1 /* 2 * Copyright (c) Meta Platforms, Inc. and affiliates. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.facebook.ktfmt.format 18 19 import java.util.regex.Pattern 20 import org.jetbrains.kotlin.com.intellij.psi.PsiComment 21 import org.jetbrains.kotlin.com.intellij.psi.PsiElement 22 import org.jetbrains.kotlin.com.intellij.psi.PsiWhiteSpace 23 import org.jetbrains.kotlin.com.intellij.psi.impl.source.tree.LeafPsiElement 24 import org.jetbrains.kotlin.lexer.KtTokens 25 import org.jetbrains.kotlin.psi.KtFile 26 import org.jetbrains.kotlin.psi.KtStringTemplateExpression 27 import org.jetbrains.kotlin.psi.KtTreeVisitorVoid 28 import org.jetbrains.kotlin.psi.psiUtil.endOffset 29 import org.jetbrains.kotlin.psi.psiUtil.startOffset 30 31 /** 32 * Tokenizer traverses a Kotlin parse tree (which blessedly contains whitespaces and comments, 33 * unlike Javac) and constructs a list of 'Tok's. 34 * 35 * <p>The google-java-format infra expects newline Toks to be separate from maximal-whitespace Toks, 36 * but Kotlin emits them together. So, we split them using Java's \R regex matcher. We don't use 37 * 'split' et al. because we want Toks for the newlines themselves. 38 */ 39 class Tokenizer(private val fileText: String, val file: KtFile) : KtTreeVisitorVoid() { 40 41 companion object { 42 private val WHITESPACE_NEWLINE_REGEX: Pattern = Pattern.compile("\\R|( )+") 43 } 44 45 val toks = mutableListOf<KotlinTok>() 46 var index = 0 47 visitElementnull48 override fun visitElement(element: PsiElement) { 49 val startIndex = element.startOffset 50 when (element) { 51 is PsiComment -> { 52 toks.add( 53 KotlinTok( 54 index, 55 fileText.substring(startIndex, element.endOffset), 56 element.text, 57 startIndex, 58 0, 59 false, 60 KtTokens.EOF)) 61 index++ 62 return 63 } 64 is KtStringTemplateExpression -> { 65 toks.add( 66 KotlinTok( 67 index, 68 WhitespaceTombstones.replaceTrailingWhitespaceWithTombstone( 69 fileText.substring(startIndex, element.endOffset)), 70 element.text, 71 startIndex, 72 0, 73 true, 74 KtTokens.EOF)) 75 index++ 76 return 77 } 78 is LeafPsiElement -> { 79 val elementText = element.text 80 val endIndex = element.endOffset 81 if (element is PsiWhiteSpace) { 82 val matcher = WHITESPACE_NEWLINE_REGEX.matcher(elementText) 83 while (matcher.find()) { 84 val text = matcher.group() 85 toks.add( 86 KotlinTok( 87 -1, 88 fileText.substring(startIndex + matcher.start(), startIndex + matcher.end()), 89 text, 90 startIndex + matcher.start(), 91 0, 92 false, 93 KtTokens.EOF)) 94 } 95 } else { 96 toks.add( 97 KotlinTok( 98 index, 99 fileText.substring(startIndex, endIndex), 100 elementText, 101 startIndex, 102 0, 103 true, 104 KtTokens.EOF)) 105 index++ 106 } 107 } 108 } 109 super.visitElement(element) 110 } 111 } 112