1 /* 2 * Copyright (c) Meta Platforms, Inc. and affiliates. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.facebook.ktfmt.format 18 19 import java.util.regex.Pattern 20 import org.jetbrains.kotlin.com.intellij.openapi.util.text.StringUtil 21 import org.jetbrains.kotlin.com.intellij.psi.PsiComment 22 import org.jetbrains.kotlin.com.intellij.psi.PsiElement 23 import org.jetbrains.kotlin.com.intellij.psi.PsiWhiteSpace 24 import org.jetbrains.kotlin.com.intellij.psi.impl.source.tree.LeafPsiElement 25 import org.jetbrains.kotlin.lexer.KtTokens 26 import org.jetbrains.kotlin.psi.KtFile 27 import org.jetbrains.kotlin.psi.KtStringTemplateExpression 28 import org.jetbrains.kotlin.psi.KtTreeVisitorVoid 29 import org.jetbrains.kotlin.psi.psiUtil.endOffset 30 import org.jetbrains.kotlin.psi.psiUtil.startOffset 31 32 /** 33 * Tokenizer traverses a Kotlin parse tree (which blessedly contains whitespaces and comments, 34 * unlike Javac) and constructs a list of 'Tok's. 35 * 36 * <p>The google-java-format infra expects newline Toks to be separate from maximal-whitespace Toks, 37 * but Kotlin emits them together. So, we split them using Java's \R regex matcher. We don't use 38 * 'split' et al. because we want Toks for the newlines themselves. 39 */ 40 class Tokenizer(private val fileText: String, val file: KtFile) : KtTreeVisitorVoid() { 41 42 companion object { 43 private val WHITESPACE_NEWLINE_REGEX: Pattern = Pattern.compile("\\R|( )+") 44 } 45 46 val toks: MutableList<KotlinTok> = mutableListOf() 47 var index: Int = 0 48 private set 49 visitElementnull50 override fun visitElement(element: PsiElement) { 51 val startIndex = element.startOffset 52 val endIndex = element.endOffset 53 val elementText = element.text 54 val originalText = fileText.substring(startIndex, endIndex) 55 when (element) { 56 is PsiComment -> { 57 if (element.text.startsWith("/*") && !element.text.endsWith("*/")) { 58 throw ParseError( 59 "Unclosed comment", StringUtil.offsetToLineColumn(fileText, element.startOffset)) 60 } 61 toks.add( 62 KotlinTok( 63 index = index, 64 originalText = originalText, 65 text = elementText, 66 position = startIndex, 67 column = 0, 68 isToken = false, 69 kind = KtTokens.EOF, 70 ), 71 ) 72 index++ 73 return 74 } 75 is KtStringTemplateExpression -> { 76 toks.add( 77 KotlinTok( 78 index = index, 79 originalText = 80 WhitespaceTombstones.replaceTrailingWhitespaceWithTombstone( 81 originalText, 82 ), 83 text = elementText, 84 position = startIndex, 85 column = 0, 86 isToken = true, 87 kind = KtTokens.EOF, 88 ), 89 ) 90 index++ 91 return 92 } 93 is LeafPsiElement -> { 94 if (element is PsiWhiteSpace) { 95 val matcher = WHITESPACE_NEWLINE_REGEX.matcher(elementText) 96 while (matcher.find()) { 97 val text = matcher.group() 98 toks.add( 99 KotlinTok( 100 index = -1, 101 originalText = 102 fileText.substring( 103 startIndex + matcher.start(), startIndex + matcher.end()), 104 text = text, 105 position = startIndex + matcher.start(), 106 column = 0, 107 isToken = false, 108 kind = KtTokens.EOF, 109 ), 110 ) 111 } 112 } else { 113 toks.add( 114 KotlinTok( 115 index = index, 116 originalText = originalText, 117 text = elementText, 118 position = startIndex, 119 column = 0, 120 isToken = true, 121 kind = KtTokens.EOF, 122 ), 123 ) 124 index++ 125 } 126 } 127 } 128 super.visitElement(element) 129 } 130 } 131