1 /* <lambda>null2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package lexer 18 19 import java.io.File 20 21 interface ILexer { 22 fun tokenize(str: String): List<Token> 23 24 fun tokenize(file: File): List<Token> { 25 return this.tokenize(file.readText()) 26 } 27 28 companion object { 29 30 fun padDelimiters(str: String): String { 31 val sb = StringBuilder() 32 val delimiters = TokenGrammar.values() 33 .filter { it.category == TokenCategory.Delimiter } 34 .filter { it != TokenGrammar.COMMENT_START } //don't convert '/**' to '/* *' 35 .map { it.value } //return string representation 36 37 str.lineSequence().forEach { line -> 38 var newLine = line 39 for (token in delimiters) { 40 newLine = newLine.replace(token, " $token ") 41 } 42 //delimiter corrections 43 newLine = unpadDecimal(newLine) //'nn . nn' => 'n.n' 44 newLine = newLine.replace(":\\s+:".toRegex(), TokenGrammar.PKG_SCOPE.value) //': :' => '::' 45 //squeeze multi-char ops with chevrons 46 newLine = newLine.replace("<\\s+<".toRegex(), TokenGrammar.LSHIFT.value) 47 newLine = newLine.replace(">\\s+>".toRegex(), TokenGrammar.RSHIFT.value) 48 newLine = newLine.replace("<\\s+=".toRegex(), TokenGrammar.LEQ.value) 49 newLine = newLine.replace(">\\s+=".toRegex(), TokenGrammar.GEQ.value) 50 51 sb.appendln(newLine) 52 } 53 return sb.toString() 54 } 55 56 /** 57 * Replace 'nn . nn' with 'n.n' 58 * Doesn't take into account decimals with missing a prefix or suffix, e.g. '9.' or '.9' 59 */ 60 private fun unpadDecimal(str: String): String { 61 var newStr = str 62 Regex("(\\d+)\\s*\\.\\s*(\\d+)").findAll(newStr).forEach { matchResult -> 63 val n1 = matchResult.groups[1]?.value 64 val n2 = matchResult.groups[2]?.value 65 if (n1 != null && n2 != null) { 66 newStr = newStr.replace("${n1}\\s*\\.\\s*${n2}".toRegex(), "${n1}.${n2}") 67 } 68 } 69 return newStr 70 } 71 72 /** 73 * Clean up the padded and tokenized doc block (reverse padDelimiters) 74 */ 75 fun unpadDelimiters(str: String): String { 76 var newStr = str 77 val delimiters = TokenGrammar.values() 78 .filter { it.category == TokenCategory.Delimiter } 79 .map { it.value } //return string representation 80 81 for (token in delimiters) { 82 newStr = newStr.replace(" $token ", token) 83 } 84 //special case 85 newStr = newStr.replace(Regex("\\s+\\.\\s*$"), ".") //end-of-line sentence periods 86 newStr = newStr.replace(",", ", ") //give comma some breathing room 87 return newStr 88 } 89 90 } 91 }