• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
<lambda>null2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package lexer
18 
19 import java.io.File
20 
21 interface ILexer {
22     fun tokenize(str: String): List<Token>
23 
24     fun tokenize(file: File): List<Token> {
25         return this.tokenize(file.readText())
26     }
27 
28     companion object {
29 
30         fun padDelimiters(str: String): String {
31             val sb = StringBuilder()
32             val delimiters = TokenGrammar.values()
33                     .filter { it.category == TokenCategory.Delimiter }
34                     .filter { it != TokenGrammar.COMMENT_START } //don't convert '/**' to '/* *'
35                     .map { it.value } //return string representation
36 
37             str.lineSequence().forEach { line ->
38                 var newLine = line
39                 for (token in delimiters) {
40                     newLine = newLine.replace(token, " $token ")
41                 }
42                 //delimiter corrections
43                 newLine = unpadDecimal(newLine) //'nn . nn' => 'n.n'
44                 newLine = newLine.replace(":\\s+:".toRegex(), TokenGrammar.PKG_SCOPE.value)  //': :' => '::'
45                 //squeeze multi-char ops with chevrons
46                 newLine = newLine.replace("<\\s+<".toRegex(), TokenGrammar.LSHIFT.value)
47                 newLine = newLine.replace(">\\s+>".toRegex(), TokenGrammar.RSHIFT.value)
48                 newLine = newLine.replace("<\\s+=".toRegex(), TokenGrammar.LEQ.value)
49                 newLine = newLine.replace(">\\s+=".toRegex(), TokenGrammar.GEQ.value)
50 
51                 sb.appendln(newLine)
52             }
53             return sb.toString()
54         }
55 
56         /**
57          * Replace 'nn . nn' with 'n.n'
58          * Doesn't take into account decimals with missing a prefix or suffix, e.g. '9.' or '.9'
59          */
60         private fun unpadDecimal(str: String): String {
61             var newStr = str
62             Regex("(\\d+)\\s*\\.\\s*(\\d+)").findAll(newStr).forEach { matchResult ->
63                 val n1 = matchResult.groups[1]?.value
64                 val n2 = matchResult.groups[2]?.value
65                 if (n1 != null && n2 != null) {
66                     newStr = newStr.replace("${n1}\\s*\\.\\s*${n2}".toRegex(), "${n1}.${n2}")
67                 }
68             }
69             return newStr
70         }
71 
72         /**
73          * Clean up the padded and tokenized doc block (reverse padDelimiters)
74          */
75         fun unpadDelimiters(str: String): String {
76             var newStr = str
77             val delimiters = TokenGrammar.values()
78                     .filter { it.category == TokenCategory.Delimiter }
79                     .map { it.value } //return string representation
80 
81             for (token in delimiters) {
82                 newStr = newStr.replace(" $token ", token)
83             }
84             //special case
85             newStr = newStr.replace(Regex("\\s+\\.\\s*$"), ".") //end-of-line sentence periods
86             newStr = newStr.replace(",", ", ") //give comma some breathing room
87             return newStr
88         }
89 
90     }
91 }