1 /* 2 * Copyright (c) 2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #include "parser/lexer.h" 17 18 #include <utility> 19 #include "util/string_builder.h" 20 21 namespace OHOS { 22 namespace Idl { Lexer()23Lexer::Lexer() 24 { 25 InitializeKeywords(); 26 } 27 ~Lexer()28Lexer::~Lexer() 29 { 30 if (currentFile_ != nullptr) { 31 currentFile_->Close(); 32 } 33 } 34 InitializeKeywords()35void Lexer::InitializeKeywords() 36 { 37 keywords_[String("boolean")] = Token::BOOLEAN; 38 keywords_[String("byte")] = Token::BYTE; 39 keywords_[String("char")] = Token::CHAR; 40 keywords_[String("double")] = Token::DOUBLE; 41 keywords_[String("float")] = Token::FLOAT; 42 keywords_[String("in")] = Token::IN; 43 keywords_[String("inout")] = Token::INOUT; 44 keywords_[String("int")] = Token::INTEGER; 45 keywords_[String("interface")] = Token::INTERFACE; 46 keywords_[String("List")] = Token::LIST; 47 keywords_[String("long")] = Token::LONG; 48 keywords_[String("Map")] = Token::MAP; 49 keywords_[String("oneway")] = Token::ONEWAY; 50 keywords_[String("out")] = Token::OUT; 51 keywords_[String("sequenceable")] = Token::SEQUENCEABLE; 52 keywords_[String("short")] = Token::SHORT; 53 keywords_[String("String")] = Token::STRING; 54 } 55 OpenSourceFile(const String & filePath)56bool Lexer::OpenSourceFile(const String& filePath) 57 { 58 currentFile_ = std::make_shared<File>(filePath, File::READ); 59 if (!currentFile_->IsValid()) { 60 return false; 61 } 62 63 return true; 64 } 65 GetToken(bool skipComment)66Token Lexer::GetToken(bool skipComment) 67 { 68 if (!havePeek_) { 69 currentToken_ = ReadToken(skipComment); 70 } 71 havePeek_ = false; 72 return currentToken_; 73 } 74 PeekToken(bool skipComment)75Token Lexer::PeekToken(bool skipComment) 76 { 77 if (!havePeek_) { 78 currentToken_ = ReadToken(skipComment); 79 havePeek_ = true; 80 } 81 return currentToken_; 82 } 83 ReadToken(bool skipComment)84Token Lexer::ReadToken(bool skipComment) 85 { 86 while (!currentFile_->IsEof()) { 87 char c = currentFile_->GetChar(); 88 tokenLineNo_ = currentFile_->GetCharLineNumber(); 89 tokenColumnNo_ = currentFile_->GetCharColumnNumber(); 90 if (IsSpace(c)) { 91 continue; 92 } else if (IsAlphabet(c) || c == '_') { 93 return ReadIdentifier(c); 94 } 95 switch (c) { 96 case '<': 97 currentToken_ = Token::ANGLE_BRACKETS_LEFT; 98 return currentToken_; 99 case '>': 100 currentToken_ = Token::ANGLE_BRACKETS_RIGHT; 101 return currentToken_; 102 case '{': 103 currentToken_ = Token::BRACES_LEFT; 104 return currentToken_; 105 case '}': 106 currentToken_ = Token::BRACES_RIGHT; 107 return currentToken_; 108 case '[': 109 currentToken_ = Token::BRACKETS_LEFT; 110 return currentToken_; 111 case ']': 112 currentToken_ = Token::BRACKETS_RIGHT; 113 return currentToken_; 114 case ',': 115 currentToken_ = Token::COMMA; 116 return currentToken_; 117 case '/': 118 if (currentFile_->PeekChar() == '/') { 119 ReadLineComment(c); 120 if (!skipComment) { 121 return currentToken_; 122 } 123 continue; 124 } else if (currentFile_->PeekChar() == '*') { 125 ReadBlockComment(c); 126 if (!skipComment) { 127 return currentToken_; 128 } 129 continue; 130 } 131 currentToken_ = Token::UNKNOWN; 132 return currentToken_; 133 case '(': 134 currentToken_ = Token::PARENTHESES_LEFT; 135 return currentToken_; 136 case ')': 137 currentToken_ = Token::PARENTHESES_RIGHT; 138 return currentToken_; 139 case '.': 140 currentToken_ = Token::DOT; 141 return currentToken_; 142 case ';': 143 currentToken_ = Token::SEMICOLON; 144 return currentToken_; 145 default: 146 currentToken_ = Token::UNKNOWN; 147 return currentToken_; 148 } 149 } 150 currentToken_ = Token::END_OF_FILE; 151 return currentToken_; 152 } 153 ReadIdentifier(char c)154Token Lexer::ReadIdentifier(char c) 155 { 156 StringBuilder sb; 157 158 sb.Append(c); 159 while (!currentFile_->IsEof()) { 160 c = currentFile_->PeekChar(); 161 if (IsAlphabet(c) || c == '_' || IsDecimalDigital(c) || c == '.') { 162 c = currentFile_->GetChar(); 163 sb.Append(c); 164 continue; 165 } 166 if (IsSpace(c)) { 167 currentFile_->GetChar(); 168 } 169 break; 170 } 171 String key = sb.ToString(); 172 auto it = keywords_.find(key); 173 if (it == keywords_.end()) { 174 identifier_ = key; 175 currentToken_ = Token::IDENTIFIER; 176 } else { 177 currentToken_ = it->second; 178 } 179 return currentToken_; 180 } 181 ReadLineComment(char c)182Token Lexer::ReadLineComment(char c) 183 { 184 StringBuilder sb; 185 186 sb.Append(c); 187 while (!currentFile_->IsEof()) { 188 c = currentFile_->GetChar(); 189 if (c == '\n') { 190 break; 191 } 192 sb.Append(c); 193 } 194 comment_ = sb.ToString(); 195 currentToken_ = Token::COMMENT_LINE; 196 return currentToken_; 197 } 198 ReadBlockComment(char c)199Token Lexer::ReadBlockComment(char c) 200 { 201 StringBuilder sb; 202 203 sb.Append(c); 204 while (!currentFile_->IsEof()) { 205 c = currentFile_->GetChar(); 206 sb.Append(c); 207 if (c == '*' && currentFile_->PeekChar() == '/') { 208 c = currentFile_->GetChar(); 209 sb.Append(c); 210 break; 211 } 212 } 213 comment_ = sb.ToString(); 214 currentToken_ = Token::COMMENT_BLOCK; 215 return currentToken_; 216 } 217 SkipCurrentLine()218void Lexer::SkipCurrentLine() 219 { 220 while (!currentFile_->IsEof()) { 221 char c = currentFile_->GetChar(); 222 if (c == '\n') { 223 currentFile_->GetChar(); 224 return; 225 } 226 } 227 } 228 SkipCurrentLine(char untilChar)229bool Lexer::SkipCurrentLine(char untilChar) 230 { 231 while (!currentFile_->IsEof()) { 232 int c = currentFile_->GetChar(); 233 if (c == untilChar) { 234 return true; 235 } 236 if (c == '\n') { 237 currentFile_->GetChar(); 238 return false; 239 } 240 } 241 return true; 242 } 243 TokenToChar(Token token)244int Lexer::TokenToChar(Token token) 245 { 246 switch (token) { 247 case Token::ANGLE_BRACKETS_LEFT: 248 return '<'; 249 case Token::ANGLE_BRACKETS_RIGHT: 250 return '>'; 251 case Token::BRACES_LEFT: 252 return '{'; 253 case Token::BRACES_RIGHT: 254 return '}'; 255 case Token::BRACKETS_LEFT: 256 return '['; 257 case Token::BRACKETS_RIGHT: 258 return ']'; 259 case Token::COMMA: 260 return ','; 261 case Token::DOT: 262 return '.'; 263 case Token::PARENTHESES_LEFT: 264 return '('; 265 case Token::PARENTHESES_RIGHT: 266 return ')'; 267 case Token::SEMICOLON: 268 return ';'; 269 default: 270 return -1; 271 } 272 } 273 DumpToken() const274String Lexer::DumpToken() const 275 { 276 switch (currentToken_) { 277 case Token::ANGLE_BRACKETS_LEFT: 278 return "<"; 279 case Token::ANGLE_BRACKETS_RIGHT: 280 return ">"; 281 case Token::BOOLEAN: 282 return "boolean"; 283 case Token::BRACES_LEFT: 284 return "{"; 285 case Token::BRACES_RIGHT: 286 return "}"; 287 case Token::BRACKETS_LEFT: 288 return "["; 289 case Token::BRACKETS_RIGHT: 290 return "]"; 291 case Token::BYTE: 292 return "byte"; 293 case Token::CHAR: 294 return "char"; 295 case Token::COMMA: 296 return ","; 297 case Token::COMMENT_BLOCK: 298 case Token::COMMENT_LINE: 299 return comment_; 300 case Token::DOT: 301 return "."; 302 case Token::DOUBLE: 303 return "double"; 304 case Token::END_OF_FILE: 305 return "eof"; 306 case Token::FLOAT: 307 return "float"; 308 case Token::IDENTIFIER: 309 return identifier_; 310 case Token::IN: 311 return "in"; 312 case Token::INOUT: 313 return "inout"; 314 case Token::INTEGER: 315 return "int"; 316 case Token::LIST: 317 return "List"; 318 case Token::LONG: 319 return "long"; 320 case Token::MAP: 321 return "Map"; 322 case Token::ONEWAY: 323 return "oneway"; 324 case Token::OUT: 325 return "out"; 326 case Token::SEQUENCEABLE: 327 return "sequenceable"; 328 case Token::PARENTHESES_LEFT: 329 return "("; 330 case Token::PARENTHESES_RIGHT: 331 return ")"; 332 case Token::SEMICOLON: 333 return ";"; 334 case Token::SHORT: 335 return "short"; 336 case Token::STRING: 337 return "String"; 338 default: 339 return "unknown token"; 340 } 341 } 342 } // namespace idl 343 } // namespace OHOS 344