1 // Copyright 2018 The Amber Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef SRC_TOKENIZER_H_ 16 #define SRC_TOKENIZER_H_ 17 18 #include <cstdlib> 19 #include <memory> 20 #include <string> 21 22 #include "amber/result.h" 23 24 namespace amber { 25 26 enum class TokenType : uint8_t { 27 kEOS = 0, 28 kEOL, 29 kIdentifier, 30 kString, 31 kInteger, 32 kDouble, 33 kHex, 34 }; 35 36 /// A token read from the input source. 37 class Token { 38 public: 39 explicit Token(TokenType type); 40 ~Token(); 41 IsHex()42 bool IsHex() const { return type_ == TokenType::kHex; } IsInteger()43 bool IsInteger() const { return type_ == TokenType::kInteger; } IsDouble()44 bool IsDouble() const { return type_ == TokenType::kDouble; } IsIdentifier()45 bool IsIdentifier() const { return type_ == TokenType::kIdentifier; } IsString()46 bool IsString() const { return type_ == TokenType::kString; } IsEOS()47 bool IsEOS() const { return type_ == TokenType::kEOS; } IsEOL()48 bool IsEOL() const { return type_ == TokenType::kEOL; } 49 IsComma()50 bool IsComma() const { 51 return type_ == TokenType::kIdentifier && string_value_ == ","; 52 } IsOpenBracket()53 bool IsOpenBracket() const { 54 return type_ == TokenType::kIdentifier && string_value_ == "("; 55 } IsCloseBracket()56 bool IsCloseBracket() const { 57 return type_ == TokenType::kIdentifier && string_value_ == ")"; 58 } 59 SetNegative()60 void SetNegative() { is_negative_ = true; } SetStringValue(const std::string & val)61 void SetStringValue(const std::string& val) { string_value_ = val; } SetUint64Value(uint64_t val)62 void SetUint64Value(uint64_t val) { uint_value_ = val; } SetDoubleValue(double val)63 void SetDoubleValue(double val) { double_value_ = val; } 64 AsString()65 const std::string& AsString() const { return string_value_; } 66 AsUint8()67 uint8_t AsUint8() const { return static_cast<uint8_t>(uint_value_); } AsUint16()68 uint16_t AsUint16() const { return static_cast<uint16_t>(uint_value_); } AsUint32()69 uint32_t AsUint32() const { return static_cast<uint32_t>(uint_value_); } AsUint64()70 uint64_t AsUint64() const { return static_cast<uint64_t>(uint_value_); } 71 AsInt8()72 int8_t AsInt8() const { return static_cast<int8_t>(uint_value_); } AsInt16()73 int16_t AsInt16() const { return static_cast<int16_t>(uint_value_); } AsInt32()74 int32_t AsInt32() const { return static_cast<int32_t>(uint_value_); } AsInt64()75 int64_t AsInt64() const { return static_cast<int64_t>(uint_value_); } 76 77 Result ConvertToDouble(); 78 AsFloat()79 float AsFloat() const { return static_cast<float>(double_value_); } AsDouble()80 double AsDouble() const { return double_value_; } 81 AsHex()82 uint64_t AsHex() const { 83 return uint64_t(std::strtoull(string_value_.c_str(), nullptr, 16)); 84 } 85 86 /// The OriginalString is set for integer and double values to store the 87 /// unparsed number which we can return in error messages. SetOriginalString(const std::string & orig_string)88 void SetOriginalString(const std::string& orig_string) { 89 string_value_ = orig_string; 90 } ToOriginalString()91 std::string ToOriginalString() const { return string_value_; } 92 93 private: 94 TokenType type_; 95 std::string string_value_; 96 uint64_t uint_value_ = 0; 97 double double_value_ = 0.0; 98 bool is_negative_ = false; 99 }; 100 101 /// Splits the provided input into a stream of tokens. 102 class Tokenizer { 103 public: 104 explicit Tokenizer(const std::string& data); 105 ~Tokenizer(); 106 107 std::unique_ptr<Token> NextToken(); 108 std::unique_ptr<Token> PeekNextToken(); 109 std::string ExtractToNext(const std::string& str); 110 SetCurrentLine(size_t line)111 void SetCurrentLine(size_t line) { current_line_ = line; } GetCurrentLine()112 size_t GetCurrentLine() const { return current_line_; } 113 114 private: 115 bool IsWhitespace(char ch); 116 void SkipWhitespace(); 117 void SkipComment(); 118 119 std::string data_; 120 size_t current_position_ = 0; 121 size_t current_line_ = 1; 122 }; 123 124 } // namespace amber 125 126 #endif // SRC_TOKENIZER_H_ 127