1 // Copyright 2018 The Amber Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef SRC_TOKENIZER_H_ 16 #define SRC_TOKENIZER_H_ 17 18 #include <cstdlib> 19 #include <memory> 20 #include <string> 21 22 #include "amber/result.h" 23 24 namespace amber { 25 26 enum class TokenType : uint8_t { 27 kEOS = 0, 28 kEOL, 29 kString, 30 kInteger, 31 kDouble, 32 kHex, 33 }; 34 35 /// A token read from the input source. 36 class Token { 37 public: 38 explicit Token(TokenType type); 39 ~Token(); 40 IsHex()41 bool IsHex() const { return type_ == TokenType::kHex; } IsInteger()42 bool IsInteger() const { return type_ == TokenType::kInteger; } IsDouble()43 bool IsDouble() const { return type_ == TokenType::kDouble; } IsString()44 bool IsString() const { return type_ == TokenType::kString; } IsEOS()45 bool IsEOS() const { return type_ == TokenType::kEOS; } IsEOL()46 bool IsEOL() const { return type_ == TokenType::kEOL; } 47 IsComma()48 bool IsComma() const { 49 return type_ == TokenType::kString && string_value_ == ","; 50 } IsOpenBracket()51 bool IsOpenBracket() const { 52 return type_ == TokenType::kString && string_value_ == "("; 53 } IsCloseBracket()54 bool IsCloseBracket() const { 55 return type_ == TokenType::kString && string_value_ == ")"; 56 } 57 SetNegative()58 void SetNegative() { is_negative_ = true; } SetStringValue(const std::string & val)59 void SetStringValue(const std::string& val) { string_value_ = val; } SetUint64Value(uint64_t val)60 void SetUint64Value(uint64_t val) { uint_value_ = val; } SetDoubleValue(double val)61 void SetDoubleValue(double val) { double_value_ = val; } 62 AsString()63 const std::string& AsString() const { return string_value_; } 64 AsUint8()65 uint8_t AsUint8() const { return static_cast<uint8_t>(uint_value_); } AsUint16()66 uint16_t AsUint16() const { return static_cast<uint16_t>(uint_value_); } AsUint32()67 uint32_t AsUint32() const { return static_cast<uint32_t>(uint_value_); } AsUint64()68 uint64_t AsUint64() const { return static_cast<uint64_t>(uint_value_); } 69 AsInt8()70 int8_t AsInt8() const { return static_cast<int8_t>(uint_value_); } AsInt16()71 int16_t AsInt16() const { return static_cast<int16_t>(uint_value_); } AsInt32()72 int32_t AsInt32() const { return static_cast<int32_t>(uint_value_); } AsInt64()73 int64_t AsInt64() const { return static_cast<int64_t>(uint_value_); } 74 75 Result ConvertToDouble(); 76 AsFloat()77 float AsFloat() const { return static_cast<float>(double_value_); } AsDouble()78 double AsDouble() const { return double_value_; } 79 AsHex()80 uint64_t AsHex() const { 81 return uint64_t(std::strtoull(string_value_.c_str(), nullptr, 16)); 82 } 83 84 /// The OriginalString is set for integer and double values to store the 85 /// unparsed number which we can return in error messages. SetOriginalString(const std::string & orig_string)86 void SetOriginalString(const std::string& orig_string) { 87 string_value_ = orig_string; 88 } ToOriginalString()89 std::string ToOriginalString() const { return string_value_; } 90 91 private: 92 TokenType type_; 93 std::string string_value_; 94 uint64_t uint_value_ = 0; 95 double double_value_ = 0.0; 96 bool is_negative_ = false; 97 }; 98 99 /// Splits the provided input into a stream of tokens. 100 class Tokenizer { 101 public: 102 explicit Tokenizer(const std::string& data); 103 ~Tokenizer(); 104 105 std::unique_ptr<Token> NextToken(); 106 std::string ExtractToNext(const std::string& str); 107 SetCurrentLine(size_t line)108 void SetCurrentLine(size_t line) { current_line_ = line; } GetCurrentLine()109 size_t GetCurrentLine() const { return current_line_; } 110 111 private: 112 bool IsWhitespace(char ch); 113 void SkipWhitespace(); 114 void SkipComment(); 115 116 std::string data_; 117 size_t current_position_ = 0; 118 size_t current_line_ = 1; 119 }; 120 121 } // namespace amber 122 123 #endif // SRC_TOKENIZER_H_ 124