1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_LEXER_H_ 17 #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_LEXER_H_ 18 19 #include <string> 20 21 #include "absl/strings/string_view.h" 22 #include "tensorflow/compiler/xla/shape.h" 23 #include "tensorflow/compiler/xla/types.h" 24 #include "tensorflow/compiler/xla/xla_data.pb.h" 25 #include "tensorflow/core/platform/logging.h" 26 #include "tensorflow/core/platform/regexp.h" 27 #include "tensorflow/core/platform/types.h" 28 29 namespace xla { 30 31 // Defines different kinds of tokens used by the HLO lexer. 32 // 33 // You shouldn't need to use this directly unless you're using HloLexer 34 // directly, and you probably don't need to do that. Use hlo_parser instead. 35 enum class TokKind { 36 // Markers 37 kEof, 38 kError, 39 40 // Tokens with no info. 41 kEqual, // = 42 kComma, // , 43 kColon, // : 44 kAsterisk, // * 45 kLsquare, 46 kRsquare, // [ ] 47 kLbrace, 48 kRbrace, // { } 49 kLparen, 50 kRparen, // ( ) 51 kDots, // ... 52 53 kArrow, // -> 54 kLeq, // <= 55 56 // Keywords 57 kw_HloModule, 58 kw_ENTRY, 59 kw_ROOT, 60 kw_true, 61 kw_false, 62 kw_maximal, 63 kw_replicated, 64 kw_nan, 65 kw_inf, 66 kw_sparse, 67 68 kNegInf, // -inf 69 70 // Typed tokens. 71 kPrimitiveType, // F32, PRED, etc. 72 kName, // %foo 73 kAttributeName, // dimensions= 74 kDimLabels, // [0-9bf]{2,}_[0-9io]{2,}->[0-9bf]{2,} 75 kDxD, // [0-9]+(x[0-9]+)+ 76 kPad, // [0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)* 77 kIdent, // other identifiers 78 kString, // "abcd\"\n" 79 kInt, // 42 80 kDecimal, // 4.2 81 }; 82 83 string TokKindToString(TokKind kind); 84 85 // Lexer for the HloModule::ToString() format text. 86 // 87 // This class is meant to be used by hlo_parser.cc. You shouldn't need to use 88 // it directly. 89 class HloLexer { 90 public: HloLexer(absl::string_view buf)91 explicit HloLexer(absl::string_view buf) : buf_(buf) { 92 current_ptr_ = buf_.begin(); 93 } 94 Lex()95 TokKind Lex() { return token_state_.current_kind = LexToken(); } 96 GetKind()97 TokKind GetKind() const { return token_state_.current_kind; } GetStrVal()98 string GetStrVal() const { 99 switch (GetKind()) { 100 case TokKind::kName: 101 case TokKind::kAttributeName: 102 case TokKind::kDimLabels: 103 case TokKind::kDxD: 104 case TokKind::kPad: 105 case TokKind::kString: 106 case TokKind::kIdent: 107 return token_state_.str_val; 108 default: 109 LOG(FATAL) << "This token does not have string value"; 110 } 111 } GetInt64Val()112 int64 GetInt64Val() const { 113 CHECK(GetKind() == TokKind::kInt); 114 return token_state_.int64_val; 115 } GetDecimalVal()116 double GetDecimalVal() const { 117 CHECK(GetKind() == TokKind::kDecimal); 118 return token_state_.decimal_val; 119 } GetPrimitiveTypeVal()120 PrimitiveType GetPrimitiveTypeVal() const { 121 CHECK(GetKind() == TokKind::kPrimitiveType); 122 return token_state_.primitive_type_val; 123 } 124 125 typedef const char* LocTy; 126 127 // Returns the location of the current token. GetLoc()128 LocTy GetLoc() const { return token_state_.token_start; } 129 130 // Returns the line and column of a location in the buffer. 131 std::pair<unsigned, unsigned> GetLineAndColumn(LocTy location) const; 132 133 // Returns the whole line given the location. 134 absl::string_view GetLine(LocTy loc) const; 135 136 // Looks ahead one token and returns it. Lexer state is unchanged. 137 TokKind LookAhead(); 138 139 private: 140 // Returns the current character. If it's neither the end of input buffer nor 141 // an invalid character, moves the pointer forward. 142 int GetNextChar(); 143 144 // Returns the current character. 145 int PeekCurrentChar() const; 146 147 // Creates StringPiece with the given begin and end. Exits if the begin > end, 148 // or it's out of the range of the current buffer. 149 absl::string_view StringPieceFromPointers(const char* begin, 150 const char* end) const; 151 tensorflow::RegexpStringPiece RegexpStringPieceFromPointers( 152 const char* begin, const char* end) const; 153 154 // Returns true if the given ptr is dereferenceable within the range of the 155 // current buffer. 156 bool CanDereference(const char* ptr) const; 157 158 TokKind LexToken(); 159 160 TokKind LexIdentifier(); 161 TokKind LexPercent(); 162 TokKind LexShape(); 163 TokKind LexConstant(); 164 TokKind LexNumberOrPattern(); 165 TokKind LexString(); 166 167 const absl::string_view buf_; 168 const char* current_ptr_; 169 170 // Information about the current token. 171 struct TokenState { 172 const char* token_start = nullptr; 173 TokKind current_kind; 174 string str_val; 175 int64 int64_val; 176 double decimal_val; 177 PrimitiveType primitive_type_val; 178 }; 179 TokenState token_state_; 180 181 struct LineNoCacheTy { 182 const char* last_query; 183 unsigned line_no_of_query; 184 }; 185 // This caches the line number of the previous query. 186 mutable LineNoCacheTy line_no_cache_{nullptr, 0}; 187 }; 188 189 } // namespace xla 190 191 #endif // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_LEXER_H_ 192