1 // © 2024 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #ifndef U_HIDE_DEPRECATED_API 7 8 #ifndef MESSAGEFORMAT_PARSER_H 9 #define MESSAGEFORMAT_PARSER_H 10 11 #include "unicode/messageformat2_data_model.h" 12 #include "unicode/parseerr.h" 13 14 #include "messageformat2_allocation.h" 15 #include "messageformat2_errors.h" 16 17 #if U_SHOW_CPLUSPLUS_API 18 19 #if !UCONFIG_NO_FORMATTING 20 21 #if !UCONFIG_NO_MF2 22 23 U_NAMESPACE_BEGIN 24 25 namespace message2 { 26 27 using namespace data_model; 28 29 // Used for parameterizing options parsing code 30 // over the two builders that use it (Operator and Markup) 31 template <class T> 32 class OptionAdder { 33 private: 34 T& builder; 35 public: OptionAdder(T & b)36 OptionAdder(T& b) : builder(b) {} addOption(const UnicodeString & k,Operand && r,UErrorCode & s)37 void addOption(const UnicodeString& k, Operand&& r, UErrorCode& s) { 38 builder.addOption(k, std::move(r), s); 39 } 40 }; 41 42 // Used for parameterizing attributes parsing code 43 // over the two builders that use it (Expression and Markup) 44 // Unfortunately the same OptionAdder class can't just be reused, 45 // becaues duplicate options are forbidden while duplicate attributes are not 46 template <class T> 47 class AttributeAdder { 48 private: 49 T& builder; 50 public: AttributeAdder(T & b)51 AttributeAdder(T& b) : builder(b) {} addAttribute(const UnicodeString & k,Operand && r,UErrorCode & s)52 void addAttribute(const UnicodeString& k, Operand&& r, UErrorCode& s) { 53 builder.addAttribute(k, std::move(r), s); 54 } 55 }; 56 57 // Parser class (private) 58 class Parser : public UMemory { 59 public: 60 virtual ~Parser(); 61 private: 62 friend class MessageFormatter; 63 64 void parse(UParseError&, UErrorCode&); 65 66 /* 67 Use an internal "parse error" structure to make it easier to translate 68 absolute offsets to line offsets. 69 This is translated back to a `UParseError` at the end of parsing. 70 */ 71 typedef struct MessageParseError { 72 // The line on which the error occurred 73 uint32_t line; 74 // The offset, relative to the erroneous line, on which the error occurred 75 uint32_t offset; 76 // The total number of characters seen before advancing to the current line. It has a value of 0 if line == 0. 77 // It includes newline characters, because the index does too. 78 uint32_t lengthBeforeCurrentLine; 79 80 // This parser doesn't yet use the last two fields. 81 UChar preContext[U_PARSE_CONTEXT_LEN]; 82 UChar postContext[U_PARSE_CONTEXT_LEN]; 83 } MessageParseError; 84 Parser(const UnicodeString & input,MFDataModel::Builder & dataModelBuilder,StaticErrors & e,UnicodeString & normalizedInputRef)85 Parser(const UnicodeString &input, MFDataModel::Builder& dataModelBuilder, StaticErrors& e, UnicodeString& normalizedInputRef) 86 : source(input), index(0), errors(e), normalizedInput(normalizedInputRef), dataModel(dataModelBuilder) { 87 parseError.line = 0; 88 parseError.offset = 0; 89 parseError.lengthBeforeCurrentLine = 0; 90 parseError.preContext[0] = '\0'; 91 parseError.postContext[0] = '\0'; 92 } 93 94 static void translateParseError(const MessageParseError&, UParseError&); 95 static void setParseError(MessageParseError&, uint32_t); 96 void maybeAdvanceLine(); 97 Pattern parseSimpleMessage(UErrorCode&); 98 void parseBody(UErrorCode&); 99 void parseDeclarations(UErrorCode&); 100 void parseUnsupportedStatement(UErrorCode&); 101 void parseLocalDeclaration(UErrorCode&); 102 void parseInputDeclaration(UErrorCode&); 103 void parseSelectors(UErrorCode&); 104 105 void parseWhitespaceMaybeRequired(bool, UErrorCode&); 106 void parseRequiredWhitespace(UErrorCode&); 107 void parseOptionalWhitespace(UErrorCode&); 108 void parseToken(UChar32, UErrorCode&); 109 void parseTokenWithWhitespace(UChar32, UErrorCode&); 110 void parseToken(const std::u16string_view&, UErrorCode&); 111 void parseTokenWithWhitespace(const std::u16string_view&, UErrorCode&); 112 bool nextIs(const std::u16string_view&) const; 113 UnicodeString parseName(UErrorCode&); 114 UnicodeString parseIdentifier(UErrorCode&); 115 UnicodeString parseDigits(UErrorCode&); 116 VariableName parseVariableName(UErrorCode&); 117 FunctionName parseFunction(UErrorCode&); 118 UnicodeString parseEscapeSequence(UErrorCode&); 119 Literal parseUnquotedLiteral(UErrorCode&); 120 Literal parseQuotedLiteral(UErrorCode&); 121 Literal parseLiteral(UErrorCode&); 122 template<class T> 123 void parseAttribute(AttributeAdder<T>&, UErrorCode&); 124 template<class T> 125 void parseAttributes(AttributeAdder<T>&, UErrorCode&); 126 template<class T> 127 void parseOption(OptionAdder<T>&, UErrorCode&); 128 template<class T> 129 void parseOptions(OptionAdder<T>&, UErrorCode&); 130 Operator parseAnnotation(UErrorCode&); 131 void parseLiteralOrVariableWithAnnotation(bool, Expression::Builder&, UErrorCode&); 132 Markup parseMarkup(UErrorCode&); 133 Expression parseExpression(UErrorCode&); 134 std::variant<Expression, Markup> parsePlaceholder(UErrorCode&); 135 UnicodeString parseTextChar(UErrorCode&); 136 Key parseKey(UErrorCode&); 137 SelectorKeys parseNonEmptyKeys(UErrorCode&); 138 void errorPattern(UErrorCode& status); 139 Pattern parseQuotedPattern(UErrorCode&); 140 bool isDeclarationStart(); 141 peek()142 UChar32 peek() const { return source.char32At(index) ; } peek(uint32_t i)143 UChar32 peek(uint32_t i) const { 144 return source.char32At(source.moveIndex32(index, i)); 145 } next()146 void next() { index = source.moveIndex32(index, 1); } 147 inBounds()148 bool inBounds() const { return (int32_t) index < source.length(); } inBounds(uint32_t i)149 bool inBounds(uint32_t i) const { return source.moveIndex32(index, i) < source.length(); } allConsumed()150 bool allConsumed() const { return (int32_t) index == source.length(); } 151 152 // The input string 153 const UnicodeString &source; 154 // The current position within the input string -- counting in UChar32 155 uint32_t index; 156 // Represents the current line (and when an error is indicated), 157 // character offset within the line of the parse error 158 MessageParseError parseError; 159 160 // The structure to use for recording errors 161 StaticErrors& errors; 162 163 // Normalized version of the input string (optional whitespace removed) 164 UnicodeString& normalizedInput; 165 166 // The parent builder 167 MFDataModel::Builder &dataModel; 168 }; // class Parser 169 170 } // namespace message2 171 172 U_NAMESPACE_END 173 174 #endif /* #if !UCONFIG_NO_MF2 */ 175 176 #endif /* #if !UCONFIG_NO_FORMATTING */ 177 178 #endif /* U_SHOW_CPLUSPLUS_API */ 179 180 #endif // MESSAGEFORMAT_PARSER_H 181 182 #endif // U_HIDE_DEPRECATED_API 183 // eof 184