• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2024 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #ifndef U_HIDE_DEPRECATED_API
7 
8 #ifndef MESSAGEFORMAT_PARSER_H
9 #define MESSAGEFORMAT_PARSER_H
10 
11 #include "unicode/messageformat2_data_model.h"
12 #include "unicode/parseerr.h"
13 
14 #include "messageformat2_allocation.h"
15 #include "messageformat2_errors.h"
16 
17 #if U_SHOW_CPLUSPLUS_API
18 
19 #if !UCONFIG_NO_FORMATTING
20 
21 #if !UCONFIG_NO_MF2
22 
23 U_NAMESPACE_BEGIN
24 
25 namespace message2 {
26 
27     using namespace data_model;
28 
29     // Used for parameterizing options parsing code
30     // over the two builders that use it (Operator and Markup)
31     template <class T>
32     class OptionAdder {
33         private:
34             T& builder;
35         public:
OptionAdder(T & b)36             OptionAdder(T& b) : builder(b) {}
addOption(const UnicodeString & k,Operand && r,UErrorCode & s)37             void addOption(const UnicodeString& k, Operand&& r, UErrorCode& s) {
38                 builder.addOption(k, std::move(r), s);
39             }
40     };
41 
42     // Used for parameterizing attributes parsing code
43     // over the two builders that use it (Expression and Markup)
44     // Unfortunately the same OptionAdder class can't just be reused,
45     // becaues duplicate options are forbidden while duplicate attributes are not
46     template <class T>
47     class AttributeAdder {
48         private:
49             T& builder;
50         public:
AttributeAdder(T & b)51             AttributeAdder(T& b) : builder(b) {}
addAttribute(const UnicodeString & k,Operand && r,UErrorCode & s)52             void addAttribute(const UnicodeString& k, Operand&& r, UErrorCode& s) {
53                 builder.addAttribute(k, std::move(r), s);
54             }
55     };
56 
57     // Parser class (private)
58     class Parser : public UMemory {
59     public:
60 	virtual ~Parser();
61     private:
62         friend class MessageFormatter;
63 
64         void parse(UParseError&, UErrorCode&);
65 
66 	/*
67 	  Use an internal "parse error" structure to make it easier to translate
68 	  absolute offsets to line offsets.
69 	  This is translated back to a `UParseError` at the end of parsing.
70 	*/
71 	typedef struct MessageParseError {
72 	    // The line on which the error occurred
73 	    uint32_t line;
74 	    // The offset, relative to the erroneous line, on which the error occurred
75 	    uint32_t offset;
76 	    // The total number of characters seen before advancing to the current line. It has a value of 0 if line == 0.
77 	    // It includes newline characters, because the index does too.
78 	    uint32_t lengthBeforeCurrentLine;
79 
80 	    // This parser doesn't yet use the last two fields.
81 	    UChar   preContext[U_PARSE_CONTEXT_LEN];
82 	    UChar   postContext[U_PARSE_CONTEXT_LEN];
83 	} MessageParseError;
84 
Parser(const UnicodeString & input,MFDataModel::Builder & dataModelBuilder,StaticErrors & e,UnicodeString & normalizedInputRef)85 	Parser(const UnicodeString &input, MFDataModel::Builder& dataModelBuilder, StaticErrors& e, UnicodeString& normalizedInputRef)
86 	  : source(input), index(0), errors(e), normalizedInput(normalizedInputRef), dataModel(dataModelBuilder) {
87 	  parseError.line = 0;
88 	  parseError.offset = 0;
89 	  parseError.lengthBeforeCurrentLine = 0;
90 	  parseError.preContext[0] = '\0';
91 	  parseError.postContext[0] = '\0';
92 	}
93 
94 	static void translateParseError(const MessageParseError&, UParseError&);
95 	static void setParseError(MessageParseError&, uint32_t);
96 	void maybeAdvanceLine();
97         Pattern parseSimpleMessage(UErrorCode&);
98         void parseBody(UErrorCode&);
99 	void parseDeclarations(UErrorCode&);
100         void parseUnsupportedStatement(UErrorCode&);
101         void parseLocalDeclaration(UErrorCode&);
102         void parseInputDeclaration(UErrorCode&);
103 	void parseSelectors(UErrorCode&);
104 
105 	void parseWhitespaceMaybeRequired(bool, UErrorCode&);
106 	void parseRequiredWhitespace(UErrorCode&);
107 	void parseOptionalWhitespace(UErrorCode&);
108 	void parseToken(UChar32, UErrorCode&);
109 	void parseTokenWithWhitespace(UChar32, UErrorCode&);
110 	void parseToken(const std::u16string_view&, UErrorCode&);
111 	void parseTokenWithWhitespace(const std::u16string_view&, UErrorCode&);
112         bool nextIs(const std::u16string_view&) const;
113 	UnicodeString parseName(UErrorCode&);
114         UnicodeString parseIdentifier(UErrorCode&);
115         UnicodeString parseDigits(UErrorCode&);
116 	VariableName parseVariableName(UErrorCode&);
117 	FunctionName parseFunction(UErrorCode&);
118 	UnicodeString parseEscapeSequence(UErrorCode&);
119 	Literal parseUnquotedLiteral(UErrorCode&);
120         Literal parseQuotedLiteral(UErrorCode&);
121 	Literal parseLiteral(UErrorCode&);
122         template<class T>
123         void parseAttribute(AttributeAdder<T>&, UErrorCode&);
124         template<class T>
125         void parseAttributes(AttributeAdder<T>&, UErrorCode&);
126         template<class T>
127         void parseOption(OptionAdder<T>&, UErrorCode&);
128         template<class T>
129         void parseOptions(OptionAdder<T>&, UErrorCode&);
130 	Operator parseAnnotation(UErrorCode&);
131 	void parseLiteralOrVariableWithAnnotation(bool, Expression::Builder&, UErrorCode&);
132         Markup parseMarkup(UErrorCode&);
133 	Expression parseExpression(UErrorCode&);
134         std::variant<Expression, Markup> parsePlaceholder(UErrorCode&);
135 	UnicodeString parseTextChar(UErrorCode&);
136 	Key parseKey(UErrorCode&);
137 	SelectorKeys parseNonEmptyKeys(UErrorCode&);
138 	void errorPattern(UErrorCode& status);
139 	Pattern parseQuotedPattern(UErrorCode&);
140         bool isDeclarationStart();
141 
peek()142         UChar32 peek() const { return source.char32At(index) ; }
peek(uint32_t i)143         UChar32 peek(uint32_t i) const {
144             return source.char32At(source.moveIndex32(index, i));
145         }
next()146         void next() { index = source.moveIndex32(index, 1); }
147 
inBounds()148         bool inBounds() const { return (int32_t) index < source.length(); }
inBounds(uint32_t i)149         bool inBounds(uint32_t i) const { return source.moveIndex32(index, i) < source.length(); }
allConsumed()150         bool allConsumed() const { return (int32_t) index == source.length(); }
151 
152 	// The input string
153 	const UnicodeString &source;
154 	// The current position within the input string -- counting in UChar32
155 	uint32_t index;
156 	// Represents the current line (and when an error is indicated),
157 	// character offset within the line of the parse error
158 	MessageParseError parseError;
159 
160 	// The structure to use for recording errors
161 	StaticErrors& errors;
162 
163 	// Normalized version of the input string (optional whitespace removed)
164 	UnicodeString& normalizedInput;
165 
166 	// The parent builder
167 	MFDataModel::Builder &dataModel;
168     }; // class Parser
169 
170 } // namespace message2
171 
172 U_NAMESPACE_END
173 
174 #endif /* #if !UCONFIG_NO_MF2 */
175 
176 #endif /* #if !UCONFIG_NO_FORMATTING */
177 
178 #endif /* U_SHOW_CPLUSPLUS_API */
179 
180 #endif // MESSAGEFORMAT_PARSER_H
181 
182 #endif // U_HIDE_DEPRECATED_API
183 // eof
184