1 // © 2024 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 #if !UCONFIG_NO_MF2
9
10 #include "unicode/messageformat2_data_model.h"
11 #include "messageformat2_macros.h"
12 #include "messageformat2_serializer.h"
13 #include "uvector.h" // U_ASSERT
14
15 U_NAMESPACE_BEGIN
16
17 namespace message2 {
18
19 // Generates a string representation of a data model
20 // ------------------------------------------------
21
22 using namespace data_model;
23
24 // Private helper methods
25
whitespace()26 void Serializer::whitespace() {
27 result += SPACE;
28 }
29
emit(UChar32 c)30 void Serializer::emit(UChar32 c) {
31 result += c;
32 }
33
emit(const UnicodeString & s)34 void Serializer::emit(const UnicodeString& s) {
35 result += s;
36 }
37
emit(const std::u16string_view & token)38 void Serializer::emit(const std::u16string_view& token) {
39 result.append(token);
40 }
41
emit(const Literal & l)42 void Serializer::emit(const Literal& l) {
43 if (l.isQuoted()) {
44 emit(PIPE);
45 }
46 const UnicodeString& contents = l.unquoted();
47 for (int32_t i = 0; ((int32_t) i) < contents.length(); i++) {
48 // Re-escape any escaped-char characters
49 switch(contents[i]) {
50 case BACKSLASH:
51 case PIPE:
52 case LEFT_CURLY_BRACE:
53 case RIGHT_CURLY_BRACE: {
54 emit(BACKSLASH);
55 break;
56 }
57 default: {
58 break;
59 }
60 }
61 emit(contents[i]);
62 }
63 if (l.isQuoted()) {
64 emit(PIPE);
65 }
66 }
67
emit(const Key & k)68 void Serializer::emit(const Key& k) {
69 if (k.isWildcard()) {
70 emit(ASTERISK);
71 return;
72 }
73 emit(k.asLiteral());
74 }
75
emit(const SelectorKeys & k)76 void Serializer::emit(const SelectorKeys& k) {
77 const Key* ks = k.getKeysInternal();
78 int32_t len = k.len;
79 // It would be an error for `keys` to be empty;
80 // that would mean this is the single `pattern`
81 // variant, and in that case, this method shouldn't be called
82 U_ASSERT(len > 0);
83 for (int32_t i = 0; i < len; i++) {
84 if (i != 0) {
85 whitespace();
86 }
87 emit(ks[i]);
88 }
89 }
90
emit(const Operand & rand)91 void Serializer::emit(const Operand& rand) {
92 U_ASSERT(!rand.isNull());
93
94 if (rand.isVariable()) {
95 emit(DOLLAR);
96 emit(rand.asVariable());
97 } else {
98 // Literal: quoted or unquoted
99 emit(rand.asLiteral());
100 }
101 }
102
emit(const OptionMap & options)103 void Serializer::emit(const OptionMap& options) {
104 // Errors should have been checked before this point
105 UErrorCode localStatus = U_ZERO_ERROR;
106 U_ASSERT(!options.bogus);
107 for (int32_t i = 0; i < options.size(); i++) {
108 const Option& opt = options.getOption(i, localStatus);
109 // No need to check error code, since we already checked
110 // that !bogus
111 whitespace();
112 emit(opt.getName());
113 emit(EQUALS);
114 emit(opt.getValue());
115 }
116 }
117
emitAttributes(const OptionMap & attributes)118 void Serializer::emitAttributes(const OptionMap& attributes) {
119 // Errors should have been checked before this point
120 UErrorCode localStatus = U_ZERO_ERROR;
121 U_ASSERT(!attributes.bogus);
122 for (int32_t i = 0; i < attributes.size(); i++) {
123 const Option& attr = attributes.getOption(i, localStatus);
124 // No need to check error code, since we already checked
125 // that !bogus
126 whitespace();
127 emit(AT);
128 emit(attr.getName());
129 const Operand& v = attr.getValue();
130 if (!v.isNull()) {
131 emit(EQUALS);
132 emit(v);
133 }
134 }
135 }
136
emit(const Expression & expr)137 void Serializer::emit(const Expression& expr) {
138 emit(LEFT_CURLY_BRACE);
139
140 if (!expr.isFunctionCall()) {
141 // Literal or variable, no annotation
142 emit(expr.getOperand());
143 } else {
144 // Function call or reserved
145 if (!expr.isStandaloneAnnotation()) {
146 // Must be a function call that has an operand
147 emit(expr.getOperand());
148 whitespace();
149 }
150 UErrorCode localStatus = U_ZERO_ERROR;
151 const Operator* rator = expr.getOperator(localStatus);
152 U_ASSERT(U_SUCCESS(localStatus));
153 emit(COLON);
154 emit(rator->getFunctionName());
155 // No whitespace after function name, in case it has
156 // no options. (when there are options, emit(OptionMap) will
157 // emit the leading whitespace)
158 emit(rator->getOptionsInternal());
159 }
160 emitAttributes(expr.getAttributesInternal());
161 emit(RIGHT_CURLY_BRACE);
162 }
163
emit(const PatternPart & part)164 void Serializer::emit(const PatternPart& part) {
165 if (part.isText()) {
166 // Raw text
167 const UnicodeString& text = part.asText();
168 // Re-escape '{'/'}'/'\''|'
169 for (int32_t i = 0; ((int32_t) i) < text.length(); i++) {
170 switch(text[i]) {
171 case PIPE:
172 case BACKSLASH:
173 case LEFT_CURLY_BRACE:
174 case RIGHT_CURLY_BRACE: {
175 emit(BACKSLASH);
176 break;
177 }
178 default:
179 break;
180 }
181 emit(text[i]);
182 }
183 return;
184 }
185 // Markup
186 if (part.isMarkup()) {
187 const Markup& markup = part.asMarkup();
188 emit(LEFT_CURLY_BRACE);
189 if (markup.isClose()) {
190 emit(SLASH);
191 } else {
192 emit(NUMBER_SIGN);
193 }
194 emit(markup.getName());
195 emit(markup.getOptionsInternal());
196 emitAttributes(markup.getAttributesInternal());
197 if (markup.isStandalone()) {
198 emit(SLASH);
199 }
200 emit(RIGHT_CURLY_BRACE);
201 return;
202 }
203 // Expression
204 emit(part.contents());
205 }
206
emit(const Pattern & pat)207 void Serializer::emit(const Pattern& pat) {
208 int32_t len = pat.numParts();
209 // Always quote pattern, which should match the normalized input
210 // if the parser is constructing it correctly
211 emit(LEFT_CURLY_BRACE);
212 emit(LEFT_CURLY_BRACE);
213 for (int32_t i = 0; i < len; i++) {
214 // No whitespace is needed here -- see the `pattern` nonterminal in the grammar
215 emit(pat.getPart(i));
216 }
217 emit(RIGHT_CURLY_BRACE);
218 emit(RIGHT_CURLY_BRACE);
219 }
220
serializeDeclarations()221 void Serializer::serializeDeclarations() {
222 const Binding* bindings = dataModel.getLocalVariablesInternal();
223 U_ASSERT(dataModel.bindingsLen == 0 || bindings != nullptr);
224
225 for (int32_t i = 0; i < dataModel.bindingsLen; i++) {
226 const Binding& b = bindings[i];
227 if (b.isLocal()) {
228 // No whitespace needed here -- see `message` in the grammar
229 emit(ID_LOCAL);
230 whitespace();
231 emit(DOLLAR);
232 emit(b.getVariable());
233 // No whitespace needed here -- see `local-declaration` in the grammar
234 emit(EQUALS);
235 // No whitespace needed here -- see `local-declaration` in the grammar
236 } else {
237 // Input declaration
238 emit(ID_INPUT);
239 // No whitespace needed here -- see `input-declaration` in the grammar
240 }
241 emit(b.getValue());
242 }
243 }
244
serializeSelectors()245 void Serializer::serializeSelectors() {
246 U_ASSERT(!dataModel.hasPattern());
247 const Expression* selectors = dataModel.getSelectorsInternal();
248
249 emit(ID_MATCH);
250 for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
251 // No whitespace needed here -- see `selectors` in the grammar
252 emit(selectors[i]);
253 }
254 }
255
serializeVariants()256 void Serializer::serializeVariants() {
257 U_ASSERT(!dataModel.hasPattern());
258 const Variant* variants = dataModel.getVariantsInternal();
259 for (int32_t i = 0; i < dataModel.numVariants(); i++) {
260 const Variant& v = variants[i];
261 emit(v.getKeys());
262 // No whitespace needed here -- see `variant` in the grammar
263 emit(v.getPattern());
264 }
265 }
266
267
268 // Main (public) serializer method
serialize()269 void Serializer::serialize() {
270 serializeDeclarations();
271 // Pattern message
272 if (dataModel.hasPattern()) {
273 emit(dataModel.getPattern());
274 } else {
275 // Selectors message
276 serializeSelectors();
277 serializeVariants();
278 }
279 }
280
281 } // namespace message2
282 U_NAMESPACE_END
283
284 #endif /* #if !UCONFIG_NO_MF2 */
285
286 #endif /* #if !UCONFIG_NO_FORMATTING */
287
288