1 // © 2022 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 package com.ibm.icu.message2; 5 6 import java.util.ArrayList; 7 import java.util.List; 8 9 import com.ibm.icu.message2.Mf2DataModel.Expression; 10 import com.ibm.icu.message2.Mf2DataModel.Pattern; 11 import com.ibm.icu.message2.Mf2DataModel.SelectorKeys; 12 import com.ibm.icu.message2.Mf2DataModel.Text; 13 import com.ibm.icu.message2.Mf2DataModel.Value; 14 import com.ibm.icu.message2.Mf2Parser.EventHandler; 15 import com.ibm.icu.message2.Mf2Serializer.Token.Type; 16 17 // TODO: find a better name for this class 18 class Mf2Serializer implements EventHandler { 19 private String input; 20 private final List<Token> tokens = new ArrayList<>(); 21 22 static class Token { 23 final String name; 24 final int begin; 25 final int end; 26 final Kind kind; 27 private final Type type; 28 private final String input; 29 30 enum Kind { 31 TERMINAL, 32 NONTERMINAL_START, 33 NONTERMINAL_END 34 } 35 36 enum Type { 37 MESSAGE, 38 PATTERN, 39 TEXT, 40 PLACEHOLDER, 41 EXPRESSION, 42 OPERAND, 43 VARIABLE, 44 IGNORE, 45 FUNCTION, 46 OPTION, 47 NAME, 48 NMTOKEN, 49 LITERAL, 50 SELECTOR, 51 VARIANT, 52 DECLARATION, VARIANTKEY, DEFAULT, 53 } 54 Token(Kind kind, String name, int begin, int end, String input)55 Token(Kind kind, String name, int begin, int end, String input) { 56 this.kind = kind; 57 this.name = name; 58 this.begin = begin; 59 this.end = end; 60 this.input = input; 61 switch (name) { 62 case "Message": type = Type.MESSAGE; break; 63 case "Pattern": type = Type.PATTERN; break; 64 case "Text": type = Type.TEXT; break; 65 case "Placeholder": type = Type.PLACEHOLDER; break; 66 case "Expression": type = Type.EXPRESSION; break; 67 case "Operand": type = Type.OPERAND; break; 68 case "Variable": type = Type.VARIABLE; break; 69 case "Function": type = Type.FUNCTION; break; 70 case "Option": type = Type.OPTION; break; 71 case "Annotation": type = Type.IGNORE; break; 72 case "Name": type = Type.NAME; break; 73 case "Nmtoken": type = Type.NMTOKEN; break; 74 case "Literal": type = Type.LITERAL; break; 75 case "Selector": type = Type.SELECTOR; break; 76 case "Variant": type = Type.VARIANT; break; 77 case "VariantKey": type = Type.VARIANTKEY; break; 78 case "Declaration": type = Type.DECLARATION; break; 79 80 case "Markup": type = Type.IGNORE; break; 81 case "MarkupStart": type = Type.IGNORE; break; 82 case "MarkupEnd": type = Type.IGNORE; break; 83 84 case "'['": type = Type.IGNORE; break; 85 case "']'": type = Type.IGNORE; break; 86 case "'{'": type = Type.IGNORE; break; 87 case "'}'": type = Type.IGNORE; break; 88 case "'='": type = Type.IGNORE; break; 89 case "'match'": type = Type.IGNORE; break; 90 case "'when'": type = Type.IGNORE; break; 91 case "'let'": type = Type.IGNORE; break; 92 case "'*'": type = Type.DEFAULT; break; 93 default: 94 throw new IllegalArgumentException("Parse error: Unknown token \"" + name + "\""); 95 } 96 } 97 isStart()98 boolean isStart() { 99 return Kind.NONTERMINAL_START.equals(kind); 100 } 101 isEnd()102 boolean isEnd() { 103 return Kind.NONTERMINAL_END.equals(kind); 104 } 105 isTerminal()106 boolean isTerminal() { 107 return Kind.TERMINAL.equals(kind); 108 } 109 110 @Override toString()111 public String toString() { 112 int from = begin == -1 ? 0 : begin; 113 String strval = end == -1 ? input.substring(from) : input.substring(from, end); 114 return String.format("Token(\"%s\", [%d, %d], %s) // \"%s\"", name, begin, end, kind, strval); 115 } 116 } 117 Mf2Serializer()118 Mf2Serializer() {} 119 120 @Override reset(CharSequence input)121 public void reset(CharSequence input) { 122 this.input = input.toString(); 123 tokens.clear(); 124 } 125 126 @Override startNonterminal(String name, int begin)127 public void startNonterminal(String name, int begin) { 128 tokens.add(new Token(Token.Kind.NONTERMINAL_START, name, begin, -1, input)); 129 } 130 131 @Override endNonterminal(String name, int end)132 public void endNonterminal(String name, int end) { 133 tokens.add(new Token(Token.Kind.NONTERMINAL_END, name, -1, end, input)); 134 } 135 136 @Override terminal(String name, int begin, int end)137 public void terminal(String name, int begin, int end) { 138 tokens.add(new Token(Token.Kind.TERMINAL, name, begin, end, input)); 139 } 140 141 @Override whitespace(int begin, int end)142 public void whitespace(int begin, int end) { 143 } 144 build()145 Mf2DataModel build() { 146 if (!tokens.isEmpty()) { 147 Token firstToken = tokens.get(0); 148 if (Type.MESSAGE.equals(firstToken.type) && firstToken.isStart()) { 149 return parseMessage(); 150 } 151 } 152 return null; 153 } 154 parseMessage()155 private Mf2DataModel parseMessage() { 156 Mf2DataModel.Builder result = Mf2DataModel.builder(); 157 158 for (int i = 0; i < tokens.size(); i++) { 159 Token token = tokens.get(i); 160 switch (token.type) { 161 case MESSAGE: 162 if (token.isStart() && i == 0) { 163 // all good 164 } else if (token.isEnd() && i == tokens.size() - 1) { 165 // We check if this last token is at the end of the input 166 if (token.end != input.length()) { 167 String leftover = input.substring(token.end) 168 .replace("\n", "") 169 .replace("\r", "") 170 .replace(" ", "") 171 .replace("\t", "") 172 ; 173 if (!leftover.isEmpty()) { 174 throw new IllegalArgumentException("Parse error: Content detected after the end of the message: '" 175 + input.substring(token.end) + "'"); 176 } 177 } 178 return result.build(); 179 } else { 180 // End of message, we ignore the rest 181 throw new IllegalArgumentException("Parse error: Extra tokens at the end of the message"); 182 } 183 break; 184 case PATTERN: 185 ParseResult<Pattern> patternResult = parsePattern(i); 186 i = patternResult.skipLen; 187 result.setPattern(patternResult.resultValue); 188 break; 189 case DECLARATION: 190 Declaration declaration = new Declaration(); 191 i = parseDeclaration(i, declaration); 192 result.addLocalVariable(declaration.variableName, declaration.expr); 193 break; 194 case SELECTOR: 195 ParseResult<List<Expression>> selectorResult = parseSelector(i); 196 result.addSelectors(selectorResult.resultValue); 197 i = selectorResult.skipLen; 198 break; 199 case VARIANT: 200 ParseResult<Variant> variantResult = parseVariant(i); 201 i = variantResult.skipLen; 202 Variant variant = variantResult.resultValue; 203 result.addVariant(variant.getSelectorKeys(), variant.getPattern()); 204 break; 205 case IGNORE: 206 break; 207 default: 208 throw new IllegalArgumentException("Parse error: parseMessage UNEXPECTED TOKEN: '" + token + "'"); 209 } 210 } 211 throw new IllegalArgumentException("Parse error: Error parsing MessageFormatter"); 212 } 213 parseVariant(int startToken)214 private ParseResult<Variant> parseVariant(int startToken) { 215 Variant.Builder result = Variant.builder(); 216 217 for (int i = startToken; i < tokens.size(); i++) { 218 Token token = tokens.get(i); 219 switch (token.type) { 220 case VARIANT: 221 if (token.isStart()) { // all good 222 } else if (token.isEnd()) { 223 return new ParseResult<>(i, result.build()); 224 } 225 break; 226 case LITERAL: 227 result.addSelectorKey(input.substring(token.begin + 1, token.end - 1)); 228 break; 229 case NMTOKEN: 230 result.addSelectorKey(input.substring(token.begin, token.end)); 231 break; 232 case DEFAULT: 233 result.addSelectorKey("*"); 234 break; 235 case PATTERN: 236 ParseResult<Pattern> patternResult = parsePattern(i); 237 i = patternResult.skipLen; 238 result.setPattern(patternResult.resultValue); 239 break; 240 case VARIANTKEY: 241 // variant.variantKey = new VariantKey(input.substring(token.begin, token.end)); 242 break; 243 case IGNORE: 244 break; 245 default: 246 throw new IllegalArgumentException("Parse error: parseVariant UNEXPECTED TOKEN: '" + token + "'"); 247 } 248 } 249 throw new IllegalArgumentException("Parse error: Error parsing Variant"); 250 } 251 parseSelector(int startToken)252 private ParseResult<List<Expression>> parseSelector(int startToken) { 253 List<Expression> result = new ArrayList<>(); 254 255 for (int i = startToken; i < tokens.size(); i++) { 256 Token token = tokens.get(i); 257 switch (token.type) { 258 case SELECTOR: 259 if (token.isStart()) { // all good, do nothing 260 } else if (token.isEnd()) { 261 return new ParseResult<>(i, result); 262 } 263 break; 264 case EXPRESSION: 265 ParseResult<Expression> exprResult = parseExpression(i); 266 i = exprResult.skipLen; 267 result.add(exprResult.resultValue); 268 break; 269 case IGNORE: 270 break; 271 default: 272 throw new IllegalArgumentException("Parse error: parseSelector UNEXPECTED TOKEN: '" + token + "'"); 273 } 274 } 275 throw new IllegalArgumentException("Parse error: Error parsing selectors"); 276 } 277 parseDeclaration(int startToken, Declaration declaration)278 private int parseDeclaration(int startToken, Declaration declaration) { 279 for (int i = startToken; i < tokens.size(); i++) { 280 Token token = tokens.get(i); 281 switch (token.type) { 282 case DECLARATION: 283 if (token.isStart()) { // all good 284 } else if (token.isEnd()) { 285 return i; 286 } 287 break; 288 case VARIABLE: 289 declaration.variableName = input.substring(token.begin + 1, token.end); 290 break; 291 case EXPRESSION: 292 ParseResult<Expression> exprResult = parseExpression(i); 293 i = exprResult.skipLen; 294 declaration.expr = exprResult.resultValue; 295 break; 296 case IGNORE: 297 break; 298 default: 299 throw new IllegalArgumentException("Parse error: parseDeclaration UNEXPECTED TOKEN: '" + token + "'"); 300 } 301 } 302 throw new IllegalArgumentException("Parse error: Error parsing Declaration"); 303 } 304 parsePattern(int startToken)305 private ParseResult<Pattern> parsePattern(int startToken) { 306 Pattern.Builder result = Pattern.builder(); 307 308 for (int i = startToken; i < tokens.size(); i++) { 309 Token token = tokens.get(i); 310 switch (token.type) { 311 case TEXT: 312 Text text = new Text(input.substring(token.begin, token.end)); 313 result.add(text); 314 break; 315 case PLACEHOLDER: 316 break; 317 case EXPRESSION: 318 ParseResult<Expression> exprResult = parseExpression(i); 319 i = exprResult.skipLen; 320 result.add(exprResult.resultValue); 321 break; 322 case VARIABLE: 323 case IGNORE: 324 break; 325 case PATTERN: 326 if (token.isStart() && i == startToken) { // all good, do nothing 327 } else if (token.isEnd()) { 328 return new ParseResult<>(i, result.build()); 329 } 330 break; 331 default: 332 throw new IllegalArgumentException("Parse error: parsePattern UNEXPECTED TOKEN: '" + token + "'"); 333 } 334 } 335 throw new IllegalArgumentException("Parse error: Error parsing Pattern"); 336 } 337 338 static class Option { 339 String name; 340 Value value; 341 } 342 343 static class Declaration { 344 String variableName; 345 Expression expr; 346 } 347 348 static class Variant { 349 private final SelectorKeys selectorKeys; 350 private final Pattern pattern; 351 Variant(Builder builder)352 private Variant(Builder builder) { 353 this.selectorKeys = builder.selectorKeys.build(); 354 this.pattern = builder.pattern; 355 } 356 357 /** 358 * Creates a builder. 359 * 360 * @return the Builder. 361 */ builder()362 public static Builder builder() { 363 return new Builder(); 364 } 365 getSelectorKeys()366 public SelectorKeys getSelectorKeys() { 367 return selectorKeys; 368 } 369 getPattern()370 public Pattern getPattern() { 371 return pattern; 372 } 373 374 public static class Builder { 375 private final SelectorKeys.Builder selectorKeys = SelectorKeys.builder(); 376 private Pattern pattern = Pattern.builder().build(); 377 378 // Prevent direct creation Builder()379 private Builder() { 380 } 381 setSelectorKeys(SelectorKeys selectorKeys)382 public Builder setSelectorKeys(SelectorKeys selectorKeys) { 383 this.selectorKeys.addAll(selectorKeys.getKeys()); 384 return this; 385 } 386 addSelectorKey(String selectorKey)387 public Builder addSelectorKey(String selectorKey) { 388 this.selectorKeys.add(selectorKey); 389 return this; 390 } 391 setPattern(Pattern pattern)392 public Builder setPattern(Pattern pattern) { 393 this.pattern = pattern; 394 return this; 395 } 396 build()397 public Variant build() { 398 return new Variant(this); 399 } 400 } 401 } 402 403 static class ParseResult<T> { 404 final int skipLen; 405 final T resultValue; 406 ParseResult(int skipLen, T resultValue)407 public ParseResult(int skipLen, T resultValue) { 408 this.skipLen = skipLen; 409 this.resultValue = resultValue; 410 } 411 } 412 parseExpression(int startToken)413 private ParseResult<Expression> parseExpression(int startToken) { 414 Expression.Builder result = Expression.builder(); 415 416 for (int i = startToken; i < tokens.size(); i++) { 417 Token token = tokens.get(i); 418 switch (token.type) { 419 case EXPRESSION: // intentional fall-through 420 case PLACEHOLDER: 421 if (token.isStart() && i == startToken) { 422 // all good 423 } else if (token.isEnd()) { 424 return new ParseResult<>(i, result.build()); 425 } 426 break; 427 case FUNCTION: 428 result.setFunctionName(input.substring(token.begin + 1, token.end)); 429 break; 430 case LITERAL: 431 result.setOperand(Value.builder() 432 .setLiteral(input.substring(token.begin + 1, token.end - 1)) 433 .build()); 434 break; 435 case VARIABLE: 436 result.setOperand(Value.builder() 437 .setVariableName(input.substring(token.begin + 1, token.end)) 438 .build()); 439 break; 440 case OPTION: 441 Option option = new Option(); 442 i = parseOptions(i, option); 443 result.addOption(option.name, option.value); 444 break; 445 case OPERAND: 446 break; 447 case IGNORE: 448 break; 449 default: 450 throw new IllegalArgumentException("Parse error: parseExpression UNEXPECTED TOKEN: '" + token + "'"); 451 } 452 } 453 throw new IllegalArgumentException("Parse error: Error parsing Expression"); 454 } 455 parseOptions(int startToken, Option option)456 private int parseOptions(int startToken, Option option) { 457 for (int i = startToken; i < tokens.size(); i++) { 458 Token token = tokens.get(i); 459 switch (token.type) { 460 case OPTION: 461 if (token.isStart() && i == startToken) { 462 // all good 463 } else if (token.isEnd()) { 464 return i; 465 } 466 break; 467 case NAME: 468 option.name = input.substring(token.begin, token.end); 469 break; 470 case LITERAL: 471 option.value = Value.builder() 472 .setLiteral(input.substring(token.begin + 1, token.end - 1)) 473 .build(); 474 break; 475 case NMTOKEN: 476 option.value = Value.builder() 477 .setLiteral(input.substring(token.begin, token.end)) 478 .build(); 479 break; 480 case VARIABLE: 481 option.value = Value.builder() 482 .setVariableName(input.substring(token.begin + 1, token.end)) 483 .build(); 484 break; 485 case IGNORE: 486 break; 487 default: 488 throw new IllegalArgumentException("Parse error: parseOptions UNEXPECTED TOKEN: '" + token + "'"); 489 } 490 } 491 throw new IllegalArgumentException("Parse error: Error parsing Option"); 492 } 493 dataModelToString(Mf2DataModel dataModel)494 static String dataModelToString(Mf2DataModel dataModel) { 495 return dataModel.toString(); 496 } 497 } 498