1<?xml version="1.0" encoding="UTF-8" ?> 2<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd"> 3<!-- 4Copyright © 1991-2018 Unicode, Inc. 5CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) 6For terms of use, see http://www.unicode.org/copyright.html 7--> 8<supplementalData> 9 <version number="$Revision: 14381 $"/> 10 <transforms> 11 <transform source="Zawgyi" target="my" direction="forward" alias="my-t-my-s0-zawgyi"> 12 <tRule><![CDATA[ 13# This transform converts Zawgyi "encoded" Burmese into proper 14# unicode. Zawgyi is a popular encoding scheme in Myanmar. It uses 15# the Myanmar unicode range but assigns different characters or 16# glyphs to some codepoints. In addition to the character mapping, 17# there is reordering of codepoints needed to match the expected 18# unicode order. This reordering is context-based. 19# 20# This transform is done in two main stages: 21# (1) Map all Zawgyi codepoints to their Unicode counterpart. 22# (2) Perform reordering. 23 24# Modern Burmese digits & Unicode code points. 25$nondigits = [^\u1040-\u1049]; 26$consonant = [\u1000-\u1021]; 27$vowelsign = [\u102B-\u1030\u1032]; # Unicode vowel signs except E (1031) 28$vowelsAndConsonants = [\u1000-\u102a]; 29 30$umedial = [\u103B-\u103E]; # Medial codepoints in Unicode 31$vowelmedial = [\u102B-\u1030\u1032\1u36\u1037\u103A-\u103F]; # Union of vowel signs and medials 32$ukinzi = \u1004\u103A\u1039; # Codepoints representing kinzi in Unicode 33 34# Zawgyi medial ra has multiple representations 35$zmedialra = [\u103B\u107E-\u1084]; 36 37$wspace = [\u0020\u00a0\u1680\u2000-\u200d\u2060\u202f\u205f\u3000\ufeff]; 38 39 40#### 41#### STAGE 1: CODEPOINT MAPPING FROM ZAWGYI TO UNICODE 42#### 43 44# Kinzi (predefined ligatures) 45# Move base character to the right 46($consonant) \u103A \u1064 → $ukinzi $1 \u103B; 47($consonant) \u1064 → $ukinzi $1; 48\u1064 → $ukinzi; 49 50# Special cases moving base character to right before vowel signs 51($consonant) \u108B → $ukinzi $1 \u102D; 52($consonant) \u108C → $ukinzi $1 \u102E; 53($consonant) \u108D → $ukinzi $1 \u1036; 54 55# Special cases moving Kinzi block to left 56($consonant) \u103A \u1033 \u108B → $ukinzi $1 \u103B \u102D \u102F; 57($consonant) \u103A \u108b → $ukinzi $1 \u103B \u102D ; 58($consonant) \u103A \u108C → $ukinzi $1 \u103B \u102E ; 59($consonant) \u103A \u108D → $ukinzi $1 \u103B \u1036 ; 60($consonant) \u103A \u108e → $1 \u103B \u102D \u1036 ; 61 62\u108B → $ukinzi \u102D ; 63\u108C → $ukinzi \u102E ; 64\u108D → $ukinzi \u1036 ; 65 66# Consonants (only the ones that have to change) 67\u106A → \u1009 ; # NYA 68\u106B → \u100A ; 69\u108F → \u1014 ; 70\u1090 → \u101B ; 71\u1086 → \u103F ; 72 73# yapin 74[\u103A|\u107d] → \u103B ; 75 76# yayit 77($zmedialra)+ → \u103C ; 78 79# wasway 80\u103C* \u108A → \u103D \u103E; # To avoid duplicate medials 81\u103C → \u103D ; 82 83# hatoh 84[\u103D|\u1087] → \u103E ; 85\u1088 → \u103E \u102F ; 86\u1089 → \u103E \u1030 ; 87 88# Vowels 89\u1033 → \u102F ; 90\u1034 → \u1030 ; 91 92# asat 93\u1039 → \u103A ; 94 95# lower dot 96[\u1094\u1095] → \u1037 ; 97 98# Special cases for 1025 vs 1009; 99\u1025 \u1039 → \u1009 \u103a; 100\u1025 \u1061 → \u1009 \u1039 \u1001; 101\u1025 \u1062 → \u1009 \u1039 \u1002; 102\u1025 \u1065 → \u1009 \u1039 \u1005; 103\u1025 \u1068 → \u1009 \u1039 \u1007; 104\u1025 \u1076 → \u1009 \u1039 \u1013; 105\u1025 \u1078 → \u1009 \u1039 \u1015; 106\u1025 \u107A → \u1009 \u1039 \u1017; 107\u1025 \u1079 → \u1009 \u1039 \u1016; 108 109# Stacked Consonants 110\u105A → \u102B \u103A ; 111\u1060 → \u1039 \u1000 ; 112\u1061 → \u1039 \u1001 ; 113\u1062 → \u1039 \u1002 ; 114\u1063 → \u1039 \u1003 ; 115\u1065 → \u1039 \u1005 ; 116[\u1066\u1067] → \u1039 \u1006 ; 117\u1068 → \u1039 \u1007 ; 118\u1069 → \u1039 \u1008 ; 119\u106C → \u1039 \u100B ; 120\u106D → \u1039 \u100C ; 121\u1070 → \u1039 \u100F ; 122[\u1071\u1072] → \u1039 \u1010 ; 123\u1096 → \u1039 \u1010 \u103D; 124[\u1073\u1074] → \u1039 \u1011 ; 125\u1075 → \u1039 \u1012 ; 126\u1076 → \u1039 \u1013 ; 127\u1077 → \u1039 \u1014 ; 128\u1078 → \u1039 \u1015 ; 129\u1079 → \u1039 \u1016 ; 130\u107A → \u1039 \u1017 ; 131[\u107B\u1093] → \u1039 \u1018 ; 132\u107C → \u1039 \u1019 ; 133\u1085 → \u1039 \u101C ; 134\u108E → \u102D \u1036 ; 135 136# Pre-defined ligatures 137\u106E → \u100D\u1039\u100D ; 138\u106F → \u100D\u1039\u100E ; 139\u1091 → \u100F\u1039\u100D ; 140\u1092 → \u100B\u1039\u100C ; 141\u1097 → \u100B\u1039\u100B ; 142\u104E → \u104E\u1004\u103A\u1038 ; 143 144 145#### 146#### STAGE 1.01: Digits 0 and 4 used instead of letters 147# Case of MYANMAR digit being used instead of a letter 148# Lone digit zero and four at start 149::Null; 150^ \u1040 ($nondigits) → \u101D $1; 151^ \u1044 ($nondigits) → | \u104E $1 ; 152 153# Lone digit zero or four at end 154($nondigits) \u1040 $ → $1 \u101D; 155($nondigits) \u1044 $ → $1 \u104e; 156 157# Evowel and dependent vowel signs before 0 or 4 only 158# -> convert to the consonant. 159([\u102b-\u103f]) \u1040 ($nondigits) → $1 \u101d $2; 160([\u102b-\u103f]) \u1044 ($nondigits) → $1 \u104E $2; 161 162 163#### 164#### STAGE 1.1: Strip spaces immediately before combining characters. 165#### Move e-vowel after consonants and medials 166#### Now every codepoint is Unicode. This starts conversion 167#### from semi-visual order to logical order. 168#### 169::Null; 170 171# Don't remove spaces before E vowel or medial Ra at this stage 172($wspace) \u1037 > \u1037 $1; 173($wspace+) ([\u102b-\u1030\u1032-\u103b\u103d\u103e]) → $2; 174 175# Remove a duplicate early 176\u1037+ → \u1037; 177 178# Move e-vowel after medials and consonants. 179\u1031+ $ukinzi ($consonant) > $ukinzi $1 \u1031; 180\u1031+ \u1037+ ($consonant) > $1 \u1031 \u1037 ; 181\u1031+ \u103c ($consonant) > $1 \u103c \u1031; 182 183# Move medials other than 103c before the 1031. Leave 103c for 184# the next consonant. 185\u1031+ ($consonant) ([\u103b\u103d\u103e]+) > $1 $2 \u1031; 186\u1031+ ($vowelsAndConsonants) > $1 \u1031; 187 188 189#### 190#### STAGE 2: POST REORDERING RULES FOR UNICODE RENDERING 191#### 192::Null; 193 194\u103b \u103a > \u103a \u103b; 195 196# Simpler replacements for Zawgyi 1025 197\u1025 \u102E → \u1026; 198 199# Asat and dot below reordering, to Unicode NFC. 200\u103A\u1037 → \u1037\u103A; 201 202# Reorder some vowel signs 203\u1036 ($umedial*) ($vowelsign+) → $1 $2 \u1036 ; 204([\u102B\u102C\u102F\u1030]) ([\u102D\u102E\u1032]) → $2 $1; 205 206# Move ra medial which precedes consonant, but not other medials. 207\u103C ($consonant) → $1 \u103C; 208 209 210#### 211#### Stage 3 212#### Move \u1036, and \u103C after consonants. 213::Null; 214 215($umedial) \u1039 ($consonant) > \u1039 $2 $1; 216 217\u103C \u103A \u1039 ($consonant) → \u103A \u1039 $1 \u103C; 218 219\u1036 ($umedial+) → $1 \u1036; 220 221 222#### 223#### Stage 4 224#### Reordering medials, dot below, contractions, E sign, and asat. 225::Null; 226 227# Reorder the medials 228([\u103C\u103D\u103E]+) \u103B → \u103B $1; 229([\u103D\u103E]+) \u103C → \u103C $1; 230\u103E\u103D → \u103D\u103E ; 231 232# Contractions with vowel signs 233([\u1031]+) ($vowelsign*) \u1039 ($consonant) → \u1039 $3 $1 $2; 234($vowelsign+) \u1039 ($consonant) → \u1039 $2 $1; 235 236# Move vowel sign E \u1031 after medials, but not across consonants 237($umedial*) ([\u1031]+) ($umedial*) → $1 $3 $2; 238 239# Reorder dot below after medials and vowel diacritics 240\u1037 ([\u102D-\u1030\u1032\u1036\u103b-\u103e]+) → $1 \u1037; 241 242# Move vowel signs after medials 243($vowelsign+) ($umedial+) → $2 $1; 244 245# Reorder modifiers and asat 246($consonant) ([\u102B-\u1032\u1036\u103B-\u103E]) \u103A ($consonant) → $1 \u103A $2 $3; 247 248 249#### 250#### Stage 5. More reorderings 251#### Vowel signs after medials, sort medials, 252#### 253::Null; 254 255# Replace CA + YA with JHA after moving other things beyond the medials. 256\u1005 \u103b → \u1008; 257 258# More moving vowel signs after medials 259([\u102b-\u1032]) ($umedial) → $2 $1; 260 261# Sort the medials 262([\u103C\u103D\u103E]) \u103B → \u103B $1; 263([\u103D\u103E]) \u103C → \u103C $1; 264\u103E\u103D → \u103D\u103E ; 265 266# Move visarga after other signs 267\u1038 ($vowelmedial) → $1 \u1038; 268 269# Reorder 270\u1036 \u102f → \u102f \u1036; 271 272 273### 274### Stage 6 275### Finish conflicting and extra diacritics. Remove some white space 276### 277::Null; 278 279# Fix duplicate combiners 280\u102D \u102D+ → \u102D; 281\u102E \u102E+ → \u102E; 282\u102F \u102F+ → \u102F; 283\u1030 \u1030+ → \u1030; 284\u1032 \u1032+ → \u1032; 285\u1036 \u1036+ → \u1036; 286\u1037 \u1037+ → \u1037; 287\u1039 \u1039+ → \u1039; 288\u103a \u103a+ → \u103a; 289\u103b \u103b+ → \u103b; 290\u103c \u103c+ → \u103c; 291\u103d \u103d+ → \u103d; 292\u103e \u103e+ → \u103e; # http://unicode.org/cldr/trac/ticket/10386 293 294# Fix overlapping signs 295\u102F [\u1030\u103a] → \u102F; 296\u102D \u102E → \u102E; 297 298# Remove space directly before diacritics. 299($wspace)+ ([\u102b-\u1032\u1036-\u103e]) → $2; 300 301# Remove ZWSP at start and end 302^ \u200b+ → ; 303\u200b+ $ → ; 304 305# Fix multiple spaces around ZWSP to single ZWSP. 306$wspace* \u200b $wspace* → \u200b; 307 ]]></tRule> 308 </transform> 309 </transforms> 310</supplementalData> 311