1// Copyright 2013 The Flutter Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5// TODO(mdebbar): To reduce the size of generated code, we could pack the data 6// into a smaller format, e.g: 7// 8// ```dart 9// const _rawData = [ 10// 0x000A, 0x000A, 1, 11// 0x000B, 0x000C, 2, 12// 0x000D, 0x000D, 3, 13// 0x0020, 0x0020, 4, 14// // ... 15// ]; 16// ``` 17// 18// Then we could lazily build the lookup instance on demand. 19import 'dart:io'; 20import 'package:path/path.dart' as path; 21 22/// A tuple that holds a [start] and [end] of a unicode range and a [property]. 23class PropertyTuple { 24 const PropertyTuple(this.start, this.end, this.property); 25 26 final int start; 27 final int end; 28 final String property; 29 30 /// Checks if there's an overlap between this tuple's range and [other]'s 31 /// range. 32 bool isOverlapping(PropertyTuple other) { 33 return start <= other.end && end >= other.start; 34 } 35 36 /// Checks if the [other] tuple is adjacent to this tuple. 37 /// 38 /// Two tuples are considered adjacent if: 39 /// - The new tuple's range immediately follows this tuple's range, and 40 /// - The new tuple has the same property as this tuple. 41 bool isAdjacent(PropertyTuple other) { 42 return other.start == end + 1 && property == other.property; 43 } 44 45 /// Merges the ranges of the 2 [PropertyTuples] if they are adjacent. 46 PropertyTuple extendRange(PropertyTuple extension) { 47 assert(isAdjacent(extension)); 48 return PropertyTuple(start, extension.end, property); 49 } 50} 51 52/// Usage (from the root of the project): 53/// 54/// ``` 55/// dart tool/unicode_sync_script.dart <path/to/word/break/properties> 56/// ``` 57/// 58/// This script parses the unicode word break properties(1) and generates Dart 59/// code(2) that can perform lookups in the unicode ranges to find what property 60/// a letter has. 61/// 62/// (1) The properties file can be downloaded from: 63/// https://www.unicode.org/Public/11.0.0/ucd/auxiliary/WordBreakProperty.txt 64/// 65/// (2) The codegen'd Dart file is located at: 66/// lib/src/text/word_break_properties.dart 67void main(List<String> arguments) async { 68 final String propertiesFile = arguments[0]; 69 final String codegenFile = path.join( 70 path.dirname(Platform.script.toFilePath()), 71 '../lib/src/text/word_break_properties.dart', 72 ); 73 WordBreakPropertiesSyncer(propertiesFile, codegenFile).perform(); 74} 75 76/// Base class that provides common logic for syncing all kinds of unicode 77/// properties (e.g. word break properties, line break properties, etc). 78/// 79/// Subclasses implement the [template] method which receives as argument the 80/// list of data parsed by [processLines]. 81abstract class PropertiesSyncer { 82 PropertiesSyncer(this._src, this._dest); 83 84 final String _src; 85 final String _dest; 86 87 void perform() async { 88 final List<String> lines = await File(_src).readAsLines(); 89 final List<String> header = extractHeader(lines); 90 final List<PropertyTuple> data = processLines(lines); 91 92 final IOSink sink = File(_dest).openWrite(); 93 sink.write(template(header, data)); 94 } 95 96 String template(List<String> header, List<PropertyTuple> data); 97} 98 99/// Syncs Unicode's word break properties. 100class WordBreakPropertiesSyncer extends PropertiesSyncer { 101 WordBreakPropertiesSyncer(String src, String dest) : super(src, dest); 102 103 @override 104 String template(List<String> header, List<PropertyTuple> data) { 105 return ''' 106// AUTO-GENERATED FILE. 107// Generated by: bin/unicode_sync_script.dart 108// 109// Source: 110// ${header.join('\n// ')} 111 112import 'unicode_range.dart'; 113 114CharProperty getCharProperty(String text, int index) { 115 if (index < 0 || index >= text.length) { 116 return null; 117 } 118 return lookup.find(text.codeUnitAt(index)); 119} 120 121enum CharProperty { 122 ${getEnumValues(data).join(',\n ')} 123} 124 125const UnicodePropertyLookup<CharProperty> lookup = 126 UnicodePropertyLookup<CharProperty>([ 127 ${getLookupEntries(data).join(',\n ')} 128]); 129'''; 130 } 131 132 Iterable<String> getEnumValues(List<PropertyTuple> data) { 133 return Set<String>.from( 134 data.map<String>((PropertyTuple tuple) => tuple.property)) 135 .map(normalizePropertyName); 136 } 137 138 Iterable<String> getLookupEntries(List<PropertyTuple> data) { 139 data.sort( 140 // Ranges don't overlap so it's safe to sort based on the start of each 141 // range. 142 (PropertyTuple tuple1, PropertyTuple tuple2) => 143 tuple1.start.compareTo(tuple2.start), 144 ); 145 verifyNoOverlappingRanges(data); 146 return combineAdjacentRanges(data) 147 .map((PropertyTuple tuple) => generateLookupEntry(tuple)); 148 } 149 150 String generateLookupEntry(PropertyTuple tuple) { 151 final String propertyStr = 152 'CharProperty.${normalizePropertyName(tuple.property)}'; 153 return 'UnicodeRange(${toHex(tuple.start)}, ${toHex(tuple.end)}, $propertyStr)'; 154 } 155} 156 157/// Example: 158/// UnicodeRange(0x01C4, 0x0293, CharProperty.ALetter), 159/// UnicodeRange(0x0294, 0x0294, CharProperty.ALetter), 160/// UnicodeRange(0x0295, 0x02AF, CharProperty.ALetter), 161/// 162/// will get combined into: 163/// UnicodeRange(0x01C4, 0x02AF, CharProperty.ALetter) 164List<PropertyTuple> combineAdjacentRanges(List<PropertyTuple> data) { 165 final List<PropertyTuple> result = <PropertyTuple>[data.first]; 166 for (int i = 1; i < data.length; i++) { 167 if (result.last.isAdjacent(data[i])) { 168 result.last = result.last.extendRange(data[i]); 169 } else { 170 result.add(data[i]); 171 } 172 } 173 return result; 174} 175 176int getRangeStart(String range) { 177 return int.parse(range.split('..')[0], radix: 16); 178} 179 180int getRangeEnd(String range) { 181 if (range.contains('..')) { 182 return int.parse(range.split('..')[1], radix: 16); 183 } 184 return int.parse(range, radix: 16); 185} 186 187String toHex(int value) { 188 return '0x${value.toRadixString(16).padLeft(4, '0').toUpperCase()}'; 189} 190 191void verifyNoOverlappingRanges(List<PropertyTuple> data) { 192 for (int i = 1; i < data.length; i++) { 193 if (data[i].isOverlapping(data[i - 1])) { 194 throw Exception('Data contains overlapping ranges.'); 195 } 196 } 197} 198 199List<String> extractHeader(List<String> lines) { 200 final List<String> headerLines = <String>[]; 201 for (String line in lines) { 202 if (line.contains('=======')) { 203 break; 204 } 205 if (line.isNotEmpty) { 206 headerLines.add(line); 207 } 208 } 209 return headerLines; 210} 211 212List<PropertyTuple> processLines(List<String> lines) { 213 return lines 214 .map(removeCommentFromLine) 215 .where((String line) => line.isNotEmpty) 216 .map(parseLineIntoPropertyTuple) 217 .toList(); 218} 219 220String normalizePropertyName(String property) { 221 return property.replaceAll('_', ''); 222} 223 224String removeCommentFromLine(String line) { 225 final int poundIdx = line.indexOf('#'); 226 return (poundIdx == -1) ? line : line.substring(0, poundIdx); 227} 228 229/// Examples: 230/// 231/// 00C0..00D6 ; ALetter 232/// 037F ; ALetter 233/// 234/// Would be parsed into: 235/// 236/// ```dart 237/// PropertyTuple(192, 214, 'ALetter'); 238/// PropertyTuple(895, 895, 'ALetter'); 239/// ``` 240PropertyTuple parseLineIntoPropertyTuple(String line) { 241 final List<String> split = line.split(';'); 242 final String rangeStr = split[0].trim(); 243 final String propertyStr = split[1].trim(); 244 245 final List<String> rangeSplit = rangeStr.contains('..') 246 ? rangeStr.split('..') 247 : <String>[rangeStr, rangeStr]; 248 return PropertyTuple( 249 int.parse(rangeSplit[0], radix: 16), 250 int.parse(rangeSplit[1], radix: 16), 251 propertyStr, 252 ); 253} 254