• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2013 The Flutter Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// TODO(mdebbar): To reduce the size of generated code, we could pack the data
6//   into a smaller format, e.g:
7//
8// ```dart
9// const _rawData = [
10//   0x000A, 0x000A, 1,
11//   0x000B, 0x000C, 2,
12//   0x000D, 0x000D, 3,
13//   0x0020, 0x0020, 4,
14//   // ...
15// ];
16// ```
17//
18// Then we could lazily build the lookup instance on demand.
19import 'dart:io';
20import 'package:path/path.dart' as path;
21
22/// A tuple that holds a [start] and [end] of a unicode range and a [property].
23class PropertyTuple {
24  const PropertyTuple(this.start, this.end, this.property);
25
26  final int start;
27  final int end;
28  final String property;
29
30  /// Checks if there's an overlap between this tuple's range and [other]'s
31  /// range.
32  bool isOverlapping(PropertyTuple other) {
33    return start <= other.end && end >= other.start;
34  }
35
36  /// Checks if the [other] tuple is adjacent to this tuple.
37  ///
38  /// Two tuples are considered adjacent if:
39  /// - The new tuple's range immediately follows this tuple's range, and
40  /// - The new tuple has the same property as this tuple.
41  bool isAdjacent(PropertyTuple other) {
42    return other.start == end + 1 && property == other.property;
43  }
44
45  /// Merges the ranges of the 2 [PropertyTuples] if they are adjacent.
46  PropertyTuple extendRange(PropertyTuple extension) {
47    assert(isAdjacent(extension));
48    return PropertyTuple(start, extension.end, property);
49  }
50}
51
52/// Usage (from the root of the project):
53///
54/// ```
55/// dart tool/unicode_sync_script.dart <path/to/word/break/properties>
56/// ```
57///
58/// This script parses the unicode word break properties(1) and generates Dart
59/// code(2) that can perform lookups in the unicode ranges to find what property
60/// a letter has.
61///
62/// (1) The properties file can be downloaded from:
63///     https://www.unicode.org/Public/11.0.0/ucd/auxiliary/WordBreakProperty.txt
64///
65/// (2) The codegen'd Dart file is located at:
66///     lib/src/text/word_break_properties.dart
67void main(List<String> arguments) async {
68  final String propertiesFile = arguments[0];
69  final String codegenFile = path.join(
70    path.dirname(Platform.script.toFilePath()),
71    '../lib/src/text/word_break_properties.dart',
72  );
73  WordBreakPropertiesSyncer(propertiesFile, codegenFile).perform();
74}
75
76/// Base class that provides common logic for syncing all kinds of unicode
77/// properties (e.g. word break properties, line break properties, etc).
78///
79/// Subclasses implement the [template] method which receives as argument the
80/// list of data parsed by [processLines].
81abstract class PropertiesSyncer {
82  PropertiesSyncer(this._src, this._dest);
83
84  final String _src;
85  final String _dest;
86
87  void perform() async {
88    final List<String> lines = await File(_src).readAsLines();
89    final List<String> header = extractHeader(lines);
90    final List<PropertyTuple> data = processLines(lines);
91
92    final IOSink sink = File(_dest).openWrite();
93    sink.write(template(header, data));
94  }
95
96  String template(List<String> header, List<PropertyTuple> data);
97}
98
99/// Syncs Unicode's word break properties.
100class WordBreakPropertiesSyncer extends PropertiesSyncer {
101  WordBreakPropertiesSyncer(String src, String dest) : super(src, dest);
102
103  @override
104  String template(List<String> header, List<PropertyTuple> data) {
105    return '''
106// AUTO-GENERATED FILE.
107// Generated by: bin/unicode_sync_script.dart
108//
109// Source:
110// ${header.join('\n// ')}
111
112import 'unicode_range.dart';
113
114CharProperty getCharProperty(String text, int index) {
115  if (index < 0 || index >= text.length) {
116    return null;
117  }
118  return lookup.find(text.codeUnitAt(index));
119}
120
121enum CharProperty {
122  ${getEnumValues(data).join(',\n  ')}
123}
124
125const UnicodePropertyLookup<CharProperty> lookup =
126    UnicodePropertyLookup<CharProperty>([
127  ${getLookupEntries(data).join(',\n  ')}
128]);
129''';
130  }
131
132  Iterable<String> getEnumValues(List<PropertyTuple> data) {
133    return Set<String>.from(
134            data.map<String>((PropertyTuple tuple) => tuple.property))
135        .map(normalizePropertyName);
136  }
137
138  Iterable<String> getLookupEntries(List<PropertyTuple> data) {
139    data.sort(
140      // Ranges don't overlap so it's safe to sort based on the start of each
141      // range.
142      (PropertyTuple tuple1, PropertyTuple tuple2) =>
143          tuple1.start.compareTo(tuple2.start),
144    );
145    verifyNoOverlappingRanges(data);
146    return combineAdjacentRanges(data)
147        .map((PropertyTuple tuple) => generateLookupEntry(tuple));
148  }
149
150  String generateLookupEntry(PropertyTuple tuple) {
151    final String propertyStr =
152        'CharProperty.${normalizePropertyName(tuple.property)}';
153    return 'UnicodeRange(${toHex(tuple.start)}, ${toHex(tuple.end)}, $propertyStr)';
154  }
155}
156
157/// Example:
158///    UnicodeRange(0x01C4, 0x0293, CharProperty.ALetter),
159///    UnicodeRange(0x0294, 0x0294, CharProperty.ALetter),
160///    UnicodeRange(0x0295, 0x02AF, CharProperty.ALetter),
161///
162/// will get combined into:
163///    UnicodeRange(0x01C4, 0x02AF, CharProperty.ALetter)
164List<PropertyTuple> combineAdjacentRanges(List<PropertyTuple> data) {
165  final List<PropertyTuple> result = <PropertyTuple>[data.first];
166  for (int i = 1; i < data.length; i++) {
167    if (result.last.isAdjacent(data[i])) {
168      result.last = result.last.extendRange(data[i]);
169    } else {
170      result.add(data[i]);
171    }
172  }
173  return result;
174}
175
176int getRangeStart(String range) {
177  return int.parse(range.split('..')[0], radix: 16);
178}
179
180int getRangeEnd(String range) {
181  if (range.contains('..')) {
182    return int.parse(range.split('..')[1], radix: 16);
183  }
184  return int.parse(range, radix: 16);
185}
186
187String toHex(int value) {
188  return '0x${value.toRadixString(16).padLeft(4, '0').toUpperCase()}';
189}
190
191void verifyNoOverlappingRanges(List<PropertyTuple> data) {
192  for (int i = 1; i < data.length; i++) {
193    if (data[i].isOverlapping(data[i - 1])) {
194      throw Exception('Data contains overlapping ranges.');
195    }
196  }
197}
198
199List<String> extractHeader(List<String> lines) {
200  final List<String> headerLines = <String>[];
201  for (String line in lines) {
202    if (line.contains('=======')) {
203      break;
204    }
205    if (line.isNotEmpty) {
206      headerLines.add(line);
207    }
208  }
209  return headerLines;
210}
211
212List<PropertyTuple> processLines(List<String> lines) {
213  return lines
214      .map(removeCommentFromLine)
215      .where((String line) => line.isNotEmpty)
216      .map(parseLineIntoPropertyTuple)
217      .toList();
218}
219
220String normalizePropertyName(String property) {
221  return property.replaceAll('_', '');
222}
223
224String removeCommentFromLine(String line) {
225  final int poundIdx = line.indexOf('#');
226  return (poundIdx == -1) ? line : line.substring(0, poundIdx);
227}
228
229/// Examples:
230///
231/// 00C0..00D6    ; ALetter
232/// 037F          ; ALetter
233///
234/// Would be parsed into:
235///
236/// ```dart
237/// PropertyTuple(192, 214, 'ALetter');
238/// PropertyTuple(895, 895, 'ALetter');
239/// ```
240PropertyTuple parseLineIntoPropertyTuple(String line) {
241  final List<String> split = line.split(';');
242  final String rangeStr = split[0].trim();
243  final String propertyStr = split[1].trim();
244
245  final List<String> rangeSplit = rangeStr.contains('..')
246      ? rangeStr.split('..')
247      : <String>[rangeStr, rangeStr];
248  return PropertyTuple(
249    int.parse(rangeSplit[0], radix: 16),
250    int.parse(rangeSplit[1], radix: 16),
251    propertyStr,
252  );
253}
254