1# © 2016 and later: Unicode, Inc. and others. 2# License & terms of use: http://www.unicode.org/copyright.html 3# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml 4# 5# File: Han_Spacedhan.txt 6# Generated from CLDR 7# 8 9# Only intended for internal use 10# Make sure Han are normalized, including characters that contain them. 11# The first set in the filter is computed with http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:tonfkd:/XXX/:]-[:ideographic:]-[:sc=han:] 12# Where XXX is the resolved [:ideographic:][:sc=han:]. It needs updating with each Unicode release! 13:: [[㆒-㆟㈠-㉇㊀-㊰㋀-㋋㍘-㍰㍻-㍿㏠-㏾ ---][:ideographic:][:sc=han:]] nfkc; 14:: fullwidth-halfwidth; 15。 → '.'; 16$terminalPunct = [\.\,\:\;\?\!.,:?!。、;[:Pe:][:Pf:]]; 17$initialPunct = [:Ps:][:Pi:]; 18# add space between any Han or terminal punctuation and letters, and 19# between letters and Han or initial punct 20[[:Ideographic:] $terminalPunct] {} [:Letter:] → ' ' ; 21[:Letter:] [:Mark:]* {} [[:Ideographic:] $initialPunct] → ' ' ; 22# remove spacing between ideographs and other letters 23← [:Ideographic:] { ' ' } [:Letter:] ; 24← [:Letter:] [:Mark:]* { ' ' } [:Ideographic:] ; 25 26