1# -*- coding: utf-8 -*- 2# The table below is taken from 3# http://www.adobe.com/devnet/opentype/archives/aglfn.txt 4 5from __future__ import (print_function, division, absolute_import, 6 unicode_literals) 7from fontTools.misc.py23 import * 8import re 9 10 11_aglText = """\ 12# ----------------------------------------------------------- 13# Copyright 2003, 2005-2008, 2010 Adobe Systems Incorporated. 14# All rights reserved. 15# 16# Redistribution and use in source and binary forms, with or 17# without modification, are permitted provided that the 18# following conditions are met: 19# 20# Redistributions of source code must retain the above 21# copyright notice, this list of conditions and the following 22# disclaimer. 23# 24# Redistributions in binary form must reproduce the above 25# copyright notice, this list of conditions and the following 26# disclaimer in the documentation and/or other materials 27# provided with the distribution. 28# 29# Neither the name of Adobe Systems Incorporated nor the names 30# of its contributors may be used to endorse or promote 31# products derived from this software without specific prior 32# written permission. 33# 34# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 35# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 36# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 37# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 38# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 39# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 40# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 41# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 42# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 43# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 44# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 45# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 46# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 47# ----------------------------------------------------------- 48# Name: Adobe Glyph List For New Fonts 49# Table version: 1.7 50# Date: November 6, 2008 51# URL: http://sourceforge.net/adobe/aglfn/ 52# 53# Description: 54# 55# AGLFN (Adobe Glyph List For New Fonts) provides a list of base glyph 56# names that are recommended for new fonts, which are compatible with 57# the AGL (Adobe Glyph List) Specification, and which should be used 58# as described in Section 6 of that document. AGLFN comprises the set 59# of glyph names from AGL that map via the AGL Specification rules to 60# the semantically correct UV (Unicode Value). For example, "Asmall" 61# is omitted because AGL maps this glyph name to the PUA (Private Use 62# Area) value U+F761, rather than to the UV that maps from the glyph 63# name "A." Also omitted is "ffi," because AGL maps this to the 64# Alphabetic Presentation Forms value U+FB03, rather than decomposing 65# it into the following sequence of three UVs: U+0066, U+0066, and 66# U+0069. The name "arrowvertex" has been omitted because this glyph 67# now has a real UV, and AGL is now incorrect in mapping it to the PUA 68# value U+F8E6. If you do not find an appropriate name for your glyph 69# in this list, then please refer to Section 6 of the AGL 70# Specification. 71# 72# Format: three semicolon-delimited fields: 73# (1) Standard UV or CUS UV--four uppercase hexadecimal digits 74# (2) Glyph name--upper/lowercase letters and digits 75# (3) Character names: Unicode character names for standard UVs, and 76# descriptive names for CUS UVs--uppercase letters, hyphen, and 77# space 78# 79# The records are sorted by glyph name in increasing ASCII order, 80# entries with the same glyph name are sorted in decreasing priority 81# order, the UVs and Unicode character names are provided for 82# convenience, lines starting with "#" are comments, and blank lines 83# should be ignored. 84# 85# Revision History: 86# 87# 1.7 [6 November 2008] 88# - Reverted to the original 1.4 and earlier mappings for Delta, 89# Omega, and mu. 90# - Removed mappings for "afii" names. These should now be assigned 91# "uni" names. 92# - Removed mappings for "commaaccent" names. These should now be 93# assigned "uni" names. 94# 95# 1.6 [30 January 2006] 96# - Completed work intended in 1.5. 97# 98# 1.5 [23 November 2005] 99# - Removed duplicated block at end of file. 100# - Changed mappings: 101# 2206;Delta;INCREMENT changed to 0394;Delta;GREEK CAPITAL LETTER DELTA 102# 2126;Omega;OHM SIGN changed to 03A9;Omega;GREEK CAPITAL LETTER OMEGA 103# 03BC;mu;MICRO SIGN changed to 03BC;mu;GREEK SMALL LETTER MU 104# - Corrected statement above about why "ffi" is omitted. 105# 106# 1.4 [24 September 2003] 107# - Changed version to 1.4, to avoid confusion with the AGL 1.3. 108# - Fixed spelling errors in the header. 109# - Fully removed "arrowvertex," as it is mapped only to a PUA Unicode 110# value in some fonts. 111# 112# 1.1 [17 April 2003] 113# - Renamed [Tt]cedilla back to [Tt]commaaccent. 114# 115# 1.0 [31 January 2003] 116# - Original version. 117# - Derived from the AGLv1.2 by: 118# removing the PUA area codes; 119# removing duplicate Unicode mappings; and 120# renaming "tcommaaccent" to "tcedilla" and "Tcommaaccent" to "Tcedilla" 121# 1220041;A;LATIN CAPITAL LETTER A 12300C6;AE;LATIN CAPITAL LETTER AE 12401FC;AEacute;LATIN CAPITAL LETTER AE WITH ACUTE 12500C1;Aacute;LATIN CAPITAL LETTER A WITH ACUTE 1260102;Abreve;LATIN CAPITAL LETTER A WITH BREVE 12700C2;Acircumflex;LATIN CAPITAL LETTER A WITH CIRCUMFLEX 12800C4;Adieresis;LATIN CAPITAL LETTER A WITH DIAERESIS 12900C0;Agrave;LATIN CAPITAL LETTER A WITH GRAVE 1300391;Alpha;GREEK CAPITAL LETTER ALPHA 1310386;Alphatonos;GREEK CAPITAL LETTER ALPHA WITH TONOS 1320100;Amacron;LATIN CAPITAL LETTER A WITH MACRON 1330104;Aogonek;LATIN CAPITAL LETTER A WITH OGONEK 13400C5;Aring;LATIN CAPITAL LETTER A WITH RING ABOVE 13501FA;Aringacute;LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE 13600C3;Atilde;LATIN CAPITAL LETTER A WITH TILDE 1370042;B;LATIN CAPITAL LETTER B 1380392;Beta;GREEK CAPITAL LETTER BETA 1390043;C;LATIN CAPITAL LETTER C 1400106;Cacute;LATIN CAPITAL LETTER C WITH ACUTE 141010C;Ccaron;LATIN CAPITAL LETTER C WITH CARON 14200C7;Ccedilla;LATIN CAPITAL LETTER C WITH CEDILLA 1430108;Ccircumflex;LATIN CAPITAL LETTER C WITH CIRCUMFLEX 144010A;Cdotaccent;LATIN CAPITAL LETTER C WITH DOT ABOVE 14503A7;Chi;GREEK CAPITAL LETTER CHI 1460044;D;LATIN CAPITAL LETTER D 147010E;Dcaron;LATIN CAPITAL LETTER D WITH CARON 1480110;Dcroat;LATIN CAPITAL LETTER D WITH STROKE 1492206;Delta;INCREMENT 1500045;E;LATIN CAPITAL LETTER E 15100C9;Eacute;LATIN CAPITAL LETTER E WITH ACUTE 1520114;Ebreve;LATIN CAPITAL LETTER E WITH BREVE 153011A;Ecaron;LATIN CAPITAL LETTER E WITH CARON 15400CA;Ecircumflex;LATIN CAPITAL LETTER E WITH CIRCUMFLEX 15500CB;Edieresis;LATIN CAPITAL LETTER E WITH DIAERESIS 1560116;Edotaccent;LATIN CAPITAL LETTER E WITH DOT ABOVE 15700C8;Egrave;LATIN CAPITAL LETTER E WITH GRAVE 1580112;Emacron;LATIN CAPITAL LETTER E WITH MACRON 159014A;Eng;LATIN CAPITAL LETTER ENG 1600118;Eogonek;LATIN CAPITAL LETTER E WITH OGONEK 1610395;Epsilon;GREEK CAPITAL LETTER EPSILON 1620388;Epsilontonos;GREEK CAPITAL LETTER EPSILON WITH TONOS 1630397;Eta;GREEK CAPITAL LETTER ETA 1640389;Etatonos;GREEK CAPITAL LETTER ETA WITH TONOS 16500D0;Eth;LATIN CAPITAL LETTER ETH 16620AC;Euro;EURO SIGN 1670046;F;LATIN CAPITAL LETTER F 1680047;G;LATIN CAPITAL LETTER G 1690393;Gamma;GREEK CAPITAL LETTER GAMMA 170011E;Gbreve;LATIN CAPITAL LETTER G WITH BREVE 17101E6;Gcaron;LATIN CAPITAL LETTER G WITH CARON 172011C;Gcircumflex;LATIN CAPITAL LETTER G WITH CIRCUMFLEX 1730120;Gdotaccent;LATIN CAPITAL LETTER G WITH DOT ABOVE 1740048;H;LATIN CAPITAL LETTER H 17525CF;H18533;BLACK CIRCLE 17625AA;H18543;BLACK SMALL SQUARE 17725AB;H18551;WHITE SMALL SQUARE 17825A1;H22073;WHITE SQUARE 1790126;Hbar;LATIN CAPITAL LETTER H WITH STROKE 1800124;Hcircumflex;LATIN CAPITAL LETTER H WITH CIRCUMFLEX 1810049;I;LATIN CAPITAL LETTER I 1820132;IJ;LATIN CAPITAL LIGATURE IJ 18300CD;Iacute;LATIN CAPITAL LETTER I WITH ACUTE 184012C;Ibreve;LATIN CAPITAL LETTER I WITH BREVE 18500CE;Icircumflex;LATIN CAPITAL LETTER I WITH CIRCUMFLEX 18600CF;Idieresis;LATIN CAPITAL LETTER I WITH DIAERESIS 1870130;Idotaccent;LATIN CAPITAL LETTER I WITH DOT ABOVE 1882111;Ifraktur;BLACK-LETTER CAPITAL I 18900CC;Igrave;LATIN CAPITAL LETTER I WITH GRAVE 190012A;Imacron;LATIN CAPITAL LETTER I WITH MACRON 191012E;Iogonek;LATIN CAPITAL LETTER I WITH OGONEK 1920399;Iota;GREEK CAPITAL LETTER IOTA 19303AA;Iotadieresis;GREEK CAPITAL LETTER IOTA WITH DIALYTIKA 194038A;Iotatonos;GREEK CAPITAL LETTER IOTA WITH TONOS 1950128;Itilde;LATIN CAPITAL LETTER I WITH TILDE 196004A;J;LATIN CAPITAL LETTER J 1970134;Jcircumflex;LATIN CAPITAL LETTER J WITH CIRCUMFLEX 198004B;K;LATIN CAPITAL LETTER K 199039A;Kappa;GREEK CAPITAL LETTER KAPPA 200004C;L;LATIN CAPITAL LETTER L 2010139;Lacute;LATIN CAPITAL LETTER L WITH ACUTE 202039B;Lambda;GREEK CAPITAL LETTER LAMDA 203013D;Lcaron;LATIN CAPITAL LETTER L WITH CARON 204013F;Ldot;LATIN CAPITAL LETTER L WITH MIDDLE DOT 2050141;Lslash;LATIN CAPITAL LETTER L WITH STROKE 206004D;M;LATIN CAPITAL LETTER M 207039C;Mu;GREEK CAPITAL LETTER MU 208004E;N;LATIN CAPITAL LETTER N 2090143;Nacute;LATIN CAPITAL LETTER N WITH ACUTE 2100147;Ncaron;LATIN CAPITAL LETTER N WITH CARON 21100D1;Ntilde;LATIN CAPITAL LETTER N WITH TILDE 212039D;Nu;GREEK CAPITAL LETTER NU 213004F;O;LATIN CAPITAL LETTER O 2140152;OE;LATIN CAPITAL LIGATURE OE 21500D3;Oacute;LATIN CAPITAL LETTER O WITH ACUTE 216014E;Obreve;LATIN CAPITAL LETTER O WITH BREVE 21700D4;Ocircumflex;LATIN CAPITAL LETTER O WITH CIRCUMFLEX 21800D6;Odieresis;LATIN CAPITAL LETTER O WITH DIAERESIS 21900D2;Ograve;LATIN CAPITAL LETTER O WITH GRAVE 22001A0;Ohorn;LATIN CAPITAL LETTER O WITH HORN 2210150;Ohungarumlaut;LATIN CAPITAL LETTER O WITH DOUBLE ACUTE 222014C;Omacron;LATIN CAPITAL LETTER O WITH MACRON 2232126;Omega;OHM SIGN 224038F;Omegatonos;GREEK CAPITAL LETTER OMEGA WITH TONOS 225039F;Omicron;GREEK CAPITAL LETTER OMICRON 226038C;Omicrontonos;GREEK CAPITAL LETTER OMICRON WITH TONOS 22700D8;Oslash;LATIN CAPITAL LETTER O WITH STROKE 22801FE;Oslashacute;LATIN CAPITAL LETTER O WITH STROKE AND ACUTE 22900D5;Otilde;LATIN CAPITAL LETTER O WITH TILDE 2300050;P;LATIN CAPITAL LETTER P 23103A6;Phi;GREEK CAPITAL LETTER PHI 23203A0;Pi;GREEK CAPITAL LETTER PI 23303A8;Psi;GREEK CAPITAL LETTER PSI 2340051;Q;LATIN CAPITAL LETTER Q 2350052;R;LATIN CAPITAL LETTER R 2360154;Racute;LATIN CAPITAL LETTER R WITH ACUTE 2370158;Rcaron;LATIN CAPITAL LETTER R WITH CARON 238211C;Rfraktur;BLACK-LETTER CAPITAL R 23903A1;Rho;GREEK CAPITAL LETTER RHO 2400053;S;LATIN CAPITAL LETTER S 241250C;SF010000;BOX DRAWINGS LIGHT DOWN AND RIGHT 2422514;SF020000;BOX DRAWINGS LIGHT UP AND RIGHT 2432510;SF030000;BOX DRAWINGS LIGHT DOWN AND LEFT 2442518;SF040000;BOX DRAWINGS LIGHT UP AND LEFT 245253C;SF050000;BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL 246252C;SF060000;BOX DRAWINGS LIGHT DOWN AND HORIZONTAL 2472534;SF070000;BOX DRAWINGS LIGHT UP AND HORIZONTAL 248251C;SF080000;BOX DRAWINGS LIGHT VERTICAL AND RIGHT 2492524;SF090000;BOX DRAWINGS LIGHT VERTICAL AND LEFT 2502500;SF100000;BOX DRAWINGS LIGHT HORIZONTAL 2512502;SF110000;BOX DRAWINGS LIGHT VERTICAL 2522561;SF190000;BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE 2532562;SF200000;BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE 2542556;SF210000;BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE 2552555;SF220000;BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE 2562563;SF230000;BOX DRAWINGS DOUBLE VERTICAL AND LEFT 2572551;SF240000;BOX DRAWINGS DOUBLE VERTICAL 2582557;SF250000;BOX DRAWINGS DOUBLE DOWN AND LEFT 259255D;SF260000;BOX DRAWINGS DOUBLE UP AND LEFT 260255C;SF270000;BOX DRAWINGS UP DOUBLE AND LEFT SINGLE 261255B;SF280000;BOX DRAWINGS UP SINGLE AND LEFT DOUBLE 262255E;SF360000;BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE 263255F;SF370000;BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE 264255A;SF380000;BOX DRAWINGS DOUBLE UP AND RIGHT 2652554;SF390000;BOX DRAWINGS DOUBLE DOWN AND RIGHT 2662569;SF400000;BOX DRAWINGS DOUBLE UP AND HORIZONTAL 2672566;SF410000;BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL 2682560;SF420000;BOX DRAWINGS DOUBLE VERTICAL AND RIGHT 2692550;SF430000;BOX DRAWINGS DOUBLE HORIZONTAL 270256C;SF440000;BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL 2712567;SF450000;BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE 2722568;SF460000;BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE 2732564;SF470000;BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE 2742565;SF480000;BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE 2752559;SF490000;BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE 2762558;SF500000;BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE 2772552;SF510000;BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE 2782553;SF520000;BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE 279256B;SF530000;BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE 280256A;SF540000;BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE 281015A;Sacute;LATIN CAPITAL LETTER S WITH ACUTE 2820160;Scaron;LATIN CAPITAL LETTER S WITH CARON 283015E;Scedilla;LATIN CAPITAL LETTER S WITH CEDILLA 284015C;Scircumflex;LATIN CAPITAL LETTER S WITH CIRCUMFLEX 28503A3;Sigma;GREEK CAPITAL LETTER SIGMA 2860054;T;LATIN CAPITAL LETTER T 28703A4;Tau;GREEK CAPITAL LETTER TAU 2880166;Tbar;LATIN CAPITAL LETTER T WITH STROKE 2890164;Tcaron;LATIN CAPITAL LETTER T WITH CARON 2900398;Theta;GREEK CAPITAL LETTER THETA 29100DE;Thorn;LATIN CAPITAL LETTER THORN 2920055;U;LATIN CAPITAL LETTER U 29300DA;Uacute;LATIN CAPITAL LETTER U WITH ACUTE 294016C;Ubreve;LATIN CAPITAL LETTER U WITH BREVE 29500DB;Ucircumflex;LATIN CAPITAL LETTER U WITH CIRCUMFLEX 29600DC;Udieresis;LATIN CAPITAL LETTER U WITH DIAERESIS 29700D9;Ugrave;LATIN CAPITAL LETTER U WITH GRAVE 29801AF;Uhorn;LATIN CAPITAL LETTER U WITH HORN 2990170;Uhungarumlaut;LATIN CAPITAL LETTER U WITH DOUBLE ACUTE 300016A;Umacron;LATIN CAPITAL LETTER U WITH MACRON 3010172;Uogonek;LATIN CAPITAL LETTER U WITH OGONEK 30203A5;Upsilon;GREEK CAPITAL LETTER UPSILON 30303D2;Upsilon1;GREEK UPSILON WITH HOOK SYMBOL 30403AB;Upsilondieresis;GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA 305038E;Upsilontonos;GREEK CAPITAL LETTER UPSILON WITH TONOS 306016E;Uring;LATIN CAPITAL LETTER U WITH RING ABOVE 3070168;Utilde;LATIN CAPITAL LETTER U WITH TILDE 3080056;V;LATIN CAPITAL LETTER V 3090057;W;LATIN CAPITAL LETTER W 3101E82;Wacute;LATIN CAPITAL LETTER W WITH ACUTE 3110174;Wcircumflex;LATIN CAPITAL LETTER W WITH CIRCUMFLEX 3121E84;Wdieresis;LATIN CAPITAL LETTER W WITH DIAERESIS 3131E80;Wgrave;LATIN CAPITAL LETTER W WITH GRAVE 3140058;X;LATIN CAPITAL LETTER X 315039E;Xi;GREEK CAPITAL LETTER XI 3160059;Y;LATIN CAPITAL LETTER Y 31700DD;Yacute;LATIN CAPITAL LETTER Y WITH ACUTE 3180176;Ycircumflex;LATIN CAPITAL LETTER Y WITH CIRCUMFLEX 3190178;Ydieresis;LATIN CAPITAL LETTER Y WITH DIAERESIS 3201EF2;Ygrave;LATIN CAPITAL LETTER Y WITH GRAVE 321005A;Z;LATIN CAPITAL LETTER Z 3220179;Zacute;LATIN CAPITAL LETTER Z WITH ACUTE 323017D;Zcaron;LATIN CAPITAL LETTER Z WITH CARON 324017B;Zdotaccent;LATIN CAPITAL LETTER Z WITH DOT ABOVE 3250396;Zeta;GREEK CAPITAL LETTER ZETA 3260061;a;LATIN SMALL LETTER A 32700E1;aacute;LATIN SMALL LETTER A WITH ACUTE 3280103;abreve;LATIN SMALL LETTER A WITH BREVE 32900E2;acircumflex;LATIN SMALL LETTER A WITH CIRCUMFLEX 33000B4;acute;ACUTE ACCENT 3310301;acutecomb;COMBINING ACUTE ACCENT 33200E4;adieresis;LATIN SMALL LETTER A WITH DIAERESIS 33300E6;ae;LATIN SMALL LETTER AE 33401FD;aeacute;LATIN SMALL LETTER AE WITH ACUTE 33500E0;agrave;LATIN SMALL LETTER A WITH GRAVE 3362135;aleph;ALEF SYMBOL 33703B1;alpha;GREEK SMALL LETTER ALPHA 33803AC;alphatonos;GREEK SMALL LETTER ALPHA WITH TONOS 3390101;amacron;LATIN SMALL LETTER A WITH MACRON 3400026;ampersand;AMPERSAND 3412220;angle;ANGLE 3422329;angleleft;LEFT-POINTING ANGLE BRACKET 343232A;angleright;RIGHT-POINTING ANGLE BRACKET 3440387;anoteleia;GREEK ANO TELEIA 3450105;aogonek;LATIN SMALL LETTER A WITH OGONEK 3462248;approxequal;ALMOST EQUAL TO 34700E5;aring;LATIN SMALL LETTER A WITH RING ABOVE 34801FB;aringacute;LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE 3492194;arrowboth;LEFT RIGHT ARROW 35021D4;arrowdblboth;LEFT RIGHT DOUBLE ARROW 35121D3;arrowdbldown;DOWNWARDS DOUBLE ARROW 35221D0;arrowdblleft;LEFTWARDS DOUBLE ARROW 35321D2;arrowdblright;RIGHTWARDS DOUBLE ARROW 35421D1;arrowdblup;UPWARDS DOUBLE ARROW 3552193;arrowdown;DOWNWARDS ARROW 3562190;arrowleft;LEFTWARDS ARROW 3572192;arrowright;RIGHTWARDS ARROW 3582191;arrowup;UPWARDS ARROW 3592195;arrowupdn;UP DOWN ARROW 36021A8;arrowupdnbse;UP DOWN ARROW WITH BASE 361005E;asciicircum;CIRCUMFLEX ACCENT 362007E;asciitilde;TILDE 363002A;asterisk;ASTERISK 3642217;asteriskmath;ASTERISK OPERATOR 3650040;at;COMMERCIAL AT 36600E3;atilde;LATIN SMALL LETTER A WITH TILDE 3670062;b;LATIN SMALL LETTER B 368005C;backslash;REVERSE SOLIDUS 369007C;bar;VERTICAL LINE 37003B2;beta;GREEK SMALL LETTER BETA 3712588;block;FULL BLOCK 372007B;braceleft;LEFT CURLY BRACKET 373007D;braceright;RIGHT CURLY BRACKET 374005B;bracketleft;LEFT SQUARE BRACKET 375005D;bracketright;RIGHT SQUARE BRACKET 37602D8;breve;BREVE 37700A6;brokenbar;BROKEN BAR 3782022;bullet;BULLET 3790063;c;LATIN SMALL LETTER C 3800107;cacute;LATIN SMALL LETTER C WITH ACUTE 38102C7;caron;CARON 38221B5;carriagereturn;DOWNWARDS ARROW WITH CORNER LEFTWARDS 383010D;ccaron;LATIN SMALL LETTER C WITH CARON 38400E7;ccedilla;LATIN SMALL LETTER C WITH CEDILLA 3850109;ccircumflex;LATIN SMALL LETTER C WITH CIRCUMFLEX 386010B;cdotaccent;LATIN SMALL LETTER C WITH DOT ABOVE 38700B8;cedilla;CEDILLA 38800A2;cent;CENT SIGN 38903C7;chi;GREEK SMALL LETTER CHI 39025CB;circle;WHITE CIRCLE 3912297;circlemultiply;CIRCLED TIMES 3922295;circleplus;CIRCLED PLUS 39302C6;circumflex;MODIFIER LETTER CIRCUMFLEX ACCENT 3942663;club;BLACK CLUB SUIT 395003A;colon;COLON 39620A1;colonmonetary;COLON SIGN 397002C;comma;COMMA 3982245;congruent;APPROXIMATELY EQUAL TO 39900A9;copyright;COPYRIGHT SIGN 40000A4;currency;CURRENCY SIGN 4010064;d;LATIN SMALL LETTER D 4022020;dagger;DAGGER 4032021;daggerdbl;DOUBLE DAGGER 404010F;dcaron;LATIN SMALL LETTER D WITH CARON 4050111;dcroat;LATIN SMALL LETTER D WITH STROKE 40600B0;degree;DEGREE SIGN 40703B4;delta;GREEK SMALL LETTER DELTA 4082666;diamond;BLACK DIAMOND SUIT 40900A8;dieresis;DIAERESIS 4100385;dieresistonos;GREEK DIALYTIKA TONOS 41100F7;divide;DIVISION SIGN 4122593;dkshade;DARK SHADE 4132584;dnblock;LOWER HALF BLOCK 4140024;dollar;DOLLAR SIGN 41520AB;dong;DONG SIGN 41602D9;dotaccent;DOT ABOVE 4170323;dotbelowcomb;COMBINING DOT BELOW 4180131;dotlessi;LATIN SMALL LETTER DOTLESS I 41922C5;dotmath;DOT OPERATOR 4200065;e;LATIN SMALL LETTER E 42100E9;eacute;LATIN SMALL LETTER E WITH ACUTE 4220115;ebreve;LATIN SMALL LETTER E WITH BREVE 423011B;ecaron;LATIN SMALL LETTER E WITH CARON 42400EA;ecircumflex;LATIN SMALL LETTER E WITH CIRCUMFLEX 42500EB;edieresis;LATIN SMALL LETTER E WITH DIAERESIS 4260117;edotaccent;LATIN SMALL LETTER E WITH DOT ABOVE 42700E8;egrave;LATIN SMALL LETTER E WITH GRAVE 4280038;eight;DIGIT EIGHT 4292208;element;ELEMENT OF 4302026;ellipsis;HORIZONTAL ELLIPSIS 4310113;emacron;LATIN SMALL LETTER E WITH MACRON 4322014;emdash;EM DASH 4332205;emptyset;EMPTY SET 4342013;endash;EN DASH 435014B;eng;LATIN SMALL LETTER ENG 4360119;eogonek;LATIN SMALL LETTER E WITH OGONEK 43703B5;epsilon;GREEK SMALL LETTER EPSILON 43803AD;epsilontonos;GREEK SMALL LETTER EPSILON WITH TONOS 439003D;equal;EQUALS SIGN 4402261;equivalence;IDENTICAL TO 441212E;estimated;ESTIMATED SYMBOL 44203B7;eta;GREEK SMALL LETTER ETA 44303AE;etatonos;GREEK SMALL LETTER ETA WITH TONOS 44400F0;eth;LATIN SMALL LETTER ETH 4450021;exclam;EXCLAMATION MARK 446203C;exclamdbl;DOUBLE EXCLAMATION MARK 44700A1;exclamdown;INVERTED EXCLAMATION MARK 4482203;existential;THERE EXISTS 4490066;f;LATIN SMALL LETTER F 4502640;female;FEMALE SIGN 4512012;figuredash;FIGURE DASH 45225A0;filledbox;BLACK SQUARE 45325AC;filledrect;BLACK RECTANGLE 4540035;five;DIGIT FIVE 455215D;fiveeighths;VULGAR FRACTION FIVE EIGHTHS 4560192;florin;LATIN SMALL LETTER F WITH HOOK 4570034;four;DIGIT FOUR 4582044;fraction;FRACTION SLASH 45920A3;franc;FRENCH FRANC SIGN 4600067;g;LATIN SMALL LETTER G 46103B3;gamma;GREEK SMALL LETTER GAMMA 462011F;gbreve;LATIN SMALL LETTER G WITH BREVE 46301E7;gcaron;LATIN SMALL LETTER G WITH CARON 464011D;gcircumflex;LATIN SMALL LETTER G WITH CIRCUMFLEX 4650121;gdotaccent;LATIN SMALL LETTER G WITH DOT ABOVE 46600DF;germandbls;LATIN SMALL LETTER SHARP S 4672207;gradient;NABLA 4680060;grave;GRAVE ACCENT 4690300;gravecomb;COMBINING GRAVE ACCENT 470003E;greater;GREATER-THAN SIGN 4712265;greaterequal;GREATER-THAN OR EQUAL TO 47200AB;guillemotleft;LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 47300BB;guillemotright;RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 4742039;guilsinglleft;SINGLE LEFT-POINTING ANGLE QUOTATION MARK 475203A;guilsinglright;SINGLE RIGHT-POINTING ANGLE QUOTATION MARK 4760068;h;LATIN SMALL LETTER H 4770127;hbar;LATIN SMALL LETTER H WITH STROKE 4780125;hcircumflex;LATIN SMALL LETTER H WITH CIRCUMFLEX 4792665;heart;BLACK HEART SUIT 4800309;hookabovecomb;COMBINING HOOK ABOVE 4812302;house;HOUSE 48202DD;hungarumlaut;DOUBLE ACUTE ACCENT 483002D;hyphen;HYPHEN-MINUS 4840069;i;LATIN SMALL LETTER I 48500ED;iacute;LATIN SMALL LETTER I WITH ACUTE 486012D;ibreve;LATIN SMALL LETTER I WITH BREVE 48700EE;icircumflex;LATIN SMALL LETTER I WITH CIRCUMFLEX 48800EF;idieresis;LATIN SMALL LETTER I WITH DIAERESIS 48900EC;igrave;LATIN SMALL LETTER I WITH GRAVE 4900133;ij;LATIN SMALL LIGATURE IJ 491012B;imacron;LATIN SMALL LETTER I WITH MACRON 492221E;infinity;INFINITY 493222B;integral;INTEGRAL 4942321;integralbt;BOTTOM HALF INTEGRAL 4952320;integraltp;TOP HALF INTEGRAL 4962229;intersection;INTERSECTION 49725D8;invbullet;INVERSE BULLET 49825D9;invcircle;INVERSE WHITE CIRCLE 499263B;invsmileface;BLACK SMILING FACE 500012F;iogonek;LATIN SMALL LETTER I WITH OGONEK 50103B9;iota;GREEK SMALL LETTER IOTA 50203CA;iotadieresis;GREEK SMALL LETTER IOTA WITH DIALYTIKA 5030390;iotadieresistonos;GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS 50403AF;iotatonos;GREEK SMALL LETTER IOTA WITH TONOS 5050129;itilde;LATIN SMALL LETTER I WITH TILDE 506006A;j;LATIN SMALL LETTER J 5070135;jcircumflex;LATIN SMALL LETTER J WITH CIRCUMFLEX 508006B;k;LATIN SMALL LETTER K 50903BA;kappa;GREEK SMALL LETTER KAPPA 5100138;kgreenlandic;LATIN SMALL LETTER KRA 511006C;l;LATIN SMALL LETTER L 512013A;lacute;LATIN SMALL LETTER L WITH ACUTE 51303BB;lambda;GREEK SMALL LETTER LAMDA 514013E;lcaron;LATIN SMALL LETTER L WITH CARON 5150140;ldot;LATIN SMALL LETTER L WITH MIDDLE DOT 516003C;less;LESS-THAN SIGN 5172264;lessequal;LESS-THAN OR EQUAL TO 518258C;lfblock;LEFT HALF BLOCK 51920A4;lira;LIRA SIGN 5202227;logicaland;LOGICAL AND 52100AC;logicalnot;NOT SIGN 5222228;logicalor;LOGICAL OR 523017F;longs;LATIN SMALL LETTER LONG S 52425CA;lozenge;LOZENGE 5250142;lslash;LATIN SMALL LETTER L WITH STROKE 5262591;ltshade;LIGHT SHADE 527006D;m;LATIN SMALL LETTER M 52800AF;macron;MACRON 5292642;male;MALE SIGN 5302212;minus;MINUS SIGN 5312032;minute;PRIME 53200B5;mu;MICRO SIGN 53300D7;multiply;MULTIPLICATION SIGN 534266A;musicalnote;EIGHTH NOTE 535266B;musicalnotedbl;BEAMED EIGHTH NOTES 536006E;n;LATIN SMALL LETTER N 5370144;nacute;LATIN SMALL LETTER N WITH ACUTE 5380149;napostrophe;LATIN SMALL LETTER N PRECEDED BY APOSTROPHE 5390148;ncaron;LATIN SMALL LETTER N WITH CARON 5400039;nine;DIGIT NINE 5412209;notelement;NOT AN ELEMENT OF 5422260;notequal;NOT EQUAL TO 5432284;notsubset;NOT A SUBSET OF 54400F1;ntilde;LATIN SMALL LETTER N WITH TILDE 54503BD;nu;GREEK SMALL LETTER NU 5460023;numbersign;NUMBER SIGN 547006F;o;LATIN SMALL LETTER O 54800F3;oacute;LATIN SMALL LETTER O WITH ACUTE 549014F;obreve;LATIN SMALL LETTER O WITH BREVE 55000F4;ocircumflex;LATIN SMALL LETTER O WITH CIRCUMFLEX 55100F6;odieresis;LATIN SMALL LETTER O WITH DIAERESIS 5520153;oe;LATIN SMALL LIGATURE OE 55302DB;ogonek;OGONEK 55400F2;ograve;LATIN SMALL LETTER O WITH GRAVE 55501A1;ohorn;LATIN SMALL LETTER O WITH HORN 5560151;ohungarumlaut;LATIN SMALL LETTER O WITH DOUBLE ACUTE 557014D;omacron;LATIN SMALL LETTER O WITH MACRON 55803C9;omega;GREEK SMALL LETTER OMEGA 55903D6;omega1;GREEK PI SYMBOL 56003CE;omegatonos;GREEK SMALL LETTER OMEGA WITH TONOS 56103BF;omicron;GREEK SMALL LETTER OMICRON 56203CC;omicrontonos;GREEK SMALL LETTER OMICRON WITH TONOS 5630031;one;DIGIT ONE 5642024;onedotenleader;ONE DOT LEADER 565215B;oneeighth;VULGAR FRACTION ONE EIGHTH 56600BD;onehalf;VULGAR FRACTION ONE HALF 56700BC;onequarter;VULGAR FRACTION ONE QUARTER 5682153;onethird;VULGAR FRACTION ONE THIRD 56925E6;openbullet;WHITE BULLET 57000AA;ordfeminine;FEMININE ORDINAL INDICATOR 57100BA;ordmasculine;MASCULINE ORDINAL INDICATOR 572221F;orthogonal;RIGHT ANGLE 57300F8;oslash;LATIN SMALL LETTER O WITH STROKE 57401FF;oslashacute;LATIN SMALL LETTER O WITH STROKE AND ACUTE 57500F5;otilde;LATIN SMALL LETTER O WITH TILDE 5760070;p;LATIN SMALL LETTER P 57700B6;paragraph;PILCROW SIGN 5780028;parenleft;LEFT PARENTHESIS 5790029;parenright;RIGHT PARENTHESIS 5802202;partialdiff;PARTIAL DIFFERENTIAL 5810025;percent;PERCENT SIGN 582002E;period;FULL STOP 58300B7;periodcentered;MIDDLE DOT 58422A5;perpendicular;UP TACK 5852030;perthousand;PER MILLE SIGN 58620A7;peseta;PESETA SIGN 58703C6;phi;GREEK SMALL LETTER PHI 58803D5;phi1;GREEK PHI SYMBOL 58903C0;pi;GREEK SMALL LETTER PI 590002B;plus;PLUS SIGN 59100B1;plusminus;PLUS-MINUS SIGN 592211E;prescription;PRESCRIPTION TAKE 593220F;product;N-ARY PRODUCT 5942282;propersubset;SUBSET OF 5952283;propersuperset;SUPERSET OF 596221D;proportional;PROPORTIONAL TO 59703C8;psi;GREEK SMALL LETTER PSI 5980071;q;LATIN SMALL LETTER Q 599003F;question;QUESTION MARK 60000BF;questiondown;INVERTED QUESTION MARK 6010022;quotedbl;QUOTATION MARK 602201E;quotedblbase;DOUBLE LOW-9 QUOTATION MARK 603201C;quotedblleft;LEFT DOUBLE QUOTATION MARK 604201D;quotedblright;RIGHT DOUBLE QUOTATION MARK 6052018;quoteleft;LEFT SINGLE QUOTATION MARK 606201B;quotereversed;SINGLE HIGH-REVERSED-9 QUOTATION MARK 6072019;quoteright;RIGHT SINGLE QUOTATION MARK 608201A;quotesinglbase;SINGLE LOW-9 QUOTATION MARK 6090027;quotesingle;APOSTROPHE 6100072;r;LATIN SMALL LETTER R 6110155;racute;LATIN SMALL LETTER R WITH ACUTE 612221A;radical;SQUARE ROOT 6130159;rcaron;LATIN SMALL LETTER R WITH CARON 6142286;reflexsubset;SUBSET OF OR EQUAL TO 6152287;reflexsuperset;SUPERSET OF OR EQUAL TO 61600AE;registered;REGISTERED SIGN 6172310;revlogicalnot;REVERSED NOT SIGN 61803C1;rho;GREEK SMALL LETTER RHO 61902DA;ring;RING ABOVE 6202590;rtblock;RIGHT HALF BLOCK 6210073;s;LATIN SMALL LETTER S 622015B;sacute;LATIN SMALL LETTER S WITH ACUTE 6230161;scaron;LATIN SMALL LETTER S WITH CARON 624015F;scedilla;LATIN SMALL LETTER S WITH CEDILLA 625015D;scircumflex;LATIN SMALL LETTER S WITH CIRCUMFLEX 6262033;second;DOUBLE PRIME 62700A7;section;SECTION SIGN 628003B;semicolon;SEMICOLON 6290037;seven;DIGIT SEVEN 630215E;seveneighths;VULGAR FRACTION SEVEN EIGHTHS 6312592;shade;MEDIUM SHADE 63203C3;sigma;GREEK SMALL LETTER SIGMA 63303C2;sigma1;GREEK SMALL LETTER FINAL SIGMA 634223C;similar;TILDE OPERATOR 6350036;six;DIGIT SIX 636002F;slash;SOLIDUS 637263A;smileface;WHITE SMILING FACE 6380020;space;SPACE 6392660;spade;BLACK SPADE SUIT 64000A3;sterling;POUND SIGN 641220B;suchthat;CONTAINS AS MEMBER 6422211;summation;N-ARY SUMMATION 643263C;sun;WHITE SUN WITH RAYS 6440074;t;LATIN SMALL LETTER T 64503C4;tau;GREEK SMALL LETTER TAU 6460167;tbar;LATIN SMALL LETTER T WITH STROKE 6470165;tcaron;LATIN SMALL LETTER T WITH CARON 6482234;therefore;THEREFORE 64903B8;theta;GREEK SMALL LETTER THETA 65003D1;theta1;GREEK THETA SYMBOL 65100FE;thorn;LATIN SMALL LETTER THORN 6520033;three;DIGIT THREE 653215C;threeeighths;VULGAR FRACTION THREE EIGHTHS 65400BE;threequarters;VULGAR FRACTION THREE QUARTERS 65502DC;tilde;SMALL TILDE 6560303;tildecomb;COMBINING TILDE 6570384;tonos;GREEK TONOS 6582122;trademark;TRADE MARK SIGN 65925BC;triagdn;BLACK DOWN-POINTING TRIANGLE 66025C4;triaglf;BLACK LEFT-POINTING POINTER 66125BA;triagrt;BLACK RIGHT-POINTING POINTER 66225B2;triagup;BLACK UP-POINTING TRIANGLE 6630032;two;DIGIT TWO 6642025;twodotenleader;TWO DOT LEADER 6652154;twothirds;VULGAR FRACTION TWO THIRDS 6660075;u;LATIN SMALL LETTER U 66700FA;uacute;LATIN SMALL LETTER U WITH ACUTE 668016D;ubreve;LATIN SMALL LETTER U WITH BREVE 66900FB;ucircumflex;LATIN SMALL LETTER U WITH CIRCUMFLEX 67000FC;udieresis;LATIN SMALL LETTER U WITH DIAERESIS 67100F9;ugrave;LATIN SMALL LETTER U WITH GRAVE 67201B0;uhorn;LATIN SMALL LETTER U WITH HORN 6730171;uhungarumlaut;LATIN SMALL LETTER U WITH DOUBLE ACUTE 674016B;umacron;LATIN SMALL LETTER U WITH MACRON 675005F;underscore;LOW LINE 6762017;underscoredbl;DOUBLE LOW LINE 677222A;union;UNION 6782200;universal;FOR ALL 6790173;uogonek;LATIN SMALL LETTER U WITH OGONEK 6802580;upblock;UPPER HALF BLOCK 68103C5;upsilon;GREEK SMALL LETTER UPSILON 68203CB;upsilondieresis;GREEK SMALL LETTER UPSILON WITH DIALYTIKA 68303B0;upsilondieresistonos;GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS 68403CD;upsilontonos;GREEK SMALL LETTER UPSILON WITH TONOS 685016F;uring;LATIN SMALL LETTER U WITH RING ABOVE 6860169;utilde;LATIN SMALL LETTER U WITH TILDE 6870076;v;LATIN SMALL LETTER V 6880077;w;LATIN SMALL LETTER W 6891E83;wacute;LATIN SMALL LETTER W WITH ACUTE 6900175;wcircumflex;LATIN SMALL LETTER W WITH CIRCUMFLEX 6911E85;wdieresis;LATIN SMALL LETTER W WITH DIAERESIS 6922118;weierstrass;SCRIPT CAPITAL P 6931E81;wgrave;LATIN SMALL LETTER W WITH GRAVE 6940078;x;LATIN SMALL LETTER X 69503BE;xi;GREEK SMALL LETTER XI 6960079;y;LATIN SMALL LETTER Y 69700FD;yacute;LATIN SMALL LETTER Y WITH ACUTE 6980177;ycircumflex;LATIN SMALL LETTER Y WITH CIRCUMFLEX 69900FF;ydieresis;LATIN SMALL LETTER Y WITH DIAERESIS 70000A5;yen;YEN SIGN 7011EF3;ygrave;LATIN SMALL LETTER Y WITH GRAVE 702007A;z;LATIN SMALL LETTER Z 703017A;zacute;LATIN SMALL LETTER Z WITH ACUTE 704017E;zcaron;LATIN SMALL LETTER Z WITH CARON 705017C;zdotaccent;LATIN SMALL LETTER Z WITH DOT ABOVE 7060030;zero;DIGIT ZERO 70703B6;zeta;GREEK SMALL LETTER ZETA 708#END 709""" 710 711 712class AGLError(Exception): 713 pass 714 715AGL2UV = {} 716UV2AGL = {} 717 718def _builddicts(): 719 import re 720 721 lines = _aglText.splitlines() 722 723 parseAGL_RE = re.compile("([0-9A-F]{4});([A-Za-z_0-9.]+);.*?$") 724 725 for line in lines: 726 if not line or line[:1] == '#': 727 continue 728 m = parseAGL_RE.match(line) 729 if not m: 730 raise AGLError("syntax error in glyphlist.txt: %s" % repr(line[:20])) 731 unicode = m.group(1) 732 assert len(unicode) == 4 733 unicode = int(unicode, 16) 734 glyphName = tostr(m.group(2)) 735 if glyphName in AGL2UV: 736 # the above table contains identical duplicates 737 assert AGL2UV[glyphName] == unicode 738 else: 739 AGL2UV[glyphName] = unicode 740 UV2AGL[unicode] = glyphName 741 742_builddicts() 743 744 745def toUnicode(glyph, isZapfDingbats=False): 746 """Convert glyph names to Unicode, such as 'longs_t.oldstyle' --> u'ſt' 747 748 If isZapfDingbats is True, the implementation recognizes additional 749 glyph names (as required by the AGL specification). 750 """ 751 # https://github.com/adobe-type-tools/agl-specification#2-the-mapping 752 # 753 # 1. Drop all the characters from the glyph name starting with 754 # the first occurrence of a period (U+002E; FULL STOP), if any. 755 glyph = glyph.split(".", 1)[0] 756 757 # 2. Split the remaining string into a sequence of components, 758 # using underscore (U+005F; LOW LINE) as the delimiter. 759 components = glyph.split("_") 760 761 # 3. Map each component to a character string according to the 762 # procedure below, and concatenate those strings; the result 763 # is the character string to which the glyph name is mapped. 764 result = [_glyphComponentToUnicode(c, isZapfDingbats) 765 for c in components] 766 return "".join(result) 767 768 769def _glyphComponentToUnicode(component, isZapfDingbats): 770 # If the font is Zapf Dingbats (PostScript FontName: ZapfDingbats), 771 # and the component is in the ITC Zapf Dingbats Glyph List, then 772 # map it to the corresponding character in that list. 773 dingbat = _zapfDingbatsToUnicode(component) if isZapfDingbats else None 774 if dingbat: 775 return dingbat 776 777 # Otherwise, if the component is in AGL, then map it 778 # to the corresponding character in that list. 779 # 780 # TODO: We currently use the AGLFN (Adobe glyph list for new fonts), 781 # although the spec actually mandates the legacy AGL which is 782 # a superset of the AGLFN. 783 # https://github.com/fonttools/fonttools/issues/775 784 uchar = AGL2UV.get(component) 785 if uchar: 786 return unichr(uchar) 787 788 # Otherwise, if the component is of the form "uni" (U+0075, 789 # U+006E, and U+0069) followed by a sequence of uppercase 790 # hexadecimal digits (0–9 and A–F, meaning U+0030 through 791 # U+0039 and U+0041 through U+0046), if the length of that 792 # sequence is a multiple of four, and if each group of four 793 # digits represents a value in the ranges 0000 through D7FF 794 # or E000 through FFFF, then interpret each as a Unicode scalar 795 # value and map the component to the string made of those 796 # scalar values. Note that the range and digit-length 797 # restrictions mean that the "uni" glyph name prefix can be 798 # used only with UVs in the Basic Multilingual Plane (BMP). 799 uni = _uniToUnicode(component) 800 if uni: 801 return uni 802 803 # Otherwise, if the component is of the form "u" (U+0075) 804 # followed by a sequence of four to six uppercase hexadecimal 805 # digits (0–9 and A–F, meaning U+0030 through U+0039 and 806 # U+0041 through U+0046), and those digits represents a value 807 # in the ranges 0000 through D7FF or E000 through 10FFFF, then 808 # interpret it as a Unicode scalar value and map the component 809 # to the string made of this scalar value. 810 uni = _uToUnicode(component) 811 if uni: 812 return uni 813 814 # Otherwise, map the component to an empty string. 815 return '' 816 817 818# https://github.com/adobe-type-tools/agl-aglfn/blob/master/zapfdingbats.txt 819_AGL_ZAPF_DINGBATS = ( 820 " ✁✂✄☎✆✝✞✟✠✡☛☞✌✍✎✏✑✒✓✔✕✖✗✘✙✚✛✜✢✣✤✥✦✧★✩✪✫✬✭✮✯✰✱✲✳✴✵✶✷✸✹✺✻✼✽✾✿❀" 821 "❁❂❃❄❅❆❇❈❉❊❋●❍■❏❑▲▼◆❖ ◗❘❙❚❯❱❲❳❨❩❬❭❪❫❴❵❛❜❝❞❡❢❣❤✐❥❦❧♠♥♦♣ ✉✈✇" 822 "①②③④⑤⑥⑦⑧⑨⑩❶❷❸❹❺❻❼❽❾❿➀➁➂➃➄➅➆➇➈➉➊➋➌➍➎➏➐➑➒➓➔→➣↔" 823 "↕➙➛➜➝➞➟➠➡➢➤➥➦➧➨➩➫➭➯➲➳➵➸➺➻➼➽➾➚➪➶➹➘➴➷➬➮➱✃❐❒❮❰") 824 825 826def _zapfDingbatsToUnicode(glyph): 827 """Helper for toUnicode().""" 828 if len(glyph) < 2 or glyph[0] != 'a': 829 return None 830 try: 831 gid = int(glyph[1:]) 832 except ValueError: 833 return None 834 if gid < 0 or gid >= len(_AGL_ZAPF_DINGBATS): 835 return None 836 uchar = _AGL_ZAPF_DINGBATS[gid] 837 return uchar if uchar != ' ' else None 838 839 840_re_uni = re.compile("^uni([0-9A-F]+)$") 841 842 843def _uniToUnicode(component): 844 """Helper for toUnicode() to handle "uniABCD" components.""" 845 match = _re_uni.match(component) 846 if match is None: 847 return None 848 digits = match.group(1) 849 if len(digits) % 4 != 0: 850 return None 851 chars = [int(digits[i : i + 4], 16) 852 for i in range(0, len(digits), 4)] 853 if any(c >= 0xD800 and c <= 0xDFFF for c in chars): 854 # The AGL specification explicitly excluded surrogate pairs. 855 return None 856 return ''.join([unichr(c) for c in chars]) 857 858 859_re_u = re.compile("^u([0-9A-F]{4,6})$") 860 861 862def _uToUnicode(component): 863 """Helper for toUnicode() to handle "u1ABCD" components.""" 864 match = _re_u.match(component) 865 if match is None: 866 return None 867 digits = match.group(1) 868 try: 869 value = int(digits, 16) 870 except ValueError: 871 return None 872 if ((value >= 0x0000 and value <= 0xD7FF) or 873 (value >= 0xE000 and value <= 0x10FFFF)): 874 return unichr(value) 875 return None 876