• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/perl
2# Copyright (C) 2016 and later: Unicode, Inc. and others.
3# License & terms of use: http://www.unicode.org/copyright.html
4# Copyright (c) 2001-2015 International Business Machines
5# Corporation and others. All Rights Reserved.
6
7####################################################################################
8# filterRFC3454.pl:
9# This tool filters the RFC-3454 txt file for StringPrep tables and creates a table
10# to be used in NamePrepProfile
11#
12# Author: Ram Viswanadha
13#
14####################################################################################
15
16use File::Find;
17use File::Basename;
18use IO::File;
19use Cwd;
20use File::Copy;
21use Getopt::Long;
22use File::Path;
23use File::Copy;
24use Time::localtime;
25
26$icu_copyright = "#####################################################################\n# Copyright (c) %d, International Business Machines Corporation and\n# others. All Rights Reserved.\n#####################################################################\n\n";
27$copyright = "###################\n# This file was generated from RFC 3454 (http://www.ietf.org/rfc/rfc3454.txt)\n# Copyright (C) The Internet Society (2002).  All Rights Reserved. \n###################\n\n";
28$warning = "###################\n# WARNING: This table is generated by filterRFC3454.pl tool with\n# options: @ARGV \n###################\n\n";
29#run the program)
30main();
31
32#---------------------------------------------------------------------
33# The main program
34
35sub main(){
36  GetOptions(
37           "--sourcedir=s" => \$sourceDir,
38           "--destdir=s" => \$destDir,
39           "--src-filename=s" => \$srcFileName,
40           "--dest-filename=s" => \$destFileName,
41           "--A1"  => \$a1,
42           "--B1"  => \$b1,
43           "--B2"  => \$b2,
44           "--B3"  => \$b3,
45           "--C11" => \$c11,
46           "--C12" => \$c12,
47           "--C21" => \$c21,
48           "--C22" => \$c22,
49           "--C3"  => \$c3,
50           "--C4"  => \$c4,
51           "--C5"  => \$c5,
52           "--C6"  => \$c6,
53           "--C7"  => \$c7,
54           "--C8"  => \$c8,
55           "--C9"  => \$c9,
56           "--iscsi" => \$writeISCSIProhibitedExtra,
57           "--xmpp-node" => \$writeXMPPNodeProhibitedExtra,
58           "--sasl" => \$writeSASLMap,
59           "--ldap" => \$writeLDAPMap,
60           "--normalize" => \$norm,
61           "--check-bidi" => \$checkBidi,
62           );
63  usage() unless defined $sourceDir;
64  usage() unless defined $destDir;
65  usage() unless defined $srcFileName;
66  usage() unless defined $destFileName;
67
68  $infile = $sourceDir."/".$srcFileName;
69  $inFH = IO::File->new($infile,"r")
70            or die  "could not open the file $infile for reading: $! \n";
71  $outfile = $destDir."/".$destFileName;
72
73  unlink($outfile);
74  $outFH = IO::File->new($outfile,"a")
75            or die  "could not open the file $outfile for writing: $! \n";
76
77  printf $outFH  $icu_copyright, localtime->year()+1900;
78  print $outFH  $copyright;
79  print $outFH  $warning;
80
81  if(defined $norm) {
82      print $outFH "\@normalize;;\n";
83  }
84  if(defined $checkBidi) {
85      print $outFH "\@check-bidi;;\n";
86  }
87  print $outFH "\n";
88  close($outFH);
89
90  if(defined $b2 && defined $b3){
91      die "ERROR: --B2 and --B3 are both specified\!\n";
92  }
93
94  while(defined ($line=<$inFH>)){
95      next unless $line=~ /Start\sTable/;
96      if($line =~ /A.1/){
97            createUnassignedTable($inFH,$outfile);
98      }
99      if($line =~ /B.1/ && defined $b1){
100            createMapToNothing($inFH,$outfile);
101      }
102      if($line =~ /B.2/ && defined $b2){
103            createCaseMapNorm($inFH,$outfile);
104      }
105      if($line =~ /B.3/ && defined $b3){
106            createCaseMapNoNorm($inFH,$outfile);
107      }
108      if($line =~ /C.1.1/ && defined $c11 ){
109            createProhibitedTable($inFH,$outfile,$line);
110      }
111      if($line =~ /C.1.2/ && defined $c12 ){
112            createProhibitedTable($inFH,$outfile,$line);
113      }
114      if($line =~ /C.2.1/ && defined $c21 ){
115            createProhibitedTable($inFH,$outfile,$line);
116      }
117      if($line =~ /C.2.2/ && defined $c22 ){
118            createProhibitedTable($inFH,$outfile,$line);
119      }
120      if($line =~ /C.3/ && defined $c3 ){
121            createProhibitedTable($inFH,$outfile,$line);
122      }
123      if($line =~ /C.4/ && defined $c4 ){
124            createProhibitedTable($inFH,$outfile,$line);
125      }
126      if($line =~ /C.5/ && defined $c5 ){
127            createProhibitedTable($inFH,$outfile,$line);
128      }
129      if($line =~ /C.6/ && defined $c6 ){
130            createProhibitedTable($inFH,$outfile,$line);
131      }
132      if($line =~ /C.7/ && defined $c7 ){
133            createProhibitedTable($inFH,$outfile,$line);
134      }
135      if($line =~ /C.8/ && defined $c8 ){
136            createProhibitedTable($inFH,$outfile,$line);
137      }
138      if($line =~ /C.9/ && defined $c9 ){
139            createProhibitedTable($inFH,$outfile,$line);
140      }
141  }
142  if( defined $writeISCSIProhibitedExtra){
143      create_iSCSIExtraProhibitedTable($inFH, $outfile);
144  }
145  if( defined $writeXMPPNodeProhibitedExtra){
146      create_XMPPNodeExtraProhibitedTable($inFH, $outfile);
147  }
148  if( defined $writeSASLMap){
149      create_SASLMapTable($inFH, $outfile);
150  }
151  if( defined $writeLDAPMap){
152      create_LDAPMapTable($inFH, $outfile);
153  }
154  close($inFH);
155}
156
157#-----------------------------------------------------------------------
158sub readPrint{
159    local ($inFH, $outFH,$comment, $table) = @_;
160    $count = 0;
161    print $outFH $comment."\n";
162    while(defined ($line = <$inFH>)){
163        next if $line =~ /Hoffman\s\&\sBlanchet/;  # ignore heading
164        next if $line =~ /RFC\s3454/; # ignore heading
165        next if $line =~ /\f/;  # ignore form feed
166        next if $line eq "\n";  # ignore blank lines
167        # break if "End Table" is found
168        if( $line =~ /End\sTable/){
169            print $outFH "\n# Total code points $count\n\n";
170            return;
171        }
172        if($print==1){
173            print $line;
174        }
175        $line =~ s/-/../;
176        $line =~ s/^\s+//;
177        if($line =~ /\;/){
178        }else{
179            $line =~ s/$/;/;
180        }
181        if($table =~ /A/ ){
182            ($code, $noise) = split /;/ , $line;
183            $line = $code."; ; UNASSIGNED\n";
184        }elsif ( $table =~ /B\.1/ ){
185            $line =~ s/Map to nothing/MAP/;
186        }elsif ( $table =~ /B\.[23]/ ){
187            $line =~ s/Case map/MAP/;
188            $line =~ s/Additional folding/MAP/;
189        }elsif ( $table =~ /C/ ) {
190            ($code, $noise) = split /;/ , $line;
191            $line = $code."; ; PROHIBITED\n";
192        }
193        if($line =~ /\.\./){
194            ($code, $noise) = split /;/ , $line;
195            ($startStr, $endStr ) = split /\.\./, $code;
196            $start = atoi($startStr);
197            $end   = atoi($endStr);
198            #print $start."     ".$end."\n";
199            while($start <= $end){
200                $count++;
201                $start++;
202            }
203        }else{
204              $count++;
205        }
206        print $outFH $line;
207    }
208}
209#-----------------------------------------------------------------------
210sub atoi {
211    my $t;
212    foreach my $d (split(//, shift())) {
213        $t = $t * 16 + $d;
214    }
215    return $t;
216}
217#-----------------------------------------------------------------------
218sub createUnassignedTable{
219    ($inFH,$outfile) = @_;
220    $outFH = IO::File->new($outfile,"a")
221            or die  "could not open the file $outfile for writing: $! \n";
222    $comment = "# This table contains code points from Table A.1 from RFC 3454\n";
223    readPrint($inFH,$outFH, $comment, "A");
224    close($outFH);
225}
226#-----------------------------------------------------------------------
227sub createMapToNothing{
228    ($inFH,$outfile) = @_;
229    $outFH = IO::File->new($outfile,"a")
230            or die  "could not open the file $outfile for writing: $! \n";
231    $comment = "# This table contains code points from Table B.1 from RFC 3454\n";
232    readPrint($inFH,$outFH,$comment, "B.1");
233    close($outFH);
234}
235#-----------------------------------------------------------------------
236sub createCaseMapNorm{
237    ($inFH,$outfile) = @_;
238    $outFH = IO::File->new($outfile,"a")
239            or die  "could not open the file $outfile for writing: $! \n";
240    $comment = $warning."# This table contains code points from Table B.2 from RFC 3454\n";
241    readPrint($inFH,$outFH,$comment, "B.2");
242    close($outFH);
243}
244#-----------------------------------------------------------------------
245sub createCaseMapNoNorm{
246    ($inFH,$outfile) = @_;
247    $outFH = IO::File->new($outfile,"a")
248            or die  "could not open the file $outfile for writing: $! \n";
249    $comment = $warning."# This table contains code points from Table B.3 from RFC 3454\n";
250    readPrint($inFH,$outFH,$comment, "B.3");
251    close($outFH);
252}
253#-----------------------------------------------------------------------
254sub createProhibitedTable{
255    ($inFH,$outfile,$line) = @_;
256    $line =~ s/Start//;
257    $line =~ s/-//g;
258    $comment = "# code points from $line";
259
260    $outFH = IO::File->new($outfile, "a")
261            or die  "could not open the file $outfile for writing: $! \n";
262    readPrint($inFH,$outFH,$comment, "C");
263    close($outFH);
264}
265
266#-----------------------------------------------------------------------
267sub create_iSCSIExtraProhibitedTable{
268    ($inFH,$outfile,$line) = @_;
269    $comment ="# Additional prohibitions from iSCSI profile (rfc3722.txt)\n\n";
270
271    $outFH = IO::File->new($outfile, "a")
272            or die  "could not open the file $outfile for writing: $! \n";
273    print $outFH $comment;
274    print $outFH "0021..002C; ; PROHIBITED\n";
275    print $outFH "002F; ; PROHIBITED\n";
276    print $outFH "003B..0040; ; PROHIBITED\n";
277    print $outFH "005B..0060; ; PROHIBITED\n";
278    print $outFH "007B..007E; ; PROHIBITED\n";
279    print $outFH "3002; ; PROHIBITED\n";
280    print $outFH "\n# Total code points 30\n";
281    close($outFH);
282}
283#-----------------------------------------------------------------------
284sub create_XMPPNodeExtraProhibitedTable{
285    ($inFH,$outfile,$line) = @_;
286    $comment ="# Additional prohibitions from XMPP Nodeprep profile (rfc3920.txt)\n\n";
287
288    $outFH = IO::File->new($outfile, "a")
289            or die  "could not open the file $outfile for writing: $! \n";
290    print $outFH $comment;
291    print $outFH "0022; ; PROHIBITED\n";
292    print $outFH "0026; ; PROHIBITED\n";
293    print $outFH "0027; ; PROHIBITED\n";
294    print $outFH "002F; ; PROHIBITED\n";
295    print $outFH "003A; ; PROHIBITED\n";
296    print $outFH "003C; ; PROHIBITED\n";
297    print $outFH "003E; ; PROHIBITED\n";
298    print $outFH "0040; ; PROHIBITED\n";
299    print $outFH "\n# Total code points 8\n";
300    close($outFH);
301}
302#-----------------------------------------------------------------------
303sub create_SASLMapTable{
304    ($inFH,$outfile,$line) = @_;
305    $comment ="# Map table for SASL profile (rfc4013.txt)\n\n";
306
307    $outFH = IO::File->new($outfile, "a")
308            or die  "could not open the file $outfile for writing: $! \n";
309    print $outFH $comment;
310    # non-ASCII space characters [C.1.2] to SPACE
311    print $outFH "00A0; 0020; MAP\n";
312    print $outFH "1680; 0020; MAP\n";
313    print $outFH "2000; 0020; MAP\n";
314    print $outFH "2001; 0020; MAP\n";
315    print $outFH "2002; 0020; MAP\n";
316    print $outFH "2003; 0020; MAP\n";
317    print $outFH "2004; 0020; MAP\n";
318    print $outFH "2005; 0020; MAP\n";
319    print $outFH "2006; 0020; MAP\n";
320    print $outFH "2007; 0020; MAP\n";
321    print $outFH "2008; 0020; MAP\n";
322    print $outFH "2009; 0020; MAP\n";
323    print $outFH "200A; 0020; MAP\n";
324    print $outFH "200B; 0020; MAP\n";
325    print $outFH "202F; 0020; MAP\n";
326    print $outFH "205F; 0020; MAP\n";
327    print $outFH "3000; 0020; MAP\n";
328
329    # commonly mapped to nothing characters except U+200B to nothing
330    print $outFH "00AD; ; MAP\n";
331    print $outFH "034F; ; MAP\n";
332    print $outFH "1806; ; MAP\n";
333    print $outFH "180B; ; MAP\n";
334    print $outFH "180C; ; MAP\n";
335    print $outFH "180D; ; MAP\n";
336    print $outFH "200C; ; MAP\n";
337    print $outFH "200D; ; MAP\n";
338    print $outFH "2060; ; MAP\n";
339    print $outFH "FE00; ; MAP\n";
340    print $outFH "FE01; ; MAP\n";
341    print $outFH "FE02; ; MAP\n";
342    print $outFH "FE03; ; MAP\n";
343    print $outFH "FE04; ; MAP\n";
344    print $outFH "FE05; ; MAP\n";
345    print $outFH "FE06; ; MAP\n";
346    print $outFH "FE07; ; MAP\n";
347    print $outFH "FE08; ; MAP\n";
348    print $outFH "FE09; ; MAP\n";
349    print $outFH "FE0A; ; MAP\n";
350    print $outFH "FE0B; ; MAP\n";
351    print $outFH "FE0C; ; MAP\n";
352    print $outFH "FE0D; ; MAP\n";
353    print $outFH "FE0E; ; MAP\n";
354    print $outFH "FE0F; ; MAP\n";
355    print $outFH "FEFF; ; MAP\n";
356    print $outFH "\n# Total code points 43\n";
357    close($outFH);
358}
359#-----------------------------------------------------------------------
360sub create_LDAPMapTable{
361    ($inFH,$outfile,$line) = @_;
362    $comment ="# Map table for LDAP profile (rfc4518.txt)\n\n";
363
364    $outFH = IO::File->new($outfile, "a")
365            or die  "could not open the file $outfile for writing: $! \n";
366    print $outFH $comment;
367
368    #   SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code
369    #   points are mapped to nothing.  COMBINING GRAPHEME JOINER (U+034F) and
370    #   VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also
371    #   mapped to nothing.  The OBJECT REPLACEMENT CHARACTER (U+FFFC) is
372    #   mapped to nothing.
373
374    print $outFH "00AD; ; MAP\n";
375    print $outFH "034F; ; MAP\n";
376    print $outFH "1806; ; MAP\n";
377    print $outFH "180B; ; MAP\n";
378    print $outFH "180C; ; MAP\n";
379    print $outFH "180D; ; MAP\n";
380    print $outFH "FE00; ; MAP\n";
381    print $outFH "FE01; ; MAP\n";
382    print $outFH "FE02; ; MAP\n";
383    print $outFH "FE03; ; MAP\n";
384    print $outFH "FE04; ; MAP\n";
385    print $outFH "FE05; ; MAP\n";
386    print $outFH "FE06; ; MAP\n";
387    print $outFH "FE07; ; MAP\n";
388    print $outFH "FE08; ; MAP\n";
389    print $outFH "FE09; ; MAP\n";
390    print $outFH "FE0A; ; MAP\n";
391    print $outFH "FE0B; ; MAP\n";
392    print $outFH "FE0C; ; MAP\n";
393    print $outFH "FE0D; ; MAP\n";
394    print $outFH "FE0E; ; MAP\n";
395    print $outFH "FE0F; ; MAP\n";
396    print $outFH "FFFC; ; MAP\n";
397
398#   CHARACTER TABULATION (U+0009), LINE FEED (LF) (U+000A), LINE
399#   TABULATION (U+000B), FORM FEED (FF) (U+000C), CARRIAGE RETURN (CR)
400#   (U+000D), and NEXT LINE (NEL) (U+0085) are mapped to SPACE (U+0020).
401
402    print $outFH "0009; 0020; MAP\n";
403    print $outFH "000A; 0020; MAP\n";
404    print $outFH "000B; 0020; MAP\n";
405    print $outFH "000C; 0020; MAP\n";
406    print $outFH "000D; 0020; MAP\n";
407    print $outFH "0085; 0020; MAP\n";
408
409    #   All other control code (e.g., Cc) points or code points with a
410    #   control function (e.g., Cf) are mapped to nothing.  The following is
411    #   a complete list of these code points: U+0000-0008, 000E-001F, 007F-
412    #   0084, 0086-009F, 06DD, 070F, 180E, 200C-200F, 202A-202E, 2060-2063,
413    #   206A-206F, FEFF, FFF9-FFFB, 1D173-1D17A, E0001, E0020-E007F.
414
415    print $outFH "0000; ; MAP\n";
416    print $outFH "0001; ; MAP\n";
417    print $outFH "0002; ; MAP\n";
418    print $outFH "0003; ; MAP\n";
419    print $outFH "0004; ; MAP\n";
420    print $outFH "0005; ; MAP\n";
421    print $outFH "0006; ; MAP\n";
422    print $outFH "0007; ; MAP\n";
423    print $outFH "0008; ; MAP\n";
424    print $outFH "000E; ; MAP\n";
425    print $outFH "000F; ; MAP\n";
426    print $outFH "0010; ; MAP\n";
427    print $outFH "0011; ; MAP\n";
428    print $outFH "0012; ; MAP\n";
429    print $outFH "0013; ; MAP\n";
430    print $outFH "0014; ; MAP\n";
431    print $outFH "0015; ; MAP\n";
432    print $outFH "0016; ; MAP\n";
433    print $outFH "0017; ; MAP\n";
434    print $outFH "0018; ; MAP\n";
435    print $outFH "0019; ; MAP\n";
436    print $outFH "001A; ; MAP\n";
437    print $outFH "001B; ; MAP\n";
438    print $outFH "001C; ; MAP\n";
439    print $outFH "001D; ; MAP\n";
440    print $outFH "001E; ; MAP\n";
441    print $outFH "001F; ; MAP\n";
442    print $outFH "007F; ; MAP\n";
443    print $outFH "0080; ; MAP\n";
444    print $outFH "0081; ; MAP\n";
445    print $outFH "0082; ; MAP\n";
446    print $outFH "0083; ; MAP\n";
447    print $outFH "0084; ; MAP\n";
448    print $outFH "0086; ; MAP\n";
449    print $outFH "0087; ; MAP\n";
450    print $outFH "0088; ; MAP\n";
451    print $outFH "0089; ; MAP\n";
452    print $outFH "008A; ; MAP\n";
453    print $outFH "008B; ; MAP\n";
454    print $outFH "008C; ; MAP\n";
455    print $outFH "008D; ; MAP\n";
456    print $outFH "008E; ; MAP\n";
457    print $outFH "008F; ; MAP\n";
458    print $outFH "0090; ; MAP\n";
459    print $outFH "0091; ; MAP\n";
460    print $outFH "0092; ; MAP\n";
461    print $outFH "0093; ; MAP\n";
462    print $outFH "0094; ; MAP\n";
463    print $outFH "0095; ; MAP\n";
464    print $outFH "0096; ; MAP\n";
465    print $outFH "0097; ; MAP\n";
466    print $outFH "0098; ; MAP\n";
467    print $outFH "0099; ; MAP\n";
468    print $outFH "009A; ; MAP\n";
469    print $outFH "009B; ; MAP\n";
470    print $outFH "009C; ; MAP\n";
471    print $outFH "009D; ; MAP\n";
472    print $outFH "009E; ; MAP\n";
473    print $outFH "009F; ; MAP\n";
474    print $outFH "06DD; ; MAP\n";
475    print $outFH "070F; ; MAP\n";
476    print $outFH "180E; ; MAP\n";
477    print $outFH "200C; ; MAP\n";
478    print $outFH "200D; ; MAP\n";
479    print $outFH "200E; ; MAP\n";
480    print $outFH "200F; ; MAP\n";
481    print $outFH "202A; ; MAP\n";
482    print $outFH "202B; ; MAP\n";
483    print $outFH "202C; ; MAP\n";
484    print $outFH "202D; ; MAP\n";
485    print $outFH "202E; ; MAP\n";
486    print $outFH "2060; ; MAP\n";
487    print $outFH "2061; ; MAP\n";
488    print $outFH "2062; ; MAP\n";
489    print $outFH "2063; ; MAP\n";
490    print $outFH "206A; ; MAP\n";
491    print $outFH "206B; ; MAP\n";
492    print $outFH "206C; ; MAP\n";
493    print $outFH "206D; ; MAP\n";
494    print $outFH "206E; ; MAP\n";
495    print $outFH "206F; ; MAP\n";
496    print $outFH "FEFF; ; MAP\n";
497    print $outFH "FFF9; ; MAP\n";
498    print $outFH "FFFA; ; MAP\n";
499    print $outFH "FFFB; ; MAP\n";
500    print $outFH "1D173; ; MAP\n";
501    print $outFH "1D174; ; MAP\n";
502    print $outFH "1D175; ; MAP\n";
503    print $outFH "1D176; ; MAP\n";
504    print $outFH "1D177; ; MAP\n";
505    print $outFH "1D178; ; MAP\n";
506    print $outFH "1D179; ; MAP\n";
507    print $outFH "1D17A; ; MAP\n";
508    print $outFH "E0001; ; MAP\n";
509    print $outFH "E0020; ; MAP\n";
510    print $outFH "E0021; ; MAP\n";
511    print $outFH "E0022; ; MAP\n";
512    print $outFH "E0023; ; MAP\n";
513    print $outFH "E0024; ; MAP\n";
514    print $outFH "E0025; ; MAP\n";
515    print $outFH "E0026; ; MAP\n";
516    print $outFH "E0027; ; MAP\n";
517    print $outFH "E0028; ; MAP\n";
518    print $outFH "E0029; ; MAP\n";
519    print $outFH "E002A; ; MAP\n";
520    print $outFH "E002B; ; MAP\n";
521    print $outFH "E002C; ; MAP\n";
522    print $outFH "E002D; ; MAP\n";
523    print $outFH "E002E; ; MAP\n";
524    print $outFH "E002F; ; MAP\n";
525    print $outFH "E0030; ; MAP\n";
526    print $outFH "E0031; ; MAP\n";
527    print $outFH "E0032; ; MAP\n";
528    print $outFH "E0033; ; MAP\n";
529    print $outFH "E0034; ; MAP\n";
530    print $outFH "E0035; ; MAP\n";
531    print $outFH "E0036; ; MAP\n";
532    print $outFH "E0037; ; MAP\n";
533    print $outFH "E0038; ; MAP\n";
534    print $outFH "E0039; ; MAP\n";
535    print $outFH "E003A; ; MAP\n";
536    print $outFH "E003B; ; MAP\n";
537    print $outFH "E003C; ; MAP\n";
538    print $outFH "E003D; ; MAP\n";
539    print $outFH "E003E; ; MAP\n";
540    print $outFH "E003F; ; MAP\n";
541    print $outFH "E0040; ; MAP\n";
542    print $outFH "E0041; ; MAP\n";
543    print $outFH "E0042; ; MAP\n";
544    print $outFH "E0043; ; MAP\n";
545    print $outFH "E0044; ; MAP\n";
546    print $outFH "E0045; ; MAP\n";
547    print $outFH "E0046; ; MAP\n";
548    print $outFH "E0047; ; MAP\n";
549    print $outFH "E0048; ; MAP\n";
550    print $outFH "E0049; ; MAP\n";
551    print $outFH "E004A; ; MAP\n";
552    print $outFH "E004B; ; MAP\n";
553    print $outFH "E004C; ; MAP\n";
554    print $outFH "E004D; ; MAP\n";
555    print $outFH "E004E; ; MAP\n";
556    print $outFH "E004F; ; MAP\n";
557    print $outFH "E0050; ; MAP\n";
558    print $outFH "E0051; ; MAP\n";
559    print $outFH "E0052; ; MAP\n";
560    print $outFH "E0053; ; MAP\n";
561    print $outFH "E0054; ; MAP\n";
562    print $outFH "E0055; ; MAP\n";
563    print $outFH "E0056; ; MAP\n";
564    print $outFH "E0057; ; MAP\n";
565    print $outFH "E0058; ; MAP\n";
566    print $outFH "E0059; ; MAP\n";
567    print $outFH "E005A; ; MAP\n";
568    print $outFH "E005B; ; MAP\n";
569    print $outFH "E005C; ; MAP\n";
570    print $outFH "E005D; ; MAP\n";
571    print $outFH "E005E; ; MAP\n";
572    print $outFH "E005F; ; MAP\n";
573    print $outFH "E0060; ; MAP\n";
574    print $outFH "E0061; ; MAP\n";
575    print $outFH "E0062; ; MAP\n";
576    print $outFH "E0063; ; MAP\n";
577    print $outFH "E0064; ; MAP\n";
578    print $outFH "E0065; ; MAP\n";
579    print $outFH "E0066; ; MAP\n";
580    print $outFH "E0067; ; MAP\n";
581    print $outFH "E0068; ; MAP\n";
582    print $outFH "E0069; ; MAP\n";
583    print $outFH "E006A; ; MAP\n";
584    print $outFH "E006B; ; MAP\n";
585    print $outFH "E006C; ; MAP\n";
586    print $outFH "E006D; ; MAP\n";
587    print $outFH "E006E; ; MAP\n";
588    print $outFH "E006F; ; MAP\n";
589    print $outFH "E0070; ; MAP\n";
590    print $outFH "E0071; ; MAP\n";
591    print $outFH "E0072; ; MAP\n";
592    print $outFH "E0073; ; MAP\n";
593    print $outFH "E0074; ; MAP\n";
594    print $outFH "E0075; ; MAP\n";
595    print $outFH "E0076; ; MAP\n";
596    print $outFH "E0077; ; MAP\n";
597    print $outFH "E0078; ; MAP\n";
598    print $outFH "E0079; ; MAP\n";
599    print $outFH "E007A; ; MAP\n";
600    print $outFH "E007B; ; MAP\n";
601    print $outFH "E007C; ; MAP\n";
602    print $outFH "E007D; ; MAP\n";
603    print $outFH "E007E; ; MAP\n";
604    print $outFH "E007F; ; MAP\n";
605
606    #   ZERO WIDTH SPACE (U+200B) is mapped to nothing.  All other code
607    #   points with Separator (space, line, or paragraph) property (e.g., Zs,
608    #   Zl, or Zp) are mapped to SPACE (U+0020).  The following is a complete
609    #   list of these code points: U+0020, 00A0, 1680, 2000-200A, 2028-2029,
610    #   202F, 205F, 3000.
611
612    print $outFH "200B; ; MAP\n";
613    print $outFH "00A0; 0020; MAP\n";
614    print $outFH "1680; 0020; MAP\n";
615    print $outFH "2000; 0020; MAP\n";
616    print $outFH "2001; 0020; MAP\n";
617    print $outFH "2002; 0020; MAP\n";
618    print $outFH "2003; 0020; MAP\n";
619    print $outFH "2004; 0020; MAP\n";
620    print $outFH "2005; 0020; MAP\n";
621    print $outFH "2006; 0020; MAP\n";
622    print $outFH "2007; 0020; MAP\n";
623    print $outFH "2008; 0020; MAP\n";
624    print $outFH "2009; 0020; MAP\n";
625    print $outFH "200A; 0020; MAP\n";
626    print $outFH "2028; 0020; MAP\n";
627    print $outFH "2029; 0020; MAP\n";
628    print $outFH "202F; 0020; MAP\n";
629    print $outFH "205F; 0020; MAP\n";
630    print $outFH "3000; 0020; MAP\n";
631
632    print $outFH "\n# Total code points 238\n";
633    close($outFH);
634}
635#-----------------------------------------------------------------------
636sub usage {
637    print << "END";
638Usage:
639filterRFC3454.pl
640Options:
641        --sourcedir=<directory>
642        --destdir=<directory>
643        --src-filename=<name of RFC file>
644        --dest-filename=<name of destination file>
645        --A1             Generate data for table A.1
646        --B1             Generate data for table B.1
647        --B2             Generate data for table B.2
648        --B3             Generate data for table B.3
649        --C11            Generate data for table C.1.1
650        --C12            Generate data for table C.1.2
651        --C21            Generate data for table C.2.1
652        --C22            Generate data for table C.2.2
653        --C3             Generate data for table C.3
654        --C4             Generate data for table C.4
655        --C5             Generate data for table C.5
656        --C6             Generate data for table C.6
657        --C7             Generate data for table C.7
658        --C8             Generate data for table C.8
659        --C9             Generate data for table C.9
660        --iscsi          Generate data for iSCSI extra prohibited table
661        --xmpp-node      Generate data for XMPP extra prohibited table
662        --sasl           Generate data for SASL map table
663        --ldap           Generate data for LDAP map table
664        --normalize      Embed the normalization directive in the output file
665        --check-bidi     Embed the check bidi directove in the output file
666
667Note, --B2 and --B3 are mutually exclusive.
668
669e.g.: filterRFC3454.pl --sourcedir=. --destdir=./output --src-filename=rfc3454.txt  --dest-filename=NamePrepProfile.txt --A1 --B1 --B2 --C12 --C22 --C3 --C4 --C5 --C6 --C7 --C8 --C9 --normalize --check-bidi
670
671filterRFC3454.pl filters the RFC file and creates String prep table files.
672The RFC text can be downloaded from ftp://ftp.rfc-editor.org/in-notes/rfc3454.txt
673
674END
675  exit(0);
676}
677
678
679