1#!/usr/bin/perl 2# Copyright (C) 2016 and later: Unicode, Inc. and others. 3# License & terms of use: http://www.unicode.org/copyright.html 4# Copyright (c) 2001-2015 International Business Machines 5# Corporation and others. All Rights Reserved. 6 7#################################################################################### 8# filterRFC3454.pl: 9# This tool filters the RFC-3454 txt file for StringPrep tables and creates a table 10# to be used in NamePrepProfile 11# 12# Author: Ram Viswanadha 13# 14#################################################################################### 15 16use File::Find; 17use File::Basename; 18use IO::File; 19use Cwd; 20use File::Copy; 21use Getopt::Long; 22use File::Path; 23use File::Copy; 24use Time::localtime; 25 26$icu_copyright = "#####################################################################\n# Copyright (c) %d, International Business Machines Corporation and\n# others. All Rights Reserved.\n#####################################################################\n\n"; 27$copyright = "###################\n# This file was generated from RFC 3454 (http://www.ietf.org/rfc/rfc3454.txt)\n# Copyright (C) The Internet Society (2002). All Rights Reserved. \n###################\n\n"; 28$warning = "###################\n# WARNING: This table is generated by filterRFC3454.pl tool with\n# options: @ARGV \n###################\n\n"; 29#run the program) 30main(); 31 32#--------------------------------------------------------------------- 33# The main program 34 35sub main(){ 36 GetOptions( 37 "--sourcedir=s" => \$sourceDir, 38 "--destdir=s" => \$destDir, 39 "--src-filename=s" => \$srcFileName, 40 "--dest-filename=s" => \$destFileName, 41 "--A1" => \$a1, 42 "--B1" => \$b1, 43 "--B2" => \$b2, 44 "--B3" => \$b3, 45 "--C11" => \$c11, 46 "--C12" => \$c12, 47 "--C21" => \$c21, 48 "--C22" => \$c22, 49 "--C3" => \$c3, 50 "--C4" => \$c4, 51 "--C5" => \$c5, 52 "--C6" => \$c6, 53 "--C7" => \$c7, 54 "--C8" => \$c8, 55 "--C9" => \$c9, 56 "--iscsi" => \$writeISCSIProhibitedExtra, 57 "--xmpp-node" => \$writeXMPPNodeProhibitedExtra, 58 "--sasl" => \$writeSASLMap, 59 "--ldap" => \$writeLDAPMap, 60 "--normalize" => \$norm, 61 "--check-bidi" => \$checkBidi, 62 ); 63 usage() unless defined $sourceDir; 64 usage() unless defined $destDir; 65 usage() unless defined $srcFileName; 66 usage() unless defined $destFileName; 67 68 $infile = $sourceDir."/".$srcFileName; 69 $inFH = IO::File->new($infile,"r") 70 or die "could not open the file $infile for reading: $! \n"; 71 $outfile = $destDir."/".$destFileName; 72 73 unlink($outfile); 74 $outFH = IO::File->new($outfile,"a") 75 or die "could not open the file $outfile for writing: $! \n"; 76 77 printf $outFH $icu_copyright, localtime->year()+1900; 78 print $outFH $copyright; 79 print $outFH $warning; 80 81 if(defined $norm) { 82 print $outFH "\@normalize;;\n"; 83 } 84 if(defined $checkBidi) { 85 print $outFH "\@check-bidi;;\n"; 86 } 87 print $outFH "\n"; 88 close($outFH); 89 90 if(defined $b2 && defined $b3){ 91 die "ERROR: --B2 and --B3 are both specified\!\n"; 92 } 93 94 while(defined ($line=<$inFH>)){ 95 next unless $line=~ /Start\sTable/; 96 if($line =~ /A.1/){ 97 createUnassignedTable($inFH,$outfile); 98 } 99 if($line =~ /B.1/ && defined $b1){ 100 createMapToNothing($inFH,$outfile); 101 } 102 if($line =~ /B.2/ && defined $b2){ 103 createCaseMapNorm($inFH,$outfile); 104 } 105 if($line =~ /B.3/ && defined $b3){ 106 createCaseMapNoNorm($inFH,$outfile); 107 } 108 if($line =~ /C.1.1/ && defined $c11 ){ 109 createProhibitedTable($inFH,$outfile,$line); 110 } 111 if($line =~ /C.1.2/ && defined $c12 ){ 112 createProhibitedTable($inFH,$outfile,$line); 113 } 114 if($line =~ /C.2.1/ && defined $c21 ){ 115 createProhibitedTable($inFH,$outfile,$line); 116 } 117 if($line =~ /C.2.2/ && defined $c22 ){ 118 createProhibitedTable($inFH,$outfile,$line); 119 } 120 if($line =~ /C.3/ && defined $c3 ){ 121 createProhibitedTable($inFH,$outfile,$line); 122 } 123 if($line =~ /C.4/ && defined $c4 ){ 124 createProhibitedTable($inFH,$outfile,$line); 125 } 126 if($line =~ /C.5/ && defined $c5 ){ 127 createProhibitedTable($inFH,$outfile,$line); 128 } 129 if($line =~ /C.6/ && defined $c6 ){ 130 createProhibitedTable($inFH,$outfile,$line); 131 } 132 if($line =~ /C.7/ && defined $c7 ){ 133 createProhibitedTable($inFH,$outfile,$line); 134 } 135 if($line =~ /C.8/ && defined $c8 ){ 136 createProhibitedTable($inFH,$outfile,$line); 137 } 138 if($line =~ /C.9/ && defined $c9 ){ 139 createProhibitedTable($inFH,$outfile,$line); 140 } 141 } 142 if( defined $writeISCSIProhibitedExtra){ 143 create_iSCSIExtraProhibitedTable($inFH, $outfile); 144 } 145 if( defined $writeXMPPNodeProhibitedExtra){ 146 create_XMPPNodeExtraProhibitedTable($inFH, $outfile); 147 } 148 if( defined $writeSASLMap){ 149 create_SASLMapTable($inFH, $outfile); 150 } 151 if( defined $writeLDAPMap){ 152 create_LDAPMapTable($inFH, $outfile); 153 } 154 close($inFH); 155} 156 157#----------------------------------------------------------------------- 158sub readPrint{ 159 local ($inFH, $outFH,$comment, $table) = @_; 160 $count = 0; 161 print $outFH $comment."\n"; 162 while(defined ($line = <$inFH>)){ 163 next if $line =~ /Hoffman\s\&\sBlanchet/; # ignore heading 164 next if $line =~ /RFC\s3454/; # ignore heading 165 next if $line =~ /\f/; # ignore form feed 166 next if $line eq "\n"; # ignore blank lines 167 # break if "End Table" is found 168 if( $line =~ /End\sTable/){ 169 print $outFH "\n# Total code points $count\n\n"; 170 return; 171 } 172 if($print==1){ 173 print $line; 174 } 175 $line =~ s/-/../; 176 $line =~ s/^\s+//; 177 if($line =~ /\;/){ 178 }else{ 179 $line =~ s/$/;/; 180 } 181 if($table =~ /A/ ){ 182 ($code, $noise) = split /;/ , $line; 183 $line = $code."; ; UNASSIGNED\n"; 184 }elsif ( $table =~ /B\.1/ ){ 185 $line =~ s/Map to nothing/MAP/; 186 }elsif ( $table =~ /B\.[23]/ ){ 187 $line =~ s/Case map/MAP/; 188 $line =~ s/Additional folding/MAP/; 189 }elsif ( $table =~ /C/ ) { 190 ($code, $noise) = split /;/ , $line; 191 $line = $code."; ; PROHIBITED\n"; 192 } 193 if($line =~ /\.\./){ 194 ($code, $noise) = split /;/ , $line; 195 ($startStr, $endStr ) = split /\.\./, $code; 196 $start = atoi($startStr); 197 $end = atoi($endStr); 198 #print $start." ".$end."\n"; 199 while($start <= $end){ 200 $count++; 201 $start++; 202 } 203 }else{ 204 $count++; 205 } 206 print $outFH $line; 207 } 208} 209#----------------------------------------------------------------------- 210sub atoi { 211 my $t; 212 foreach my $d (split(//, shift())) { 213 $t = $t * 16 + $d; 214 } 215 return $t; 216} 217#----------------------------------------------------------------------- 218sub createUnassignedTable{ 219 ($inFH,$outfile) = @_; 220 $outFH = IO::File->new($outfile,"a") 221 or die "could not open the file $outfile for writing: $! \n"; 222 $comment = "# This table contains code points from Table A.1 from RFC 3454\n"; 223 readPrint($inFH,$outFH, $comment, "A"); 224 close($outFH); 225} 226#----------------------------------------------------------------------- 227sub createMapToNothing{ 228 ($inFH,$outfile) = @_; 229 $outFH = IO::File->new($outfile,"a") 230 or die "could not open the file $outfile for writing: $! \n"; 231 $comment = "# This table contains code points from Table B.1 from RFC 3454\n"; 232 readPrint($inFH,$outFH,$comment, "B.1"); 233 close($outFH); 234} 235#----------------------------------------------------------------------- 236sub createCaseMapNorm{ 237 ($inFH,$outfile) = @_; 238 $outFH = IO::File->new($outfile,"a") 239 or die "could not open the file $outfile for writing: $! \n"; 240 $comment = $warning."# This table contains code points from Table B.2 from RFC 3454\n"; 241 readPrint($inFH,$outFH,$comment, "B.2"); 242 close($outFH); 243} 244#----------------------------------------------------------------------- 245sub createCaseMapNoNorm{ 246 ($inFH,$outfile) = @_; 247 $outFH = IO::File->new($outfile,"a") 248 or die "could not open the file $outfile for writing: $! \n"; 249 $comment = $warning."# This table contains code points from Table B.3 from RFC 3454\n"; 250 readPrint($inFH,$outFH,$comment, "B.3"); 251 close($outFH); 252} 253#----------------------------------------------------------------------- 254sub createProhibitedTable{ 255 ($inFH,$outfile,$line) = @_; 256 $line =~ s/Start//; 257 $line =~ s/-//g; 258 $comment = "# code points from $line"; 259 260 $outFH = IO::File->new($outfile, "a") 261 or die "could not open the file $outfile for writing: $! \n"; 262 readPrint($inFH,$outFH,$comment, "C"); 263 close($outFH); 264} 265 266#----------------------------------------------------------------------- 267sub create_iSCSIExtraProhibitedTable{ 268 ($inFH,$outfile,$line) = @_; 269 $comment ="# Additional prohibitions from iSCSI profile (rfc3722.txt)\n\n"; 270 271 $outFH = IO::File->new($outfile, "a") 272 or die "could not open the file $outfile for writing: $! \n"; 273 print $outFH $comment; 274 print $outFH "0021..002C; ; PROHIBITED\n"; 275 print $outFH "002F; ; PROHIBITED\n"; 276 print $outFH "003B..0040; ; PROHIBITED\n"; 277 print $outFH "005B..0060; ; PROHIBITED\n"; 278 print $outFH "007B..007E; ; PROHIBITED\n"; 279 print $outFH "3002; ; PROHIBITED\n"; 280 print $outFH "\n# Total code points 30\n"; 281 close($outFH); 282} 283#----------------------------------------------------------------------- 284sub create_XMPPNodeExtraProhibitedTable{ 285 ($inFH,$outfile,$line) = @_; 286 $comment ="# Additional prohibitions from XMPP Nodeprep profile (rfc3920.txt)\n\n"; 287 288 $outFH = IO::File->new($outfile, "a") 289 or die "could not open the file $outfile for writing: $! \n"; 290 print $outFH $comment; 291 print $outFH "0022; ; PROHIBITED\n"; 292 print $outFH "0026; ; PROHIBITED\n"; 293 print $outFH "0027; ; PROHIBITED\n"; 294 print $outFH "002F; ; PROHIBITED\n"; 295 print $outFH "003A; ; PROHIBITED\n"; 296 print $outFH "003C; ; PROHIBITED\n"; 297 print $outFH "003E; ; PROHIBITED\n"; 298 print $outFH "0040; ; PROHIBITED\n"; 299 print $outFH "\n# Total code points 8\n"; 300 close($outFH); 301} 302#----------------------------------------------------------------------- 303sub create_SASLMapTable{ 304 ($inFH,$outfile,$line) = @_; 305 $comment ="# Map table for SASL profile (rfc4013.txt)\n\n"; 306 307 $outFH = IO::File->new($outfile, "a") 308 or die "could not open the file $outfile for writing: $! \n"; 309 print $outFH $comment; 310 # non-ASCII space characters [C.1.2] to SPACE 311 print $outFH "00A0; 0020; MAP\n"; 312 print $outFH "1680; 0020; MAP\n"; 313 print $outFH "2000; 0020; MAP\n"; 314 print $outFH "2001; 0020; MAP\n"; 315 print $outFH "2002; 0020; MAP\n"; 316 print $outFH "2003; 0020; MAP\n"; 317 print $outFH "2004; 0020; MAP\n"; 318 print $outFH "2005; 0020; MAP\n"; 319 print $outFH "2006; 0020; MAP\n"; 320 print $outFH "2007; 0020; MAP\n"; 321 print $outFH "2008; 0020; MAP\n"; 322 print $outFH "2009; 0020; MAP\n"; 323 print $outFH "200A; 0020; MAP\n"; 324 print $outFH "200B; 0020; MAP\n"; 325 print $outFH "202F; 0020; MAP\n"; 326 print $outFH "205F; 0020; MAP\n"; 327 print $outFH "3000; 0020; MAP\n"; 328 329 # commonly mapped to nothing characters except U+200B to nothing 330 print $outFH "00AD; ; MAP\n"; 331 print $outFH "034F; ; MAP\n"; 332 print $outFH "1806; ; MAP\n"; 333 print $outFH "180B; ; MAP\n"; 334 print $outFH "180C; ; MAP\n"; 335 print $outFH "180D; ; MAP\n"; 336 print $outFH "200C; ; MAP\n"; 337 print $outFH "200D; ; MAP\n"; 338 print $outFH "2060; ; MAP\n"; 339 print $outFH "FE00; ; MAP\n"; 340 print $outFH "FE01; ; MAP\n"; 341 print $outFH "FE02; ; MAP\n"; 342 print $outFH "FE03; ; MAP\n"; 343 print $outFH "FE04; ; MAP\n"; 344 print $outFH "FE05; ; MAP\n"; 345 print $outFH "FE06; ; MAP\n"; 346 print $outFH "FE07; ; MAP\n"; 347 print $outFH "FE08; ; MAP\n"; 348 print $outFH "FE09; ; MAP\n"; 349 print $outFH "FE0A; ; MAP\n"; 350 print $outFH "FE0B; ; MAP\n"; 351 print $outFH "FE0C; ; MAP\n"; 352 print $outFH "FE0D; ; MAP\n"; 353 print $outFH "FE0E; ; MAP\n"; 354 print $outFH "FE0F; ; MAP\n"; 355 print $outFH "FEFF; ; MAP\n"; 356 print $outFH "\n# Total code points 43\n"; 357 close($outFH); 358} 359#----------------------------------------------------------------------- 360sub create_LDAPMapTable{ 361 ($inFH,$outfile,$line) = @_; 362 $comment ="# Map table for LDAP profile (rfc4518.txt)\n\n"; 363 364 $outFH = IO::File->new($outfile, "a") 365 or die "could not open the file $outfile for writing: $! \n"; 366 print $outFH $comment; 367 368 # SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code 369 # points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and 370 # VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also 371 # mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is 372 # mapped to nothing. 373 374 print $outFH "00AD; ; MAP\n"; 375 print $outFH "034F; ; MAP\n"; 376 print $outFH "1806; ; MAP\n"; 377 print $outFH "180B; ; MAP\n"; 378 print $outFH "180C; ; MAP\n"; 379 print $outFH "180D; ; MAP\n"; 380 print $outFH "FE00; ; MAP\n"; 381 print $outFH "FE01; ; MAP\n"; 382 print $outFH "FE02; ; MAP\n"; 383 print $outFH "FE03; ; MAP\n"; 384 print $outFH "FE04; ; MAP\n"; 385 print $outFH "FE05; ; MAP\n"; 386 print $outFH "FE06; ; MAP\n"; 387 print $outFH "FE07; ; MAP\n"; 388 print $outFH "FE08; ; MAP\n"; 389 print $outFH "FE09; ; MAP\n"; 390 print $outFH "FE0A; ; MAP\n"; 391 print $outFH "FE0B; ; MAP\n"; 392 print $outFH "FE0C; ; MAP\n"; 393 print $outFH "FE0D; ; MAP\n"; 394 print $outFH "FE0E; ; MAP\n"; 395 print $outFH "FE0F; ; MAP\n"; 396 print $outFH "FFFC; ; MAP\n"; 397 398# CHARACTER TABULATION (U+0009), LINE FEED (LF) (U+000A), LINE 399# TABULATION (U+000B), FORM FEED (FF) (U+000C), CARRIAGE RETURN (CR) 400# (U+000D), and NEXT LINE (NEL) (U+0085) are mapped to SPACE (U+0020). 401 402 print $outFH "0009; 0020; MAP\n"; 403 print $outFH "000A; 0020; MAP\n"; 404 print $outFH "000B; 0020; MAP\n"; 405 print $outFH "000C; 0020; MAP\n"; 406 print $outFH "000D; 0020; MAP\n"; 407 print $outFH "0085; 0020; MAP\n"; 408 409 # All other control code (e.g., Cc) points or code points with a 410 # control function (e.g., Cf) are mapped to nothing. The following is 411 # a complete list of these code points: U+0000-0008, 000E-001F, 007F- 412 # 0084, 0086-009F, 06DD, 070F, 180E, 200C-200F, 202A-202E, 2060-2063, 413 # 206A-206F, FEFF, FFF9-FFFB, 1D173-1D17A, E0001, E0020-E007F. 414 415 print $outFH "0000; ; MAP\n"; 416 print $outFH "0001; ; MAP\n"; 417 print $outFH "0002; ; MAP\n"; 418 print $outFH "0003; ; MAP\n"; 419 print $outFH "0004; ; MAP\n"; 420 print $outFH "0005; ; MAP\n"; 421 print $outFH "0006; ; MAP\n"; 422 print $outFH "0007; ; MAP\n"; 423 print $outFH "0008; ; MAP\n"; 424 print $outFH "000E; ; MAP\n"; 425 print $outFH "000F; ; MAP\n"; 426 print $outFH "0010; ; MAP\n"; 427 print $outFH "0011; ; MAP\n"; 428 print $outFH "0012; ; MAP\n"; 429 print $outFH "0013; ; MAP\n"; 430 print $outFH "0014; ; MAP\n"; 431 print $outFH "0015; ; MAP\n"; 432 print $outFH "0016; ; MAP\n"; 433 print $outFH "0017; ; MAP\n"; 434 print $outFH "0018; ; MAP\n"; 435 print $outFH "0019; ; MAP\n"; 436 print $outFH "001A; ; MAP\n"; 437 print $outFH "001B; ; MAP\n"; 438 print $outFH "001C; ; MAP\n"; 439 print $outFH "001D; ; MAP\n"; 440 print $outFH "001E; ; MAP\n"; 441 print $outFH "001F; ; MAP\n"; 442 print $outFH "007F; ; MAP\n"; 443 print $outFH "0080; ; MAP\n"; 444 print $outFH "0081; ; MAP\n"; 445 print $outFH "0082; ; MAP\n"; 446 print $outFH "0083; ; MAP\n"; 447 print $outFH "0084; ; MAP\n"; 448 print $outFH "0086; ; MAP\n"; 449 print $outFH "0087; ; MAP\n"; 450 print $outFH "0088; ; MAP\n"; 451 print $outFH "0089; ; MAP\n"; 452 print $outFH "008A; ; MAP\n"; 453 print $outFH "008B; ; MAP\n"; 454 print $outFH "008C; ; MAP\n"; 455 print $outFH "008D; ; MAP\n"; 456 print $outFH "008E; ; MAP\n"; 457 print $outFH "008F; ; MAP\n"; 458 print $outFH "0090; ; MAP\n"; 459 print $outFH "0091; ; MAP\n"; 460 print $outFH "0092; ; MAP\n"; 461 print $outFH "0093; ; MAP\n"; 462 print $outFH "0094; ; MAP\n"; 463 print $outFH "0095; ; MAP\n"; 464 print $outFH "0096; ; MAP\n"; 465 print $outFH "0097; ; MAP\n"; 466 print $outFH "0098; ; MAP\n"; 467 print $outFH "0099; ; MAP\n"; 468 print $outFH "009A; ; MAP\n"; 469 print $outFH "009B; ; MAP\n"; 470 print $outFH "009C; ; MAP\n"; 471 print $outFH "009D; ; MAP\n"; 472 print $outFH "009E; ; MAP\n"; 473 print $outFH "009F; ; MAP\n"; 474 print $outFH "06DD; ; MAP\n"; 475 print $outFH "070F; ; MAP\n"; 476 print $outFH "180E; ; MAP\n"; 477 print $outFH "200C; ; MAP\n"; 478 print $outFH "200D; ; MAP\n"; 479 print $outFH "200E; ; MAP\n"; 480 print $outFH "200F; ; MAP\n"; 481 print $outFH "202A; ; MAP\n"; 482 print $outFH "202B; ; MAP\n"; 483 print $outFH "202C; ; MAP\n"; 484 print $outFH "202D; ; MAP\n"; 485 print $outFH "202E; ; MAP\n"; 486 print $outFH "2060; ; MAP\n"; 487 print $outFH "2061; ; MAP\n"; 488 print $outFH "2062; ; MAP\n"; 489 print $outFH "2063; ; MAP\n"; 490 print $outFH "206A; ; MAP\n"; 491 print $outFH "206B; ; MAP\n"; 492 print $outFH "206C; ; MAP\n"; 493 print $outFH "206D; ; MAP\n"; 494 print $outFH "206E; ; MAP\n"; 495 print $outFH "206F; ; MAP\n"; 496 print $outFH "FEFF; ; MAP\n"; 497 print $outFH "FFF9; ; MAP\n"; 498 print $outFH "FFFA; ; MAP\n"; 499 print $outFH "FFFB; ; MAP\n"; 500 print $outFH "1D173; ; MAP\n"; 501 print $outFH "1D174; ; MAP\n"; 502 print $outFH "1D175; ; MAP\n"; 503 print $outFH "1D176; ; MAP\n"; 504 print $outFH "1D177; ; MAP\n"; 505 print $outFH "1D178; ; MAP\n"; 506 print $outFH "1D179; ; MAP\n"; 507 print $outFH "1D17A; ; MAP\n"; 508 print $outFH "E0001; ; MAP\n"; 509 print $outFH "E0020; ; MAP\n"; 510 print $outFH "E0021; ; MAP\n"; 511 print $outFH "E0022; ; MAP\n"; 512 print $outFH "E0023; ; MAP\n"; 513 print $outFH "E0024; ; MAP\n"; 514 print $outFH "E0025; ; MAP\n"; 515 print $outFH "E0026; ; MAP\n"; 516 print $outFH "E0027; ; MAP\n"; 517 print $outFH "E0028; ; MAP\n"; 518 print $outFH "E0029; ; MAP\n"; 519 print $outFH "E002A; ; MAP\n"; 520 print $outFH "E002B; ; MAP\n"; 521 print $outFH "E002C; ; MAP\n"; 522 print $outFH "E002D; ; MAP\n"; 523 print $outFH "E002E; ; MAP\n"; 524 print $outFH "E002F; ; MAP\n"; 525 print $outFH "E0030; ; MAP\n"; 526 print $outFH "E0031; ; MAP\n"; 527 print $outFH "E0032; ; MAP\n"; 528 print $outFH "E0033; ; MAP\n"; 529 print $outFH "E0034; ; MAP\n"; 530 print $outFH "E0035; ; MAP\n"; 531 print $outFH "E0036; ; MAP\n"; 532 print $outFH "E0037; ; MAP\n"; 533 print $outFH "E0038; ; MAP\n"; 534 print $outFH "E0039; ; MAP\n"; 535 print $outFH "E003A; ; MAP\n"; 536 print $outFH "E003B; ; MAP\n"; 537 print $outFH "E003C; ; MAP\n"; 538 print $outFH "E003D; ; MAP\n"; 539 print $outFH "E003E; ; MAP\n"; 540 print $outFH "E003F; ; MAP\n"; 541 print $outFH "E0040; ; MAP\n"; 542 print $outFH "E0041; ; MAP\n"; 543 print $outFH "E0042; ; MAP\n"; 544 print $outFH "E0043; ; MAP\n"; 545 print $outFH "E0044; ; MAP\n"; 546 print $outFH "E0045; ; MAP\n"; 547 print $outFH "E0046; ; MAP\n"; 548 print $outFH "E0047; ; MAP\n"; 549 print $outFH "E0048; ; MAP\n"; 550 print $outFH "E0049; ; MAP\n"; 551 print $outFH "E004A; ; MAP\n"; 552 print $outFH "E004B; ; MAP\n"; 553 print $outFH "E004C; ; MAP\n"; 554 print $outFH "E004D; ; MAP\n"; 555 print $outFH "E004E; ; MAP\n"; 556 print $outFH "E004F; ; MAP\n"; 557 print $outFH "E0050; ; MAP\n"; 558 print $outFH "E0051; ; MAP\n"; 559 print $outFH "E0052; ; MAP\n"; 560 print $outFH "E0053; ; MAP\n"; 561 print $outFH "E0054; ; MAP\n"; 562 print $outFH "E0055; ; MAP\n"; 563 print $outFH "E0056; ; MAP\n"; 564 print $outFH "E0057; ; MAP\n"; 565 print $outFH "E0058; ; MAP\n"; 566 print $outFH "E0059; ; MAP\n"; 567 print $outFH "E005A; ; MAP\n"; 568 print $outFH "E005B; ; MAP\n"; 569 print $outFH "E005C; ; MAP\n"; 570 print $outFH "E005D; ; MAP\n"; 571 print $outFH "E005E; ; MAP\n"; 572 print $outFH "E005F; ; MAP\n"; 573 print $outFH "E0060; ; MAP\n"; 574 print $outFH "E0061; ; MAP\n"; 575 print $outFH "E0062; ; MAP\n"; 576 print $outFH "E0063; ; MAP\n"; 577 print $outFH "E0064; ; MAP\n"; 578 print $outFH "E0065; ; MAP\n"; 579 print $outFH "E0066; ; MAP\n"; 580 print $outFH "E0067; ; MAP\n"; 581 print $outFH "E0068; ; MAP\n"; 582 print $outFH "E0069; ; MAP\n"; 583 print $outFH "E006A; ; MAP\n"; 584 print $outFH "E006B; ; MAP\n"; 585 print $outFH "E006C; ; MAP\n"; 586 print $outFH "E006D; ; MAP\n"; 587 print $outFH "E006E; ; MAP\n"; 588 print $outFH "E006F; ; MAP\n"; 589 print $outFH "E0070; ; MAP\n"; 590 print $outFH "E0071; ; MAP\n"; 591 print $outFH "E0072; ; MAP\n"; 592 print $outFH "E0073; ; MAP\n"; 593 print $outFH "E0074; ; MAP\n"; 594 print $outFH "E0075; ; MAP\n"; 595 print $outFH "E0076; ; MAP\n"; 596 print $outFH "E0077; ; MAP\n"; 597 print $outFH "E0078; ; MAP\n"; 598 print $outFH "E0079; ; MAP\n"; 599 print $outFH "E007A; ; MAP\n"; 600 print $outFH "E007B; ; MAP\n"; 601 print $outFH "E007C; ; MAP\n"; 602 print $outFH "E007D; ; MAP\n"; 603 print $outFH "E007E; ; MAP\n"; 604 print $outFH "E007F; ; MAP\n"; 605 606 # ZERO WIDTH SPACE (U+200B) is mapped to nothing. All other code 607 # points with Separator (space, line, or paragraph) property (e.g., Zs, 608 # Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete 609 # list of these code points: U+0020, 00A0, 1680, 2000-200A, 2028-2029, 610 # 202F, 205F, 3000. 611 612 print $outFH "200B; ; MAP\n"; 613 print $outFH "00A0; 0020; MAP\n"; 614 print $outFH "1680; 0020; MAP\n"; 615 print $outFH "2000; 0020; MAP\n"; 616 print $outFH "2001; 0020; MAP\n"; 617 print $outFH "2002; 0020; MAP\n"; 618 print $outFH "2003; 0020; MAP\n"; 619 print $outFH "2004; 0020; MAP\n"; 620 print $outFH "2005; 0020; MAP\n"; 621 print $outFH "2006; 0020; MAP\n"; 622 print $outFH "2007; 0020; MAP\n"; 623 print $outFH "2008; 0020; MAP\n"; 624 print $outFH "2009; 0020; MAP\n"; 625 print $outFH "200A; 0020; MAP\n"; 626 print $outFH "2028; 0020; MAP\n"; 627 print $outFH "2029; 0020; MAP\n"; 628 print $outFH "202F; 0020; MAP\n"; 629 print $outFH "205F; 0020; MAP\n"; 630 print $outFH "3000; 0020; MAP\n"; 631 632 print $outFH "\n# Total code points 238\n"; 633 close($outFH); 634} 635#----------------------------------------------------------------------- 636sub usage { 637 print << "END"; 638Usage: 639filterRFC3454.pl 640Options: 641 --sourcedir=<directory> 642 --destdir=<directory> 643 --src-filename=<name of RFC file> 644 --dest-filename=<name of destination file> 645 --A1 Generate data for table A.1 646 --B1 Generate data for table B.1 647 --B2 Generate data for table B.2 648 --B3 Generate data for table B.3 649 --C11 Generate data for table C.1.1 650 --C12 Generate data for table C.1.2 651 --C21 Generate data for table C.2.1 652 --C22 Generate data for table C.2.2 653 --C3 Generate data for table C.3 654 --C4 Generate data for table C.4 655 --C5 Generate data for table C.5 656 --C6 Generate data for table C.6 657 --C7 Generate data for table C.7 658 --C8 Generate data for table C.8 659 --C9 Generate data for table C.9 660 --iscsi Generate data for iSCSI extra prohibited table 661 --xmpp-node Generate data for XMPP extra prohibited table 662 --sasl Generate data for SASL map table 663 --ldap Generate data for LDAP map table 664 --normalize Embed the normalization directive in the output file 665 --check-bidi Embed the check bidi directove in the output file 666 667Note, --B2 and --B3 are mutually exclusive. 668 669e.g.: filterRFC3454.pl --sourcedir=. --destdir=./output --src-filename=rfc3454.txt --dest-filename=NamePrepProfile.txt --A1 --B1 --B2 --C12 --C22 --C3 --C4 --C5 --C6 --C7 --C8 --C9 --normalize --check-bidi 670 671filterRFC3454.pl filters the RFC file and creates String prep table files. 672The RFC text can be downloaded from ftp://ftp.rfc-editor.org/in-notes/rfc3454.txt 673 674END 675 exit(0); 676} 677 678 679