1#!/usr/bin/perl -w 2# 3# This is JavaScriptCore's variant of the PCRE library. While this library 4# started out as a copy of PCRE, many of the features of PCRE have been 5# removed. This library now supports only the regular expression features 6# required by the JavaScript language specification, and has only the functions 7# needed by JavaScriptCore and the rest of WebKit. 8# 9# Originally written by Philip Hazel 10# Copyright (c) 1997-2006 University of Cambridge 11# Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. 12# 13# ----------------------------------------------------------------------------- 14# Redistribution and use in source and binary forms, with or without 15# modification, are permitted provided that the following conditions are met: 16# 17# * Redistributions of source code must retain the above copyright notice, 18# this list of conditions and the following disclaimer. 19# 20# * Redistributions in binary form must reproduce the above copyright 21# notice, this list of conditions and the following disclaimer in the 22# documentation and/or other materials provided with the distribution. 23# 24# * Neither the name of the University of Cambridge nor the names of its 25# contributors may be used to endorse or promote products derived from 26# this software without specific prior written permission. 27# 28# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 29# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 32# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 33# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 34# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 35# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 36# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 37# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 38# POSSIBILITY OF SUCH DAMAGE. 39# ----------------------------------------------------------------------------- 40 41# This is a freestanding support program to generate a file containing 42# character tables. The tables are built according to the default C 43# locale. 44 45use strict; 46 47use File::Basename; 48use File::Spec; 49use File::Temp qw(tempfile); 50use Getopt::Long; 51 52sub readHeaderValues(); 53 54my %pcre_internal; 55 56if (scalar(@ARGV) < 1) { 57 print STDERR "Usage: ", basename($0), " [--preprocessor=program] output-file\n"; 58 exit 1; 59} 60 61my $outputFile; 62my $preprocessor; 63GetOptions('preprocessor=s' => \$preprocessor); 64if (not $preprocessor) { 65 $preprocessor = "cpp"; 66} 67 68$outputFile = $ARGV[0]; 69die('Must specify output file.') unless defined($outputFile); 70 71readHeaderValues(); 72 73open(OUT, ">", $outputFile) or die "$!"; 74binmode(OUT); 75 76printf(OUT 77 "/*************************************************\n" . 78 "* Perl-Compatible Regular Expressions *\n" . 79 "*************************************************/\n\n" . 80 "/* This file is automatically written by the dftables auxiliary \n" . 81 "program. If you edit it by hand, you might like to edit the Makefile to \n" . 82 "prevent its ever being regenerated.\n\n"); 83printf(OUT 84 "This file contains the default tables for characters with codes less than\n" . 85 "128 (ASCII characters). These tables are used when no external tables are\n" . 86 "passed to PCRE. */\n\n" . 87 "const unsigned char jsc_pcre_default_tables[%d] = {\n\n" . 88 "/* This table is a lower casing table. */\n\n", $pcre_internal{tables_length}); 89 90if ($pcre_internal{lcc_offset} != 0) { 91 die "lcc_offset != 0"; 92} 93 94printf(OUT " "); 95for (my $i = 0; $i < 128; $i++) { 96 if (($i & 7) == 0 && $i != 0) { 97 printf(OUT "\n "); 98 } 99 printf(OUT "0x%02X", ord(lc(chr($i)))); 100 if ($i != 127) { 101 printf(OUT ", "); 102 } 103} 104printf(OUT ",\n\n"); 105 106printf(OUT "/* This table is a case flipping table. */\n\n"); 107 108if ($pcre_internal{fcc_offset} != 128) { 109 die "fcc_offset != 128"; 110} 111 112printf(OUT " "); 113for (my $i = 0; $i < 128; $i++) { 114 if (($i & 7) == 0 && $i != 0) { 115 printf(OUT "\n "); 116 } 117 my $c = chr($i); 118 printf(OUT "0x%02X", $c =~ /[[:lower:]]/ ? ord(uc($c)) : ord(lc($c))); 119 if ($i != 127) { 120 printf(OUT ", "); 121 } 122} 123printf(OUT ",\n\n"); 124 125printf(OUT 126 "/* This table contains bit maps for various character classes.\n" . 127 "Each map is 32 bytes long and the bits run from the least\n" . 128 "significant end of each byte. The classes are: space, digit, word. */\n\n"); 129 130if ($pcre_internal{cbits_offset} != $pcre_internal{fcc_offset} + 128) { 131 die "cbits_offset != fcc_offset + 128"; 132} 133 134my @cbit_table = (0) x $pcre_internal{cbit_length}; 135for (my $i = ord('0'); $i <= ord('9'); $i++) { 136 $cbit_table[$pcre_internal{cbit_digit} + $i / 8] |= 1 << ($i & 7); 137} 138$cbit_table[$pcre_internal{cbit_word} + ord('_') / 8] |= 1 << (ord('_') & 7); 139for (my $i = 0; $i < 128; $i++) { 140 my $c = chr($i); 141 if ($c =~ /[[:alnum:]]/) { 142 $cbit_table[$pcre_internal{cbit_word} + $i / 8] |= 1 << ($i & 7); 143 } 144 if ($c =~ /[[:space:]]/) { 145 $cbit_table[$pcre_internal{cbit_space} + $i / 8] |= 1 << ($i & 7); 146 } 147} 148 149printf(OUT " "); 150for (my $i = 0; $i < $pcre_internal{cbit_length}; $i++) { 151 if (($i & 7) == 0 && $i != 0) { 152 if (($i & 31) == 0) { 153 printf(OUT "\n"); 154 } 155 printf(OUT "\n "); 156 } 157 printf(OUT "0x%02X", $cbit_table[$i]); 158 if ($i != $pcre_internal{cbit_length} - 1) { 159 printf(OUT ", "); 160 } 161} 162printf(OUT ",\n\n"); 163 164printf(OUT 165 "/* This table identifies various classes of character by individual bits:\n" . 166 " 0x%02x white space character\n" . 167 " 0x%02x hexadecimal digit\n" . 168 " 0x%02x alphanumeric or '_'\n*/\n\n", 169 $pcre_internal{ctype_space}, $pcre_internal{ctype_xdigit}, $pcre_internal{ctype_word}); 170 171if ($pcre_internal{ctypes_offset} != $pcre_internal{cbits_offset} + $pcre_internal{cbit_length}) { 172 die "ctypes_offset != cbits_offset + cbit_length"; 173} 174 175printf(OUT " "); 176for (my $i = 0; $i < 128; $i++) { 177 my $x = 0; 178 my $c = chr($i); 179 if ($c =~ /[[:space:]]/) { 180 $x += $pcre_internal{ctype_space}; 181 } 182 if ($c =~ /[[:xdigit:]]/) { 183 $x += $pcre_internal{ctype_xdigit}; 184 } 185 if ($c =~ /[[:alnum:]_]/) { 186 $x += $pcre_internal{ctype_word}; 187 } 188 printf(OUT "0x%02X", $x); 189 if ($i != 127) { 190 printf(OUT ", "); 191 } else { 192 printf(OUT "};"); 193 } 194 if (($i & 7) == 7) { 195 printf(OUT " /* "); 196 my $d = chr($i - 7); 197 if ($d =~ /[[:print:]]/) { 198 printf(OUT " %c -", $i - 7); 199 } else { 200 printf(OUT "%3d-", $i - 7); 201 } 202 if ($c =~ m/[[:print:]]/) { 203 printf(OUT " %c ", $i); 204 } else { 205 printf(OUT "%3d", $i); 206 } 207 printf(OUT " */\n"); 208 if ($i != 127) { 209 printf(OUT " "); 210 } 211 } 212} 213 214if ($pcre_internal{tables_length} != $pcre_internal{ctypes_offset} + 128) { 215 die "tables_length != ctypes_offset + 128"; 216} 217 218printf(OUT "\n\n/* End of chartables.c */\n"); 219 220close(OUT); 221 222exit 0; 223 224sub readHeaderValues() 225{ 226 my @variables = qw( 227 cbit_digit 228 cbit_length 229 cbit_space 230 cbit_word 231 cbits_offset 232 ctype_space 233 ctype_word 234 ctype_xdigit 235 ctypes_offset 236 fcc_offset 237 lcc_offset 238 tables_length 239 ); 240 241 local $/ = undef; 242 243 my $headerPath = File::Spec->catfile(dirname($0), "pcre_internal.h"); 244 245 my ($fh, $tempFile) = tempfile( 246 basename($0) . "-XXXXXXXX", 247 DIR => ($ENV{'TMPDIR'} || "/tmp"), 248 SUFFIX => ".in", 249 UNLINK => 0, 250 ); 251 252 print $fh "#define DFTABLES\n\n"; 253 254 open(HEADER, "<", $headerPath) or die "$!"; 255 print $fh <HEADER>; 256 close(HEADER); 257 258 print $fh "\n\n"; 259 260 for my $v (@variables) { 261 print $fh "\$pcre_internal{\"$v\"} = $v;\n"; 262 } 263 264 close($fh); 265 266 open(CPP, "$preprocessor \"$tempFile\" |") or die "$!"; 267 my $content = <CPP>; 268 close(CPP); 269 270 eval $content; 271 die "$@" if $@; 272} 273