• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/perl -w
2#
3# This is JavaScriptCore's variant of the PCRE library. While this library
4# started out as a copy of PCRE, many of the features of PCRE have been
5# removed. This library now supports only the regular expression features
6# required by the JavaScript language specification, and has only the functions
7# needed by JavaScriptCore and the rest of WebKit.
8#
9#                  Originally written by Philip Hazel
10#            Copyright (c) 1997-2006 University of Cambridge
11#  Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc.  All rights reserved.
12#
13# -----------------------------------------------------------------------------
14# Redistribution and use in source and binary forms, with or without
15# modification, are permitted provided that the following conditions are met:
16#
17#     * Redistributions of source code must retain the above copyright notice,
18#       this list of conditions and the following disclaimer.
19#
20#     * Redistributions in binary form must reproduce the above copyright
21#       notice, this list of conditions and the following disclaimer in the
22#       documentation and/or other materials provided with the distribution.
23#
24#     * Neither the name of the University of Cambridge nor the names of its
25#       contributors may be used to endorse or promote products derived from
26#       this software without specific prior written permission.
27#
28# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38# POSSIBILITY OF SUCH DAMAGE.
39# -----------------------------------------------------------------------------
40
41# This is a freestanding support program to generate a file containing
42# character tables. The tables are built according to the default C
43# locale.
44
45use strict;
46
47use File::Basename;
48use File::Spec;
49use File::Temp qw(tempfile);
50use Getopt::Long;
51
52sub readHeaderValues();
53
54my %pcre_internal;
55
56if (scalar(@ARGV) < 1) {
57    print STDERR "Usage: ", basename($0), " [--preprocessor=program] output-file\n";
58    exit 1;
59}
60
61my $outputFile;
62my $preprocessor;
63GetOptions('preprocessor=s' => \$preprocessor);
64if (not $preprocessor) {
65    $preprocessor = "cpp";
66}
67
68$outputFile = $ARGV[0];
69die('Must specify output file.') unless defined($outputFile);
70
71readHeaderValues();
72
73open(OUT, ">", $outputFile) or die "$!";
74binmode(OUT);
75
76printf(OUT
77    "/*************************************************\n" .
78    "*      Perl-Compatible Regular Expressions       *\n" .
79    "*************************************************/\n\n" .
80    "/* This file is automatically written by the dftables auxiliary \n" .
81    "program. If you edit it by hand, you might like to edit the Makefile to \n" .
82    "prevent its ever being regenerated.\n\n");
83printf(OUT
84    "This file contains the default tables for characters with codes less than\n" .
85    "128 (ASCII characters). These tables are used when no external tables are\n" .
86    "passed to PCRE. */\n\n" .
87    "const unsigned char jsc_pcre_default_tables[%d] = {\n\n" .
88    "/* This table is a lower casing table. */\n\n", $pcre_internal{tables_length});
89
90if ($pcre_internal{lcc_offset} != 0) {
91    die "lcc_offset != 0";
92}
93
94printf(OUT "  ");
95for (my $i = 0; $i < 128; $i++) {
96    if (($i & 7) == 0 && $i != 0) {
97        printf(OUT "\n  ");
98    }
99    printf(OUT "0x%02X", ord(lc(chr($i))));
100    if ($i != 127) {
101        printf(OUT ", ");
102    }
103}
104printf(OUT ",\n\n");
105
106printf(OUT "/* This table is a case flipping table. */\n\n");
107
108if ($pcre_internal{fcc_offset} != 128) {
109  die "fcc_offset != 128";
110}
111
112printf(OUT "  ");
113for (my $i = 0; $i < 128; $i++) {
114    if (($i & 7) == 0 && $i != 0) {
115        printf(OUT "\n  ");
116    }
117    my $c = chr($i);
118    printf(OUT "0x%02X", $c =~ /[[:lower:]]/ ? ord(uc($c)) : ord(lc($c)));
119    if ($i != 127) {
120        printf(OUT ", ");
121    }
122}
123printf(OUT ",\n\n");
124
125printf(OUT
126    "/* This table contains bit maps for various character classes.\n" .
127    "Each map is 32 bytes long and the bits run from the least\n" .
128    "significant end of each byte. The classes are: space, digit, word. */\n\n");
129
130if ($pcre_internal{cbits_offset} != $pcre_internal{fcc_offset} + 128) {
131    die "cbits_offset != fcc_offset + 128";
132}
133
134my @cbit_table = (0) x $pcre_internal{cbit_length};
135for (my $i = ord('0'); $i <= ord('9'); $i++) {
136    $cbit_table[$pcre_internal{cbit_digit} + $i / 8] |= 1 << ($i & 7);
137}
138$cbit_table[$pcre_internal{cbit_word} + ord('_') / 8] |= 1 << (ord('_') & 7);
139for (my $i = 0; $i < 128; $i++) {
140    my $c = chr($i);
141    if ($c =~ /[[:alnum:]]/) {
142        $cbit_table[$pcre_internal{cbit_word} + $i / 8] |= 1 << ($i & 7);
143    }
144    if ($c =~ /[[:space:]]/) {
145        $cbit_table[$pcre_internal{cbit_space} + $i / 8] |= 1 << ($i & 7);
146    }
147}
148
149printf(OUT "  ");
150for (my $i = 0; $i < $pcre_internal{cbit_length}; $i++) {
151    if (($i & 7) == 0 && $i != 0) {
152        if (($i & 31) == 0) {
153            printf(OUT "\n");
154        }
155        printf(OUT "\n  ");
156    }
157    printf(OUT "0x%02X", $cbit_table[$i]);
158    if ($i != $pcre_internal{cbit_length} - 1) {
159        printf(OUT ", ");
160    }
161}
162printf(OUT ",\n\n");
163
164printf(OUT
165    "/* This table identifies various classes of character by individual bits:\n" .
166    "  0x%02x   white space character\n" .
167    "  0x%02x   hexadecimal digit\n" .
168    "  0x%02x   alphanumeric or '_'\n*/\n\n",
169    $pcre_internal{ctype_space}, $pcre_internal{ctype_xdigit}, $pcre_internal{ctype_word});
170
171if ($pcre_internal{ctypes_offset} != $pcre_internal{cbits_offset} + $pcre_internal{cbit_length}) {
172    die "ctypes_offset != cbits_offset + cbit_length";
173}
174
175printf(OUT "  ");
176for (my $i = 0; $i < 128; $i++) {
177    my $x = 0;
178    my $c = chr($i);
179    if ($c =~ /[[:space:]]/) {
180        $x += $pcre_internal{ctype_space};
181    }
182    if ($c =~ /[[:xdigit:]]/) {
183        $x += $pcre_internal{ctype_xdigit};
184    }
185    if ($c =~ /[[:alnum:]_]/) {
186        $x += $pcre_internal{ctype_word};
187    }
188    printf(OUT "0x%02X", $x);
189    if ($i != 127) {
190        printf(OUT ", ");
191    } else {
192        printf(OUT "};");
193    }
194    if (($i & 7) == 7) {
195        printf(OUT " /* ");
196        my $d = chr($i - 7);
197        if ($d =~ /[[:print:]]/) {
198            printf(OUT " %c -", $i - 7);
199        } else {
200            printf(OUT "%3d-", $i - 7);
201        }
202        if ($c =~ m/[[:print:]]/) {
203            printf(OUT " %c ", $i);
204        } else {
205            printf(OUT "%3d", $i);
206        }
207        printf(OUT " */\n");
208        if ($i != 127) {
209            printf(OUT "  ");
210        }
211    }
212}
213
214if ($pcre_internal{tables_length} != $pcre_internal{ctypes_offset} + 128) {
215    die "tables_length != ctypes_offset + 128";
216}
217
218printf(OUT "\n\n/* End of chartables.c */\n");
219
220close(OUT);
221
222exit 0;
223
224sub readHeaderValues()
225{
226    my @variables = qw(
227        cbit_digit
228        cbit_length
229        cbit_space
230        cbit_word
231        cbits_offset
232        ctype_space
233        ctype_word
234        ctype_xdigit
235        ctypes_offset
236        fcc_offset
237        lcc_offset
238        tables_length
239    );
240
241    local $/ = undef;
242
243    my $headerPath = File::Spec->catfile(dirname($0), "pcre_internal.h");
244
245    my ($fh, $tempFile) = tempfile(
246        basename($0) . "-XXXXXXXX",
247        DIR => ($ENV{'TMPDIR'} || "/tmp"),
248        SUFFIX => ".in",
249        UNLINK => 0,
250    );
251
252    print $fh "#define DFTABLES\n\n";
253
254    open(HEADER, "<", $headerPath) or die "$!";
255    print $fh <HEADER>;
256    close(HEADER);
257
258    print $fh "\n\n";
259
260    for my $v (@variables) {
261        print $fh "\$pcre_internal{\"$v\"} = $v;\n";
262    }
263
264    close($fh);
265
266    open(CPP, "$preprocessor \"$tempFile\" |") or die "$!";
267    my $content = <CPP>;
268    close(CPP);
269
270    eval $content;
271    die "$@" if $@;
272}
273