• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3
4# This script reads Huffman Code table [1] and generates symbol table
5# and decoding tables in C language.  The resulting code is used in
6# lib/nghttp2_hd_huffman.h and lib/nghttp2_hd_huffman_data.c
7#
8# [1] https://httpwg.org/specs/rfc7541.html
9
10import re
11import sys
12from io import StringIO
13
14# From [1]
15HUFFMAN_CODE_TABLE = """\
16    (  0)  |11111111|11000                             1ff8  [13]
17    (  1)  |11111111|11111111|1011000                7fffd8  [23]
18    (  2)  |11111111|11111111|11111110|0010         fffffe2  [28]
19    (  3)  |11111111|11111111|11111110|0011         fffffe3  [28]
20    (  4)  |11111111|11111111|11111110|0100         fffffe4  [28]
21    (  5)  |11111111|11111111|11111110|0101         fffffe5  [28]
22    (  6)  |11111111|11111111|11111110|0110         fffffe6  [28]
23    (  7)  |11111111|11111111|11111110|0111         fffffe7  [28]
24    (  8)  |11111111|11111111|11111110|1000         fffffe8  [28]
25    (  9)  |11111111|11111111|11101010               ffffea  [24]
26    ( 10)  |11111111|11111111|11111111|111100      3ffffffc  [30]
27    ( 11)  |11111111|11111111|11111110|1001         fffffe9  [28]
28    ( 12)  |11111111|11111111|11111110|1010         fffffea  [28]
29    ( 13)  |11111111|11111111|11111111|111101      3ffffffd  [30]
30    ( 14)  |11111111|11111111|11111110|1011         fffffeb  [28]
31    ( 15)  |11111111|11111111|11111110|1100         fffffec  [28]
32    ( 16)  |11111111|11111111|11111110|1101         fffffed  [28]
33    ( 17)  |11111111|11111111|11111110|1110         fffffee  [28]
34    ( 18)  |11111111|11111111|11111110|1111         fffffef  [28]
35    ( 19)  |11111111|11111111|11111111|0000         ffffff0  [28]
36    ( 20)  |11111111|11111111|11111111|0001         ffffff1  [28]
37    ( 21)  |11111111|11111111|11111111|0010         ffffff2  [28]
38    ( 22)  |11111111|11111111|11111111|111110      3ffffffe  [30]
39    ( 23)  |11111111|11111111|11111111|0011         ffffff3  [28]
40    ( 24)  |11111111|11111111|11111111|0100         ffffff4  [28]
41    ( 25)  |11111111|11111111|11111111|0101         ffffff5  [28]
42    ( 26)  |11111111|11111111|11111111|0110         ffffff6  [28]
43    ( 27)  |11111111|11111111|11111111|0111         ffffff7  [28]
44    ( 28)  |11111111|11111111|11111111|1000         ffffff8  [28]
45    ( 29)  |11111111|11111111|11111111|1001         ffffff9  [28]
46    ( 30)  |11111111|11111111|11111111|1010         ffffffa  [28]
47    ( 31)  |11111111|11111111|11111111|1011         ffffffb  [28]
48' ' ( 32)  |010100                                       14  [ 6]
49'!' ( 33)  |11111110|00                                 3f8  [10]
50'"' ( 34)  |11111110|01                                 3f9  [10]
51'#' ( 35)  |11111111|1010                               ffa  [12]
52'$' ( 36)  |11111111|11001                             1ff9  [13]
53'%' ( 37)  |010101                                       15  [ 6]
54'&' ( 38)  |11111000                                     f8  [ 8]
55''' ( 39)  |11111111|010                                7fa  [11]
56'(' ( 40)  |11111110|10                                 3fa  [10]
57')' ( 41)  |11111110|11                                 3fb  [10]
58'*' ( 42)  |11111001                                     f9  [ 8]
59'+' ( 43)  |11111111|011                                7fb  [11]
60',' ( 44)  |11111010                                     fa  [ 8]
61'-' ( 45)  |010110                                       16  [ 6]
62'.' ( 46)  |010111                                       17  [ 6]
63'/' ( 47)  |011000                                       18  [ 6]
64'0' ( 48)  |00000                                         0  [ 5]
65'1' ( 49)  |00001                                         1  [ 5]
66'2' ( 50)  |00010                                         2  [ 5]
67'3' ( 51)  |011001                                       19  [ 6]
68'4' ( 52)  |011010                                       1a  [ 6]
69'5' ( 53)  |011011                                       1b  [ 6]
70'6' ( 54)  |011100                                       1c  [ 6]
71'7' ( 55)  |011101                                       1d  [ 6]
72'8' ( 56)  |011110                                       1e  [ 6]
73'9' ( 57)  |011111                                       1f  [ 6]
74':' ( 58)  |1011100                                      5c  [ 7]
75';' ( 59)  |11111011                                     fb  [ 8]
76'<' ( 60)  |11111111|1111100                           7ffc  [15]
77'=' ( 61)  |100000                                       20  [ 6]
78'>' ( 62)  |11111111|1011                               ffb  [12]
79'?' ( 63)  |11111111|00                                 3fc  [10]
80'@' ( 64)  |11111111|11010                             1ffa  [13]
81'A' ( 65)  |100001                                       21  [ 6]
82'B' ( 66)  |1011101                                      5d  [ 7]
83'C' ( 67)  |1011110                                      5e  [ 7]
84'D' ( 68)  |1011111                                      5f  [ 7]
85'E' ( 69)  |1100000                                      60  [ 7]
86'F' ( 70)  |1100001                                      61  [ 7]
87'G' ( 71)  |1100010                                      62  [ 7]
88'H' ( 72)  |1100011                                      63  [ 7]
89'I' ( 73)  |1100100                                      64  [ 7]
90'J' ( 74)  |1100101                                      65  [ 7]
91'K' ( 75)  |1100110                                      66  [ 7]
92'L' ( 76)  |1100111                                      67  [ 7]
93'M' ( 77)  |1101000                                      68  [ 7]
94'N' ( 78)  |1101001                                      69  [ 7]
95'O' ( 79)  |1101010                                      6a  [ 7]
96'P' ( 80)  |1101011                                      6b  [ 7]
97'Q' ( 81)  |1101100                                      6c  [ 7]
98'R' ( 82)  |1101101                                      6d  [ 7]
99'S' ( 83)  |1101110                                      6e  [ 7]
100'T' ( 84)  |1101111                                      6f  [ 7]
101'U' ( 85)  |1110000                                      70  [ 7]
102'V' ( 86)  |1110001                                      71  [ 7]
103'W' ( 87)  |1110010                                      72  [ 7]
104'X' ( 88)  |11111100                                     fc  [ 8]
105'Y' ( 89)  |1110011                                      73  [ 7]
106'Z' ( 90)  |11111101                                     fd  [ 8]
107'[' ( 91)  |11111111|11011                             1ffb  [13]
108'\' ( 92)  |11111111|11111110|000                     7fff0  [19]
109']' ( 93)  |11111111|11100                             1ffc  [13]
110'^' ( 94)  |11111111|111100                            3ffc  [14]
111'_' ( 95)  |100010                                       22  [ 6]
112'`' ( 96)  |11111111|1111101                           7ffd  [15]
113'a' ( 97)  |00011                                         3  [ 5]
114'b' ( 98)  |100011                                       23  [ 6]
115'c' ( 99)  |00100                                         4  [ 5]
116'd' (100)  |100100                                       24  [ 6]
117'e' (101)  |00101                                         5  [ 5]
118'f' (102)  |100101                                       25  [ 6]
119'g' (103)  |100110                                       26  [ 6]
120'h' (104)  |100111                                       27  [ 6]
121'i' (105)  |00110                                         6  [ 5]
122'j' (106)  |1110100                                      74  [ 7]
123'k' (107)  |1110101                                      75  [ 7]
124'l' (108)  |101000                                       28  [ 6]
125'm' (109)  |101001                                       29  [ 6]
126'n' (110)  |101010                                       2a  [ 6]
127'o' (111)  |00111                                         7  [ 5]
128'p' (112)  |101011                                       2b  [ 6]
129'q' (113)  |1110110                                      76  [ 7]
130'r' (114)  |101100                                       2c  [ 6]
131's' (115)  |01000                                         8  [ 5]
132't' (116)  |01001                                         9  [ 5]
133'u' (117)  |101101                                       2d  [ 6]
134'v' (118)  |1110111                                      77  [ 7]
135'w' (119)  |1111000                                      78  [ 7]
136'x' (120)  |1111001                                      79  [ 7]
137'y' (121)  |1111010                                      7a  [ 7]
138'z' (122)  |1111011                                      7b  [ 7]
139'{' (123)  |11111111|1111110                           7ffe  [15]
140'|' (124)  |11111111|100                                7fc  [11]
141'}' (125)  |11111111|111101                            3ffd  [14]
142'~' (126)  |11111111|11101                             1ffd  [13]
143    (127)  |11111111|11111111|11111111|1100         ffffffc  [28]
144    (128)  |11111111|11111110|0110                    fffe6  [20]
145    (129)  |11111111|11111111|010010                 3fffd2  [22]
146    (130)  |11111111|11111110|0111                    fffe7  [20]
147    (131)  |11111111|11111110|1000                    fffe8  [20]
148    (132)  |11111111|11111111|010011                 3fffd3  [22]
149    (133)  |11111111|11111111|010100                 3fffd4  [22]
150    (134)  |11111111|11111111|010101                 3fffd5  [22]
151    (135)  |11111111|11111111|1011001                7fffd9  [23]
152    (136)  |11111111|11111111|010110                 3fffd6  [22]
153    (137)  |11111111|11111111|1011010                7fffda  [23]
154    (138)  |11111111|11111111|1011011                7fffdb  [23]
155    (139)  |11111111|11111111|1011100                7fffdc  [23]
156    (140)  |11111111|11111111|1011101                7fffdd  [23]
157    (141)  |11111111|11111111|1011110                7fffde  [23]
158    (142)  |11111111|11111111|11101011               ffffeb  [24]
159    (143)  |11111111|11111111|1011111                7fffdf  [23]
160    (144)  |11111111|11111111|11101100               ffffec  [24]
161    (145)  |11111111|11111111|11101101               ffffed  [24]
162    (146)  |11111111|11111111|010111                 3fffd7  [22]
163    (147)  |11111111|11111111|1100000                7fffe0  [23]
164    (148)  |11111111|11111111|11101110               ffffee  [24]
165    (149)  |11111111|11111111|1100001                7fffe1  [23]
166    (150)  |11111111|11111111|1100010                7fffe2  [23]
167    (151)  |11111111|11111111|1100011                7fffe3  [23]
168    (152)  |11111111|11111111|1100100                7fffe4  [23]
169    (153)  |11111111|11111110|11100                  1fffdc  [21]
170    (154)  |11111111|11111111|011000                 3fffd8  [22]
171    (155)  |11111111|11111111|1100101                7fffe5  [23]
172    (156)  |11111111|11111111|011001                 3fffd9  [22]
173    (157)  |11111111|11111111|1100110                7fffe6  [23]
174    (158)  |11111111|11111111|1100111                7fffe7  [23]
175    (159)  |11111111|11111111|11101111               ffffef  [24]
176    (160)  |11111111|11111111|011010                 3fffda  [22]
177    (161)  |11111111|11111110|11101                  1fffdd  [21]
178    (162)  |11111111|11111110|1001                    fffe9  [20]
179    (163)  |11111111|11111111|011011                 3fffdb  [22]
180    (164)  |11111111|11111111|011100                 3fffdc  [22]
181    (165)  |11111111|11111111|1101000                7fffe8  [23]
182    (166)  |11111111|11111111|1101001                7fffe9  [23]
183    (167)  |11111111|11111110|11110                  1fffde  [21]
184    (168)  |11111111|11111111|1101010                7fffea  [23]
185    (169)  |11111111|11111111|011101                 3fffdd  [22]
186    (170)  |11111111|11111111|011110                 3fffde  [22]
187    (171)  |11111111|11111111|11110000               fffff0  [24]
188    (172)  |11111111|11111110|11111                  1fffdf  [21]
189    (173)  |11111111|11111111|011111                 3fffdf  [22]
190    (174)  |11111111|11111111|1101011                7fffeb  [23]
191    (175)  |11111111|11111111|1101100                7fffec  [23]
192    (176)  |11111111|11111111|00000                  1fffe0  [21]
193    (177)  |11111111|11111111|00001                  1fffe1  [21]
194    (178)  |11111111|11111111|100000                 3fffe0  [22]
195    (179)  |11111111|11111111|00010                  1fffe2  [21]
196    (180)  |11111111|11111111|1101101                7fffed  [23]
197    (181)  |11111111|11111111|100001                 3fffe1  [22]
198    (182)  |11111111|11111111|1101110                7fffee  [23]
199    (183)  |11111111|11111111|1101111                7fffef  [23]
200    (184)  |11111111|11111110|1010                    fffea  [20]
201    (185)  |11111111|11111111|100010                 3fffe2  [22]
202    (186)  |11111111|11111111|100011                 3fffe3  [22]
203    (187)  |11111111|11111111|100100                 3fffe4  [22]
204    (188)  |11111111|11111111|1110000                7ffff0  [23]
205    (189)  |11111111|11111111|100101                 3fffe5  [22]
206    (190)  |11111111|11111111|100110                 3fffe6  [22]
207    (191)  |11111111|11111111|1110001                7ffff1  [23]
208    (192)  |11111111|11111111|11111000|00           3ffffe0  [26]
209    (193)  |11111111|11111111|11111000|01           3ffffe1  [26]
210    (194)  |11111111|11111110|1011                    fffeb  [20]
211    (195)  |11111111|11111110|001                     7fff1  [19]
212    (196)  |11111111|11111111|100111                 3fffe7  [22]
213    (197)  |11111111|11111111|1110010                7ffff2  [23]
214    (198)  |11111111|11111111|101000                 3fffe8  [22]
215    (199)  |11111111|11111111|11110110|0            1ffffec  [25]
216    (200)  |11111111|11111111|11111000|10           3ffffe2  [26]
217    (201)  |11111111|11111111|11111000|11           3ffffe3  [26]
218    (202)  |11111111|11111111|11111001|00           3ffffe4  [26]
219    (203)  |11111111|11111111|11111011|110          7ffffde  [27]
220    (204)  |11111111|11111111|11111011|111          7ffffdf  [27]
221    (205)  |11111111|11111111|11111001|01           3ffffe5  [26]
222    (206)  |11111111|11111111|11110001               fffff1  [24]
223    (207)  |11111111|11111111|11110110|1            1ffffed  [25]
224    (208)  |11111111|11111110|010                     7fff2  [19]
225    (209)  |11111111|11111111|00011                  1fffe3  [21]
226    (210)  |11111111|11111111|11111001|10           3ffffe6  [26]
227    (211)  |11111111|11111111|11111100|000          7ffffe0  [27]
228    (212)  |11111111|11111111|11111100|001          7ffffe1  [27]
229    (213)  |11111111|11111111|11111001|11           3ffffe7  [26]
230    (214)  |11111111|11111111|11111100|010          7ffffe2  [27]
231    (215)  |11111111|11111111|11110010               fffff2  [24]
232    (216)  |11111111|11111111|00100                  1fffe4  [21]
233    (217)  |11111111|11111111|00101                  1fffe5  [21]
234    (218)  |11111111|11111111|11111010|00           3ffffe8  [26]
235    (219)  |11111111|11111111|11111010|01           3ffffe9  [26]
236    (220)  |11111111|11111111|11111111|1101         ffffffd  [28]
237    (221)  |11111111|11111111|11111100|011          7ffffe3  [27]
238    (222)  |11111111|11111111|11111100|100          7ffffe4  [27]
239    (223)  |11111111|11111111|11111100|101          7ffffe5  [27]
240    (224)  |11111111|11111110|1100                    fffec  [20]
241    (225)  |11111111|11111111|11110011               fffff3  [24]
242    (226)  |11111111|11111110|1101                    fffed  [20]
243    (227)  |11111111|11111111|00110                  1fffe6  [21]
244    (228)  |11111111|11111111|101001                 3fffe9  [22]
245    (229)  |11111111|11111111|00111                  1fffe7  [21]
246    (230)  |11111111|11111111|01000                  1fffe8  [21]
247    (231)  |11111111|11111111|1110011                7ffff3  [23]
248    (232)  |11111111|11111111|101010                 3fffea  [22]
249    (233)  |11111111|11111111|101011                 3fffeb  [22]
250    (234)  |11111111|11111111|11110111|0            1ffffee  [25]
251    (235)  |11111111|11111111|11110111|1            1ffffef  [25]
252    (236)  |11111111|11111111|11110100               fffff4  [24]
253    (237)  |11111111|11111111|11110101               fffff5  [24]
254    (238)  |11111111|11111111|11111010|10           3ffffea  [26]
255    (239)  |11111111|11111111|1110100                7ffff4  [23]
256    (240)  |11111111|11111111|11111010|11           3ffffeb  [26]
257    (241)  |11111111|11111111|11111100|110          7ffffe6  [27]
258    (242)  |11111111|11111111|11111011|00           3ffffec  [26]
259    (243)  |11111111|11111111|11111011|01           3ffffed  [26]
260    (244)  |11111111|11111111|11111100|111          7ffffe7  [27]
261    (245)  |11111111|11111111|11111101|000          7ffffe8  [27]
262    (246)  |11111111|11111111|11111101|001          7ffffe9  [27]
263    (247)  |11111111|11111111|11111101|010          7ffffea  [27]
264    (248)  |11111111|11111111|11111101|011          7ffffeb  [27]
265    (249)  |11111111|11111111|11111111|1110         ffffffe  [28]
266    (250)  |11111111|11111111|11111101|100          7ffffec  [27]
267    (251)  |11111111|11111111|11111101|101          7ffffed  [27]
268    (252)  |11111111|11111111|11111101|110          7ffffee  [27]
269    (253)  |11111111|11111111|11111101|111          7ffffef  [27]
270    (254)  |11111111|11111111|11111110|000          7fffff0  [27]
271    (255)  |11111111|11111111|11111011|10           3ffffee  [26]
272EOS (256)  |11111111|11111111|11111111|111111      3fffffff  [30]
273"""
274
275class Node:
276
277    def __init__(self, term = None):
278        self.term = term
279        self.left = None
280        self.right = None
281        self.trans = []
282        self.id = None
283        self.accept = False
284
285class Context:
286
287    def __init__(self):
288        self.next_id_ = 0
289        self.root = Node()
290
291    def next_id(self):
292        id = self.next_id_
293        self.next_id_ += 1
294        return id
295
296def _add(node, sym, bits):
297    if len(bits) == 0:
298        node.term = sym
299        return
300    else:
301        if bits[0] == '0':
302            if node.left is None:
303                node.left = Node()
304            child = node.left
305        else:
306            if node.right is None:
307                node.right = Node()
308            child = node.right
309        _add(child, sym, bits[1:])
310
311def huffman_tree_add(ctx, sym, bits):
312    _add(ctx.root, sym, bits)
313
314def _set_node_id(ctx, node, prefix):
315    if node.term is not None:
316        return
317    if len(prefix) <= 7 and [1] * len(prefix) == prefix:
318        node.accept = True
319    node.id = ctx.next_id()
320    _set_node_id(ctx, node.left, prefix + [0])
321    _set_node_id(ctx, node.right, prefix + [1])
322
323def huffman_tree_set_node_id(ctx):
324    _set_node_id(ctx, ctx.root, [])
325
326def _traverse(node, sym, start_node, root, left):
327    if left == 0:
328        if sym == 256:
329            sym = None
330            node = None
331        start_node.trans.append((node, sym))
332        return
333
334    if node.term is not None:
335        node = root
336
337    def go(node):
338        if node.term is not None:
339            assert sym is None
340            nsym = node.term
341        else:
342            nsym = sym
343
344        _traverse(node, nsym, start_node, root, left - 1)
345
346    go(node.left)
347    go(node.right)
348
349def _build_transition_table(ctx, node):
350    if node is None:
351        return
352    _traverse(node, None, node, ctx.root, 4)
353    _build_transition_table(ctx, node.left)
354    _build_transition_table(ctx, node.right)
355
356def huffman_tree_build_transition_table(ctx):
357    _build_transition_table(ctx, ctx.root)
358
359NGHTTP2_HUFF_ACCEPTED = 1 << 14
360NGHTTP2_HUFF_SYM = 1 << 15
361
362def _print_transition_table(node):
363    if node.term is not None:
364        return
365    print('/* {} */'.format(node.id))
366    print('{')
367    for nd, sym in node.trans:
368        flags = 0
369        if sym is None:
370            out = 0
371        else:
372            out = sym
373            flags |= NGHTTP2_HUFF_SYM
374        if nd is None:
375            id = 256
376        else:
377            id = nd.id
378            if id is None:
379                # if nd.id is None, it is a leaf node
380                id = 0
381                flags |= NGHTTP2_HUFF_ACCEPTED
382            elif nd.accept:
383                flags |= NGHTTP2_HUFF_ACCEPTED
384        print('  {{0x{:02x}, {}}},'.format(id | flags, out))
385    print('},')
386    _print_transition_table(node.left)
387    _print_transition_table(node.right)
388
389def huffman_tree_print_transition_table(ctx):
390    _print_transition_table(ctx.root)
391    print('/* 256 */')
392    print('{')
393    print('  {0x100, 0},')
394    print('  {0x100, 0},')
395    print('  {0x100, 0},')
396    print('  {0x100, 0},')
397    print('  {0x100, 0},')
398    print('  {0x100, 0},')
399    print('  {0x100, 0},')
400    print('  {0x100, 0},')
401    print('  {0x100, 0},')
402    print('  {0x100, 0},')
403    print('  {0x100, 0},')
404    print('  {0x100, 0},')
405    print('  {0x100, 0},')
406    print('  {0x100, 0},')
407    print('  {0x100, 0},')
408    print('  {0x100, 0},')
409    print('},')
410
411if __name__ == '__main__':
412    ctx = Context()
413    symbol_tbl = [(None, 0) for i in range(257)]
414
415    for line in StringIO(HUFFMAN_CODE_TABLE):
416        m = re.match(
417            r'.*\(\s*(\d+)\)\s+([|01]+)\s+(\S+)\s+\[\s*(\d+)\].*', line)
418        if m:
419            sym = int(m.group(1))
420            bits = re.sub(r'\|', '', m.group(2))
421            code = m.group(3)
422            nbits = int(m.group(4))
423            if len(code) > 8:
424                raise Error('Code is more than 4 bytes long')
425            assert(len(bits) == nbits)
426            symbol_tbl[sym] = (nbits, code)
427            huffman_tree_add(ctx, sym, bits)
428
429    huffman_tree_set_node_id(ctx)
430    huffman_tree_build_transition_table(ctx)
431
432    print('''\
433typedef struct {
434  uint32_t nbits;
435  uint32_t code;
436} nghttp2_huff_sym;
437''')
438
439    print('''\
440const nghttp2_huff_sym huff_sym_table[] = {''')
441    for i in range(257):
442        nbits = symbol_tbl[i][0]
443        k = int(symbol_tbl[i][1], 16)
444        k = k << (32 - nbits)
445        print('''\
446  {{ {}, 0x{}u }}{}\
447'''.format(symbol_tbl[i][0], hex(k)[2:], ',' if i < 256 else ''))
448    print('};')
449    print()
450
451    print('''\
452enum {{
453  NGHTTP2_HUFF_ACCEPTED = {},
454  NGHTTP2_HUFF_SYM = {},
455}} nghttp2_huff_decode_flag;
456'''.format(NGHTTP2_HUFF_ACCEPTED, NGHTTP2_HUFF_SYM))
457
458    print('''\
459typedef struct {
460  uint16_t fstate;
461  uint8_t sym;
462} nghttp2_huff_decode;
463''')
464
465    print('''\
466const nghttp2_huff_decode huff_decode_table[][16] = {''')
467    huffman_tree_print_transition_table(ctx)
468    print('};')
469