Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
huffman.cpp
Go to the documentation of this file.
1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3// Qt-Security score:critical reason:network-protocol
4
5#include "bitstreams_p.h"
6#include "huffman_p.h"
7
8#include <QtCore/qbytearray.h>
9
10#include <algorithm>
11#include <limits>
12
13QT_BEGIN_NAMESPACE
14
15namespace HPack
16{
17
18/*
19 The static Huffman code used here was extracted from:
20 https://http2.github.io/http2-spec/compression.html#huffman.code
21
22 This code was generated from statistics obtained on a large
23 sample of HTTP headers. It is a canonical Huffman code
24 with some tweaking to ensure that no symbol has a unique
25 code length. All codes were left-aligned - for implementation
26 convenience.
27
28 Using binary trees to implement decoding would be prohibitively
29 expensive (both memory and time-wise). Instead we use a table-based
30 approach and any given code itself works as an index into such table(s).
31 We have 256 possible byte values and code lengths in
32 a range [5, 26]. This would require a huge table (and most of entries
33 would be 'wasted', since we only have to encode 256 elements).
34 Instead we use multi-level tables. The first level table
35 is using 9-bit length index; some entries in this table are 'terminal',
36 some reference the next level table(s).
37
38 For example, bytes with values 48 and 49 (ASCII codes for '0' and '1')
39 both have code length 5, Huffman codes are: 00000 and 00001. They
40 both are placed in the 'root' table,
41 the 'root' table has index length == 9:
42 [00000 | 4 remaining bits]
43 ...
44 [00001 | 4 remaining bits]
45
46 All entries with indices between these two will 'point' to value 48
47 with bitLength == 5 so that bit stream (for example) 000001010 will be
48 decoded as: 48 + "put 1010 back into bitstream".
49
50 A good description can be found here:
51 http://commandlinefanatic.com/cgi-bin/showarticle.cgi?article=art007
52 or just google "Efficient Huffman Decoding".
53 Also see comments below about 'filling holes'.
54*/
55
56namespace
57{
58
59const CodeEntry staticHuffmanCodeTable[]
60{
61 { 0, 0xffc00000ul, 13}, // 11111111|11000
62 { 1, 0xffffb000ul, 23}, // 11111111|11111111|1011000
63 { 2, 0xfffffe20ul, 28}, // 11111111|11111111|11111110|0010
64 { 3, 0xfffffe30ul, 28}, // 11111111|11111111|11111110|0011
65 { 4, 0xfffffe40ul, 28}, // 11111111|11111111|11111110|0100
66 { 5, 0xfffffe50ul, 28}, // 11111111|11111111|11111110|0101
67 { 6, 0xfffffe60ul, 28}, // 11111111|11111111|11111110|0110
68 { 7, 0xfffffe70ul, 28}, // 11111111|11111111|11111110|0111
69 { 8, 0xfffffe80ul, 28}, // 11111111|11111111|11111110|1000
70 { 9, 0xffffea00ul, 24}, // 11111111|11111111|11101010
71 { 10, 0xfffffff0ul, 30}, // 11111111|11111111|11111111|111100
72 { 11, 0xfffffe90ul, 28}, // 11111111|11111111|11111110|1001
73 { 12, 0xfffffea0ul, 28}, // 11111111|11111111|11111110|1010
74 { 13, 0xfffffff4ul, 30}, // 11111111|11111111|11111111|111101
75 { 14, 0xfffffeb0ul, 28}, // 11111111|11111111|11111110|1011
76 { 15, 0xfffffec0ul, 28}, // 11111111|11111111|11111110|1100
77 { 16, 0xfffffed0ul, 28}, // 11111111|11111111|11111110|1101
78 { 17, 0xfffffee0ul, 28}, // 11111111|11111111|11111110|1110
79 { 18, 0xfffffef0ul, 28}, // 11111111|11111111|11111110|1111
80 { 19, 0xffffff00ul, 28}, // 11111111|11111111|11111111|0000
81 { 20, 0xffffff10ul, 28}, // 11111111|11111111|11111111|0001
82 { 21, 0xffffff20ul, 28}, // 11111111|11111111|11111111|0010
83 { 22, 0xfffffff8ul, 30}, // 11111111|11111111|11111111|111110
84 { 23, 0xffffff30ul, 28}, // 11111111|11111111|11111111|0011
85 { 24, 0xffffff40ul, 28}, // 11111111|11111111|11111111|0100
86 { 25, 0xffffff50ul, 28}, // 11111111|11111111|11111111|0101
87 { 26, 0xffffff60ul, 28}, // 11111111|11111111|11111111|0110
88 { 27, 0xffffff70ul, 28}, // 11111111|11111111|11111111|0111
89 { 28, 0xffffff80ul, 28}, // 11111111|11111111|11111111|1000
90 { 29, 0xffffff90ul, 28}, // 11111111|11111111|11111111|1001
91 { 30, 0xffffffa0ul, 28}, // 11111111|11111111|11111111|1010
92 { 31, 0xffffffb0ul, 28}, // 11111111|11111111|11111111|1011
93 { 32, 0x50000000ul, 6}, // ' ' 010100
94 { 33, 0xfe000000ul, 10}, // '!' 11111110|00
95 { 34, 0xfe400000ul, 10}, // '"' 11111110|01
96 { 35, 0xffa00000ul, 12}, // '#' 11111111|1010
97 { 36, 0xffc80000ul, 13}, // '$' 11111111|11001
98 { 37, 0x54000000ul, 6}, // '%' 010101
99 { 38, 0xf8000000ul, 8}, // '&' 11111000
100 { 39, 0xff400000ul, 11}, // ''' 11111111|010
101 { 40, 0xfe800000ul, 10}, // '(' 11111110|10
102 { 41, 0xfec00000ul, 10}, // ')' 11111110|11
103 { 42, 0xf9000000ul, 8}, // '*' 11111001
104 { 43, 0xff600000ul, 11}, // '+' 11111111|011
105 { 44, 0xfa000000ul, 8}, // ',' 11111010
106 { 45, 0x58000000ul, 6}, // '-' 010110
107 { 46, 0x5c000000ul, 6}, // '.' 010111
108 { 47, 0x60000000ul, 6}, // '/' 011000
109 { 48, 0x00000000ul, 5}, // '0' 00000
110 { 49, 0x08000000ul, 5}, // '1' 00001
111 { 50, 0x10000000ul, 5}, // '2' 00010
112 { 51, 0x64000000ul, 6}, // '3' 011001
113 { 52, 0x68000000ul, 6}, // '4' 011010
114 { 53, 0x6c000000ul, 6}, // '5' 011011
115 { 54, 0x70000000ul, 6}, // '6' 011100
116 { 55, 0x74000000ul, 6}, // '7' 011101
117 { 56, 0x78000000ul, 6}, // '8' 011110
118 { 57, 0x7c000000ul, 6}, // '9' 011111
119 { 58, 0xb8000000ul, 7}, // ':' 1011100
120 { 59, 0xfb000000ul, 8}, // ';' 11111011
121 { 60, 0xfff80000ul, 15}, // '<' 11111111|1111100
122 { 61, 0x80000000ul, 6}, // '=' 100000
123 { 62, 0xffb00000ul, 12}, // '>' 11111111|1011
124 { 63, 0xff000000ul, 10}, // '?' 11111111|00
125 { 64, 0xffd00000ul, 13}, // '@' 11111111|11010
126 { 65, 0x84000000ul, 6}, // 'A' 100001
127 { 66, 0xba000000ul, 7}, // 'B' 1011101
128 { 67, 0xbc000000ul, 7}, // 'C' 1011110
129 { 68, 0xbe000000ul, 7}, // 'D' 1011111
130 { 69, 0xc0000000ul, 7}, // 'E' 1100000
131 { 70, 0xc2000000ul, 7}, // 'F' 1100001
132 { 71, 0xc4000000ul, 7}, // 'G' 1100010
133 { 72, 0xc6000000ul, 7}, // 'H' 1100011
134 { 73, 0xc8000000ul, 7}, // 'I' 1100100
135 { 74, 0xca000000ul, 7}, // 'J' 1100101
136 { 75, 0xcc000000ul, 7}, // 'K' 1100110
137 { 76, 0xce000000ul, 7}, // 'L' 1100111
138 { 77, 0xd0000000ul, 7}, // 'M' 1101000
139 { 78, 0xd2000000ul, 7}, // 'N' 1101001
140 { 79, 0xd4000000ul, 7}, // 'O' 1101010
141 { 80, 0xd6000000ul, 7}, // 'P' 1101011
142 { 81, 0xd8000000ul, 7}, // 'Q' 1101100
143 { 82, 0xda000000ul, 7}, // 'R' 1101101
144 { 83, 0xdc000000ul, 7}, // 'S' 1101110
145 { 84, 0xde000000ul, 7}, // 'T' 1101111
146 { 85, 0xe0000000ul, 7}, // 'U' 1110000
147 { 86, 0xe2000000ul, 7}, // 'V' 1110001
148 { 87, 0xe4000000ul, 7}, // 'W' 1110010
149 { 88, 0xfc000000ul, 8}, // 'X' 11111100
150 { 89, 0xe6000000ul, 7}, // 'Y' 1110011
151 { 90, 0xfd000000ul, 8}, // 'Z' 11111101
152 { 91, 0xffd80000ul, 13}, // '[' 11111111|11011
153 { 92, 0xfffe0000ul, 19}, // '\' 11111111|11111110|000
154 { 93, 0xffe00000ul, 13}, // ']' 11111111|11100
155 { 94, 0xfff00000ul, 14}, // '^' 11111111|111100
156 { 95, 0x88000000ul, 6}, // '_' 100010
157 { 96, 0xfffa0000ul, 15}, // '`' 11111111|1111101
158 { 97, 0x18000000ul, 5}, // 'a' 00011
159 { 98, 0x8c000000ul, 6}, // 'b' 100011
160 { 99, 0x20000000ul, 5}, // 'c' 00100
161 {100, 0x90000000ul, 6}, // 'd' 100100
162 {101, 0x28000000ul, 5}, // 'e' 00101
163 {102, 0x94000000ul, 6}, // 'f' 100101
164 {103, 0x98000000ul, 6}, // 'g' 100110
165 {104, 0x9c000000ul, 6}, // 'h' 100111
166 {105, 0x30000000ul, 5}, // 'i' 00110
167 {106, 0xe8000000ul, 7}, // 'j' 1110100
168 {107, 0xea000000ul, 7}, // 'k' 1110101
169 {108, 0xa0000000ul, 6}, // 'l' 101000
170 {109, 0xa4000000ul, 6}, // 'm' 101001
171 {110, 0xa8000000ul, 6}, // 'n' 101010
172 {111, 0x38000000ul, 5}, // 'o' 00111
173 {112, 0xac000000ul, 6}, // 'p' 101011
174 {113, 0xec000000ul, 7}, // 'q' 1110110
175 {114, 0xb0000000ul, 6}, // 'r' 101100
176 {115, 0x40000000ul, 5}, // 's' 01000
177 {116, 0x48000000ul, 5}, // 't' 01001
178 {117, 0xb4000000ul, 6}, // 'u' 101101
179 {118, 0xee000000ul, 7}, // 'v' 1110111
180 {119, 0xf0000000ul, 7}, // 'w' 1111000
181 {120, 0xf2000000ul, 7}, // 'x' 1111001
182 {121, 0xf4000000ul, 7}, // 'y' 1111010
183 {122, 0xf6000000ul, 7}, // 'z' 1111011
184 {123, 0xfffc0000ul, 15}, // '{' 11111111|1111110
185 {124, 0xff800000ul, 11}, // '|' 11111111|100
186 {125, 0xfff40000ul, 14}, // '}' 11111111|111101
187 {126, 0xffe80000ul, 13}, // '~' 11111111|11101
188 {127, 0xffffffc0ul, 28}, // 11111111|11111111|11111111|1100
189 {128, 0xfffe6000ul, 20}, // 11111111|11111110|0110
190 {129, 0xffff4800ul, 22}, // 11111111|11111111|010010
191 {130, 0xfffe7000ul, 20}, // 11111111|11111110|0111
192 {131, 0xfffe8000ul, 20}, // 11111111|11111110|1000
193 {132, 0xffff4c00ul, 22}, // 11111111|11111111|010011
194 {133, 0xffff5000ul, 22}, // 11111111|11111111|010100
195 {134, 0xffff5400ul, 22}, // 11111111|11111111|010101
196 {135, 0xffffb200ul, 23}, // 11111111|11111111|1011001
197 {136, 0xffff5800ul, 22}, // 11111111|11111111|010110
198 {137, 0xffffb400ul, 23}, // 11111111|11111111|1011010
199 {138, 0xffffb600ul, 23}, // 11111111|11111111|1011011
200 {139, 0xffffb800ul, 23}, // 11111111|11111111|1011100
201 {140, 0xffffba00ul, 23}, // 11111111|11111111|1011101
202 {141, 0xffffbc00ul, 23}, // 11111111|11111111|1011110
203 {142, 0xffffeb00ul, 24}, // 11111111|11111111|11101011
204 {143, 0xffffbe00ul, 23}, // 11111111|11111111|1011111
205 {144, 0xffffec00ul, 24}, // 11111111|11111111|11101100
206 {145, 0xffffed00ul, 24}, // 11111111|11111111|11101101
207 {146, 0xffff5c00ul, 22}, // 11111111|11111111|010111
208 {147, 0xffffc000ul, 23}, // 11111111|11111111|1100000
209 {148, 0xffffee00ul, 24}, // 11111111|11111111|11101110
210 {149, 0xffffc200ul, 23}, // 11111111|11111111|1100001
211 {150, 0xffffc400ul, 23}, // 11111111|11111111|1100010
212 {151, 0xffffc600ul, 23}, // 11111111|11111111|1100011
213 {152, 0xffffc800ul, 23}, // 11111111|11111111|1100100
214 {153, 0xfffee000ul, 21}, // 11111111|11111110|11100
215 {154, 0xffff6000ul, 22}, // 11111111|11111111|011000
216 {155, 0xffffca00ul, 23}, // 11111111|11111111|1100101
217 {156, 0xffff6400ul, 22}, // 11111111|11111111|011001
218 {157, 0xffffcc00ul, 23}, // 11111111|11111111|1100110
219 {158, 0xffffce00ul, 23}, // 11111111|11111111|1100111
220 {159, 0xffffef00ul, 24}, // 11111111|11111111|11101111
221 {160, 0xffff6800ul, 22}, // 11111111|11111111|011010
222 {161, 0xfffee800ul, 21}, // 11111111|11111110|11101
223 {162, 0xfffe9000ul, 20}, // 11111111|11111110|1001
224 {163, 0xffff6c00ul, 22}, // 11111111|11111111|011011
225 {164, 0xffff7000ul, 22}, // 11111111|11111111|011100
226 {165, 0xffffd000ul, 23}, // 11111111|11111111|1101000
227 {166, 0xffffd200ul, 23}, // 11111111|11111111|1101001
228 {167, 0xfffef000ul, 21}, // 11111111|11111110|11110
229 {168, 0xffffd400ul, 23}, // 11111111|11111111|1101010
230 {169, 0xffff7400ul, 22}, // 11111111|11111111|011101
231 {170, 0xffff7800ul, 22}, // 11111111|11111111|011110
232 {171, 0xfffff000ul, 24}, // 11111111|11111111|11110000
233 {172, 0xfffef800ul, 21}, // 11111111|11111110|11111
234 {173, 0xffff7c00ul, 22}, // 11111111|11111111|011111
235 {174, 0xffffd600ul, 23}, // 11111111|11111111|1101011
236 {175, 0xffffd800ul, 23}, // 11111111|11111111|1101100
237 {176, 0xffff0000ul, 21}, // 11111111|11111111|00000
238 {177, 0xffff0800ul, 21}, // 11111111|11111111|00001
239 {178, 0xffff8000ul, 22}, // 11111111|11111111|100000
240 {179, 0xffff1000ul, 21}, // 11111111|11111111|00010
241 {180, 0xffffda00ul, 23}, // 11111111|11111111|1101101
242 {181, 0xffff8400ul, 22}, // 11111111|11111111|100001
243 {182, 0xffffdc00ul, 23}, // 11111111|11111111|1101110
244 {183, 0xffffde00ul, 23}, // 11111111|11111111|1101111
245 {184, 0xfffea000ul, 20}, // 11111111|11111110|1010
246 {185, 0xffff8800ul, 22}, // 11111111|11111111|100010
247 {186, 0xffff8c00ul, 22}, // 11111111|11111111|100011
248 {187, 0xffff9000ul, 22}, // 11111111|11111111|100100
249 {188, 0xffffe000ul, 23}, // 11111111|11111111|1110000
250 {189, 0xffff9400ul, 22}, // 11111111|11111111|100101
251 {190, 0xffff9800ul, 22}, // 11111111|11111111|100110
252 {191, 0xffffe200ul, 23}, // 11111111|11111111|1110001
253 {192, 0xfffff800ul, 26}, // 11111111|11111111|11111000|00
254 {193, 0xfffff840ul, 26}, // 11111111|11111111|11111000|01
255 {194, 0xfffeb000ul, 20}, // 11111111|11111110|1011
256 {195, 0xfffe2000ul, 19}, // 11111111|11111110|001
257 {196, 0xffff9c00ul, 22}, // 11111111|11111111|100111
258 {197, 0xffffe400ul, 23}, // 11111111|11111111|1110010
259 {198, 0xffffa000ul, 22}, // 11111111|11111111|101000
260 {199, 0xfffff600ul, 25}, // 11111111|11111111|11110110|0
261 {200, 0xfffff880ul, 26}, // 11111111|11111111|11111000|10
262 {201, 0xfffff8c0ul, 26}, // 11111111|11111111|11111000|11
263 {202, 0xfffff900ul, 26}, // 11111111|11111111|11111001|00
264 {203, 0xfffffbc0ul, 27}, // 11111111|11111111|11111011|110
265 {204, 0xfffffbe0ul, 27}, // 11111111|11111111|11111011|111
266 {205, 0xfffff940ul, 26}, // 11111111|11111111|11111001|01
267 {206, 0xfffff100ul, 24}, // 11111111|11111111|11110001
268 {207, 0xfffff680ul, 25}, // 11111111|11111111|11110110|1
269 {208, 0xfffe4000ul, 19}, // 11111111|11111110|010
270 {209, 0xffff1800ul, 21}, // 11111111|11111111|00011
271 {210, 0xfffff980ul, 26}, // 11111111|11111111|11111001|10
272 {211, 0xfffffc00ul, 27}, // 11111111|11111111|11111100|000
273 {212, 0xfffffc20ul, 27}, // 11111111|11111111|11111100|001
274 {213, 0xfffff9c0ul, 26}, // 11111111|11111111|11111001|11
275 {214, 0xfffffc40ul, 27}, // 11111111|11111111|11111100|010
276 {215, 0xfffff200ul, 24}, // 11111111|11111111|11110010
277 {216, 0xffff2000ul, 21}, // 11111111|11111111|00100
278 {217, 0xffff2800ul, 21}, // 11111111|11111111|00101
279 {218, 0xfffffa00ul, 26}, // 11111111|11111111|11111010|00
280 {219, 0xfffffa40ul, 26}, // 11111111|11111111|11111010|01
281 {220, 0xffffffd0ul, 28}, // 11111111|11111111|11111111|1101
282 {221, 0xfffffc60ul, 27}, // 11111111|11111111|11111100|011
283 {222, 0xfffffc80ul, 27}, // 11111111|11111111|11111100|100
284 {223, 0xfffffca0ul, 27}, // 11111111|11111111|11111100|101
285 {224, 0xfffec000ul, 20}, // 11111111|11111110|1100
286 {225, 0xfffff300ul, 24}, // 11111111|11111111|11110011
287 {226, 0xfffed000ul, 20}, // 11111111|11111110|1101
288 {227, 0xffff3000ul, 21}, // 11111111|11111111|00110
289 {228, 0xffffa400ul, 22}, // 11111111|11111111|101001
290 {229, 0xffff3800ul, 21}, // 11111111|11111111|00111
291 {230, 0xffff4000ul, 21}, // 11111111|11111111|01000
292 {231, 0xffffe600ul, 23}, // 11111111|11111111|1110011
293 {232, 0xffffa800ul, 22}, // 11111111|11111111|101010
294 {233, 0xffffac00ul, 22}, // 11111111|11111111|101011
295 {234, 0xfffff700ul, 25}, // 11111111|11111111|11110111|0
296 {235, 0xfffff780ul, 25}, // 11111111|11111111|11110111|1
297 {236, 0xfffff400ul, 24}, // 11111111|11111111|11110100
298 {237, 0xfffff500ul, 24}, // 11111111|11111111|11110101
299 {238, 0xfffffa80ul, 26}, // 11111111|11111111|11111010|10
300 {239, 0xffffe800ul, 23}, // 11111111|11111111|1110100
301 {240, 0xfffffac0ul, 26}, // 11111111|11111111|11111010|11
302 {241, 0xfffffcc0ul, 27}, // 11111111|11111111|11111100|110
303 {242, 0xfffffb00ul, 26}, // 11111111|11111111|11111011|00
304 {243, 0xfffffb40ul, 26}, // 11111111|11111111|11111011|01
305 {244, 0xfffffce0ul, 27}, // 11111111|11111111|11111100|111
306 {245, 0xfffffd00ul, 27}, // 11111111|11111111|11111101|000
307 {246, 0xfffffd20ul, 27}, // 11111111|11111111|11111101|001
308 {247, 0xfffffd40ul, 27}, // 11111111|11111111|11111101|010
309 {248, 0xfffffd60ul, 27}, // 11111111|11111111|11111101|011
310 {249, 0xffffffe0ul, 28}, // 11111111|11111111|11111111|1110
311 {250, 0xfffffd80ul, 27}, // 11111111|11111111|11111101|100
312 {251, 0xfffffda0ul, 27}, // 11111111|11111111|11111101|101
313 {252, 0xfffffdc0ul, 27}, // 11111111|11111111|11111101|110
314 {253, 0xfffffde0ul, 27}, // 11111111|11111111|11111101|111
315 {254, 0xfffffe00ul, 27}, // 11111111|11111111|11111110|000
316 {255, 0xfffffb80ul, 26}, // 11111111|11111111|11111011|10
317 {256, 0xfffffffcul, 30} // EOS 11111111|11111111|11111111|111111
318};
319
320void write_huffman_code(BitOStream &outputStream, CodeEntry code)
321{
322 // Append octet by octet.
323 auto bitLength = code.bitLength;
324 const auto hc = code.huffmanCode >> (32 - bitLength);
325
326 if (bitLength > 24) {
327 outputStream.writeBits(uchar(hc >> 24), bitLength - 24);
328 bitLength = 24;
329 }
330
331 if (bitLength > 16) {
332 outputStream.writeBits(uchar(hc >> 16), bitLength - 16);
333 bitLength = 16;
334 }
335
336 if (bitLength > 8) {
337 outputStream.writeBits(uchar(hc >> 8), bitLength - 8);
338 bitLength = 8;
339 }
340
341 outputStream.writeBits(uchar(hc), bitLength);
342}
343
344}
345
346// That's from HPACK's specs - we deal with octets.
347static_assert(std::numeric_limits<uchar>::digits == 8, "octets expected");
348
349quint64 huffman_encoded_bit_length(QByteArrayView inputData)
350{
351 quint64 bitLength = 0;
352 for (int i = 0, e = inputData.size(); i < e; ++i)
353 bitLength += staticHuffmanCodeTable[int(inputData[i])].bitLength;
354
355 return bitLength;
356}
357
358void huffman_encode_string(QByteArrayView inputData, BitOStream &outputStream)
359{
360 for (int i = 0, e = inputData.size(); i < e; ++i) {
361 const auto value = uchar(inputData[i]);
362 write_huffman_code(outputStream, staticHuffmanCodeTable[value]);
363 }
364
365 // Pad bits ...
366 if (outputStream.bitLength() % 8)
367 outputStream.writeBits(0xff, 8 - outputStream.bitLength() % 8);
368}
369
370static constexpr
371bool padding_is_valid(quint32 chunk, quint32 nBits)
372{
373 Q_ASSERT(nBits);
374
375 // HPACK, 5.2: "A padding strictly longer than 7 bits MUST be
376 // treated as a decoding error."
377 if (nBits > 7)
378 return false;
379 // HPACK, 5.2:
380 // "A padding not corresponding to the most significant bits
381 // of the code for the EOS symbol MUST be treated as a decoding error."
382 return (chunk >> (32 - nBits)) == quint32((1 << nBits) - 1);
383}
384
386 : minCodeLength()
387{
388 const auto nCodes = sizeof staticHuffmanCodeTable / sizeof staticHuffmanCodeTable[0];
389
390 std::vector<CodeEntry> symbols(staticHuffmanCodeTable, staticHuffmanCodeTable + nCodes);
391 // Now we sort: by bit length first (in the descending order) and by the symbol
392 // value (descending). Descending order: to make sure we do not create prefix tables with
393 // short 'indexLength' first and having longer codes that do not fit into such tables later.
394 std::sort(symbols.begin(), symbols.end(), [](CodeEntry code1, CodeEntry code2) {
395 if (code1.bitLength == code2.bitLength)
396 return code1.byteValue > code2.byteValue;
397 return code1.bitLength > code2.bitLength;
398 });
399
400 minCodeLength = symbols.back().bitLength; // The shortest one, currently it's 5.
401
402 // TODO: add a verification - Huffman codes
403 // within a given bit length range also
404 // should be in descending order.
405
406 // Initialize 'prefixTables' and 'tableData'.
407 addTable(0, quint32(BitConstants::rootPrefix)); // 'root' table.
408
409 for (const auto &s : symbols) {
410 quint32 tableIndex = 0;
411 while (true) {
412 Q_ASSERT(tableIndex < prefixTables.size());
413 // Note, by value - since prefixTables will be updated in between.
414 const auto table = prefixTables[tableIndex];
415 // We skip prefixed bits (if any) and use indexed bits only:
416 const auto entryIndex = s.huffmanCode << table.prefixLength >> (32 - table.indexLength);
417 // Again, by value.
418 PrefixTableEntry entry = tableEntry(table, entryIndex);
419 // How many bits were coded by previous tables and this table:
420 const auto codedLength = table.prefixLength + table.indexLength;
421 if (codedLength < s.bitLength) {
422 // We have to add a new prefix table ... (if it's not done yet).
423 if (!entry.bitLength) {
424 entry.nextTable = addTable(codedLength, std::min<quint32>(quint32(BitConstants::childPrefix),
425 s.bitLength - codedLength));
426 entry.bitLength = s.bitLength;
427 entry.byteValue = s.byteValue;
428 setTableEntry(table, entryIndex, entry);
429 }
430 tableIndex = entry.nextTable;
431 } else {
432 // We found the slot for our code (terminal entry):
433 entry.byteValue = s.byteValue;
434 entry.bitLength = s.bitLength;
435 // Refer to our own table as 'nextTable':
436 entry.nextTable = tableIndex;
437 setTableEntry(table, entryIndex, entry);
438 break;
439 }
440 }
441 }
442
443 // Now, we have a table(s) and have to fill 'holes' to
444 // 'fix' terminal entries.
445 for (const auto &table : prefixTables) {
446 const quint32 codedLength = table.prefixLength + table.indexLength;
447 for (quint32 j = 0; j < table.size();) {
448 const PrefixTableEntry &entry = tableEntry(table, j);
449 if (entry.bitLength && entry.bitLength < codedLength) {
450 const quint32 range = 1 << (codedLength - entry.bitLength);
451 for (quint32 k = 1; k < range; ++k)
452 setTableEntry(table, j + k, entry);
453 j += range;
454 } else {
455 ++j;
456 }
457 }
458 }
459}
460
461bool HuffmanDecoder::decodeStream(BitIStream &inputStream, QByteArray &outputBuffer)
462{
463 while (true) {
464 quint32 chunk = 0;
465 const quint32 readBits = inputStream.peekBits(inputStream.streamOffset(), 32, &chunk);
466 if (!readBits)
467 return !inputStream.hasMoreBits();
468
469 if (readBits < minCodeLength) {
470 inputStream.skipBits(readBits);
471 return padding_is_valid(chunk, readBits);
472 }
473
474 quint32 tableIndex = 0;
475 const PrefixTable *table = &prefixTables[tableIndex];
476 quint32 entryIndex = chunk >> (32 - table->indexLength);
477 PrefixTableEntry entry = tableEntry(*table, entryIndex);
478
479 while (true) {
480 if (entry.nextTable == tableIndex)
481 break;
482
483 tableIndex = entry.nextTable;
484 table = &prefixTables[tableIndex];
485 entryIndex = chunk << table->prefixLength >> (32 - table->indexLength);
486 entry = tableEntry(*table, entryIndex);
487 }
488
489 if (entry.bitLength > readBits) {
490 inputStream.skipBits(readBits);
491 return padding_is_valid(chunk, readBits);
492 }
493
494 if (!entry.bitLength || entry.byteValue == 256) {
495 //EOS (256) == compression error (HPACK).
496 inputStream.skipBits(readBits);
497 return false;
498 }
499
500 outputBuffer.append(entry.byteValue);
501 inputStream.skipBits(entry.bitLength);
502 }
503
504 return false;
505}
506
507quint32 HuffmanDecoder::addTable(quint32 prefix, quint32 index)
508{
509 PrefixTable newTable{prefix, index};
510 newTable.offset = quint32(tableData.size());
511 prefixTables.push_back(newTable);
512 // Add entries for this table:
513 tableData.resize(tableData.size() + newTable.size());
514
515 return quint32(prefixTables.size() - 1);
516}
517
518PrefixTableEntry HuffmanDecoder::tableEntry(PrefixTable table, quint32 index)
519{
520 Q_ASSERT(index < table.size());
521 return tableData[table.offset + index];
522}
523
524void HuffmanDecoder::setTableEntry(PrefixTable table, quint32 index,
525 PrefixTableEntry entry)
526{
527 Q_ASSERT(index < table.size());
528 tableData[table.offset + index] = entry;
529}
530
531bool huffman_decode_string(BitIStream &inputStream, QByteArray *outputBuffer)
532{
533 Q_ASSERT(outputBuffer);
534
535 static HuffmanDecoder decoder;
536 return decoder.decodeStream(inputStream, *outputBuffer);
537}
538
539}
540
541QT_END_NAMESPACE
bool decodeStream(BitIStream &inputStream, QByteArray &outputBuffer)
Definition huffman.cpp:461
quint64 huffman_encoded_bit_length(QByteArrayView inputData)
Definition huffman.cpp:349
static constexpr bool padding_is_valid(quint32 chunk, quint32 nBits)
Definition huffman.cpp:371
bool huffman_decode_string(BitIStream &inputStream, QByteArray *outputBuffer)
Definition huffman.cpp:531
void huffman_encode_string(QByteArrayView inputData, BitOStream &outputStream)
Definition huffman.cpp:358