Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
strip_jp2_comments.py
Go to the documentation of this file.
1#!/usr/bin/env python3
2# Copyright 2023 The PDFium Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Strips comments from a JP2 file.
6
7This is a simple filter script to strip comments from a JP2 file, in order to
8save a few bytes from the final file size.
9"""
10
11import struct
12import sys
13
14BOX_HEADER_SIZE = 8
15BOX_TAG_JP2C = b'jp2c'
16
17MARKER_SIZE = 2
18MARKER_START = 0xff
19MARKER_TAG_IGNORE = 0x00
20MARKER_TAG_COMMENT = 0x64
21MARKER_TAG_FILL = 0xff
22
23
24def parse_box(buffer, offset):
25 """Parses the next box in a JP2 file.
26
27 Args:
28 buffer: A buffer containing the JP2 file contents.
29 offset: The starting offset into the buffer.
30
31 Returns:
32 A tuple (next_offset, tag) where next_offset is the ending offset, and tag
33 is the type tag. The box contents will be buffer[offset + 8:next_offset].
34 """
35 length, tag = struct.unpack_from('>I4s', buffer, offset)
36 return offset + length, tag
37
38
39def parse_marker(buffer, offset):
40 """Parses the next marker in a codestream.
41
42 Args:
43 buffer: A buffer containing the codestream.
44 offset: The starting offset into the buffer.
45
46 Returns:
47 A tuple (next_offset, tag) where next_offset is the offset after the marker,
48 and tag is the type tag. If no marker was found, next_offset will point to
49 the end of the buffer, and tag will be None. A marker is always 2 bytes.
50 """
51 while True:
52 # Search for start of marker.
53 next_offset = buffer.find(MARKER_START, offset)
54 if next_offset == -1:
55 next_offset = len(buffer)
56 break
57 next_offset += 1
58
59 # Parse marker.
60 if next_offset == len(buffer):
61 break
62 tag = buffer[next_offset]
63 if tag == MARKER_TAG_FILL:
64 # Possible fill byte, reparse as start of marker.
65 continue
66 next_offset += 1
67
68 if tag == MARKER_TAG_IGNORE:
69 # Not a real marker.
70 continue
71 return next_offset, tag
72
73 return next_offset
74
75
76def rewrite_jp2c(buffer):
77 rewrite_buffer = bytearray(BOX_HEADER_SIZE)
78
79 offset = 0
80 start_offset = offset
81 while offset < len(buffer):
82 next_offset, marker = parse_marker(buffer, offset)
83 if marker == MARKER_TAG_COMMENT:
84 # Flush the codestream before the comment.
85 rewrite_buffer.extend(buffer[start_offset:next_offset - MARKER_SIZE])
86
87 # Find the next marker, skipping the comment.
88 next_offset, marker = parse_marker(buffer, next_offset)
89 if marker is not None:
90 # Reparse the marker.
91 next_offset -= MARKER_SIZE
92 start_offset = next_offset
93 else:
94 # Pass through other markers.
95 pass
96 offset = next_offset
97
98 # Flush the tail of the codestream.
99 rewrite_buffer.extend(buffer[start_offset:])
100
101 struct.pack_into('>I4s', rewrite_buffer, 0, len(rewrite_buffer), BOX_TAG_JP2C)
102 return rewrite_buffer
103
104
105def main(in_file, out_file):
106 buffer = in_file.read()
107
108 # Scan through JP2 boxes.
109 offset = 0
110 while offset < len(buffer):
111 next_offset, tag = parse_box(buffer, offset)
112 if tag == BOX_TAG_JP2C:
113 # Rewrite "jp2c" (codestream) box.
114 out_file.write(rewrite_jp2c(buffer[offset + BOX_HEADER_SIZE:next_offset]))
115 else:
116 # Pass through other boxes.
117 out_file.write(buffer[offset:next_offset])
118 offset = next_offset
119
120 out_file.flush()
121
122
123if __name__ == '__main__':
124 main(sys.stdin.buffer, sys.stdout.buffer)
int main()
[0]
QByteArray bytearray
[3]