Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
cfx_seekablestreamproxy.cpp
Go to the documentation of this file.
1// Copyright 2017 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "core/fxcrt/cfx_seekablestreamproxy.h"
8
9#include <stdint.h>
10
11#include <algorithm>
12#include <limits>
13#include <utility>
14
15#include "build/build_config.h"
16#include "core/fxcrt/check.h"
17#include "core/fxcrt/check_op.h"
18#include "core/fxcrt/data_vector.h"
19#include "core/fxcrt/fx_extension.h"
20#include "core/fxcrt/fx_safe_types.h"
21#include "core/fxcrt/span.h"
22#include "core/fxcrt/span_util.h"
23
24namespace {
25
26// Returns {src bytes consumed, dst chars produced}.
27// Invalid sequences are silently not output.
28std::pair<size_t, size_t> UTF8Decode(pdfium::span<const uint8_t> pSrc,
29 pdfium::span<wchar_t> pDst) {
30 DCHECK(!pDst.empty());
31
32 uint32_t dwCode = 0;
33 int32_t iPending = 0;
34 size_t iSrcNum = 0;
35 size_t iDstNum = 0;
36 for (size_t iIndex = 0; iIndex < pSrc.size() && iDstNum < pDst.size();
37 ++iIndex) {
38 ++iSrcNum;
39 uint8_t byte = pSrc[iIndex];
40 if (byte < 0x80) {
41 iPending = 0;
42 pDst[iDstNum++] = byte;
43 } else if (byte < 0xc0) {
44 if (iPending < 1)
45 continue;
46
47 dwCode = dwCode << 6;
48 dwCode |= (byte & 0x3f);
49 --iPending;
50 if (iPending == 0)
51 pDst[iDstNum++] = dwCode;
52 } else if (byte < 0xe0) {
53 iPending = 1;
54 dwCode = (byte & 0x1f);
55 } else if (byte < 0xf0) {
56 iPending = 2;
57 dwCode = (byte & 0x0f);
58 } else if (byte < 0xf8) {
59 iPending = 3;
60 dwCode = (byte & 0x07);
61 } else if (byte < 0xfc) {
62 iPending = 4;
63 dwCode = (byte & 0x03);
64 } else if (byte < 0xfe) {
65 iPending = 5;
66 dwCode = (byte & 0x01);
67 }
68 }
69 return {iSrcNum, iDstNum};
70}
71
72void UTF16ToWChar(pdfium::span<wchar_t> buffer) {
73#if defined(WCHAR_T_IS_32_BIT)
74 auto src = fxcrt::reinterpret_span<uint16_t>(buffer);
75 // Perform self-intersecting copy in reverse order.
76 for (size_t i = buffer.size(); i > 0; --i) {
77 buffer[i - 1] = static_cast<wchar_t>(src[i - 1]);
78 }
79#endif // defined(WCHAR_T_IS_32_BIT)
80}
81
82void SwapByteOrder(pdfium::span<uint16_t> str) {
83 for (auto& wch : str) {
84 wch = (wch >> 8) | (wch << 8);
85 }
86}
87
88} // namespace
89
90#define BOM_UTF8_MASK 0x00FFFFFF
91#define BOM_UTF8 0x00BFBBEF
92#define BOM_UTF16_MASK 0x0000FFFF
93#define BOM_UTF16_BE 0x0000FFFE
94#define BOM_UTF16_LE 0x0000FEFF
95
96CFX_SeekableStreamProxy::CFX_SeekableStreamProxy(
97 const RetainPtr<IFX_SeekableReadStream>& stream)
98 : m_pStream(stream) {
99 DCHECK(m_pStream);
100
101 Seek(From::Begin, 0);
102
103 uint32_t bom = 0;
104 ReadData(pdfium::byte_span_from_ref(bom).first<3>());
105
106 bom &= BOM_UTF8_MASK;
107 if (bom == BOM_UTF8) {
108 m_wBOMLength = 3;
109 m_wCodePage = FX_CodePage::kUTF8;
110 } else {
111 bom &= BOM_UTF16_MASK;
112 if (bom == BOM_UTF16_BE) {
113 m_wBOMLength = 2;
114 m_wCodePage = FX_CodePage::kUTF16BE;
115 } else if (bom == BOM_UTF16_LE) {
116 m_wBOMLength = 2;
117 m_wCodePage = FX_CodePage::kUTF16LE;
118 } else {
119 m_wBOMLength = 0;
120 m_wCodePage = FX_GetACP();
121 }
122 }
123
124 Seek(From::Begin, static_cast<FX_FILESIZE>(m_wBOMLength));
125}
126
127CFX_SeekableStreamProxy::~CFX_SeekableStreamProxy() = default;
128
129FX_FILESIZE CFX_SeekableStreamProxy::GetSize() const {
130 return m_pStream->GetSize();
131}
132
133FX_FILESIZE CFX_SeekableStreamProxy::GetPosition() const {
134 return m_iPosition;
135}
136
137bool CFX_SeekableStreamProxy::IsEOF() const {
138 return m_iPosition >= GetSize();
139}
140
141void CFX_SeekableStreamProxy::Seek(From eSeek, FX_FILESIZE iOffset) {
142 switch (eSeek) {
143 case From::Begin:
144 m_iPosition = iOffset;
145 break;
146 case From::Current: {
147 FX_SAFE_FILESIZE new_pos = m_iPosition;
148 new_pos += iOffset;
149 m_iPosition =
150 new_pos.ValueOrDefault(std::numeric_limits<FX_FILESIZE>::max());
151 } break;
152 }
153 m_iPosition = std::clamp(m_iPosition, static_cast<FX_FILESIZE>(0), GetSize());
154}
155
156void CFX_SeekableStreamProxy::SetCodePage(FX_CodePage wCodePage) {
157 if (m_wBOMLength > 0)
158 return;
159 m_wCodePage = wCodePage;
160}
161
162size_t CFX_SeekableStreamProxy::ReadData(pdfium::span<uint8_t> buffer) {
163 DCHECK(!buffer.empty());
164 const size_t remaining = static_cast<size_t>(GetSize() - m_iPosition);
165 size_t read_size = std::min(buffer.size(), remaining);
166 if (read_size == 0) {
167 return 0;
168 }
169 if (!m_pStream->ReadBlockAtOffset(buffer.first(read_size), m_iPosition)) {
170 return 0;
171 }
172 FX_SAFE_FILESIZE new_pos = m_iPosition;
173 new_pos += read_size;
174 m_iPosition = new_pos.ValueOrDefault(m_iPosition);
175 return new_pos.IsValid() ? read_size : 0;
176}
177
178size_t CFX_SeekableStreamProxy::ReadBlock(pdfium::span<wchar_t> buffer) {
179 if (buffer.empty()) {
180 return 0;
181 }
182 if (m_wCodePage == FX_CodePage::kUTF16LE ||
183 m_wCodePage == FX_CodePage::kUTF16BE) {
184 size_t bytes_to_read = buffer.size() * sizeof(uint16_t);
185 size_t bytes_read =
186 ReadData(pdfium::as_writable_bytes(buffer).first(bytes_to_read));
187 size_t elements = bytes_read / sizeof(uint16_t);
188 if (m_wCodePage == FX_CodePage::kUTF16BE) {
189 SwapByteOrder(fxcrt::reinterpret_span<uint16_t>(buffer).first(elements));
190 }
191 UTF16ToWChar(buffer.first(elements));
192 return elements;
193 }
194 FX_FILESIZE pos = GetPosition();
195 size_t bytes_to_read =
196 std::min(buffer.size(), static_cast<size_t>(GetSize() - pos));
197 if (bytes_to_read == 0) {
198 return 0;
199 }
200 DataVector<uint8_t> byte_buf(bytes_to_read);
201 size_t bytes_read = ReadData(byte_buf);
202 if (m_wCodePage != FX_CodePage::kUTF8) {
203 return 0;
204 }
205 auto [src_bytes_consumed, dest_wchars_produced] =
206 UTF8Decode(pdfium::make_span(byte_buf).first(bytes_read), buffer);
207 Seek(From::Current, src_bytes_consumed - bytes_read);
208 return dest_wchars_produced;
209}
#define BOM_UTF8
#define BOM_UTF16_LE
#define BOM_UTF16_BE
#define BOM_UTF8_MASK
#define BOM_UTF16_MASK
#define DCHECK
Definition check.h:33
size_t ReadBlock(pdfium::span< wchar_t > buffer)
void SetCodePage(FX_CodePage wCodePage)
~CFX_SeekableStreamProxy() override
FX_CodePage FX_GetACP()
FX_CodePage
Definition fx_codepage.h:19
pdfium::CheckedNumeric< FX_FILESIZE > FX_SAFE_FILESIZE
#define FX_FILESIZE
Definition fx_types.h:19