Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
cfx_seekablestreamproxy.cpp
Go to the documentation of this file.
1// Copyright 2017 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "core/fxcrt/cfx_seekablestreamproxy.h"
8
9#include <stdint.h>
10
11#include <algorithm>
12#include <limits>
13#include <utility>
14
15#include "build/build_config.h"
16#include "core/fxcrt/data_vector.h"
17#include "core/fxcrt/fx_extension.h"
18#include "core/fxcrt/fx_safe_types.h"
19#include "third_party/base/check.h"
20#include "third_party/base/check_op.h"
21
22namespace {
23
24// Returns {src bytes consumed, dst chars produced}.
25// Invalid sequences are silently not output.
26std::pair<size_t, size_t> UTF8Decode(pdfium::span<const uint8_t> pSrc,
27 pdfium::span<wchar_t> pDst) {
28 DCHECK(!pDst.empty());
29
30 uint32_t dwCode = 0;
31 int32_t iPending = 0;
32 size_t iSrcNum = 0;
33 size_t iDstNum = 0;
34 for (size_t iIndex = 0; iIndex < pSrc.size() && iDstNum < pDst.size();
35 ++iIndex) {
36 ++iSrcNum;
37 uint8_t byte = pSrc[iIndex];
38 if (byte < 0x80) {
39 iPending = 0;
40 pDst[iDstNum++] = byte;
41 } else if (byte < 0xc0) {
42 if (iPending < 1)
43 continue;
44
45 dwCode = dwCode << 6;
46 dwCode |= (byte & 0x3f);
47 --iPending;
48 if (iPending == 0)
49 pDst[iDstNum++] = dwCode;
50 } else if (byte < 0xe0) {
51 iPending = 1;
52 dwCode = (byte & 0x1f);
53 } else if (byte < 0xf0) {
54 iPending = 2;
55 dwCode = (byte & 0x0f);
56 } else if (byte < 0xf8) {
57 iPending = 3;
58 dwCode = (byte & 0x07);
59 } else if (byte < 0xfc) {
60 iPending = 4;
61 dwCode = (byte & 0x03);
62 } else if (byte < 0xfe) {
63 iPending = 5;
64 dwCode = (byte & 0x01);
65 }
66 }
67 return {iSrcNum, iDstNum};
68}
69
70#if defined(WCHAR_T_IS_32_BIT)
71static_assert(sizeof(wchar_t) > 2, "wchar_t is too small");
72
73void UTF16ToWChar(void* pBuffer, size_t iLength) {
74 DCHECK(pBuffer);
75 DCHECK_GT(iLength, 0u);
76
77 uint16_t* pSrc = static_cast<uint16_t*>(pBuffer);
78 wchar_t* pDst = static_cast<wchar_t*>(pBuffer);
79
80 // Perform self-intersecting copy in reverse order.
81 for (size_t i = iLength; i > 0; --i)
82 pDst[i - 1] = static_cast<wchar_t>(pSrc[i - 1]);
83}
84#endif // defined(WCHAR_T_IS_32_BIT)
85
86void SwapByteOrder(uint16_t* pStr, size_t iLength) {
87 while (iLength-- > 0) {
88 uint16_t wch = *pStr;
89 *pStr++ = (wch >> 8) | (wch << 8);
90 }
91}
92
93} // namespace
94
95#define BOM_UTF8_MASK 0x00FFFFFF
96#define BOM_UTF8 0x00BFBBEF
97#define BOM_UTF16_MASK 0x0000FFFF
98#define BOM_UTF16_BE 0x0000FFFE
99#define BOM_UTF16_LE 0x0000FEFF
100
101CFX_SeekableStreamProxy::CFX_SeekableStreamProxy(
102 const RetainPtr<IFX_SeekableReadStream>& stream)
103 : m_pStream(stream) {
104 DCHECK(m_pStream);
105
106 Seek(From::Begin, 0);
107
108 uint32_t bom = 0;
109 ReadData(reinterpret_cast<uint8_t*>(&bom), 3);
110
111 bom &= BOM_UTF8_MASK;
112 if (bom == BOM_UTF8) {
113 m_wBOMLength = 3;
114 m_wCodePage = FX_CodePage::kUTF8;
115 } else {
116 bom &= BOM_UTF16_MASK;
117 if (bom == BOM_UTF16_BE) {
118 m_wBOMLength = 2;
119 m_wCodePage = FX_CodePage::kUTF16BE;
120 } else if (bom == BOM_UTF16_LE) {
121 m_wBOMLength = 2;
122 m_wCodePage = FX_CodePage::kUTF16LE;
123 } else {
124 m_wBOMLength = 0;
125 m_wCodePage = FX_GetACP();
126 }
127 }
128
129 Seek(From::Begin, static_cast<FX_FILESIZE>(m_wBOMLength));
130}
131
132CFX_SeekableStreamProxy::~CFX_SeekableStreamProxy() = default;
133
134FX_FILESIZE CFX_SeekableStreamProxy::GetSize() {
135 return m_pStream->GetSize();
136}
137
138FX_FILESIZE CFX_SeekableStreamProxy::GetPosition() {
139 return m_iPosition;
140}
141
142bool CFX_SeekableStreamProxy::IsEOF() {
143 return m_iPosition >= GetSize();
144}
145
146void CFX_SeekableStreamProxy::Seek(From eSeek, FX_FILESIZE iOffset) {
147 switch (eSeek) {
148 case From::Begin:
149 m_iPosition = iOffset;
150 break;
151 case From::Current: {
152 FX_SAFE_FILESIZE new_pos = m_iPosition;
153 new_pos += iOffset;
154 m_iPosition =
155 new_pos.ValueOrDefault(std::numeric_limits<FX_FILESIZE>::max());
156 } break;
157 }
158 m_iPosition = std::clamp(m_iPosition, static_cast<FX_FILESIZE>(0), GetSize());
159}
160
161void CFX_SeekableStreamProxy::SetCodePage(FX_CodePage wCodePage) {
162 if (m_wBOMLength > 0)
163 return;
164 m_wCodePage = wCodePage;
165}
166
167size_t CFX_SeekableStreamProxy::ReadData(uint8_t* pBuffer, size_t iBufferSize) {
168 DCHECK(pBuffer);
169 DCHECK(iBufferSize > 0);
170
171 iBufferSize =
172 std::min(iBufferSize, static_cast<size_t>(GetSize() - m_iPosition));
173 if (iBufferSize <= 0)
174 return 0;
175
176 if (!m_pStream->ReadBlockAtOffset({pBuffer, iBufferSize}, m_iPosition))
177 return 0;
178
179 FX_SAFE_FILESIZE new_pos = m_iPosition;
180 new_pos += iBufferSize;
181 m_iPosition = new_pos.ValueOrDefault(m_iPosition);
182 return new_pos.IsValid() ? iBufferSize : 0;
183}
184
185size_t CFX_SeekableStreamProxy::ReadBlock(wchar_t* pStr, size_t size) {
186 if (!pStr || size == 0)
187 return 0;
188
189 if (m_wCodePage == FX_CodePage::kUTF16LE ||
190 m_wCodePage == FX_CodePage::kUTF16BE) {
191 size_t iBytes = size * 2;
192 size_t iLen = ReadData(reinterpret_cast<uint8_t*>(pStr), iBytes);
193 size = iLen / 2;
194 if (m_wCodePage == FX_CodePage::kUTF16BE)
195 SwapByteOrder(reinterpret_cast<uint16_t*>(pStr), size);
196
197#if defined(WCHAR_T_IS_32_BIT)
198 if (size > 0)
199 UTF16ToWChar(pStr, size);
200#endif
201 return size;
202 }
203
204 FX_FILESIZE pos = GetPosition();
205 size_t iBytes = std::min(size, static_cast<size_t>(GetSize() - pos));
206 if (iBytes == 0)
207 return 0;
208
209 DataVector<uint8_t> buf(iBytes);
210 size_t iLen = ReadData(buf.data(), iBytes);
211 if (m_wCodePage != FX_CodePage::kUTF8)
212 return 0;
213
214 size_t iSrc;
215 std::tie(iSrc, size) = UTF8Decode({buf.data(), iLen}, {pStr, size});
216 Seek(From::Current, iSrc - iLen);
217 return size;
218}
#define BOM_UTF8
#define BOM_UTF16_LE
#define BOM_UTF16_BE
#define BOM_UTF8_MASK
#define BOM_UTF16_MASK
size_t ReadBlock(wchar_t *pStr, size_t size)
void SetCodePage(FX_CodePage wCodePage)
~CFX_SeekableStreamProxy() override
FX_CodePage FX_GetACP()
FX_CodePage
Definition fx_codepage.h:18
#define FX_FILESIZE
Definition fx_types.h:19