Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
cpdf_textobject.cpp
Go to the documentation of this file.
1// Copyright 2016 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "core/fpdfapi/page/cpdf_textobject.h"
8
9#include <algorithm>
10
11#include "core/fpdfapi/font/cpdf_cidfont.h"
12#include "core/fpdfapi/font/cpdf_font.h"
13#include "core/fxcrt/fx_coordinates.h"
14#include "third_party/base/check.h"
15#include "third_party/base/containers/span.h"
16
17#define ISLATINWORD(u) (u != 0x20 && u <= 0x28FF)
18
19namespace {
20
21bool IsVertWritingCIDFont(const CPDF_CIDFont* font) {
22 return font && font->IsVertWriting();
23}
24
25} // namespace
26
27CPDF_TextObject::Item::Item() = default;
28
29CPDF_TextObject::Item::Item(const Item& that) = default;
30
31CPDF_TextObject::Item::~Item() = default;
32
33CPDF_TextObject::CPDF_TextObject(int32_t content_stream)
34 : CPDF_PageObject(content_stream) {}
35
36CPDF_TextObject::CPDF_TextObject() : CPDF_TextObject(kNoContentStream) {}
37
38CPDF_TextObject::~CPDF_TextObject() = default;
39
40size_t CPDF_TextObject::CountItems() const {
41 return m_CharCodes.size();
42}
43
44CPDF_TextObject::Item CPDF_TextObject::GetItemInfo(size_t index) const {
45 DCHECK(index < m_CharCodes.size());
46
47 Item info;
48 info.m_CharCode = m_CharCodes[index];
49 info.m_Origin = CFX_PointF(index > 0 ? m_CharPos[index - 1] : 0, 0);
51 return info;
52
53 RetainPtr<CPDF_Font> pFont = GetFont();
54 const CPDF_CIDFont* pCIDFont = pFont->AsCIDFont();
55 if (!IsVertWritingCIDFont(pCIDFont))
56 return info;
57
58 uint16_t cid = pCIDFont->CIDFromCharCode(info.m_CharCode);
59 info.m_Origin = CFX_PointF(0, info.m_Origin.x);
60
61 CFX_Point16 vertical_origin = pCIDFont->GetVertOrigin(cid);
62 float fontsize = GetFontSize();
63 info.m_Origin.x -= fontsize * vertical_origin.x / 1000;
64 info.m_Origin.y -= fontsize * vertical_origin.y / 1000;
65 return info;
66}
67
68size_t CPDF_TextObject::CountChars() const {
69 size_t count = 0;
70 for (uint32_t charcode : m_CharCodes) {
71 if (charcode != CPDF_Font::kInvalidCharCode)
72 ++count;
73 }
74 return count;
75}
76
77uint32_t CPDF_TextObject::GetCharCode(size_t index) const {
78 size_t count = 0;
79 for (uint32_t code : m_CharCodes) {
80 if (code == CPDF_Font::kInvalidCharCode)
81 continue;
82 if (count++ != index)
83 continue;
84 return code;
85 }
87}
88
89CPDF_TextObject::Item CPDF_TextObject::GetCharInfo(size_t index) const {
90 size_t count = 0;
91 for (size_t i = 0; i < m_CharCodes.size(); ++i) {
92 uint32_t charcode = m_CharCodes[i];
93 if (charcode == CPDF_Font::kInvalidCharCode)
94 continue;
95 if (count++ == index)
96 return GetItemInfo(i);
97 }
98 return Item();
99}
100
101int CPDF_TextObject::CountWords() const {
102 RetainPtr<CPDF_Font> pFont = GetFont();
103 bool bInLatinWord = false;
104 int nWords = 0;
105 for (size_t i = 0, sz = CountChars(); i < sz; ++i) {
106 uint32_t charcode = GetCharCode(i);
107
108 WideString swUnicode = pFont->UnicodeFromCharCode(charcode);
109 uint16_t unicode = 0;
110 if (swUnicode.GetLength() > 0)
111 unicode = swUnicode[0];
112
113 bool bIsLatin = ISLATINWORD(unicode);
114 if (bIsLatin && bInLatinWord)
115 continue;
116
117 bInLatinWord = bIsLatin;
118 if (unicode != 0x20)
119 nWords++;
120 }
121
122 return nWords;
123}
124
125WideString CPDF_TextObject::GetWordString(int nWordIndex) const {
126 RetainPtr<CPDF_Font> pFont = GetFont();
127 WideString swRet;
128 int nWords = 0;
129 bool bInLatinWord = false;
130 for (size_t i = 0, sz = CountChars(); i < sz; ++i) {
131 uint32_t charcode = GetCharCode(i);
132
133 WideString swUnicode = pFont->UnicodeFromCharCode(charcode);
134 uint16_t unicode = 0;
135 if (swUnicode.GetLength() > 0)
136 unicode = swUnicode[0];
137
138 bool bIsLatin = ISLATINWORD(unicode);
139 if (!bIsLatin || !bInLatinWord) {
140 bInLatinWord = bIsLatin;
141 if (unicode != 0x20)
142 nWords++;
143 }
144 if (nWords - 1 == nWordIndex)
145 swRet += unicode;
146 }
147 return swRet;
148}
149
150std::unique_ptr<CPDF_TextObject> CPDF_TextObject::Clone() const {
151 auto obj = std::make_unique<CPDF_TextObject>();
152 obj->CopyData(this);
153 obj->m_CharCodes = m_CharCodes;
154 obj->m_CharPos = m_CharPos;
155 obj->m_Pos = m_Pos;
156 return obj;
157}
158
159CPDF_PageObject::Type CPDF_TextObject::GetType() const {
160 return Type::kText;
161}
162
163void CPDF_TextObject::Transform(const CFX_Matrix& matrix) {
165 SetDirty(true);
166}
167
168bool CPDF_TextObject::IsText() const {
169 return true;
170}
171
172CPDF_TextObject* CPDF_TextObject::AsText() {
173 return this;
174}
175
176const CPDF_TextObject* CPDF_TextObject::AsText() const {
177 return this;
178}
179
180CFX_Matrix CPDF_TextObject::GetTextMatrix() const {
181 pdfium::span<const float> pTextMatrix = text_state().GetMatrix();
182 return CFX_Matrix(pTextMatrix[0], pTextMatrix[2], pTextMatrix[1],
183 pTextMatrix[3], m_Pos.x, m_Pos.y);
184}
185
186void CPDF_TextObject::SetTextMatrix(const CFX_Matrix& matrix) {
187 pdfium::span<float> pTextMatrix = mutable_text_state().GetMutableMatrix();
188 pTextMatrix[0] = matrix.a;
189 pTextMatrix[1] = matrix.c;
190 pTextMatrix[2] = matrix.b;
191 pTextMatrix[3] = matrix.d;
192 m_Pos = CFX_PointF(matrix.e, matrix.f);
193 CalcPositionDataInternal(GetFont());
194}
195
196void CPDF_TextObject::SetSegments(const ByteString* pStrs,
197 const std::vector<float>& kernings,
198 size_t nSegs) {
199 CHECK(nSegs);
200 m_CharCodes.clear();
201 m_CharPos.clear();
202 RetainPtr<CPDF_Font> pFont = GetFont();
203 size_t nChars = nSegs - 1;
204 for (size_t i = 0; i < nSegs; ++i)
205 nChars += pFont->CountChar(pStrs[i].AsStringView());
206
207 CHECK(nChars);
208 m_CharCodes.resize(nChars);
209 m_CharPos.resize(nChars - 1);
210 size_t index = 0;
211 for (size_t i = 0; i < nSegs; ++i) {
212 ByteStringView segment = pStrs[i].AsStringView();
213 size_t offset = 0;
214 while (offset < segment.GetLength()) {
215 DCHECK(index < m_CharCodes.size());
216 m_CharCodes[index++] = pFont->GetNextChar(segment, &offset);
217 }
218 if (i != nSegs - 1) {
219 m_CharPos[index - 1] = kernings[i];
220 m_CharCodes[index++] = CPDF_Font::kInvalidCharCode;
221 }
222 }
223}
224
225void CPDF_TextObject::SetText(const ByteString& str) {
226 SetSegments(&str, std::vector<float>(), 1);
227 CalcPositionDataInternal(GetFont());
228 SetDirty(true);
229}
230
231float CPDF_TextObject::GetCharWidth(uint32_t charcode) const {
232 const float fontsize = GetFontSize() / 1000;
233 RetainPtr<CPDF_Font> pFont = GetFont();
234 const CPDF_CIDFont* pCIDFont = pFont->AsCIDFont();
235 if (!IsVertWritingCIDFont(pCIDFont))
236 return pFont->GetCharWidthF(charcode) * fontsize;
237
238 uint16_t cid = pCIDFont->CIDFromCharCode(charcode);
239 return pCIDFont->GetVertWidth(cid) * fontsize;
240}
241
242RetainPtr<CPDF_Font> CPDF_TextObject::GetFont() const {
243 return text_state().GetFont();
244}
245
246float CPDF_TextObject::GetFontSize() const {
247 return text_state().GetFontSize();
248}
249
250TextRenderingMode CPDF_TextObject::GetTextRenderMode() const {
251 return text_state().GetTextMode();
252}
253
254void CPDF_TextObject::SetTextRenderMode(TextRenderingMode mode) {
255 mutable_text_state().SetTextMode(mode);
256 SetDirty(true);
257}
258
259CFX_PointF CPDF_TextObject::CalcPositionData(float horz_scale) {
260 RetainPtr<CPDF_Font> pFont = GetFont();
261 const float curpos = CalcPositionDataInternal(pFont);
262 if (IsVertWritingCIDFont(pFont->AsCIDFont()))
263 return {0, curpos};
264 return {curpos * horz_scale, 0};
265}
266
267float CPDF_TextObject::CalcPositionDataInternal(
268 const RetainPtr<CPDF_Font>& pFont) {
269 float curpos = 0;
270 float min_x = 10000.0f;
271 float max_x = -10000.0f;
272 float min_y = 10000.0f;
273 float max_y = -10000.0f;
274 const CPDF_CIDFont* pCIDFont = pFont->AsCIDFont();
275 const bool bVertWriting = IsVertWritingCIDFont(pCIDFont);
276 const float fontsize = GetFontSize();
277
278 for (size_t i = 0; i < m_CharCodes.size(); ++i) {
279 const uint32_t charcode = m_CharCodes[i];
280 if (i > 0) {
281 if (charcode == CPDF_Font::kInvalidCharCode) {
282 curpos -= (m_CharPos[i - 1] * fontsize) / 1000;
283 continue;
284 }
285 m_CharPos[i - 1] = curpos;
286 }
287
288 FX_RECT char_rect = pFont->GetCharBBox(charcode);
289 float charwidth;
290 if (bVertWriting) {
291 uint16_t cid = pCIDFont->CIDFromCharCode(charcode);
292 CFX_Point16 vertical_origin = pCIDFont->GetVertOrigin(cid);
293 char_rect.Offset(-vertical_origin.x, -vertical_origin.y);
294 min_x = std::min(
295 min_x, static_cast<float>(std::min(char_rect.left, char_rect.right)));
296 max_x = std::max(
297 max_x, static_cast<float>(std::max(char_rect.left, char_rect.right)));
298 const float char_top = curpos + char_rect.top * fontsize / 1000;
299 const float char_bottom = curpos + char_rect.bottom * fontsize / 1000;
300 min_y = std::min(min_y, std::min(char_top, char_bottom));
301 max_y = std::max(max_y, std::max(char_top, char_bottom));
302 charwidth = pCIDFont->GetVertWidth(cid) * fontsize / 1000;
303 } else {
304 min_y = std::min(
305 min_y, static_cast<float>(std::min(char_rect.top, char_rect.bottom)));
306 max_y = std::max(
307 max_y, static_cast<float>(std::max(char_rect.top, char_rect.bottom)));
308 const float char_left = curpos + char_rect.left * fontsize / 1000;
309 const float char_right = curpos + char_rect.right * fontsize / 1000;
310 min_x = std::min(min_x, std::min(char_left, char_right));
311 max_x = std::max(max_x, std::max(char_left, char_right));
312 charwidth = pFont->GetCharWidthF(charcode) * fontsize / 1000;
313 }
314 curpos += charwidth;
315 if (charcode == ' ' && (!pCIDFont || pCIDFont->GetCharSize(' ') == 1))
316 curpos += text_state().GetWordSpace();
317
318 curpos += text_state().GetCharSpace();
319 }
320
321 if (bVertWriting) {
322 min_x = min_x * fontsize / 1000;
323 max_x = max_x * fontsize / 1000;
324 } else {
325 min_y = min_y * fontsize / 1000;
326 max_y = max_y * fontsize / 1000;
327 }
328
329 SetOriginalRect(CFX_FloatRect(min_x, min_y, max_x, max_y));
330 CFX_FloatRect rect = GetTextMatrix().TransformRect(GetOriginalRect());
331 if (TextRenderingModeIsStrokeMode(text_state().GetTextMode())) {
332 // TODO(crbug.com/pdfium/1840): Does the original rect need a similar
333 // adjustment?
334 const float half_width = graph_state().GetLineWidth() / 2;
335 rect.Inflate(half_width, half_width);
336 }
337 SetRect(rect);
338
339 return curpos;
340}
constexpr CFX_FloatRect(float l, float b, float r, float t)
void Inflate(float x, float y)
CFX_Matrix operator*(const CFX_Matrix &right) const
uint16_t CIDFromCharCode(uint32_t charcode) const
int16_t GetVertWidth(uint16_t cid) const
bool IsVertWriting() const override
int GetCharSize(uint32_t charcode) const
static constexpr uint32_t kInvalidCharCode
Definition cpdf_font.h:63
void SetRect(const CFX_FloatRect &rect)
const CFX_FloatRect & GetOriginalRect() const
void SetDirty(bool value)
CPDF_PageObject(int32_t content_stream)
void SetOriginalRect(const CFX_FloatRect &rect)
static constexpr int32_t kNoContentStream
CPDF_TextState & mutable_text_state()
const CPDF_TextState & text_state() const
const CFX_GraphState & graph_state() const
RetainPtr< CPDF_Font > GetFont() const
void SetText(const ByteString &str)
CPDF_TextObject * AsText() override
Item GetItemInfo(size_t index) const
void SetTextRenderMode(TextRenderingMode mode)
CPDF_TextObject(int32_t content_stream)
CFX_Matrix GetTextMatrix() const
WideString GetWordString(int nWordIndex) const
std::unique_ptr< CPDF_TextObject > Clone() const
Type GetType() const override
bool IsText() const override
uint32_t GetCharCode(size_t index) const
Item GetCharInfo(size_t index) const
float GetCharWidth(uint32_t charcode) const
float GetFontSize() const
TextRenderingMode GetTextRenderMode() const
~CPDF_TextObject() override
size_t CountChars() const
const CPDF_TextObject * AsText() const override
size_t CountItems() const
void Transform(const CFX_Matrix &matrix) override
CFX_PointF CalcPositionData(float horz_scale)
void SetSegments(const ByteString *pStrs, const std::vector< float > &kernings, size_t nSegs)
void SetTextMatrix(const CFX_Matrix &matrix)
#define ISLATINWORD(u)
TextRenderingMode
#define CHECK(cvref)
Item(const Item &that)
void Offset(int dx, int dy)
int32_t bottom
int32_t right
int32_t top
int32_t left