Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
cpdf_textobject.cpp
Go to the documentation of this file.
1// Copyright 2016 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "core/fpdfapi/page/cpdf_textobject.h"
8
9#include <algorithm>
10
11#include "core/fpdfapi/font/cpdf_cidfont.h"
12#include "core/fpdfapi/font/cpdf_font.h"
13#include "core/fxcrt/check.h"
14#include "core/fxcrt/fx_coordinates.h"
15#include "core/fxcrt/span.h"
16#include "core/fxcrt/span_util.h"
17
18#define ISLATINWORD(u) (u != 0x20 && u <= 0x28FF)
19
20namespace {
21
22bool IsVertWritingCIDFont(const CPDF_CIDFont* font) {
23 return font && font->IsVertWriting();
24}
25
26} // namespace
27
28CPDF_TextObject::Item::Item() = default;
29
30CPDF_TextObject::Item::Item(const Item& that) = default;
31
32CPDF_TextObject::Item::~Item() = default;
33
34CPDF_TextObject::CPDF_TextObject(int32_t content_stream)
35 : CPDF_PageObject(content_stream) {}
36
37CPDF_TextObject::CPDF_TextObject() : CPDF_TextObject(kNoContentStream) {}
38
39CPDF_TextObject::~CPDF_TextObject() = default;
40
41size_t CPDF_TextObject::CountItems() const {
42 return m_CharCodes.size();
43}
44
45CPDF_TextObject::Item CPDF_TextObject::GetItemInfo(size_t index) const {
46 DCHECK(index < m_CharCodes.size());
47
48 Item info;
49 info.m_CharCode = m_CharCodes[index];
50 info.m_Origin = CFX_PointF(index > 0 ? m_CharPos[index - 1] : 0, 0);
52 return info;
53
54 RetainPtr<CPDF_Font> pFont = GetFont();
55 const CPDF_CIDFont* pCIDFont = pFont->AsCIDFont();
56 if (!IsVertWritingCIDFont(pCIDFont))
57 return info;
58
59 uint16_t cid = pCIDFont->CIDFromCharCode(info.m_CharCode);
60 info.m_Origin = CFX_PointF(0, info.m_Origin.x);
61
62 CFX_Point16 vertical_origin = pCIDFont->GetVertOrigin(cid);
63 float fontsize = GetFontSize();
64 info.m_Origin.x -= fontsize * vertical_origin.x / 1000;
65 info.m_Origin.y -= fontsize * vertical_origin.y / 1000;
66 return info;
67}
68
69size_t CPDF_TextObject::CountChars() const {
70 size_t count = 0;
71 for (uint32_t charcode : m_CharCodes) {
72 if (charcode != CPDF_Font::kInvalidCharCode)
73 ++count;
74 }
75 return count;
76}
77
78uint32_t CPDF_TextObject::GetCharCode(size_t index) const {
79 size_t count = 0;
80 for (uint32_t code : m_CharCodes) {
81 if (code == CPDF_Font::kInvalidCharCode)
82 continue;
83 if (count++ != index)
84 continue;
85 return code;
86 }
88}
89
90CPDF_TextObject::Item CPDF_TextObject::GetCharInfo(size_t index) const {
91 size_t count = 0;
92 for (size_t i = 0; i < m_CharCodes.size(); ++i) {
93 uint32_t charcode = m_CharCodes[i];
94 if (charcode == CPDF_Font::kInvalidCharCode)
95 continue;
96 if (count++ == index)
97 return GetItemInfo(i);
98 }
99 return Item();
100}
101
102int CPDF_TextObject::CountWords() const {
103 RetainPtr<CPDF_Font> pFont = GetFont();
104 bool bInLatinWord = false;
105 int nWords = 0;
106 for (size_t i = 0, sz = CountChars(); i < sz; ++i) {
107 uint32_t charcode = GetCharCode(i);
108
109 WideString swUnicode = pFont->UnicodeFromCharCode(charcode);
110 uint16_t unicode = 0;
111 if (swUnicode.GetLength() > 0)
112 unicode = swUnicode[0];
113
114 bool bIsLatin = ISLATINWORD(unicode);
115 if (bIsLatin && bInLatinWord)
116 continue;
117
118 bInLatinWord = bIsLatin;
119 if (unicode != 0x20)
120 nWords++;
121 }
122
123 return nWords;
124}
125
126WideString CPDF_TextObject::GetWordString(int nWordIndex) const {
127 RetainPtr<CPDF_Font> pFont = GetFont();
128 WideString swRet;
129 int nWords = 0;
130 bool bInLatinWord = false;
131 for (size_t i = 0, sz = CountChars(); i < sz; ++i) {
132 uint32_t charcode = GetCharCode(i);
133
134 WideString swUnicode = pFont->UnicodeFromCharCode(charcode);
135 uint16_t unicode = 0;
136 if (swUnicode.GetLength() > 0)
137 unicode = swUnicode[0];
138
139 bool bIsLatin = ISLATINWORD(unicode);
140 if (!bIsLatin || !bInLatinWord) {
141 bInLatinWord = bIsLatin;
142 if (unicode != 0x20)
143 nWords++;
144 }
145 if (nWords - 1 == nWordIndex)
146 swRet += unicode;
147 }
148 return swRet;
149}
150
151std::unique_ptr<CPDF_TextObject> CPDF_TextObject::Clone() const {
152 auto obj = std::make_unique<CPDF_TextObject>();
153 obj->CopyData(this);
154 obj->m_CharCodes = m_CharCodes;
155 obj->m_CharPos = m_CharPos;
156 obj->m_Pos = m_Pos;
157 return obj;
158}
159
160CPDF_PageObject::Type CPDF_TextObject::GetType() const {
161 return Type::kText;
162}
163
164void CPDF_TextObject::Transform(const CFX_Matrix& matrix) {
166 SetDirty(true);
167}
168
169bool CPDF_TextObject::IsText() const {
170 return true;
171}
172
173CPDF_TextObject* CPDF_TextObject::AsText() {
174 return this;
175}
176
177const CPDF_TextObject* CPDF_TextObject::AsText() const {
178 return this;
179}
180
181CFX_Matrix CPDF_TextObject::GetTextMatrix() const {
182 pdfium::span<const float> pTextMatrix = text_state().GetMatrix();
183 return CFX_Matrix(pTextMatrix[0], pTextMatrix[2], pTextMatrix[1],
184 pTextMatrix[3], m_Pos.x, m_Pos.y);
185}
186
187void CPDF_TextObject::SetTextMatrix(const CFX_Matrix& matrix) {
188 pdfium::span<float> pTextMatrix = mutable_text_state().GetMutableMatrix();
189 pTextMatrix[0] = matrix.a;
190 pTextMatrix[1] = matrix.c;
191 pTextMatrix[2] = matrix.b;
192 pTextMatrix[3] = matrix.d;
193 m_Pos = CFX_PointF(matrix.e, matrix.f);
194 CalcPositionDataInternal(GetFont());
195}
196
197void CPDF_TextObject::SetSegments(pdfium::span<const ByteString> strings,
198 pdfium::span<const float> kernings) {
199 size_t nSegs = strings.size();
200 CHECK(nSegs);
201 m_CharCodes.clear();
202 m_CharPos.clear();
203 RetainPtr<CPDF_Font> pFont = GetFont();
204 size_t nChars = nSegs - 1;
205 for (const auto& str : strings) {
206 nChars += pFont->CountChar(str.AsStringView());
207 }
208 CHECK(nChars);
209 m_CharCodes.resize(nChars);
210 m_CharPos.resize(nChars - 1);
211 size_t index = 0;
212 for (size_t i = 0; i < nSegs; ++i) {
213 ByteStringView segment = strings[i].AsStringView();
214 size_t offset = 0;
215 while (offset < segment.GetLength()) {
216 DCHECK(index < m_CharCodes.size());
217 m_CharCodes[index++] = pFont->GetNextChar(segment, &offset);
218 }
219 if (i != nSegs - 1) {
220 m_CharPos[index - 1] = kernings[i];
221 m_CharCodes[index++] = CPDF_Font::kInvalidCharCode;
222 }
223 }
224}
225
226void CPDF_TextObject::SetText(const ByteString& str) {
227 SetSegments(pdfium::span_from_ref(str), pdfium::span<float>());
228 CalcPositionDataInternal(GetFont());
229 SetDirty(true);
230}
231
232float CPDF_TextObject::GetCharWidth(uint32_t charcode) const {
233 const float fontsize = GetFontSize() / 1000;
234 RetainPtr<CPDF_Font> pFont = GetFont();
235 const CPDF_CIDFont* pCIDFont = pFont->AsCIDFont();
236 if (!IsVertWritingCIDFont(pCIDFont))
237 return pFont->GetCharWidthF(charcode) * fontsize;
238
239 uint16_t cid = pCIDFont->CIDFromCharCode(charcode);
240 return pCIDFont->GetVertWidth(cid) * fontsize;
241}
242
243RetainPtr<CPDF_Font> CPDF_TextObject::GetFont() const {
244 return text_state().GetFont();
245}
246
247float CPDF_TextObject::GetFontSize() const {
248 return text_state().GetFontSize();
249}
250
251TextRenderingMode CPDF_TextObject::GetTextRenderMode() const {
252 return text_state().GetTextMode();
253}
254
255void CPDF_TextObject::SetTextRenderMode(TextRenderingMode mode) {
256 mutable_text_state().SetTextMode(mode);
257 SetDirty(true);
258}
259
260CFX_PointF CPDF_TextObject::CalcPositionData(float horz_scale) {
261 RetainPtr<CPDF_Font> pFont = GetFont();
262 const float curpos = CalcPositionDataInternal(pFont);
263 if (IsVertWritingCIDFont(pFont->AsCIDFont()))
264 return {0, curpos};
265 return {curpos * horz_scale, 0};
266}
267
268float CPDF_TextObject::CalcPositionDataInternal(
269 const RetainPtr<CPDF_Font>& pFont) {
270 float curpos = 0;
271 float min_x = 10000.0f;
272 float max_x = -10000.0f;
273 float min_y = 10000.0f;
274 float max_y = -10000.0f;
275 const CPDF_CIDFont* pCIDFont = pFont->AsCIDFont();
276 const bool bVertWriting = IsVertWritingCIDFont(pCIDFont);
277 const float fontsize = GetFontSize();
278
279 for (size_t i = 0; i < m_CharCodes.size(); ++i) {
280 const uint32_t charcode = m_CharCodes[i];
281 if (i > 0) {
282 if (charcode == CPDF_Font::kInvalidCharCode) {
283 curpos -= (m_CharPos[i - 1] * fontsize) / 1000;
284 continue;
285 }
286 m_CharPos[i - 1] = curpos;
287 }
288
289 FX_RECT char_rect = pFont->GetCharBBox(charcode);
290 float charwidth;
291 if (bVertWriting) {
292 uint16_t cid = pCIDFont->CIDFromCharCode(charcode);
293 CFX_Point16 vertical_origin = pCIDFont->GetVertOrigin(cid);
294 char_rect.Offset(-vertical_origin.x, -vertical_origin.y);
295 min_x = std::min({min_x, static_cast<float>(char_rect.left),
296 static_cast<float>(char_rect.right)});
297 max_x = std::max({max_x, static_cast<float>(char_rect.left),
298 static_cast<float>(char_rect.right)});
299 const float char_top = curpos + char_rect.top * fontsize / 1000;
300 const float char_bottom = curpos + char_rect.bottom * fontsize / 1000;
301 min_y = std::min({min_y, char_top, char_bottom});
302 max_y = std::max({max_y, char_top, char_bottom});
303 charwidth = pCIDFont->GetVertWidth(cid) * fontsize / 1000;
304 } else {
305 min_y = std::min({min_y, static_cast<float>(char_rect.top),
306 static_cast<float>(char_rect.bottom)});
307 max_y = std::max({max_y, static_cast<float>(char_rect.top),
308 static_cast<float>(char_rect.bottom)});
309 const float char_left = curpos + char_rect.left * fontsize / 1000;
310 const float char_right = curpos + char_rect.right * fontsize / 1000;
311 min_x = std::min({min_x, char_left, char_right});
312 max_x = std::max({max_x, char_left, char_right});
313 charwidth = pFont->GetCharWidthF(charcode) * fontsize / 1000;
314 }
315 curpos += charwidth;
316 if (charcode == ' ' && (!pCIDFont || pCIDFont->GetCharSize(' ') == 1))
317 curpos += text_state().GetWordSpace();
318
319 curpos += text_state().GetCharSpace();
320 }
321
322 if (bVertWriting) {
323 min_x = min_x * fontsize / 1000;
324 max_x = max_x * fontsize / 1000;
325 } else {
326 min_y = min_y * fontsize / 1000;
327 max_y = max_y * fontsize / 1000;
328 }
329
330 SetOriginalRect(CFX_FloatRect(min_x, min_y, max_x, max_y));
331 CFX_FloatRect rect = GetTextMatrix().TransformRect(GetOriginalRect());
332 if (TextRenderingModeIsStrokeMode(text_state().GetTextMode())) {
333 // TODO(crbug.com/pdfium/1840): Does the original rect need a similar
334 // adjustment?
335 const float half_width = graph_state().GetLineWidth() / 2;
336 rect.Inflate(half_width, half_width);
337 }
338 SetRect(rect);
339
340 return curpos;
341}
fxcrt::ByteString ByteString
Definition bytestring.h:180
#define DCHECK
Definition check.h:33
constexpr CFX_FloatRect(float l, float b, float r, float t)
void Inflate(float x, float y)
CFX_Matrix operator*(const CFX_Matrix &right) const
uint16_t CIDFromCharCode(uint32_t charcode) const
int16_t GetVertWidth(uint16_t cid) const
bool IsVertWriting() const override
int GetCharSize(uint32_t charcode) const
static constexpr uint32_t kInvalidCharCode
Definition cpdf_font.h:62
void SetRect(const CFX_FloatRect &rect)
const CFX_FloatRect & GetOriginalRect() const
void SetDirty(bool value)
CPDF_PageObject(int32_t content_stream)
void SetOriginalRect(const CFX_FloatRect &rect)
static constexpr int32_t kNoContentStream
CPDF_TextState & mutable_text_state()
const CPDF_TextState & text_state() const
const CFX_GraphState & graph_state() const
RetainPtr< CPDF_Font > GetFont() const
void SetSegments(pdfium::span< const ByteString > strings, pdfium::span< const float > kernings)
void SetText(const ByteString &str)
CPDF_TextObject * AsText() override
Item GetItemInfo(size_t index) const
void SetTextRenderMode(TextRenderingMode mode)
CPDF_TextObject(int32_t content_stream)
CFX_Matrix GetTextMatrix() const
WideString GetWordString(int nWordIndex) const
std::unique_ptr< CPDF_TextObject > Clone() const
Type GetType() const override
bool IsText() const override
uint32_t GetCharCode(size_t index) const
Item GetCharInfo(size_t index) const
float GetCharWidth(uint32_t charcode) const
float GetFontSize() const
TextRenderingMode GetTextRenderMode() const
~CPDF_TextObject() override
size_t CountChars() const
const CPDF_TextObject * AsText() const override
size_t CountItems() const
void Transform(const CFX_Matrix &matrix) override
CFX_PointF CalcPositionData(float horz_scale)
void SetTextMatrix(const CFX_Matrix &matrix)
#define ISLATINWORD(u)
TextRenderingMode
CFX_PTemplate< int16_t > CFX_Point16
CFX_PTemplate< float > CFX_PointF
#define CHECK(cvref)
fxcrt::ByteStringView ByteStringView
Item(const Item &that)
void Offset(int dx, int dy)
int32_t bottom
int32_t right
int32_t top
int32_t left
fxcrt::WideString WideString
Definition widestring.h:207