Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
cpdf_textpage.h
Go to the documentation of this file.
1// Copyright 2016 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#ifndef CORE_FPDFTEXT_CPDF_TEXTPAGE_H_
8#define CORE_FPDFTEXT_CPDF_TEXTPAGE_H_
9
10#include <stdint.h>
11
12#include <deque>
13#include <functional>
14#include <vector>
15
16#include "core/fpdfapi/page/cpdf_pageobjectholder.h"
17#include "core/fxcrt/data_vector.h"
18#include "core/fxcrt/fx_coordinates.h"
19#include "core/fxcrt/fx_memory_wrappers.h"
20#include "core/fxcrt/unowned_ptr.h"
21#include "core/fxcrt/widestring.h"
22#include "core/fxcrt/widetext_buffer.h"
23#include "third_party/abseil-cpp/absl/types/optional.h"
24
25class CPDF_FormObject;
26class CPDF_Page;
27class CPDF_TextObject;
28
30 int index;
31 int count;
32};
33
35
37 public:
38 enum class CharType : uint8_t {
39 kNormal,
42 kHyphen,
43 kPiece,
44 };
45
46 class CharInfo {
47 public:
51
52 int m_Index = 0;
53 uint32_t m_CharCode = 0;
54 wchar_t m_Unicode = 0;
56 CFX_PointF m_Origin;
58 UnownedPtr<const CPDF_TextObject> m_pTextObj;
60 };
61
62 CPDF_TextPage(const CPDF_Page* pPage, bool rtl);
64
65 int CharIndexFromTextIndex(int text_index) const;
66 int TextIndexFromCharIndex(int char_index) const;
67 size_t size() const { return m_CharList.size(); }
68 int CountChars() const;
69
70 // These methods CHECK() to make sure |index| is within bounds.
71 const CharInfo& GetCharInfo(size_t index) const;
72 float GetCharFontSize(size_t index) const;
73 CFX_FloatRect GetCharLooseBounds(size_t index) const;
74
75 std::vector<CFX_FloatRect> GetRectArray(int start, int count) const;
76 int GetIndexAtPos(const CFX_PointF& point, const CFX_SizeF& tolerance) const;
77 WideString GetTextByRect(const CFX_FloatRect& rect) const;
78 WideString GetTextByObject(const CPDF_TextObject* pTextObj) const;
79
80 // Returns string with the text from |m_TextBuf| that are covered by the input
81 // range. |start| and |count| are in terms of the |m_CharIndices|, so the
82 // range will be converted into appropriate indices.
83 WideString GetPageText(int start, int count) const;
84 WideString GetAllPageText() const { return GetPageText(0, CountChars()); }
85
86 int CountRects(int start, int nCount);
87 bool GetRect(int rectIndex, CFX_FloatRect* pRect) const;
88
89 private:
90 enum class TextOrientation {
91 kUnknown,
92 kHorizontal,
93 kVertical,
94 };
95
96 enum class GenerateCharacter {
97 kNone,
98 kSpace,
99 kLineBreak,
100 kHyphen,
101 };
102
103 enum class MarkedContentState { kPass = 0, kDone, kDelay };
104
105 struct TransformedTextObject {
106 TransformedTextObject();
107 TransformedTextObject(const TransformedTextObject& that);
108 ~TransformedTextObject();
109
110 UnownedPtr<const CPDF_TextObject> m_pTextObj;
111 CFX_Matrix m_formMatrix;
112 };
113
114 void Init();
115 bool IsHyphen(wchar_t curChar) const;
116 void ProcessObject();
117 void ProcessFormObject(CPDF_FormObject* pFormObj,
118 const CFX_Matrix& formMatrix);
119 void ProcessTextObject(const TransformedTextObject& obj);
120 void ProcessTextObject(CPDF_TextObject* pTextObj,
121 const CFX_Matrix& formMatrix,
122 const CPDF_PageObjectHolder* pObjList,
123 CPDF_PageObjectHolder::const_iterator ObjPos);
124 GenerateCharacter ProcessInsertObject(const CPDF_TextObject* pObj,
125 const CFX_Matrix& formMatrix);
126 const CharInfo* GetPrevCharInfo() const;
127 absl::optional<CharInfo> GenerateCharInfo(wchar_t unicode);
128 bool IsSameAsPreTextObject(CPDF_TextObject* pTextObj,
129 const CPDF_PageObjectHolder* pObjList,
130 CPDF_PageObjectHolder::const_iterator iter) const;
131 bool IsSameTextObject(CPDF_TextObject* pTextObj1,
132 CPDF_TextObject* pTextObj2) const;
133 void CloseTempLine();
134 MarkedContentState PreMarkedContent(const CPDF_TextObject* pTextObj);
135 void ProcessMarkedContent(const TransformedTextObject& obj);
136 void FindPreviousTextObject();
137 void AddCharInfoByLRDirection(wchar_t wChar, const CharInfo& info);
138 void AddCharInfoByRLDirection(wchar_t wChar, const CharInfo& info);
139 TextOrientation GetTextObjectWritingMode(
140 const CPDF_TextObject* pTextObj) const;
141 TextOrientation FindTextlineFlowOrientation() const;
142 void AppendGeneratedCharacter(wchar_t unicode, const CFX_Matrix& formMatrix);
143 void SwapTempTextBuf(size_t iCharListStartAppend, size_t iBufStartAppend);
144 WideString GetTextByPredicate(
145 const std::function<bool(const CharInfo&)>& predicate) const;
146
147 UnownedPtr<const CPDF_Page> const m_pPage;
148 DataVector<TextPageCharSegment> m_CharIndices;
149 std::deque<CharInfo> m_CharList;
150 std::deque<CharInfo> m_TempCharList;
151 WideTextBuffer m_TextBuf;
152 WideTextBuffer m_TempTextBuf;
153 UnownedPtr<const CPDF_TextObject> m_pPrevTextObj;
154 CFX_Matrix m_PrevMatrix;
155 const bool m_rtl;
156 const CFX_Matrix m_DisplayMatrix;
157 std::vector<CFX_FloatRect> m_SelRects;
158 std::vector<TransformedTextObject> mTextObjects;
159 TextOrientation m_TextlineDir = TextOrientation::kUnknown;
160 CFX_FloatRect m_CurlineRect;
161};
162
163#endif // CORE_FPDFTEXT_CPDF_TEXTPAGE_H_
std::vector< CFX_FloatRect > GetRects(size_t index) const
WideString GetURL(size_t index) const
UnownedPtr< const CPDF_TextPage > const m_pTextPage
absl::optional< Range > GetTextRange(size_t index) const
CPDF_LinkExtract(const CPDF_TextPage *pTextPage)
absl::optional< Link > CheckWebLink(const WideString &str)
std::vector< Link > m_LinkArray
bool CheckMailLink(WideString *str)
size_t CountLinks() const
CharInfo(const CharInfo &)
UnownedPtr< const CPDF_TextObject > m_pTextObj
int CharIndexFromTextIndex(int text_index) const
WideString GetAllPageText() const
bool GetRect(int rectIndex, CFX_FloatRect *pRect) const
int CountChars() const
WideString GetTextByRect(const CFX_FloatRect &rect) const
const CharInfo & GetCharInfo(size_t index) const
CFX_FloatRect GetCharLooseBounds(size_t index) const
CPDF_TextPage(const CPDF_Page *pPage, bool rtl)
WideString GetTextByObject(const CPDF_TextObject *pTextObj) const
int GetIndexAtPos(const CFX_PointF &point, const CFX_SizeF &tolerance) const
float GetCharFontSize(size_t index) const
int CountRects(int start, int nCount)
int TextIndexFromCharIndex(int char_index) const
std::vector< CFX_FloatRect > GetRectArray(int start, int count) const
WideString GetPageText(int start, int count) const
size_t size() const
WideString & operator=(WideString &&that) noexcept
void TrimRight(wchar_t target)
CharType Back() const
Definition widestring.h:152
bool FXSYS_iswalnum(wchar_t c)
#define FX_DATA_PARTITION_EXCEPTION(T)
WideString operator+(const wchar_t *str1, const WideString &str2)
Definition widestring.h:281