Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
cpdf_textpage.h
Go to the documentation of this file.
1// Copyright 2016 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#ifndef CORE_FPDFTEXT_CPDF_TEXTPAGE_H_
8#define CORE_FPDFTEXT_CPDF_TEXTPAGE_H_
9
10#include <stdint.h>
11
12#include <deque>
13#include <functional>
14#include <optional>
15#include <vector>
16
17#include "core/fpdfapi/page/cpdf_pageobjectholder.h"
18#include "core/fxcrt/data_vector.h"
19#include "core/fxcrt/fx_coordinates.h"
20#include "core/fxcrt/fx_memory_wrappers.h"
21#include "core/fxcrt/unowned_ptr.h"
22#include "core/fxcrt/widestring.h"
23#include "core/fxcrt/widetext_buffer.h"
24
25class CPDF_FormObject;
26class CPDF_Page;
27class CPDF_TextObject;
28
30 int index;
31 int count;
32};
33
35
37 public:
45
61
62 CPDF_TextPage(const CPDF_Page* pPage, bool rtl);
64
65 int CharIndexFromTextIndex(int text_index) const;
66 int TextIndexFromCharIndex(int char_index) const;
67 size_t size() const { return m_CharList.size(); }
68 int CountChars() const;
69
70 // These methods CHECK() to make sure |index| is within bounds.
71 const CharInfo& GetCharInfo(size_t index) const;
72 CharInfo& GetCharInfo(size_t index);
73 float GetCharFontSize(size_t index) const;
74 CFX_FloatRect GetCharLooseBounds(size_t index) const;
75
76 std::vector<CFX_FloatRect> GetRectArray(int start, int count) const;
77 int GetIndexAtPos(const CFX_PointF& point, const CFX_SizeF& tolerance) const;
78 WideString GetTextByRect(const CFX_FloatRect& rect) const;
79 WideString GetTextByObject(const CPDF_TextObject* pTextObj) const;
80
81 // Returns string with the text from |m_TextBuf| that are covered by the input
82 // range. |start| and |count| are in terms of the |m_CharIndices|, so the
83 // range will be converted into appropriate indices.
84 WideString GetPageText(int start, int count) const;
86
87 int CountRects(int start, int nCount);
88 bool GetRect(int rectIndex, CFX_FloatRect* pRect) const;
89
90 private:
91 enum class TextOrientation {
92 kUnknown,
93 kHorizontal,
94 kVertical,
95 };
96
97 enum class GenerateCharacter {
98 kNone,
99 kSpace,
100 kLineBreak,
101 kHyphen,
102 };
103
104 enum class MarkedContentState { kPass = 0, kDone, kDelay };
105
106 struct TransformedTextObject {
107 TransformedTextObject();
108 TransformedTextObject(const TransformedTextObject& that);
109 ~TransformedTextObject();
110
111 UnownedPtr<CPDF_TextObject> m_pTextObj;
112 CFX_Matrix m_formMatrix;
113 };
114
115 void Init();
116 bool IsHyphen(wchar_t curChar) const;
117 void ProcessObject();
118 void ProcessFormObject(CPDF_FormObject* pFormObj,
119 const CFX_Matrix& formMatrix);
120 void ProcessTextObject(const TransformedTextObject& obj);
121 void ProcessTextObject(CPDF_TextObject* pTextObj,
122 const CFX_Matrix& formMatrix,
123 const CPDF_PageObjectHolder* pObjList,
124 CPDF_PageObjectHolder::const_iterator ObjPos);
125 GenerateCharacter ProcessInsertObject(const CPDF_TextObject* pObj,
126 const CFX_Matrix& formMatrix);
127 const CharInfo* GetPrevCharInfo() const;
128 std::optional<CharInfo> GenerateCharInfo(wchar_t unicode);
129 bool IsSameAsPreTextObject(CPDF_TextObject* pTextObj,
130 const CPDF_PageObjectHolder* pObjList,
131 CPDF_PageObjectHolder::const_iterator iter) const;
132 bool IsSameTextObject(CPDF_TextObject* pTextObj1,
133 CPDF_TextObject* pTextObj2) const;
134 void CloseTempLine();
135 MarkedContentState PreMarkedContent(const CPDF_TextObject* pTextObj);
136 void ProcessMarkedContent(const TransformedTextObject& obj);
137 void FindPreviousTextObject();
138 void AddCharInfoByLRDirection(wchar_t wChar, const CharInfo& info);
139 void AddCharInfoByRLDirection(wchar_t wChar, const CharInfo& info);
140 TextOrientation GetTextObjectWritingMode(
141 const CPDF_TextObject* pTextObj) const;
142 TextOrientation FindTextlineFlowOrientation() const;
143 void AppendGeneratedCharacter(wchar_t unicode, const CFX_Matrix& formMatrix);
144 void SwapTempTextBuf(size_t iCharListStartAppend, size_t iBufStartAppend);
145 WideString GetTextByPredicate(
146 const std::function<bool(const CharInfo&)>& predicate) const;
147
148 UnownedPtr<const CPDF_Page> const m_pPage;
149 DataVector<TextPageCharSegment> m_CharIndices;
150 std::deque<CharInfo> m_CharList;
151 std::deque<CharInfo> m_TempCharList;
152 WideTextBuffer m_TextBuf;
153 WideTextBuffer m_TempTextBuf;
154 UnownedPtr<const CPDF_TextObject> m_pPrevTextObj;
155 CFX_Matrix m_PrevMatrix;
156 const bool m_rtl;
157 const CFX_Matrix m_DisplayMatrix;
158 std::vector<CFX_FloatRect> m_SelRects;
159 std::vector<TransformedTextObject> mTextObjects;
160 TextOrientation m_TextlineDir = TextOrientation::kUnknown;
161 CFX_FloatRect m_CurlineRect;
162};
163
164#endif // CORE_FPDFTEXT_CPDF_TEXTPAGE_H_
std::vector< CFX_FloatRect > GetRects(size_t index) const
WideString GetURL(size_t index) const
UnownedPtr< const CPDF_TextPage > const m_pTextPage
CPDF_LinkExtract(const CPDF_TextPage *pTextPage)
std::optional< Range > GetTextRange(size_t index) const
std::vector< Link > m_LinkArray
bool CheckMailLink(WideString *str)
std::optional< Link > CheckWebLink(const WideString &str)
size_t CountLinks() const
UnownedPtr< CPDF_TextObject > m_pTextObj
CharInfo(const CharInfo &)
int CharIndexFromTextIndex(int text_index) const
WideString GetAllPageText() const
bool GetRect(int rectIndex, CFX_FloatRect *pRect) const
int CountChars() const
WideString GetTextByRect(const CFX_FloatRect &rect) const
const CharInfo & GetCharInfo(size_t index) const
CFX_FloatRect GetCharLooseBounds(size_t index) const
CharInfo & GetCharInfo(size_t index)
CPDF_TextPage(const CPDF_Page *pPage, bool rtl)
WideString GetTextByObject(const CPDF_TextObject *pTextObj) const
int GetIndexAtPos(const CFX_PointF &point, const CFX_SizeF &tolerance) const
float GetCharFontSize(size_t index) const
int CountRects(int start, int nCount)
int TextIndexFromCharIndex(int char_index) const
std::vector< CFX_FloatRect > GetRectArray(int start, int count) const
WideString GetPageText(int start, int count) const
size_t size() const
WideString & operator=(WideString &&that) noexcept
CFX_PTemplate< float > CFX_PointF
CFX_STemplate< float > CFX_SizeF
bool FXSYS_iswalnum(wchar_t c)
#define FX_DATA_PARTITION_EXCEPTION(T)
WideString operator+(const wchar_t *str1, const WideString &str2)
Definition widestring.h:168
fxcrt::WideString WideString
Definition widestring.h:207