Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
cpdf_contentparser.cpp
Go to the documentation of this file.
1// Copyright 2016 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "core/fpdfapi/page/cpdf_contentparser.h"
8
9#include <utility>
10
11#include "constants/page_object.h"
12#include "core/fpdfapi/font/cpdf_type3char.h"
13#include "core/fpdfapi/page/cpdf_allstates.h"
14#include "core/fpdfapi/page/cpdf_page.h"
15#include "core/fpdfapi/page/cpdf_pageobject.h"
16#include "core/fpdfapi/page/cpdf_path.h"
17#include "core/fpdfapi/parser/cpdf_array.h"
18#include "core/fpdfapi/parser/cpdf_dictionary.h"
19#include "core/fpdfapi/parser/cpdf_stream.h"
20#include "core/fpdfapi/parser/cpdf_stream_acc.h"
21#include "core/fxcrt/check.h"
22#include "core/fxcrt/check_op.h"
23#include "core/fxcrt/fixed_size_data_vector.h"
24#include "core/fxcrt/fx_safe_types.h"
25#include "core/fxcrt/pauseindicator_iface.h"
26#include "core/fxcrt/span_util.h"
27#include "core/fxcrt/stl_util.h"
28#include "core/fxge/cfx_fillrenderoptions.h"
29
31 : m_CurrentStage(Stage::kGetContent), m_pPageObjectHolder(pPage) {
32 DCHECK(pPage);
33 if (!pPage->GetDocument()) {
34 m_CurrentStage = Stage::kComplete;
35 return;
36 }
37
38 RetainPtr<CPDF_Object> pContent =
39 pPage->GetMutableDict()->GetMutableDirectObjectFor(
40 pdfium::page_object::kContents);
41 if (!pContent) {
42 HandlePageContentFailure();
43 return;
44 }
45
46 const CPDF_Stream* pStream = pContent->AsStream();
47 if (pStream) {
48 HandlePageContentStream(pStream);
49 return;
50 }
51
52 const CPDF_Array* pArray = pContent->AsArray();
53 if (pArray && HandlePageContentArray(pArray))
54 return;
55
56 HandlePageContentFailure();
57}
58
60 RetainPtr<const CPDF_Stream> pStream,
61 CPDF_PageObjectHolder* pPageObjectHolder,
62 const CPDF_AllStates* pGraphicStates,
63 const CFX_Matrix* pParentMatrix,
64 CPDF_Type3Char* pType3Char,
65 CPDF_Form::RecursionState* recursion_state)
66 : m_CurrentStage(Stage::kParse),
69 DCHECK(m_pPageObjectHolder);
70 CFX_Matrix form_matrix =
71 m_pPageObjectHolder->GetDict()->GetMatrixFor("Matrix");
72 if (pGraphicStates)
73 form_matrix.Concat(pGraphicStates->current_transformation_matrix());
74
75 RetainPtr<const CPDF_Array> pBBox =
76 m_pPageObjectHolder->GetDict()->GetArrayFor("BBox");
77 CFX_FloatRect form_bbox;
78 CPDF_Path ClipPath;
79 if (pBBox) {
80 form_bbox = pBBox->GetRect();
81 ClipPath.Emplace();
82 ClipPath.AppendFloatRect(form_bbox);
83 ClipPath.Transform(form_matrix);
84 if (pParentMatrix)
85 ClipPath.Transform(*pParentMatrix);
86
87 form_bbox = form_matrix.TransformRect(form_bbox);
88 if (pParentMatrix)
89 form_bbox = pParentMatrix->TransformRect(form_bbox);
90 }
91
92 RetainPtr<CPDF_Dictionary> pResources =
93 m_pPageObjectHolder->GetMutableDict()->GetMutableDictFor("Resources");
94 m_pParser = std::make_unique<CPDF_StreamContentParser>(
95 m_pPageObjectHolder->GetDocument(),
96 m_pPageObjectHolder->GetMutablePageResources(),
97 m_pPageObjectHolder->GetMutableResources(), pParentMatrix,
98 m_pPageObjectHolder, std::move(pResources), form_bbox, pGraphicStates,
99 recursion_state);
100 m_pParser->GetCurStates()->set_current_transformation_matrix(form_matrix);
101 m_pParser->GetCurStates()->set_parent_matrix(form_matrix);
102 if (ClipPath.HasRef()) {
103 m_pParser->GetCurStates()->mutable_clip_path().AppendPathWithAutoMerge(
104 ClipPath, CFX_FillRenderOptions::FillType::kWinding);
105 }
106 if (m_pPageObjectHolder->GetTransparency().IsGroup()) {
107 CPDF_GeneralState& state =
108 m_pParser->GetCurStates()->mutable_general_state();
110 state.SetStrokeAlpha(1.0f);
111 state.SetFillAlpha(1.0f);
112 state.SetSoftMask(nullptr);
113 }
114 m_pSingleStream = pdfium::MakeRetain<CPDF_StreamAcc>(std::move(pStream));
115 m_pSingleStream->LoadAllDataFiltered();
116 m_Data = m_pSingleStream->GetSpan();
117}
118
120
122 return m_pParser ? m_pParser->TakeAllCTMs() : CPDF_PageObjectHolder::CTMMap();
123}
124
125// Returning |true| means that there is more content to be processed and
126// Continue() should be called again. Returning |false| means that we've
127// completed the parse and Continue() is complete.
129 while (m_CurrentStage == Stage::kGetContent) {
130 m_CurrentStage = GetContent();
131 if (pPause && pPause->NeedToPauseNow())
132 return true;
133 }
134
135 if (m_CurrentStage == Stage::kPrepareContent)
136 m_CurrentStage = PrepareContent();
137
138 while (m_CurrentStage == Stage::kParse) {
139 m_CurrentStage = Parse();
140 if (pPause && pPause->NeedToPauseNow())
141 return true;
142 }
143
144 if (m_CurrentStage == Stage::kCheckClip)
145 m_CurrentStage = CheckClip();
146
147 DCHECK_EQ(m_CurrentStage, Stage::kComplete);
148 return false;
149}
150
151CPDF_ContentParser::Stage CPDF_ContentParser::GetContent() {
152 DCHECK_EQ(m_CurrentStage, Stage::kGetContent);
153 DCHECK(m_pPageObjectHolder->IsPage());
154 RetainPtr<const CPDF_Array> pContent =
155 m_pPageObjectHolder->GetDict()->GetArrayFor(
156 pdfium::page_object::kContents);
157 RetainPtr<const CPDF_Stream> pStreamObj = ToStream(
158 pContent ? pContent->GetDirectObjectAt(m_CurrentOffset) : nullptr);
159 m_StreamArray[m_CurrentOffset] =
160 pdfium::MakeRetain<CPDF_StreamAcc>(std::move(pStreamObj));
161 m_StreamArray[m_CurrentOffset]->LoadAllDataFiltered();
162 m_CurrentOffset++;
163
164 return m_CurrentOffset == m_nStreams ? Stage::kPrepareContent
165 : Stage::kGetContent;
166}
167
168CPDF_ContentParser::Stage CPDF_ContentParser::PrepareContent() {
169 m_CurrentOffset = 0;
170
171 if (m_StreamArray.empty()) {
172 m_Data = m_pSingleStream->GetSpan();
173 return Stage::kParse;
174 }
175
176 FX_SAFE_UINT32 safe_size = 0;
177 for (const auto& stream : m_StreamArray) {
178 m_StreamSegmentOffsets.push_back(safe_size.ValueOrDie());
179 safe_size += stream->GetSize();
180 safe_size += 1;
181 if (!safe_size.IsValid())
182 return Stage::kComplete;
183 }
184
185 const size_t buffer_size = safe_size.ValueOrDie();
186 auto buffer = FixedSizeDataVector<uint8_t>::TryZeroed(buffer_size);
187 if (buffer.empty()) {
188 m_Data.emplace<pdfium::raw_span<const uint8_t>>();
189 return Stage::kComplete;
190 }
191
192 auto data_span = buffer.span();
193 for (const auto& stream : m_StreamArray) {
194 data_span = fxcrt::spancpy(data_span, stream->GetSpan());
195 data_span.front() = ' ';
196 data_span = data_span.subspan(1);
197 }
198 m_StreamArray.clear();
199 m_Data = std::move(buffer);
200 return Stage::kParse;
201}
202
204 if (!m_pParser) {
205 m_RecursionState.parsed_set.clear();
206 m_pParser = std::make_unique<CPDF_StreamContentParser>(
207 m_pPageObjectHolder->GetDocument(),
208 m_pPageObjectHolder->GetMutablePageResources(), nullptr, nullptr,
209 m_pPageObjectHolder, m_pPageObjectHolder->GetMutableResources(),
210 m_pPageObjectHolder->GetBBox(), nullptr, &m_RecursionState);
211 m_pParser->GetCurStates()->mutable_color_state().SetDefault();
212 }
213 if (m_CurrentOffset >= GetData().size())
214 return Stage::kCheckClip;
215
216 if (m_StreamSegmentOffsets.empty())
217 m_StreamSegmentOffsets.push_back(0);
218
219 static constexpr uint32_t kParseStepLimit = 100;
220 m_CurrentOffset += m_pParser->Parse(GetData(), m_CurrentOffset,
221 kParseStepLimit, m_StreamSegmentOffsets);
222 return Stage::kParse;
223}
224
225CPDF_ContentParser::Stage CPDF_ContentParser::CheckClip() {
226 if (m_pType3Char) {
227 m_pType3Char->InitializeFromStreamData(m_pParser->IsColored(),
228 m_pParser->GetType3Data());
229 }
230
231 for (auto& pObj : *m_pPageObjectHolder) {
232 CPDF_ClipPath& clip_path = pObj->mutable_clip_path();
233 if (!clip_path.HasRef()) {
234 continue;
235 }
236 if (clip_path.GetPathCount() != 1) {
237 continue;
238 }
239 if (clip_path.GetTextCount() > 0) {
240 continue;
241 }
242
243 CPDF_Path path = clip_path.GetPath(0);
244 if (!path.IsRect() || pObj->IsShading()) {
245 continue;
246 }
247
248 CFX_PointF point0 = path.GetPoint(0);
249 CFX_PointF point2 = path.GetPoint(2);
250 CFX_FloatRect old_rect(point0.x, point0.y, point2.x, point2.y);
251 if (old_rect.Contains(pObj->GetRect()))
252 clip_path.SetNull();
253 }
254 return Stage::kComplete;
255}
256
257void CPDF_ContentParser::HandlePageContentStream(const CPDF_Stream* pStream) {
258 m_pSingleStream =
259 pdfium::MakeRetain<CPDF_StreamAcc>(pdfium::WrapRetain(pStream));
260 m_pSingleStream->LoadAllDataFiltered();
261 m_CurrentStage = Stage::kPrepareContent;
262}
263
264bool CPDF_ContentParser::HandlePageContentArray(const CPDF_Array* pArray) {
265 m_nStreams = fxcrt::CollectionSize<uint32_t>(*pArray);
266 if (m_nStreams == 0)
267 return false;
268
269 m_StreamArray.resize(m_nStreams);
270 return true;
271}
272
273void CPDF_ContentParser::HandlePageContentFailure() {
274 m_CurrentStage = Stage::kComplete;
275}
276
277pdfium::span<const uint8_t> CPDF_ContentParser::GetData() const {
278 if (is_owned()) {
279 return absl::get<FixedSizeDataVector<uint8_t>>(m_Data).span();
280 }
281 return absl::get<pdfium::raw_span<const uint8_t>>(m_Data);
282}
#define DCHECK
Definition check.h:33
#define DCHECK_EQ(x, y)
Definition check_op.h:17
CFX_FloatRect & operator=(const CFX_FloatRect &that)=default
CFX_FloatRect TransformRect(const CFX_FloatRect &rect) const
void Concat(const CFX_Matrix &right)
const CFX_Matrix & current_transformation_matrix() const
std::vector< RetainPtr< CPDF_Object > >::const_iterator const_iterator
Definition cpdf_array.h:29
CPDF_ContentParser(CPDF_Page *pPage)
bool Continue(PauseIndicatorIface *pPause)
CPDF_PageObjectHolder::CTMMap TakeAllCTMs()
CPDF_ContentParser(RetainPtr< const CPDF_Stream > pStream, CPDF_PageObjectHolder *pPageObjectHolder, const CPDF_AllStates *pGraphicStates, const CFX_Matrix *pParentMatrix, CPDF_Type3Char *pType3Char, CPDF_Form::RecursionState *recursion_state)
std::map< ByteString, RetainPtr< CPDF_Object >, std::less<> > DictMap
void SetBlendType(BlendMode type)
void SetFillAlpha(float alpha)
void SetStrokeAlpha(float alpha)
void SetSoftMask(RetainPtr< CPDF_Dictionary > pDict)
CPDF_Document * GetDocument() const override
Definition cpdf_page.cpp:51
bool HasRef() const
Definition cpdf_path.h:22
void AppendFloatRect(const CFX_FloatRect &rect)
Definition cpdf_path.cpp:49
void Emplace()
Definition cpdf_path.h:21
void Transform(const CFX_Matrix &matrix)
Definition cpdf_path.cpp:41
virtual bool NeedToPauseNow()=0
BlendMode
Definition fx_dib.h:119
@ kNormal
Definition fx_dib.h:120
pdfium::CheckedNumeric< uint32_t > FX_SAFE_UINT32