Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
fpdf_flatten.cpp
Go to the documentation of this file.
1// Copyright 2014 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "public/fpdf_flatten.h"
8
9#include <limits.h>
10
11#include <algorithm>
12#include <sstream>
13#include <utility>
14#include <vector>
15
16#include "constants/annotation_common.h"
17#include "constants/annotation_flags.h"
18#include "constants/font_encodings.h"
19#include "constants/page_object.h"
20#include "core/fpdfapi/edit/cpdf_contentstream_write_utils.h"
21#include "core/fpdfapi/page/cpdf_page.h"
22#include "core/fpdfapi/page/cpdf_pageobject.h"
23#include "core/fpdfapi/parser/cpdf_array.h"
24#include "core/fpdfapi/parser/cpdf_dictionary.h"
25#include "core/fpdfapi/parser/cpdf_document.h"
26#include "core/fpdfapi/parser/cpdf_name.h"
27#include "core/fpdfapi/parser/cpdf_number.h"
28#include "core/fpdfapi/parser/cpdf_reference.h"
29#include "core/fpdfapi/parser/cpdf_stream.h"
30#include "core/fpdfapi/parser/cpdf_stream_acc.h"
31#include "core/fpdfapi/parser/fpdf_parser_utility.h"
32#include "core/fpdfdoc/cpdf_annot.h"
33#include "core/fxcrt/fx_string_wrappers.h"
34#include "core/fxcrt/notreached.h"
35#include "fpdfsdk/cpdfsdk_helpers.h"
36
37enum FPDF_TYPE { MAX, MIN };
39
40namespace {
41
42bool IsValidRect(const CFX_FloatRect& rect, const CFX_FloatRect& rcPage) {
43 constexpr float kMinSize = 0.000001f;
44 if (rect.IsEmpty() || rect.Width() < kMinSize || rect.Height() < kMinSize)
45 return false;
46
47 if (rcPage.IsEmpty())
48 return true;
49
50 constexpr float kMinBorderSize = 10.000001f;
51 return rect.left - rcPage.left >= -kMinBorderSize &&
52 rect.right - rcPage.right <= kMinBorderSize &&
53 rect.top - rcPage.top <= kMinBorderSize &&
54 rect.bottom - rcPage.bottom >= -kMinBorderSize;
55}
56
57void GetContentsRect(CPDF_Document* pDoc,
59 std::vector<CFX_FloatRect>* pRectArray) {
60 auto pPDFPage = pdfium::MakeRetain<CPDF_Page>(pDoc, pDict);
61 pPDFPage->ParseContent();
62
63 for (const auto& pPageObject : *pPDFPage) {
64 const CFX_FloatRect& rc = pPageObject->GetRect();
65 if (IsValidRect(rc, pDict->GetRectFor(pdfium::page_object::kMediaBox)))
66 pRectArray->push_back(rc);
67 }
68}
69
70void ParserStream(const CPDF_Dictionary* pPageDic,
71 CPDF_Dictionary* pStream,
72 std::vector<CFX_FloatRect>* pRectArray,
73 std::vector<CPDF_Dictionary*>* pObjectArray) {
74 if (!pStream)
75 return;
76 CFX_FloatRect rect;
77 if (pStream->KeyExist("Rect"))
78 rect = pStream->GetRectFor("Rect");
79 else if (pStream->KeyExist("BBox"))
80 rect = pStream->GetRectFor("BBox");
81
82 if (IsValidRect(rect, pPageDic->GetRectFor(pdfium::page_object::kMediaBox)))
83 pRectArray->push_back(rect);
84
85 pObjectArray->push_back(pStream);
86}
87
88int ParserAnnots(CPDF_Document* pSourceDoc,
89 RetainPtr<CPDF_Dictionary> pPageDic,
90 std::vector<CFX_FloatRect>* pRectArray,
91 std::vector<CPDF_Dictionary*>* pObjectArray,
92 int nUsage) {
93 if (!pSourceDoc)
94 return FLATTEN_FAIL;
95
96 GetContentsRect(pSourceDoc, pPageDic, pRectArray);
97 RetainPtr<const CPDF_Array> pAnnots = pPageDic->GetArrayFor("Annots");
98 if (!pAnnots)
100
101 CPDF_ArrayLocker locker(pAnnots);
102 for (const auto& pAnnot : locker) {
103 RetainPtr<CPDF_Dictionary> pAnnotDict =
104 ToDictionary(pAnnot->GetMutableDirect());
105 if (!pAnnotDict)
106 continue;
107
108 ByteString sSubtype =
109 pAnnotDict->GetByteStringFor(pdfium::annotation::kSubtype);
110 if (sSubtype == "Popup")
111 continue;
112
113 int nAnnotFlag = pAnnotDict->GetIntegerFor("F");
114 if (nAnnotFlag & pdfium::annotation_flags::kHidden)
115 continue;
116
117 bool bParseStream;
118 if (nUsage == FLAT_NORMALDISPLAY)
119 bParseStream = !(nAnnotFlag & pdfium::annotation_flags::kInvisible);
120 else
121 bParseStream = !!(nAnnotFlag & pdfium::annotation_flags::kPrint);
122 if (bParseStream)
123 ParserStream(pPageDic.Get(), pAnnotDict.Get(), pRectArray, pObjectArray);
124 }
125 return FLATTEN_SUCCESS;
126}
127
128float GetMinMaxValue(const std::vector<CFX_FloatRect>& array,
129 FPDF_TYPE type,
130 FPDF_VALUE value) {
131 if (array.empty())
132 return 0.0f;
133
134 size_t nRects = array.size();
135 std::vector<float> pArray(nRects);
136 switch (value) {
137 case LEFT:
138 for (size_t i = 0; i < nRects; i++)
139 pArray[i] = array[i].left;
140 break;
141 case TOP:
142 for (size_t i = 0; i < nRects; i++)
143 pArray[i] = array[i].top;
144 break;
145 case RIGHT:
146 for (size_t i = 0; i < nRects; i++)
147 pArray[i] = array[i].right;
148 break;
149 case BOTTOM:
150 for (size_t i = 0; i < nRects; i++)
151 pArray[i] = array[i].bottom;
152 break;
153 }
154
155 float fRet = pArray[0];
156 if (type == MAX) {
157 for (size_t i = 1; i < nRects; i++)
158 fRet = std::max(fRet, pArray[i]);
159 } else {
160 for (size_t i = 1; i < nRects; i++)
161 fRet = std::min(fRet, pArray[i]);
162 }
163 return fRet;
164}
165
166CFX_FloatRect CalculateRect(std::vector<CFX_FloatRect>* pRectArray) {
167 CFX_FloatRect rcRet;
168
169 rcRet.left = GetMinMaxValue(*pRectArray, MIN, LEFT);
170 rcRet.top = GetMinMaxValue(*pRectArray, MAX, TOP);
171 rcRet.right = GetMinMaxValue(*pRectArray, MAX, RIGHT);
172 rcRet.bottom = GetMinMaxValue(*pRectArray, MIN, BOTTOM);
173
174 return rcRet;
175}
176
177ByteString GenerateFlattenedContent(const ByteString& key) {
178 return "q 1 0 0 1 0 0 cm /" + key + " Do Q";
179}
180
181RetainPtr<CPDF_Reference> NewIndirectContentsStreamReference(
182 CPDF_Document* pDocument,
183 const ByteString& contents) {
184 auto pNewContents =
185 pDocument->NewIndirect<CPDF_Stream>(pDocument->New<CPDF_Dictionary>());
186 pNewContents->SetData(contents.unsigned_span());
187 return pNewContents->MakeReference(pDocument);
188}
189
190void SetPageContents(const ByteString& key,
191 CPDF_Dictionary* pPage,
192 CPDF_Document* pDocument) {
193 RetainPtr<CPDF_Array> pContentsArray =
194 pPage->GetMutableArrayFor(pdfium::page_object::kContents);
195 RetainPtr<CPDF_Stream> pContentsStream =
196 pPage->GetMutableStreamFor(pdfium::page_object::kContents);
197 if (!pContentsStream && !pContentsArray) {
198 if (!key.IsEmpty()) {
199 pPage->SetFor(pdfium::page_object::kContents,
200 NewIndirectContentsStreamReference(
201 pDocument, GenerateFlattenedContent(key)));
202 }
203 return;
204 }
205
207 if (pContentsArray) {
208 pContentsArray->InsertAt(
209 0, NewIndirectContentsStreamReference(pDocument, "q"));
210 pContentsArray->Append(NewIndirectContentsStreamReference(pDocument, "Q"));
211 } else {
212 ByteString sStream = "q\n";
213 {
214 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pContentsStream);
215 pAcc->LoadAllDataFiltered();
216 sStream += ByteStringView(pAcc->GetSpan());
217 sStream += "\nQ";
218 }
219 pContentsStream->SetDataAndRemoveFilter(sStream.unsigned_span());
220 pContentsArray = pDocument->NewIndirect<CPDF_Array>();
221 pContentsArray->AppendNew<CPDF_Reference>(pDocument,
222 pContentsStream->GetObjNum());
223 pPage->SetNewFor<CPDF_Reference>(pdfium::page_object::kContents, pDocument,
224 pContentsArray->GetObjNum());
225 }
226 if (!key.IsEmpty()) {
227 pContentsArray->Append(NewIndirectContentsStreamReference(
228 pDocument, GenerateFlattenedContent(key)));
229 }
230}
231
232CFX_Matrix GetMatrix(const CFX_FloatRect& rcAnnot,
233 const CFX_FloatRect& rcStream,
234 const CFX_Matrix& matrix) {
235 if (rcStream.IsEmpty())
236 return CFX_Matrix();
237
238 CFX_FloatRect rcTransformed = matrix.TransformRect(rcStream);
239 rcTransformed.Normalize();
240
241 float a = rcAnnot.Width() / rcTransformed.Width();
242 float d = rcAnnot.Height() / rcTransformed.Height();
243
244 float e = rcAnnot.left - rcTransformed.left * a;
245 float f = rcAnnot.bottom - rcTransformed.bottom * d;
246 return CFX_Matrix(a, 0.0f, 0.0f, d, e, f);
247}
248
249bool IsValidBaseEncoding(ByteString base_encoding) {
250 // ISO 32000-1:2008 spec, table 114.
251 // ISO 32000-2:2020 spec, table 112.
252 //
253 // Since /BaseEncoding is optional, `base_encoding` can be empty.
254 return base_encoding.IsEmpty() ||
258}
259
260void SanitizeFont(RetainPtr<CPDF_Dictionary> font_dict) {
261 if (!font_dict) {
262 return;
263 }
264
265 RetainPtr<CPDF_Dictionary> encoding_dict =
266 font_dict->GetMutableDictFor("Encoding");
267 if (encoding_dict) {
268 if (!IsValidBaseEncoding(encoding_dict->GetNameFor("BaseEncoding"))) {
269 font_dict->RemoveFor("Encoding");
270 }
271 }
272}
273
274void SanitizeFontResources(RetainPtr<CPDF_Dictionary> font_resource_dict) {
275 if (!font_resource_dict) {
276 return;
277 }
278
279 CPDF_DictionaryLocker locker(font_resource_dict);
280 for (auto it : locker) {
281 SanitizeFont(ToDictionary(it.second->GetMutableDirect()));
282 }
283}
284
285void SanitizeResources(RetainPtr<CPDF_Dictionary> resources_dict) {
286 if (!resources_dict) {
287 return;
288 }
289
290 SanitizeFontResources(resources_dict->GetMutableDictFor("Font"));
291}
292
293} // namespace
294
295FPDF_EXPORT int FPDF_CALLCONV FPDFPage_Flatten(FPDF_PAGE page, int nFlag) {
296 CPDF_Page* pPage = CPDFPageFromFPDFPage(page);
297 if (!page)
298 return FLATTEN_FAIL;
299
300 CPDF_Document* pDocument = pPage->GetDocument();
301 RetainPtr<CPDF_Dictionary> pPageDict = pPage->GetMutableDict();
302 if (!pDocument)
303 return FLATTEN_FAIL;
304
305 std::vector<CPDF_Dictionary*> ObjectArray;
306 std::vector<CFX_FloatRect> RectArray;
307 int iRet =
308 ParserAnnots(pDocument, pPageDict, &RectArray, &ObjectArray, nFlag);
309 if (iRet == FLATTEN_NOTHINGTODO || iRet == FLATTEN_FAIL)
310 return iRet;
311
312 CFX_FloatRect rcMerger = CalculateRect(&RectArray);
313 CFX_FloatRect rcOriginalMB =
314 pPageDict->GetRectFor(pdfium::page_object::kMediaBox);
315 if (pPageDict->KeyExist(pdfium::page_object::kCropBox))
316 rcOriginalMB = pPageDict->GetRectFor(pdfium::page_object::kCropBox);
317
318 rcOriginalMB.Normalize();
319 if (rcOriginalMB.IsEmpty())
320 rcOriginalMB = CFX_FloatRect(0.0f, 0.0f, 612.0f, 792.0f);
321
322 CFX_FloatRect rcOriginalCB;
323 if (pPageDict->KeyExist(pdfium::page_object::kCropBox)) {
324 rcOriginalCB = pPageDict->GetRectFor(pdfium::page_object::kCropBox);
325 rcOriginalCB.Normalize();
326 }
327 if (rcOriginalCB.IsEmpty())
328 rcOriginalCB = rcOriginalMB;
329
330 rcMerger.left = std::max(rcMerger.left, rcOriginalMB.left);
331 rcMerger.right = std::min(rcMerger.right, rcOriginalMB.right);
332 rcMerger.bottom = std::max(rcMerger.bottom, rcOriginalMB.bottom);
333 rcMerger.top = std::min(rcMerger.top, rcOriginalMB.top);
334
335 pPageDict->SetRectFor(pdfium::page_object::kMediaBox, rcOriginalMB);
336 pPageDict->SetRectFor(pdfium::page_object::kCropBox, rcOriginalCB);
337
339 pPageDict->GetOrCreateDictFor(pdfium::page_object::kResources);
340 auto pNewXObject =
341 pDocument->NewIndirect<CPDF_Stream>(pDocument->New<CPDF_Dictionary>());
342 RetainPtr<CPDF_Dictionary> pPageXObject = pRes->GetOrCreateDictFor("XObject");
343
344 ByteString key;
345 if (!ObjectArray.empty()) {
346 int i = 0;
347 while (i < INT_MAX) {
348 ByteString sKey = ByteString::Format("FFT%d", i);
349 if (!pPageXObject->KeyExist(sKey)) {
350 key = std::move(sKey);
351 break;
352 }
353 ++i;
354 }
355 }
356
357 SetPageContents(key, pPageDict.Get(), pDocument);
358
359 RetainPtr<CPDF_Dictionary> pNewXORes;
360 if (!key.IsEmpty()) {
361 pPageXObject->SetNewFor<CPDF_Reference>(key, pDocument,
362 pNewXObject->GetObjNum());
363
364 RetainPtr<CPDF_Dictionary> pNewOXbjectDic = pNewXObject->GetMutableDict();
365 pNewXORes = pNewOXbjectDic->SetNewFor<CPDF_Dictionary>("Resources");
366 pNewOXbjectDic->SetNewFor<CPDF_Name>("Type", "XObject");
367 pNewOXbjectDic->SetNewFor<CPDF_Name>("Subtype", "Form");
368 pNewOXbjectDic->SetNewFor<CPDF_Number>("FormType", 1);
369 pNewOXbjectDic->SetRectFor("BBox", rcOriginalCB);
370 }
371
372 for (size_t i = 0; i < ObjectArray.size(); ++i) {
373 CPDF_Dictionary* pAnnotDict = ObjectArray[i];
374 if (!pAnnotDict)
375 continue;
376
378 rcAnnot.Normalize();
379
380 ByteString sAnnotState = pAnnotDict->GetByteStringFor("AS");
381 RetainPtr<CPDF_Dictionary> pAnnotAP =
382 pAnnotDict->GetMutableDictFor(pdfium::annotation::kAP);
383 if (!pAnnotAP)
384 continue;
385
386 RetainPtr<CPDF_Stream> original_ap_stream =
387 pAnnotAP->GetMutableStreamFor("N");
388 if (!original_ap_stream) {
389 RetainPtr<CPDF_Dictionary> original_ap_dict =
390 pAnnotAP->GetMutableDictFor("N");
391 if (!original_ap_dict) {
392 continue;
393 }
394
395 if (!sAnnotState.IsEmpty()) {
396 original_ap_stream = original_ap_dict->GetMutableStreamFor(sAnnotState);
397 } else {
398 if (original_ap_dict->size() > 0) {
399 CPDF_DictionaryLocker locker(original_ap_dict);
400 RetainPtr<CPDF_Object> pFirstObj = locker.begin()->second;
401 if (pFirstObj) {
402 if (pFirstObj->IsReference())
403 pFirstObj = pFirstObj->GetMutableDirect();
404 if (!pFirstObj->IsStream())
405 continue;
406 original_ap_stream.Reset(pFirstObj->AsMutableStream());
407 }
408 }
409 }
410 }
411 if (!original_ap_stream) {
412 continue;
413 }
414
415 RetainPtr<const CPDF_Dictionary> original_ap_stream_dict =
416 original_ap_stream->GetDict();
417 CFX_FloatRect rcStream;
418 if (original_ap_stream_dict->KeyExist("Rect")) {
419 rcStream = original_ap_stream_dict->GetRectFor("Rect");
420 } else if (original_ap_stream_dict->KeyExist("BBox")) {
421 rcStream = original_ap_stream_dict->GetRectFor("BBox");
422 }
423 rcStream.Normalize();
424
425 if (rcStream.IsEmpty())
426 continue;
427
428 RetainPtr<CPDF_Stream> ap_stream;
429 if (original_ap_stream->IsInline()) {
430 ap_stream = ToStream(original_ap_stream->Clone());
431 pDocument->AddIndirectObject(ap_stream);
432 } else {
433 ap_stream = original_ap_stream;
434 }
435
436 RetainPtr<CPDF_Dictionary> ap_stream_dict = ap_stream->GetMutableDict();
437 ap_stream_dict->SetNewFor<CPDF_Name>("Type", "XObject");
438 ap_stream_dict->SetNewFor<CPDF_Name>("Subtype", "Form");
439 SanitizeResources(ap_stream_dict->GetMutableDictFor("Resources"));
440
441 RetainPtr<CPDF_Dictionary> pXObject =
442 pNewXORes->GetOrCreateDictFor("XObject");
443 ByteString sFormName = ByteString::Format("F%d", i);
444 pXObject->SetNewFor<CPDF_Reference>(sFormName, pDocument,
445 ap_stream->GetObjNum());
446
447 ByteString sStream;
448 {
449 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pNewXObject);
450 pAcc->LoadAllDataFiltered();
451 sStream = ByteString(ByteStringView(pAcc->GetSpan()));
452 }
453 CFX_Matrix matrix = original_ap_stream_dict->GetMatrixFor("Matrix");
454 CFX_Matrix m = GetMatrix(rcAnnot, rcStream, matrix);
455 m.b = 0;
456 m.c = 0;
457 fxcrt::ostringstream buf;
458 WriteMatrix(buf, m);
459 ByteString str(buf);
460 sStream += ByteString::Format("q %s cm /%s Do Q\n", str.c_str(),
461 sFormName.c_str());
462 pNewXObject->SetDataAndRemoveFilter(sStream.unsigned_span());
463 }
464 pPageDict->RemoveFor("Annots");
465 return FLATTEN_SUCCESS;
466}
fxcrt::ByteString ByteString
Definition bytestring.h:180
constexpr CFX_FloatRect(float l, float b, float r, float t)
float Width() const
bool IsEmpty() const
CFX_FloatRect & operator=(const CFX_FloatRect &that)=default
float Height() const
constexpr CFX_Matrix()=default
CFX_FloatRect TransformRect(const CFX_FloatRect &rect) const
constexpr CFX_Matrix(float a1, float b1, float c1, float d1, float e1, float f1)
std::vector< RetainPtr< CPDF_Object > >::const_iterator const_iterator
Definition cpdf_array.h:29
bool KeyExist(const ByteString &key) const
void ConvertToIndirectObjectFor(const ByteString &key, CPDF_IndirectObjectHolder *pHolder)
ByteString GetByteStringFor(const ByteString &key) const
std::map< ByteString, RetainPtr< CPDF_Object >, std::less<> > DictMap
CFX_FloatRect GetRectFor(const ByteString &key) const
uint32_t AddIndirectObject(RetainPtr< CPDF_Object > pObj)
CPDF_Document * GetDocument() const override
Definition cpdf_page.cpp:51
static ByteString Format(const char *pFormat,...)
bool operator==(const char *ptr) const
ByteString & operator+=(const char *str)
ByteString & operator=(ByteString &&that) noexcept
CPDF_Page * CPDFPageFromFPDFPage(FPDF_PAGE page)
FPDF_VALUE
@ LEFT
@ RIGHT
@ BOTTOM
@ TOP
FPDF_EXPORT int FPDF_CALLCONV FPDFPage_Flatten(FPDF_PAGE page, int nFlag)
FPDF_TYPE
@ MAX
@ MIN
#define FLATTEN_NOTHINGTODO
#define FLATTEN_SUCCESS
#define FLATTEN_FAIL
#define FLAT_NORMALDISPLAY
#define FPDF_CALLCONV
Definition fpdfview.h:229
#define FPDF_EXPORT
Definition fpdfview.h:223
ByteString operator+(const ByteString &str1, const char *str2)
Definition bytestring.h:155
ByteString operator+(const char *str1, const ByteString &str2)
Definition bytestring.h:158
const char kMacExpertEncoding[]
const char kMediaBox[]
const char kContents[]
const char kCropBox[]
fxcrt::ByteStringView ByteStringView