PoDoFo 1.2.0
Loading...
Searching...
No Matches
PdfDocument.h
1// SPDX-FileCopyrightText: 2006 Dominik Seichter <domseichter@web.de>
2// SPDX-FileCopyrightText: 2020 Francesco Pretto <ceztko@gmail.com>
3// SPDX-License-Identifier: LGPL-2.0-or-later OR MPL-2.0
4
5#ifndef PDF_DOCUMENT_H
6#define PDF_DOCUMENT_H
7
8#include "PdfTrailer.h"
9#include "PdfCatalog.h"
10#include "PdfIndirectObjectList.h"
11#include "PdfAcroForm.h"
12#include "PdfFontManager.h"
13#include "PdfMetadata.h"
14#include "PdfPageCollection.h"
15#include "PdfNameTrees.h"
16#include "PdfXObjectForm.h"
17#include "PdfImage.h"
18#include "PdfColorSpace.h"
19#include "PdfPattern.h"
20#include "PdfFunction.h"
21#include "PdfInfo.h"
22#include "PdfOutlines.h"
23#include "PdfExtension.h"
24
25namespace PoDoFo {
26
27class PdfAction;
28class PdfExtGState;
29class PdfEncrypt;
30class PdfDocument;
31
32template <typename TField>
33class PdfDocumentFieldIterableBase final
34{
35 friend class PdfDocument;
36
37public:
38 PdfDocumentFieldIterableBase()
39 : m_doc(nullptr) { }
40
41private:
42 PdfDocumentFieldIterableBase(PdfDocument& doc)
43 : m_doc(&doc) { }
44
45public:
46 class Iterator final
47 {
48 friend class PdfDocumentFieldIterableBase;
49 public:
50 using difference_type = void;
51 using value_type = TField*;
52 using pointer = void;
53 using reference = void;
54 using iterator_category = std::forward_iterator_tag;
55 public:
56 Iterator();
57 private:
58 Iterator(PdfDocument& doc);
59 public:
60 Iterator(const Iterator&) = default;
61 Iterator& operator=(const Iterator&) = default;
62 bool operator==(const Iterator& rhs) const;
63 bool operator!=(const Iterator& rhs) const;
64 Iterator& operator++();
65 Iterator operator++(int);
66 value_type operator*() { return m_Field; }
67 value_type operator->() { return m_Field; }
68 private:
69 void increment();
70 void stepIntoPageOrForm(PdfPageCollection& pages);
71 bool stepIntoPageAnnot(PdfAnnotationCollection& annots);
72 void stepIntoFormField(PdfAcroForm& form);
73 private:
74 PdfDocument* m_doc;
75 unsigned m_pageIndex;
76 PdfAnnotationCollection::iterator m_pageAnnotIterator;
77 PdfAcroForm::iterator m_acroFormIterator;
78 value_type m_Field;
79 std::unordered_set<PdfReference> m_visitedObjs;
80 };
81
82public:
83 Iterator begin() const;
84 Iterator end() const;
85
86private:
87 PdfDocument* m_doc;
88};
89
90using PdfDocumentFieldIterable = PdfDocumentFieldIterableBase<PdfField>;
91using PdfDocumentConstFieldIterable = PdfDocumentFieldIterableBase<const PdfField>;
92
107class PODOFO_API PdfDocument
108{
109 friend class PdfMetadata;
110 friend class PdfXObjectForm;
111 friend class PdfPageCollection;
112 friend class PdfMemDocument;
113 friend class PdfStreamedDocument;
114
115public:
117 virtual ~PdfDocument();
118
123 PdfOutlines& GetOrCreateOutlines();
124
131 PdfNameTrees& GetOrCreateNames();
132
140 PdfAcroForm& GetOrCreateAcroForm(PdfAcroFormDefaulAppearance eDefaultAppearance = PdfAcroFormDefaulAppearance::ArialBlack);
141
142 void CollectGarbage();
143
145 std::unique_ptr<PdfImage> CreateImage();
146
147 std::unique_ptr<PdfXObjectForm> CreateXObjectForm(const Rect& rect);
148
149 std::unique_ptr<PdfDestination> CreateDestination();
150
151 std::unique_ptr<PdfColorSpace> CreateColorSpace(PdfColorSpaceFilterPtr filter);
152
153 std::unique_ptr<PdfFunction> CreateFunction(PdfFunctionDefinitionPtr definition);
154
155 std::unique_ptr<PdfUncolouredTilingPattern> CreateTilingPattern(std::shared_ptr<PdfUncolouredTilingPatternDefinition> definition);
156
157 std::unique_ptr<PdfColouredTilingPattern> CreateTilingPattern(std::shared_ptr<PdfColouredTilingPatternDefinition> definition);
158
159 std::unique_ptr<PdfShadingPattern> CreateShadingPattern(PdfShadingPatternDefinitionPtr definition);
160
161 std::unique_ptr<PdfShadingDictionary> CreateShadingDictionary(PdfShadingDefinitionPtr definition);
162
163 std::unique_ptr<PdfExtGState> CreateExtGState(PdfExtGStateDefinitionPtr definition);
164
165 template <typename Taction>
166 std::unique_ptr<Taction> CreateAction();
167
168 std::unique_ptr<PdfAction> CreateAction(PdfActionType type);
169
170 std::unique_ptr<PdfFileSpec> CreateFileSpec();
171
178 bool IsPrintAllowed() const;
179
186 bool IsEditAllowed() const;
187
194 bool IsCopyAllowed() const;
195
202 bool IsEditNotesAllowed() const;
203
210 bool IsFillAndSignAllowed() const;
211
218 bool IsAccessibilityAllowed() const;
219
226 bool IsDocAssemblyAllowed() const;
227
234 bool IsHighPrintAllowed() const;
235
238 void PushPdfExtension(const PdfExtension& extension);
239
245 bool HasPdfExtension(const std::string_view& ns, int64_t level) const;
246
250 void RemovePdfExtension(const std::string_view& ns, int64_t level);
251
254 std::vector<PdfExtension> GetPdfExtensions() const;
255
256 PdfAcroForm& MustGetAcroForm();
257
258 const PdfAcroForm& MustGetAcroForm() const;
259
260 PdfNameTrees& MustGetNames();
261
262 const PdfNameTrees& MustGetNames() const;
263
264 PdfOutlines& MustGetOutlines();
265
266 const PdfOutlines& MustGetOutlines() const;
267
271 PdfDocumentFieldIterable GetFieldsIterator();
272 PdfDocumentConstFieldIterable GetFieldsIterator() const;
273
275 void Reset();
276
277public:
285 virtual bool HasOwnerPermissions() const = 0;
286
287 virtual const PdfEncrypt* GetEncrypt() const = 0;
288
290 bool IsEncrypted() const;
291
292public:
297 PdfCatalog& GetCatalog() { return *m_Catalog; }
298
303 const PdfCatalog& GetCatalog() const { return *m_Catalog; }
304
307 PdfPageCollection& GetPages() { return *m_Pages; }
308
311 const PdfPageCollection& GetPages() const { return *m_Pages; }
312
317 PdfTrailer &GetTrailer() { return *m_Trailer; }
318
323 const PdfTrailer& GetTrailer() const { return *m_Trailer; }
324
330 const PdfInfo* GetInfo() const;
331
332 PdfMetadata& GetMetadata() { return m_Metadata; }
333
334 const PdfMetadata& GetMetadata() const { return m_Metadata; }
335
340 PdfIndirectObjectList& GetObjects() { return m_Objects; }
341
346 const PdfIndirectObjectList& GetObjects() const { return m_Objects; }
347
348 PdfAcroForm* GetAcroForm() { return m_AcroForm.get(); }
349
350 const PdfAcroForm* GetAcroForm() const { return m_AcroForm.get(); }
351
352 PdfNameTrees* GetNames() { return m_NameTrees.get(); }
353
354 const PdfNameTrees* GetNames() const { return m_NameTrees.get(); }
355
356 PdfOutlines* GetOutlines();
357
358 const PdfOutlines* GetOutlines() const;
359
360 PdfFontManager& GetFonts() { return m_FontManager; }
361
362protected:
368 void SetTrailer(std::unique_ptr<PdfObject> obj);
369
371 void Init();
372
373 virtual void reset();
374
376 void Clear();
377
378 virtual void clear();
379
382 virtual PdfVersion GetPdfVersion() const = 0;
383
387
388private:
391 PdfDocument(bool empty = false);
392
394
395 // Called by PdfPageCollection
396 void AppendDocumentPages(const PdfDocument& doc);
397 void InsertDocumentPageAt(unsigned atIndex, const PdfDocument& doc, unsigned pageIndex);
398 void AppendDocumentPages(const PdfDocument& doc, unsigned pageIndex, unsigned pageCount);
399
400 // Called by PdfXObjectForm
401 Rect FillXObjectFromPage(PdfXObjectForm& xobj, const PdfPage& page, bool useTrimBox);
402
403 PdfInfo& GetOrCreateInfo();
404
405 void createAction(PdfActionType type, std::unique_ptr<PdfAction>& action);
406
407private:
408 void append(const PdfDocument& doc, bool appendAll);
415 void fixObjectReferences(PdfObject& obj, int difference);
416
417 void deletePages(unsigned atIndex, unsigned pageCount);
418
419 void resetPrivate();
420
421 void lazyLoadOutlines();
422 void lazyLoadInfo();
423
424private:
425 PdfDocument& operator=(const PdfDocument&) = delete;
426
427private:
428 PdfIndirectObjectList m_Objects;
429 PdfMetadata m_Metadata;
430 PdfFontManager m_FontManager;
431 bool m_InfoLazyLoaded;
432 bool m_OutlinesLazyLoaded;
433 std::unique_ptr<PdfObject> m_TrailerObj;
434 std::unique_ptr<PdfTrailer> m_Trailer;
435 std::unique_ptr<PdfCatalog> m_Catalog;
436 std::unique_ptr<PdfInfo> m_Info;
437 std::unique_ptr<PdfPageCollection> m_Pages;
438 std::unique_ptr<PdfAcroForm> m_AcroForm;
439 std::unique_ptr<PdfOutlines> m_Outlines;
440 std::unique_ptr<PdfNameTrees> m_NameTrees;
441};
442
443template<typename TAction>
444std::unique_ptr<TAction> PdfDocument::CreateAction()
445{
446 std::unique_ptr<TAction> ret;
447 createAction(PdfAction::GetActionType<TAction>(), reinterpret_cast<std::unique_ptr<PdfAction>&>(ret));
448 return ret;
449}
450
451template<typename TField>
452typename PdfDocumentFieldIterableBase<TField>::Iterator PdfDocumentFieldIterableBase<TField>::begin() const
453{
454 if (m_doc == nullptr)
455 return Iterator();
456 else
457 return Iterator(*m_doc);
458}
459
460template<typename TField>
461typename PdfDocumentFieldIterableBase<TField>::Iterator PdfDocumentFieldIterableBase<TField>::end() const
462{
463 return Iterator();
464}
465
466template<typename TField>
467PdfDocumentFieldIterableBase<TField>::Iterator::Iterator()
468 : m_doc(nullptr), m_pageIndex(0), m_Field(nullptr)
469{
470}
471
472template<typename TField>
473PdfDocumentFieldIterableBase<TField>::Iterator::Iterator(PdfDocument& doc)
474 : m_doc(&doc), m_pageIndex(0), m_Field(nullptr)
475{
476 stepIntoPageOrForm(doc.GetPages());
477}
478
479template<typename TField>
480bool PdfDocumentFieldIterableBase<TField>::Iterator::operator==(const Iterator& rhs) const
481{
482 if (m_doc == nullptr && rhs.m_doc == nullptr)
483 return true;
484
485 return m_doc == rhs.m_doc && m_pageIndex == rhs.m_pageIndex && m_pageAnnotIterator == rhs.m_pageAnnotIterator && m_acroFormIterator == rhs.m_acroFormIterator;
486}
487
488template<typename TField>
489bool PdfDocumentFieldIterableBase<TField>::Iterator::operator!=(const Iterator& rhs) const
490{
491 if (m_doc == nullptr && rhs.m_doc == nullptr)
492 return false;
493
494 return m_doc != rhs.m_doc || m_pageIndex != rhs.m_pageIndex || m_pageAnnotIterator != rhs.m_pageAnnotIterator || m_acroFormIterator != rhs.m_acroFormIterator;
495}
496
497template<typename TField>
498typename PdfDocumentFieldIterableBase<TField>::Iterator& PdfDocumentFieldIterableBase<TField>::Iterator::operator++()
499{
500 increment();
501 return *this;
502}
503
504template<typename TField>
505typename PdfDocumentFieldIterableBase<TField>::Iterator PdfDocumentFieldIterableBase<TField>::Iterator::operator++(int)
506{
507 auto copy = *this;
508 increment();
509 return copy;
510}
511
512template<typename TField>
513void PdfDocumentFieldIterableBase<TField>::Iterator::increment()
514{
515 if (m_doc == nullptr)
516 return;
517
518 auto& pages = m_doc->GetPages();
519 if (m_pageIndex < pages.GetCount())
520 {
521 m_pageAnnotIterator++;
522 if (stepIntoPageAnnot(pages.GetPageAt(m_pageIndex).GetAnnotations()))
523 return;
524
525 m_pageIndex++;
526 stepIntoPageOrForm(pages);
527 }
528 else
529 {
530 m_acroFormIterator++;
531 stepIntoFormField(m_doc->MustGetAcroForm());
532 }
533}
534
535// Update the iterator for the current page index, or switch to form iteration
536template<typename TField>
537void PdfDocumentFieldIterableBase<TField>::Iterator::stepIntoPageOrForm(PdfPageCollection& pages)
538{
539 while (true)
540 {
541 if (m_pageIndex >= pages.GetCount())
542 break;
543
544 auto& annots = pages.GetPageAt(m_pageIndex).GetAnnotations();
545 m_pageAnnotIterator = annots.begin();
546 if (stepIntoPageAnnot(annots))
547 return;
548
549 m_pageIndex++;
550 }
551
552 auto form = m_doc->GetAcroForm();
553 if (form != nullptr)
554 {
555 m_acroFormIterator = form->begin();
556 stepIntoFormField(*form);
557 return;
558 }
559
560 // End of iteration
561 m_doc = nullptr;
562 m_Field = nullptr;
563 m_visitedObjs.clear();
564}
565
566// Verify the current page annotation iterator. It updates the current field
567// and returns true if a valid unvisited field is found, false otherwise
568template<typename TField>
569bool PdfDocumentFieldIterableBase<TField>::Iterator::stepIntoPageAnnot(PdfAnnotationCollection& annots)
570{
571 while (true)
572 {
573 if (m_pageAnnotIterator == annots.end())
574 break;
575
576 auto& annot = **m_pageAnnotIterator;
577 PdfField* field = nullptr;
578 if (annot.GetType() == PdfAnnotationType::Widget &&
579 (field = &static_cast<PdfAnnotationWidget&>(annot).GetField(),
580 m_visitedObjs.find(field->GetObject().GetIndirectReference()) == m_visitedObjs.end()))
581 {
582 m_Field = field;
583 m_visitedObjs.insert(field->GetObject().GetIndirectReference());
584 return true;
585 }
586
587 m_pageAnnotIterator++;
588 }
589
590 return false;
591}
592
593// Verify the current AcroForm field iterator. It updates the current field
594// if a valid unvisited leaf field is found, or it ends the iteration otherwise
595template<typename TField>
596void PdfDocumentFieldIterableBase<TField>::Iterator::stepIntoFormField(PdfAcroForm& form)
597{
598 while (true)
599 {
600 if (m_acroFormIterator == form.end())
601 break;
602
603 auto& field = **m_acroFormIterator;
604 if (field.GetChildren().GetCount() == 0
605 && m_visitedObjs.find(field.GetObject().GetIndirectReference()) == m_visitedObjs.end())
606 {
607 m_Field = &field;
608 m_visitedObjs.insert(field.GetObject().GetIndirectReference());
609 return;
610 }
611
612 m_acroFormIterator++;
613 }
614
615 // End of iteration
616 m_doc = nullptr;
617 m_Field = nullptr;
618 m_visitedObjs.clear();
619}
620
621};
622
623
624#endif // PDF_DOCUMENT_H
PdfDocument is the core interface for working with PDF documents.
Definition PdfDocument.h:108
PdfPageCollection & GetPages()
Get access to the page tree.
Definition PdfDocument.h:307
const PdfPageCollection & GetPages() const
Get access to the page tree.
Definition PdfDocument.h:311
virtual PdfVersion GetPdfVersion() const =0
Get the PDF version of the document.
const PdfTrailer & GetTrailer() const
Get access to the internal trailer dictionary or root object.
Definition PdfDocument.h:323
virtual bool HasOwnerPermissions() const =0
Checks if document has been opened with full owner privileges.
PdfTrailer & GetTrailer()
Get access to the internal trailer dictionary or root object.
Definition PdfDocument.h:317
virtual void SetPdfVersion(PdfVersion version)=0
Get the PDF version of the document.
PdfCatalog & GetCatalog()
Get access to the internal Catalog dictionary or root object.
Definition PdfDocument.h:297
PdfIndirectObjectList & GetObjects()
Get access to the internal vector of objects or root object.
Definition PdfDocument.h:340
const PdfIndirectObjectList & GetObjects() const
Get access to the internal vector of objects or root object.
Definition PdfDocument.h:346
const PdfCatalog & GetCatalog() const
Get access to the internal Catalog dictionary or root object.
Definition PdfDocument.h:303
A class that is used to encrypt a PDF file and set document permissions on the PDF file.
Definition PdfEncrypt.h:111
PdfExtension is a simple class that describes a vendor-specific extension to the official specificati...
Definition PdfExtension.h:15
This class assists PdfDocument with caching font information.
Definition PdfFontManager.h:50
A list of PdfObjects that constitutes the indirect object list of the document The PdfParser will rea...
Definition PdfIndirectObjectList.h:28
This class provides access to the documents info dictionary, which provides information about the PDF...
Definition PdfInfo.h:18
PdfMemDocument is the core class for reading and manipulating PDF files and writing them back to disk...
Definition PdfMemDocument.h:35
Interface to access names trees in the document.
Definition PdfNameTrees.h:18
This class represents a PDF indirect Object in memory.
Definition PdfObject.h:31
The main PDF outlines dictionary.
Definition PdfOutlines.h:175
Class for managing the tree of Pages in a PDF document Don't use this class directly.
Definition PdfPageCollection.h:21
PdfPage is one page in the pdf document.
Definition PdfPage.h:133
PdfStreamedDocument is the preferred class for creating new PDF documents.
Definition PdfStreamedDocument.h:47
An normalized rectangle defined by position (left-bottom) and size.
Definition Rect.h:17
Convenient type for char array storage and/or buffer with std::string compatibility.
Definition basetypes.h:30
All classes, functions, types and enums of PoDoFo are members of these namespace.
Definition basetypes.h:13
std::shared_ptr< const PdfColorSpaceFilter > PdfColorSpaceFilterPtr
Convenience alias for a constant PdfColorSpaceFilter shared ptr.
Definition PdfColorSpaceFilter.h:71
std::shared_ptr< const PdfShadingDefinition > PdfShadingDefinitionPtr
Convenience alias for a constant PdfShadingDefinition shared ptr.
Definition PdfPatternDefinition.h:163
PdfAcroFormDefaulAppearance
Definition PdfAcroForm.h:15
std::shared_ptr< const PdfShadingPatternDefinition > PdfShadingPatternDefinitionPtr
Convenience alias for a constant PdfShadingPatternDefinition shared ptr.
Definition PdfPatternDefinition.h:391
std::shared_ptr< const PdfExtGStateDefinition > PdfExtGStateDefinitionPtr
Convenience alias for a constant PdfExtGStateDefinition shared ptr.
Definition PdfExtGStateDefinition.h:30
PdfActionType
The type of the action.
Definition PdfAction.h:25
PdfVersion
Enum to identify different versions of the PDF file format.
Definition PdfDeclarations.h:61
std::shared_ptr< const PdfFunctionDefinition > PdfFunctionDefinitionPtr
Convenience alias for a constant PdfFunction shared ptr.
Definition PdfFunctionDefinition.h:54