PoDoFo  1.0.0-dev
PdfDocument.h
1 
7 #ifndef PDF_DOCUMENT_H
8 #define PDF_DOCUMENT_H
9 
10 #include "PdfTrailer.h"
11 #include "PdfCatalog.h"
12 #include "PdfIndirectObjectList.h"
13 #include "PdfAcroForm.h"
14 #include "PdfFontManager.h"
15 #include "PdfMetadata.h"
16 #include "PdfPageCollection.h"
17 #include "PdfNameTrees.h"
18 #include "PdfXObjectForm.h"
19 #include "PdfImage.h"
20 #include "PdfColorSpace.h"
21 #include "PdfInfo.h"
22 #include "PdfOutlines.h"
23 
24 namespace PoDoFo {
25 
26 class PdfAction;
27 class PdfExtGState;
28 class PdfEncrypt;
29 class PdfDocument;
30 
31 template <typename TField>
32 class PdfDocumentFieldIterableBase final
33 {
34  friend class PdfDocument;
35 
36 public:
37  PdfDocumentFieldIterableBase()
38  : m_doc(nullptr) { }
39 
40 private:
41  PdfDocumentFieldIterableBase(PdfDocument& doc)
42  : m_doc(&doc) { }
43 
44 public:
45  class Iterator final
46  {
47  friend class PdfDocumentFieldIterableBase;
48  public:
49  using difference_type = void;
50  using value_type = TField*;
51  using pointer = void;
52  using reference = void;
53  using iterator_category = std::forward_iterator_tag;
54  public:
55  Iterator();
56  private:
57  Iterator(PdfDocument& doc);
58  public:
59  Iterator(const Iterator&) = default;
60  Iterator& operator=(const Iterator&) = default;
61  bool operator==(const Iterator& rhs) const;
62  bool operator!=(const Iterator& rhs) const;
63  Iterator& operator++();
64  Iterator operator++(int);
65  value_type operator*() { return m_Field; }
66  value_type operator->() { return m_Field; }
67  private:
68  void increment();
69  void stepIntoPageOrForm(PdfPageCollection& pages);
70  bool stepIntoPageAnnot(PdfAnnotationCollection& annots);
71  void stepIntoFormField(PdfAcroForm& form);
72  private:
73  PdfDocument* m_doc;
74  unsigned m_pageIndex;
75  PdfAnnotationCollection::iterator m_pageAnnotIterator;
76  PdfAcroForm::iterator m_acroFormIterator;
77  value_type m_Field;
78  std::unordered_set<PdfReference> m_visitedObjs;
79  };
80 
81 public:
82  Iterator begin() const;
83  Iterator end() const;
84 
85 private:
86  PdfDocument* m_doc;
87 };
88 
89 using PdfDocumentFieldIterable = PdfDocumentFieldIterableBase<PdfField>;
90 using PdfDocumentConstFieldIterable = PdfDocumentFieldIterableBase<const PdfField>;
91 
107 class PODOFO_API PdfDocument
108 {
109  friend class PdfMetadata;
110  friend class PdfXObjectForm;
111  friend class PdfPageCollection;
112 
113 public:
116  virtual ~PdfDocument();
117 
125  PdfOutlines& GetOrCreateOutlines();
126 
134  PdfNameTrees& GetOrCreateNames();
135 
144  PdfAcroForm& GetOrCreateAcroForm(PdfAcroFormDefaulAppearance eDefaultAppearance = PdfAcroFormDefaulAppearance::ArialBlack);
145 
146  void CollectGarbage();
147 
150  std::unique_ptr<PdfImage> CreateImage();
151 
152  std::unique_ptr<PdfXObjectForm> CreateXObjectForm(const Rect& rect);
153 
154  std::unique_ptr<PdfDestination> CreateDestination();
155 
156  std::unique_ptr<PdfColorSpace> CreateColorSpace(const PdfColorSpaceFilterPtr& filter);
157 
158  std::unique_ptr<PdfExtGState> CreateExtGState();
159 
160  template <typename Taction>
161  std::unique_ptr<Taction> CreateAction();
162 
163  std::unique_ptr<PdfAction> CreateAction(PdfActionType type);
164 
165  std::unique_ptr<PdfFileSpec> CreateFileSpec();
166 
174  bool IsPrintAllowed() const;
175 
183  bool IsEditAllowed() const;
184 
192  bool IsCopyAllowed() const;
193 
201  bool IsEditNotesAllowed() const;
202 
210  bool IsFillAndSignAllowed() const;
211 
219  bool IsAccessibilityAllowed() const;
220 
228  bool IsDocAssemblyAllowed() const;
229 
237  bool IsHighPrintAllowed() const;
238 
239  PdfAcroForm& MustGetAcroForm();
240 
241  const PdfAcroForm& MustGetAcroForm() const;
242 
243  PdfNameTrees& MustGetNames();
244 
245  const PdfNameTrees& MustGetNames() const;
246 
247  PdfOutlines& MustGetOutlines();
248 
249  const PdfOutlines& MustGetOutlines() const;
250 
256  PdfDocumentFieldIterable GetFieldsIterator();
257  PdfDocumentConstFieldIterable GetFieldsIterator() const;
258 
261  void Reset();
262 
263 public:
264  virtual const PdfEncrypt* GetEncrypt() const = 0;
265 
269  bool IsEncrypted() const;
270 
271 public:
277  PdfCatalog& GetCatalog() { return *m_Catalog; }
278 
284  const PdfCatalog& GetCatalog() const { return *m_Catalog; }
285 
289  PdfPageCollection& GetPages() { return *m_Pages; }
290 
294  const PdfPageCollection& GetPages() const { return *m_Pages; }
295 
301  PdfTrailer &GetTrailer() { return *m_Trailer; }
302 
308  const PdfTrailer& GetTrailer() const { return *m_Trailer; }
309 
316  const PdfInfo* GetInfo() const { return m_Info.get(); }
317 
318  PdfMetadata& GetMetadata() { return m_Metadata; }
319 
320  const PdfMetadata& GetMetadata() const { return m_Metadata; }
321 
327  PdfIndirectObjectList& GetObjects() { return m_Objects; }
328 
334  const PdfIndirectObjectList& GetObjects() const { return m_Objects; }
335 
336  PdfAcroForm* GetAcroForm() { return m_AcroForm.get(); }
337 
338  const PdfAcroForm* GetAcroForm() const { return m_AcroForm.get(); }
339 
340  PdfNameTrees* GetNames() { return m_NameTrees.get(); }
341 
342  const PdfNameTrees* GetNames() const { return m_NameTrees.get(); }
343 
344  PdfOutlines* GetOutlines();
345 
346  const PdfOutlines* GetOutlines() const;
347 
348  PdfFontManager& GetFonts() { return m_FontManager; }
349 
350 protected:
354  PdfDocument(bool empty = false);
355 
356  PdfDocument(const PdfDocument& doc);
357 
364  void SetTrailer(std::unique_ptr<PdfObject> obj);
365 
368  void Init();
369 
370  virtual void reset();
371 
374  void Clear();
375 
376  virtual void clear();
377 
381  virtual PdfVersion GetPdfVersion() const = 0;
382 
386  virtual void SetPdfVersion(PdfVersion version) = 0;
387 
388 private:
389  // Called by PdfPageCollection
390  void AppendDocumentPages(const PdfDocument& doc);
391  void InsertDocumentPageAt(unsigned atIndex, const PdfDocument& doc, unsigned pageIndex);
392  void AppendDocumentPages(const PdfDocument& doc, unsigned pageIndex, unsigned pageCount);
393 
394  // Called by PdfXObjectForm
395  Rect FillXObjectFromPage(PdfXObjectForm& xobj, const PdfPage& page, bool useTrimBox);
396 
397  PdfInfo& GetOrCreateInfo();
398 
399  void createAction(PdfActionType type, std::unique_ptr<PdfAction>& action);
400 
401 private:
402  void append(const PdfDocument& doc, bool appendAll);
410  void fixObjectReferences(PdfObject& obj, int difference);
411 
412  void deletePages(unsigned atIndex, unsigned pageCount);
413 
414  void resetPrivate();
415 
416  void initOutlines();
417 
418 private:
419  PdfDocument& operator=(const PdfDocument&) = delete;
420 
421 private:
422  PdfIndirectObjectList m_Objects;
423  PdfMetadata m_Metadata;
424  PdfFontManager m_FontManager;
425  std::unique_ptr<PdfObject> m_TrailerObj;
426  std::unique_ptr<PdfTrailer> m_Trailer;
427  std::unique_ptr<PdfCatalog> m_Catalog;
428  std::unique_ptr<PdfInfo> m_Info;
429  std::unique_ptr<PdfPageCollection> m_Pages;
430  std::unique_ptr<PdfAcroForm> m_AcroForm;
432  std::unique_ptr<PdfNameTrees> m_NameTrees;
433 };
434 
435 template<typename TAction>
436 std::unique_ptr<TAction> PdfDocument::CreateAction()
437 {
438  std::unique_ptr<TAction> ret;
439  createAction(PdfAction::GetActionType<TAction>(), reinterpret_cast<std::unique_ptr<PdfAction>&>(ret));
440  return ret;
441 }
442 
443 template<typename TField>
444 typename PdfDocumentFieldIterableBase<TField>::Iterator PdfDocumentFieldIterableBase<TField>::begin() const
445 {
446  if (m_doc == nullptr)
447  return Iterator();
448  else
449  return Iterator(*m_doc);
450 }
451 
452 template<typename TField>
453 typename PdfDocumentFieldIterableBase<TField>::Iterator PdfDocumentFieldIterableBase<TField>::end() const
454 {
455  return Iterator();
456 }
457 
458 template<typename TField>
459 PdfDocumentFieldIterableBase<TField>::Iterator::Iterator()
460  : m_doc(nullptr), m_pageIndex(0), m_Field(nullptr)
461 {
462 }
463 
464 template<typename TField>
465 PdfDocumentFieldIterableBase<TField>::Iterator::Iterator(PdfDocument& doc)
466  : m_doc(&doc), m_pageIndex(0), m_Field(nullptr)
467 {
468  stepIntoPageOrForm(doc.GetPages());
469 }
470 
471 template<typename TField>
472 bool PdfDocumentFieldIterableBase<TField>::Iterator::operator==(const Iterator& rhs) const
473 {
474  if (m_doc == nullptr && rhs.m_doc == nullptr)
475  return true;
476 
477  return m_doc == rhs.m_doc && m_pageIndex == rhs.m_pageIndex && m_pageAnnotIterator == rhs.m_pageAnnotIterator && m_acroFormIterator == rhs.m_acroFormIterator;
478 }
479 
480 template<typename TField>
481 bool PdfDocumentFieldIterableBase<TField>::Iterator::operator!=(const Iterator& rhs) const
482 {
483  if (m_doc == nullptr && rhs.m_doc == nullptr)
484  return false;
485 
486  return m_doc != rhs.m_doc || m_pageIndex != rhs.m_pageIndex || m_pageAnnotIterator != rhs.m_pageAnnotIterator || m_acroFormIterator != rhs.m_acroFormIterator;
487 }
488 
489 template<typename TField>
490 typename PdfDocumentFieldIterableBase<TField>::Iterator& PdfDocumentFieldIterableBase<TField>::Iterator::operator++()
491 {
492  increment();
493  return *this;
494 }
495 
496 template<typename TField>
497 typename PdfDocumentFieldIterableBase<TField>::Iterator PdfDocumentFieldIterableBase<TField>::Iterator::operator++(int)
498 {
499  auto copy = *this;
500  increment();
501  return copy;
502 }
503 
504 template<typename TField>
505 void PdfDocumentFieldIterableBase<TField>::Iterator::increment()
506 {
507  if (m_doc == nullptr)
508  return;
509 
510  auto& pages = m_doc->GetPages();
511  if (m_pageIndex < pages.GetCount())
512  {
513  m_pageAnnotIterator++;
514  if (stepIntoPageAnnot(pages.GetPageAt(m_pageIndex).GetAnnotations()))
515  return;
516 
517  m_pageIndex++;
518  stepIntoPageOrForm(pages);
519  }
520  else
521  {
522  m_acroFormIterator++;
523  stepIntoFormField(m_doc->MustGetAcroForm());
524  }
525 }
526 
527 // Update the iterator for the current page index, or swith to form iteration
528 template<typename TField>
529 void PdfDocumentFieldIterableBase<TField>::Iterator::stepIntoPageOrForm(PdfPageCollection& pages)
530 {
531  while (true)
532  {
533  if (m_pageIndex >= pages.GetCount())
534  break;
535 
536  auto& annots = pages.GetPageAt(m_pageIndex).GetAnnotations();
537  m_pageAnnotIterator = annots.begin();
538  if (stepIntoPageAnnot(annots))
539  return;
540 
541  m_pageIndex++;
542  }
543 
544  auto form = m_doc->GetAcroForm();
545  if (form != nullptr)
546  {
547  m_acroFormIterator = form->begin();
548  stepIntoFormField(*form);
549  return;
550  }
551 
552  // End of iteration
553  m_doc = nullptr;
554  m_Field = nullptr;
555  m_visitedObjs.clear();
556 }
557 
558 // Verify the current page annotation iterator. It updates the current field
559 // and returns true if a valid unvisited field is found, false otherwise
560 template<typename TField>
561 bool PdfDocumentFieldIterableBase<TField>::Iterator::stepIntoPageAnnot(PdfAnnotationCollection& annots)
562 {
563  while (true)
564  {
565  if (m_pageAnnotIterator == annots.end())
566  break;
567 
568  auto& annot = **m_pageAnnotIterator;
569  PdfField* field = nullptr;
570  if (annot.GetType() == PdfAnnotationType::Widget &&
571  (field = &static_cast<PdfAnnotationWidget&>(annot).GetField(),
572  m_visitedObjs.find(field->GetObject().GetIndirectReference()) == m_visitedObjs.end()))
573  {
574  m_Field = field;
575  m_visitedObjs.insert(field->GetObject().GetIndirectReference());
576  return true;
577  }
578 
579  m_pageAnnotIterator++;
580  }
581 
582  return false;
583 }
584 
585 // Verify the current AcroForm field iterator. It updates the current field
586 // if a valid unvisited leaf field is found, or it ends the iteration otherwise
587 template<typename TField>
588 void PdfDocumentFieldIterableBase<TField>::Iterator::stepIntoFormField(PdfAcroForm& form)
589 {
590  while (true)
591  {
592  if (m_acroFormIterator == form.end())
593  break;
594 
595  auto& field = **m_acroFormIterator;
596  if (field.GetChildren().GetCount() == 0
597  && m_visitedObjs.find(field.GetObject().GetIndirectReference()) == m_visitedObjs.end())
598  {
599  m_Field = &field;
600  m_visitedObjs.insert(field.GetObject().GetIndirectReference());
601  return;
602  }
603 
604  m_acroFormIterator++;
605  }
606 
607  // End of iteration
608  m_doc = nullptr;
609  m_Field = nullptr;
610  m_visitedObjs.clear();
611 }
612 
613 };
614 
615 
616 #endif // PDF_DOCUMENT_H
PdfDocument is the core interface for working with PDF documents.
Definition: PdfDocument.h:108
const PdfTrailer & GetTrailer() const
Get access to the internal trailer dictionary or root object.
Definition: PdfDocument.h:308
const PdfCatalog & GetCatalog() const
Get access to the internal Catalog dictionary or root object.
Definition: PdfDocument.h:284
const PdfPageCollection & GetPages() const
Get access to the page tree.
Definition: PdfDocument.h:294
virtual PdfVersion GetPdfVersion() const =0
Get the PDF version of the document.
const PdfInfo * GetInfo() const
Get access to the internal Info dictionary You can set the author, title etc.
Definition: PdfDocument.h:316
PdfCatalog & GetCatalog()
Get access to the internal Catalog dictionary or root object.
Definition: PdfDocument.h:277
virtual void SetPdfVersion(PdfVersion version)=0
Get the PDF version of the document.
PdfIndirectObjectList & GetObjects()
Get access to the internal vector of objects or root object.
Definition: PdfDocument.h:327
PdfPageCollection & GetPages()
Get access to the page tree.
Definition: PdfDocument.h:289
PdfTrailer & GetTrailer()
Get access to the internal trailer dictionary or root object.
Definition: PdfDocument.h:301
const PdfIndirectObjectList & GetObjects() const
Get access to the internal vector of objects or root object.
Definition: PdfDocument.h:334
A class that is used to encrypt a PDF file and set document permissions on the PDF file.
Definition: PdfEncrypt.h:122
This class assists PdfDocument with caching font information.
Definition: PdfFontManager.h:54
A list of PdfObjects that constitutes the indirect object list of the document The PdfParser will rea...
Definition: PdfIndirectObjectList.h:30
This class provides access to the documents info dictionary, which provides information about the PDF...
Definition: PdfInfo.h:21
Interface to access names trees in the document.
Definition: PdfNameTrees.h:22
This class represents a PDF indirect Object in memory.
Definition: PdfObject.h:35
The main PDF outlines dictionary.
Definition: PdfOutlines.h:206
Class for managing the tree of Pages in a PDF document Don't use this class directly.
Definition: PdfPageCollection.h:24
PdfPage is one page in the pdf document.
Definition: PdfPage.h:127
A rectangle defined by position and size.
Definition: Rect.h:20
Alternative to std::optional that supports reference (but not pointer) types.
Definition: nullable.h:29
SPDX-FileCopyrightText: (C) 2022 Francesco Pretto ceztko@gmail.com SPDX-License-Identifier: LGPL-2....
Definition: basetypes.h:16
PdfActionType
The type of the action.
Definition: PdfAction.h:28
std::shared_ptr< const PdfColorSpaceFilter > PdfColorSpaceFilterPtr
Convenience alias for a constant PdfColorSpaceFilter shared ptr.
Definition: PdfColorSpaceFilter.h:77
PdfAcroFormDefaulAppearance
Definition: PdfAcroForm.h:17
@ ArialBlack
Add a default appearance with Arial embedded and black text if no other DA key is present.
PdfVersion
Enum to identify different versions of the PDF file format.
Definition: PdfDeclarations.h:71