PoDoFo  1.0.0-dev
PdfPage.h
1 
7 #ifndef PDF_PAGE_H
8 #define PDF_PAGE_H
9 
10 #include "PdfDeclarations.h"
11 
12 #include <podofo/auxiliary/Rect.h>
13 
14 #include "PdfAnnotationCollection.h"
15 #include "PdfCanvas.h"
16 #include "PdfContents.h"
17 #include "PdfField.h"
18 #include "PdfResources.h"
19 
20 namespace PoDoFo {
21 
22 class PdfDocument;
23 class InputStream;
24 class PdfPage;
25 
26 struct PODOFO_API PdfTextEntry final
27 {
28  std::string Text;
29  int Page = -1;
30  double X = -1;
31  double Y = -1;
32  double Length = -1;
33  nullable<Rect> BoundingBox;
34 };
35 
36 struct PODOFO_API PdfTextExtractParams final
37 {
38  nullable<Rect> ClipRect;
39  PdfTextExtractFlags Flags = PdfTextExtractFlags::None;
40 };
41 
42 template <typename TField>
43 class PdfPageFieldIterableBase final
44 {
45  friend class PdfPage;
46 
47 public:
48  PdfPageFieldIterableBase()
49  : m_page(nullptr) { }
50 
51 private:
52  PdfPageFieldIterableBase(PdfPage& page)
53  : m_page(&page) { }
54 
55 public:
56  class Iterator final
57  {
58  friend class PdfPageFieldIterableBase;
59  public:
60  using difference_type = void;
61  using value_type = TField*;
62  using pointer = void;
63  using reference = void;
64  using iterator_category = std::forward_iterator_tag;
65  public:
66  Iterator()
67  : m_Field(nullptr) { }
68  private:
69  void stepIntoPageAnnot();
70 
71  Iterator(PdfAnnotationCollection::iterator begin,
72  PdfAnnotationCollection::iterator end)
73  : m_annotsIterator(std::move(begin)), m_annotsEnd(std::move(end)), m_Field(nullptr)
74  {
75  stepIntoPageAnnot();
76  }
77 
78  public:
79  Iterator(const Iterator&) = default;
80  Iterator& operator=(const Iterator&) = default;
81  bool operator==(const Iterator& rhs) const
82  {
83  return m_annotsIterator == rhs.m_annotsIterator;
84  }
85  bool operator!=(const Iterator& rhs) const
86  {
87  return m_annotsIterator != rhs.m_annotsIterator;
88  }
89  Iterator& operator++()
90  {
91  m_annotsIterator++;
92  stepIntoPageAnnot();
93  return *this;
94  }
95  Iterator operator++(int)
96  {
97  auto copy = *this;
98  m_annotsIterator++;
99  stepIntoPageAnnot();
100  return copy;
101  }
102  value_type operator*() { return m_Field; }
103  value_type operator->() { return m_Field; }
104  private:
105  PdfAnnotationCollection::iterator m_annotsIterator;
106  PdfAnnotationCollection::iterator m_annotsEnd;
107  value_type m_Field;
108  std::unordered_set<PdfReference> m_visitedObjs;
109  };
110 
111 public:
112  Iterator begin() const;
113  Iterator end() const;
114 
115 private:
116  PdfPage* m_page;
117 };
118 
119 using PdfPageFieldIterable = PdfPageFieldIterableBase<PdfField>;
120 using PdfPageConstFieldIterable = PdfPageFieldIterableBase<const PdfField>;
121 
126 class PODOFO_API PdfPage final : public PdfDictionaryElement, public PdfCanvas
127 {
128  PODOFO_PRIVATE_FRIEND(class PdfPageTest);
129  friend class PdfPageCollection;
130  friend class PdfDocument;
131 
132 private:
137  PdfPage(PdfDocument& parent, const Rect& size);
138 
147  PdfPage(PdfObject& obj);
148  PdfPage(PdfObject& obj, std::vector<PdfObject*>&& parents);
149 
150 public:
151  void ExtractTextTo(std::vector<PdfTextEntry>& entries,
152  const PdfTextExtractParams& params) const;
153 
154  void ExtractTextTo(std::vector<PdfTextEntry>& entries,
155  const std::string_view& pattern = { },
156  const PdfTextExtractParams& params = { }) const;
157 
158  Rect GetRect() const;
159 
160  Rect GetRectRaw() const override;
161 
162  void SetRect(const Rect& rect);
163 
164  void SetRectRaw(const Rect& rect);
165 
166  bool HasRotation(double& teta) const override;
167 
171  void SetMediaBox(const Rect& rect, bool raw = false);
172 
176  void SetCropBox(const Rect& rect, bool raw = false);
177 
181  void SetTrimBox(const Rect& rect, bool raw = false);
182 
186  void SetBleedBox(const Rect& rect, bool raw = false);
187 
191  void SetArtBox(const Rect& rect, bool raw = false);
192 
198  unsigned GetPageNumber() const;
199 
207  static Rect CreateStandardPageSize(const PdfPageSize pageSize, bool landscape = false);
208 
212  Rect GetMediaBox(bool raw = false) const;
213 
217  Rect GetCropBox(bool raw = false) const;
218 
222  Rect GetTrimBox(bool raw = false) const;
223 
227  Rect GetBleedBox(bool raw = false) const;
228 
232  Rect GetArtBox(bool raw = false) const;
233 
237  unsigned GetRotation() const;
238 
242  double GetRotationRaw() const;
243 
248  void SetRotation(int rotation);
249 
252  bool MoveTo(unsigned index);
253 
254  template <typename TField>
255  TField& CreateField(const std::string_view& name, const Rect& rect, bool rawRect = false);
256 
257  PdfField& CreateField(const std::string_view& name, PdfFieldType fieldType, const Rect& rect, bool rawRect = false);
258 
263  PdfPageFieldIterable GetFieldsIterator();
264  PdfPageConstFieldIterable GetFieldsIterator() const;
265 
266 public:
267  unsigned GetIndex() const { return m_Index; }
268  PdfContents& GetOrCreateContents();
269  PdfResources& GetOrCreateResources() override;
270  inline const PdfContents* GetContents() const { return m_Contents.get(); }
271  inline PdfContents* GetContents() { return m_Contents.get(); }
272  const PdfContents& MustGetContents() const;
273  PdfContents& MustGetContents();
274  inline const PdfResources* GetResources() const { return m_Resources.get(); }
275  inline PdfResources* GetResources() { return m_Resources.get(); }
276  const PdfResources& MustGetResources() const;
277  PdfResources& MustGetResources();
278  inline PdfAnnotationCollection& GetAnnotations() { return m_Annotations; }
279  inline const PdfAnnotationCollection& GetAnnotations() const { return m_Annotations; }
280 
281 private:
282  // To be called by PdfPageCollection
283  void FlattenStructure();
284  void SetIndex(unsigned index) { m_Index = index; }
285 
286  void EnsureResourcesCreated() override;
287 
288  void CopyContentsTo(OutputStream& stream) const override;
289 
290  PdfObjectStream& GetOrCreateContentsStream(PdfStreamAppendFlags flags) override;
291 
292  PdfObjectStream& ResetContentsStream() override;
293 
294  PdfResources* getResources() override;
295 
296  PdfObject* getContentsObject() override;
297 
298  PdfDictionaryElement& getElement() override;
299 
300  PdfObject* findInheritableAttribute(const std::string_view& name) const;
301 
302  PdfObject* findInheritableAttribute(const std::string_view& name, bool& isShallow) const;
303 
310  void initNewPage(const Rect& size);
311 
312  void ensureContentsCreated();
313  void ensureResourcesCreated();
314 
319  Rect getPageBox(const std::string_view& inBox, bool isInheritable, bool raw) const;
320 
321  void setPageBox(const PdfName& inBox, const Rect& rect, bool raw);
322 
323  void loadRotation();
324 
325 private:
326  PdfElement& GetElement() = delete;
327  const PdfElement& GetElement() const = delete;
328  PdfObject* GetContentsObject() = delete;
329  const PdfObject* GetContentsObject() const = delete;
330 
331 private:
332  unsigned m_Index;
333  std::vector<PdfObject*> m_parents;
334  std::unique_ptr<PdfContents> m_Contents;
335  std::unique_ptr<PdfResources> m_Resources;
336  PdfAnnotationCollection m_Annotations;
337  int m_Rotation;
338 };
339 
340 template<typename TField>
341 TField& PdfPage::CreateField(const std::string_view& name, const Rect & rect, bool rawRect)
342 {
343  return static_cast<TField&>(CreateField(name, PdfField::GetFieldType<TField>(), rect, rawRect));
344 }
345 
346 template<typename TField>
347 typename PdfPageFieldIterableBase<TField>::Iterator PdfPageFieldIterableBase<TField>::begin() const
348 {
349  if (m_page == nullptr)
350  return Iterator();
351  else
352  return Iterator(m_page->GetAnnotations().begin(), m_page->GetAnnotations().end());
353 }
354 
355 template<typename TField>
356 typename PdfPageFieldIterableBase<TField>::Iterator PdfPageFieldIterableBase<TField>::end() const
357 {
358  if (m_page == nullptr)
359  return Iterator();
360  else
361  return Iterator(m_page->GetAnnotations().end(), m_page->GetAnnotations().end());
362 }
363 
364 template<typename TField>
365 void PdfPageFieldIterableBase<TField>::Iterator::stepIntoPageAnnot()
366 {
367  while (true)
368  {
369  if (m_annotsIterator == m_annotsEnd)
370  break;
371 
372  auto& annot = **m_annotsIterator;
373  PdfField* field = nullptr;
374  if (annot.GetType() == PdfAnnotationType::Widget &&
375  (field = &static_cast<PdfAnnotationWidget&>(annot).GetField(),
376  m_visitedObjs.find(field->GetObject().GetIndirectReference()) == m_visitedObjs.end()))
377  {
378  m_Field = field;
379  m_visitedObjs.insert(field->GetObject().GetIndirectReference());
380  return;
381  }
382 
383  m_annotsIterator++;
384  }
385 
386  m_Field = nullptr;
387  m_visitedObjs.clear();
388 }
389 
390 };
391 
392 #endif // PDF_PAGE_H
SPDX-FileCopyrightText: (C) 2005 Dominik Seichter domseichter@web.de SPDX-FileCopyrightText: (C) 2020...
An interface for writing blocks of data to a data source.
Definition: OutputStream.h:18
A interface that provides the necessary features for a painter to draw onto a PdfObject.
Definition: PdfCanvas.h:28
A interface that provides a wrapper around "PDF content" - the instructions that are used to draw on ...
Definition: PdfContents.h:20
PdfDocument is the core interface for working with PDF documents.
Definition: PdfDocument.h:108
PdfElement is a common base class for all elements in a PDF file.
Definition: PdfElement.h:30
This class represents a PdfName.
Definition: PdfName.h:24
A PDF stream can be appended to any PdfObject and can contain arbitrary data.
Definition: PdfObjectStream.h:87
This class represents a PDF indirect Object in memory.
Definition: PdfObject.h:35
Class for managing the tree of Pages in a PDF document Don't use this class directly.
Definition: PdfPageCollection.h:24
PdfPage is one page in the pdf document.
Definition: PdfPage.h:127
A interface that provides a wrapper around /Resources.
Definition: PdfResources.h:25
A rectangle defined by position and size.
Definition: Rect.h:20
SPDX-FileCopyrightText: (C) 2022 Francesco Pretto ceztko@gmail.com SPDX-License-Identifier: LGPL-2....
Definition: basetypes.h:16
PdfPageSize
Enum holding the supported page sizes by PoDoFo.
Definition: PdfDeclarations.h:479
PdfFieldType
The type of PDF field.
Definition: PdfDeclarations.h:614
PdfTextExtractFlags
Definition: PdfDeclarations.h:170