PoDoFo 1.0.0-dev
Loading...
Searching...
No Matches
PdfPage.h
1
7#ifndef PDF_PAGE_H
8#define PDF_PAGE_H
9
10#include "PdfDeclarations.h"
11
12#include <podofo/auxiliary/Rect.h>
13
14#include "PdfAnnotationCollection.h"
15#include "PdfCanvas.h"
16#include "PdfContents.h"
17#include "PdfField.h"
18#include "PdfResources.h"
19
20namespace PoDoFo {
21
22class PdfDocument;
23class InputStream;
24class PdfPage;
25
26struct PODOFO_API PdfTextEntry final
27{
28 std::string Text;
29 int Page = -1;
30 double X = -1;
31 double Y = -1;
32 double Length = -1;
33 nullable<Rect> BoundingBox;
34};
35
36struct PODOFO_API PdfTextExtractParams final
37{
38 nullable<Rect> ClipRect;
39 PdfTextExtractFlags Flags = PdfTextExtractFlags::None;
40
41 std::function<bool(int read_cnt)> AbortCheck = nullptr;
42};
43
44template <typename TField>
45class PdfPageFieldIterableBase final
46{
47 friend class PdfPage;
48
49public:
50 PdfPageFieldIterableBase()
51 : m_page(nullptr) { }
52
53private:
54 PdfPageFieldIterableBase(PdfPage& page)
55 : m_page(&page) { }
56
57public:
58 class Iterator final
59 {
60 friend class PdfPageFieldIterableBase;
61 public:
62 using difference_type = void;
63 using value_type = TField*;
64 using pointer = void;
65 using reference = void;
66 using iterator_category = std::forward_iterator_tag;
67 public:
68 Iterator()
69 : m_Field(nullptr) { }
70 private:
71 void stepIntoPageAnnot();
72
73 Iterator(PdfAnnotationCollection::iterator begin,
74 PdfAnnotationCollection::iterator end)
75 : m_annotsIterator(std::move(begin)), m_annotsEnd(std::move(end)), m_Field(nullptr)
76 {
77 stepIntoPageAnnot();
78 }
79
80 public:
81 Iterator(const Iterator&) = default;
82 Iterator& operator=(const Iterator&) = default;
83 bool operator==(const Iterator& rhs) const
84 {
85 return m_annotsIterator == rhs.m_annotsIterator;
86 }
87 bool operator!=(const Iterator& rhs) const
88 {
89 return m_annotsIterator != rhs.m_annotsIterator;
90 }
91 Iterator& operator++()
92 {
93 m_annotsIterator++;
94 stepIntoPageAnnot();
95 return *this;
96 }
97 Iterator operator++(int)
98 {
99 auto copy = *this;
100 m_annotsIterator++;
101 stepIntoPageAnnot();
102 return copy;
103 }
104 value_type operator*() { return m_Field; }
105 value_type operator->() { return m_Field; }
106 private:
107 PdfAnnotationCollection::iterator m_annotsIterator;
108 PdfAnnotationCollection::iterator m_annotsEnd;
109 value_type m_Field;
110 std::unordered_set<PdfReference> m_visitedObjs;
111 };
112
113public:
114 Iterator begin() const;
115 Iterator end() const;
116
117private:
118 PdfPage* m_page;
119};
120
121using PdfPageFieldIterable = PdfPageFieldIterableBase<PdfField>;
122using PdfPageConstFieldIterable = PdfPageFieldIterableBase<const PdfField>;
123
128class PODOFO_API PdfPage final : public PdfDictionaryElement, public PdfCanvas
129{
130 PODOFO_PRIVATE_FRIEND(class PdfPageTest);
131 friend class PdfPageCollection;
132 friend class PdfDocument;
133
134private:
139 PdfPage(PdfDocument& parent, const Rect& size);
140
150 PdfPage(PdfObject& obj, std::vector<PdfObject*>&& parents);
151
152public:
153 void ExtractTextTo(std::vector<PdfTextEntry>& entries,
154 const PdfTextExtractParams& params) const;
155
156 void ExtractTextTo(std::vector<PdfTextEntry>& entries,
157 const std::string_view& pattern = { },
158 const PdfTextExtractParams& params = { }) const;
159
163 Rect GetRect() const { return m_Rect; }
164
168 void SetRect(const Rect& rect);
169
170 Corners GetRectRaw() const override;
171
172 void SetRectRaw(const Corners& rect);
173
174 bool TryGetRotationRadians(double& teta) const override;
175
179 double GetRotationRadians() const;
180
184 void SetMediaBox(const Rect& rect);
185
189 void SetCropBox(const Rect& rect);
190
194 void SetTrimBox(const Rect& rect);
195
199 void SetBleedBox(const Rect& rect);
200
204 void SetArtBox(const Rect& rect);
205
211 unsigned GetPageNumber() const;
212
220 static Rect CreateStandardPageSize(const PdfPageSize pageSize, bool landscape = false);
221
225 Rect GetMediaBox() const;
226 Corners GetMediaBoxRaw() const;
227
231 Rect GetCropBox() const;
232 Corners GetCropBoxRaw() const;
233
237 Rect GetTrimBox() const;
238 Corners GetTrimBoxRaw() const;
239
243 Rect GetBleedBox() const;
244 Corners GetBleedBoxRaw() const;
245
249 Rect GetArtBox() const;
250 Corners GetArtBoxRaw() const;
251
255 unsigned GetRotation() const { return m_Rotation; }
256
261 bool TryGetRotationRaw(double& rotation) const;
262
267 void SetRotation(int rotation);
268
271 bool MoveTo(unsigned index);
272
273 template <typename TField>
274 TField& CreateField(const std::string_view& name, const Rect& rect);
275
276 PdfField& CreateField(const std::string_view& name, PdfFieldType fieldType, const Rect& rect);
277
282 PdfPageFieldIterable GetFieldsIterator();
283 PdfPageConstFieldIterable GetFieldsIterator() const;
284
285public:
286 unsigned GetIndex() const { return m_Index; }
287 PdfContents& GetOrCreateContents();
288 inline const PdfContents* GetContents() const { return m_Contents.get(); }
289 inline PdfContents* GetContents() { return m_Contents.get(); }
290 const PdfContents& MustGetContents() const;
291 PdfContents& MustGetContents();
292 const PdfResources& GetResources() const;
293 PdfResources& GetResources();
294 inline PdfAnnotationCollection& GetAnnotations() { return m_Annotations; }
295 inline const PdfAnnotationCollection& GetAnnotations() const { return m_Annotations; }
296
297private:
298 // To be called by PdfPageCollection
299 void FlattenStructure();
300 void SetIndex(unsigned index) { m_Index = index; }
301
302 void CopyContentsTo(OutputStream& stream) const override;
303
304 PdfObjectStream& GetOrCreateContentsStream(PdfStreamAppendFlags flags) override;
305
306 PdfObjectStream& ResetContentsStream() override;
307
308 PdfResources& GetOrCreateResources() override;
309
310 PdfResources* getResources() override;
311
312 PdfObject* getContentsObject() override;
313
314 PdfDictionaryElement& getElement() override;
315
316 PdfObject* findInheritableAttribute(const std::string_view& name) const;
317
318 PdfObject* findInheritableAttribute(const std::string_view& name, bool& isShallow) const;
319
320 void ensureContentsCreated();
321
326 Rect getPageBox(const std::string_view& inBox, bool isInheritable) const;
327
328 Corners getPageBoxRaw(const std::string_view& inBox, bool isInheritable) const;
329
330 void setPageBox(const PdfName& inBox, const Rect& rect);
331
332 void adjustRectToCurrentRotation(Rect& rect) const;
333
334private:
335 // Remove some PdfCanvas methods to maintain the class API surface clean
336 PdfElement& GetElement() = delete;
337 const PdfElement& GetElement() const = delete;
338 PdfObject* GetContentsObject() = delete;
339 const PdfObject* GetContentsObject() const = delete;
340
341private:
342 unsigned m_Index;
343 unsigned m_Rotation;
344 Rect m_Rect;
345 std::vector<PdfObject*> m_parents;
346 std::unique_ptr<PdfContents> m_Contents;
347 std::unique_ptr<PdfResources> m_Resources;
348 PdfAnnotationCollection m_Annotations;
349};
350
351template<typename TField>
352TField& PdfPage::CreateField(const std::string_view& name, const Rect & rect)
353{
354 return static_cast<TField&>(CreateField(name, PdfField::GetFieldType<TField>(), rect));
355}
356
357template<typename TField>
358typename PdfPageFieldIterableBase<TField>::Iterator PdfPageFieldIterableBase<TField>::begin() const
359{
360 if (m_page == nullptr)
361 return Iterator();
362 else
363 return Iterator(m_page->GetAnnotations().begin(), m_page->GetAnnotations().end());
364}
365
366template<typename TField>
367typename PdfPageFieldIterableBase<TField>::Iterator PdfPageFieldIterableBase<TField>::end() const
368{
369 if (m_page == nullptr)
370 return Iterator();
371 else
372 return Iterator(m_page->GetAnnotations().end(), m_page->GetAnnotations().end());
373}
374
375template<typename TField>
376void PdfPageFieldIterableBase<TField>::Iterator::stepIntoPageAnnot()
377{
378 while (true)
379 {
380 if (m_annotsIterator == m_annotsEnd)
381 break;
382
383 auto& annot = **m_annotsIterator;
384 PdfField* field = nullptr;
385 if (annot.GetType() == PdfAnnotationType::Widget &&
386 (field = &static_cast<PdfAnnotationWidget&>(annot).GetField(),
387 m_visitedObjs.find(field->GetObject().GetIndirectReference()) == m_visitedObjs.end()))
388 {
389 m_Field = field;
390 m_visitedObjs.insert(field->GetObject().GetIndirectReference());
391 return;
392 }
393
394 m_annotsIterator++;
395 }
396
397 m_Field = nullptr;
398 m_visitedObjs.clear();
399}
400
401};
402
403#endif // PDF_PAGE_H
SPDX-FileCopyrightText: (C) 2005 Dominik Seichter domseichter@web.de SPDX-FileCopyrightText: (C) 2020...
An unoriented rectangle defined by 2 points.
Definition Corners.h:20
A interface that provides the necessary features for a painter to draw onto a PdfObject.
Definition PdfCanvas.h:28
PdfDocument is the core interface for working with PDF documents.
Definition PdfDocument.h:111
This class represents a PDF indirect Object in memory.
Definition PdfObject.h:35
Class for managing the tree of Pages in a PDF document Don't use this class directly.
Definition PdfPageCollection.h:24
PdfPage is one page in the pdf document.
Definition PdfPage.h:129
Rect GetRect() const
Get the rectangle of this page.
Definition PdfPage.h:163
unsigned GetRotation() const
Get the normalized page rotation (0, 90, 180 or 270)
Definition PdfPage.h:255
An normalized rectangle defined by position (left-bottom) and size.
Definition Rect.h:20
Convenient type for char array storage and/or buffer with std::string compatibility.
Definition basetypes.h:38
SPDX-FileCopyrightText: (C) 2022 Francesco Pretto ceztko@gmail.com SPDX-License-Identifier: LGPL-2....
Definition basetypes.h:16
PdfPageSize
Enum holding the supported page sizes by PoDoFo.
Definition PdfDeclarations.h:518
PdfFieldType
The type of PDF field.
Definition PdfDeclarations.h:650
PdfTextExtractFlags
Definition PdfDeclarations.h:199