12#include <podofo/auxiliary/Rect.h>
14#include "PdfAnnotationCollection.h"
16#include "PdfContents.h"
18#include "PdfResources.h"
26struct PODOFO_API PdfTextEntry final
33 nullable<Rect> BoundingBox;
40 unsigned ReadCount = 0;
43struct PODOFO_API PdfTextExtractParams
final
52template <
typename TField>
53class PdfPageFieldIterableBase final
58 PdfPageFieldIterableBase()
62 PdfPageFieldIterableBase(PdfPage& page)
68 friend class PdfPageFieldIterableBase;
70 using difference_type = void;
71 using value_type = TField*;
73 using reference = void;
74 using iterator_category = std::forward_iterator_tag;
77 : m_Field(nullptr) { }
79 void stepIntoPageAnnot();
81 Iterator(PdfAnnotationCollection::iterator begin,
82 PdfAnnotationCollection::iterator end)
83 : m_annotsIterator(std::move(begin)), m_annotsEnd(std::move(end)), m_Field(nullptr)
89 Iterator(
const Iterator&) =
default;
90 Iterator& operator=(
const Iterator&) =
default;
91 bool operator==(
const Iterator& rhs)
const
93 return m_annotsIterator == rhs.m_annotsIterator;
95 bool operator!=(
const Iterator& rhs)
const
97 return m_annotsIterator != rhs.m_annotsIterator;
99 Iterator& operator++()
105 Iterator operator++(
int)
112 value_type operator*() {
return m_Field; }
113 value_type operator->() {
return m_Field; }
115 PdfAnnotationCollection::iterator m_annotsIterator;
116 PdfAnnotationCollection::iterator m_annotsEnd;
118 std::unordered_set<PdfReference> m_visitedObjs;
122 Iterator begin()
const;
123 Iterator end()
const;
129using PdfPageFieldIterable = PdfPageFieldIterableBase<PdfField>;
130using PdfPageConstFieldIterable = PdfPageFieldIterableBase<const PdfField>;
161 void ExtractTextTo(std::vector<PdfTextEntry>&
entries,
162 const PdfTextExtractParams&
params)
const;
164 void ExtractTextTo(std::vector<PdfTextEntry>&
entries,
165 const std::string_view&
pattern = { },
166 const PdfTextExtractParams&
params = { })
const;
178 Corners GetRectRaw()
const override;
182 bool TryGetRotationRadians(
double&
teta)
const override;
187 double GetRotationRadians()
const;
219 unsigned GetPageNumber()
const;
233 Rect GetMediaBox()
const;
234 Corners GetMediaBoxRaw()
const;
239 Rect GetCropBox()
const;
245 Rect GetTrimBox()
const;
251 Rect GetBleedBox()
const;
252 Corners GetBleedBoxRaw()
const;
257 Rect GetArtBox()
const;
269 bool TryGetRotationRaw(
double&
rotation)
const;
279 bool MoveTo(
unsigned index);
281 template <
typename TField>
294 unsigned GetIndex()
const {
return m_Index; }
295 PdfContents& GetOrCreateContents();
296 inline const PdfContents* GetContents()
const {
return m_Contents.get(); }
297 inline PdfContents* GetContents() {
return m_Contents.get(); }
298 const PdfContents& MustGetContents()
const;
299 PdfContents& MustGetContents();
300 const PdfResources& GetResources()
const;
301 PdfResources& GetResources();
302 inline PdfAnnotationCollection& GetAnnotations() {
return m_Annotations; }
303 inline const PdfAnnotationCollection& GetAnnotations()
const {
return m_Annotations; }
307 void FlattenStructure();
308 void SetIndex(
unsigned index) { m_Index = index; }
310 void CopyContentsTo(OutputStream& stream)
const override;
312 PdfObjectStream& GetOrCreateContentsStream(PdfStreamAppendFlags flags)
override;
314 PdfObjectStream& ResetContentsStream()
override;
316 PdfResources& GetOrCreateResources()
override;
318 PdfResources* getResources()
override;
320 PdfObject* getContentsObject()
override;
322 PdfDictionaryElement& getElement()
override;
324 PdfObject* findInheritableAttribute(
const std::string_view& name)
const;
326 PdfObject* findInheritableAttribute(
const std::string_view& name,
bool& isShallow)
const;
328 void ensureContentsCreated();
334 Rect getPageBox(
const std::string_view& inBox,
bool isInheritable)
const;
336 Corners getPageBoxRaw(
const std::string_view& inBox,
bool isInheritable)
const;
338 void setPageBox(
const PdfName& inBox,
const Rect& rect);
340 void adjustRectToCurrentRotation(Rect& rect)
const;
344 PdfElement& GetElement() =
delete;
345 const PdfElement& GetElement()
const =
delete;
346 PdfObject* GetContentsObject() =
delete;
347 const PdfObject* GetContentsObject()
const =
delete;
353 std::vector<PdfObject*> m_parents;
354 std::unique_ptr<PdfContents> m_Contents;
355 std::unique_ptr<PdfResources> m_Resources;
356 PdfAnnotationCollection m_Annotations;
359template<
typename TField>
360TField& PdfPage::CreateField(
const std::string_view& name,
const Rect & rect)
362 return static_cast<TField&
>(CreateField(name, PdfField::GetFieldType<TField>(), rect));
365template<
typename TField>
366typename PdfPageFieldIterableBase<TField>::Iterator PdfPageFieldIterableBase<TField>::begin()
const
368 if (m_page ==
nullptr)
371 return Iterator(m_page->GetAnnotations().begin(), m_page->GetAnnotations().end());
374template<
typename TField>
375typename PdfPageFieldIterableBase<TField>::Iterator PdfPageFieldIterableBase<TField>::end()
const
377 if (m_page ==
nullptr)
380 return Iterator(m_page->GetAnnotations().end(), m_page->GetAnnotations().end());
383template<
typename TField>
384void PdfPageFieldIterableBase<TField>::Iterator::stepIntoPageAnnot()
388 if (m_annotsIterator == m_annotsEnd)
391 auto& annot = **m_annotsIterator;
392 PdfField* field =
nullptr;
393 if (annot.GetType() == PdfAnnotationType::Widget &&
394 (field = &
static_cast<PdfAnnotationWidget&
>(annot).GetField(),
395 m_visitedObjs.find(field->GetObject().GetIndirectReference()) == m_visitedObjs.end()))
398 m_visitedObjs.insert(field->GetObject().GetIndirectReference());
406 m_visitedObjs.clear();
SPDX-FileCopyrightText: (C) 2005 Dominik Seichter domseichter@web.de SPDX-FileCopyrightText: (C) 2020...
An unoriented rectangle defined by 2 points.
Definition Corners.h:20
A interface that provides the necessary features for a painter to draw onto a PdfObject.
Definition PdfCanvas.h:28
PdfDocument is the core interface for working with PDF documents.
Definition PdfDocument.h:111
This class represents a PDF indirect Object in memory.
Definition PdfObject.h:35
Class for managing the tree of Pages in a PDF document Don't use this class directly.
Definition PdfPageCollection.h:24
PdfPage is one page in the pdf document.
Definition PdfPage.h:137
Rect GetRect() const
Get the rectangle of this page.
Definition PdfPage.h:171
unsigned GetRotation() const
Get the normalized page rotation (0, 90, 180 or 270)
Definition PdfPage.h:263
An normalized rectangle defined by position (left-bottom) and size.
Definition Rect.h:20
Convenient type for char array storage and/or buffer with std::string compatibility.
Definition basetypes.h:38
SPDX-FileCopyrightText: (C) 2022 Francesco Pretto ceztko@gmail.com SPDX-License-Identifier: LGPL-2....
Definition basetypes.h:16
PdfPageSize
Enum holding the supported page sizes by PoDoFo.
Definition PdfDeclarations.h:518
PdfFieldType
The type of PDF field.
Definition PdfDeclarations.h:650
PdfTextExtractFlags
Definition PdfDeclarations.h:199
A structure with status progress attributes of certain operations.
Definition PdfPage.h:39