PoDoFo 1.1.0
Loading...
Searching...
No Matches
PdfContentStreamReader.h
1
7#ifndef PDF_CONTENT_READER_H
8#define PDF_CONTENT_READER_H
9
10#include "PdfXObject.h"
11#include "PdfCanvas.h"
12#include "PdfData.h"
13#include "PdfDictionary.h"
14#include "PdfVariantStack.h"
15#include "PdfPostScriptTokenizer.h"
16
17namespace PoDoFo {
18
22{
23 Unknown = 0,
24 Operator,
26 ImageData,
27 DoXObject,
31};
32
41
43{
44 None = 0,
45 InvalidOperator = 1,
46 InvalidXObject = 2,
47};
48
51class PODOFO_API PdfContent final
52{
53 friend class PdfContentStreamReader;
54public:
55 PdfContent();
56public:
57 struct Data
58 {
59 PdfVariantStack Stack;
60 PdfOperator Operator = PdfOperator::Unknown;
61 std::string_view Keyword;
62 PdfDictionary InlineImageDictionary;
63 charbuff InlineImageData;
64 const PdfName* Name = nullptr;
65 std::shared_ptr<PdfXObject> XObject;
66 };
67
68 const PdfVariantStack& GetStack() const;
69 PdfOperator GetOperator() const;
70 const std::string_view& GetKeyword() const;
71 const PdfDictionary& GetInlineImageDictionary() const;
72 const charbuff& GetInlineImageData() const;
73 const std::shared_ptr<const PdfXObject>& GetXObject() const;
74
75 bool HasWarnings() const;
76 bool HasErrors() const;
77
78 PdfContentType GetType() const { return Type; }
79 PdfContentWarnings GetWarnings() const { return Warnings; }
80 PdfContentErrors GetErrors() const { return Errors; }
81
84 const Data& operator*() const { return Data; }
85
88 Data& operator*() { return Data; }
89
92 const Data* operator->() const { return &Data; }
93
96 Data* operator->() { return &Data; }
97
98private:
99 void checkAccess(PdfContentType type) const;
100
101private:
102 PdfContentType Type;
103 bool ThrowOnWarnings;
104 PdfContentWarnings Warnings;
105 PdfContentErrors Errors;
106 struct Data Data;
107};
108
110{
111 None = 0,
112 ThrowOnWarnings = 1,
115};
116
122
123struct PODOFO_API PdfContentReaderArgs final
124{
125 PdfContentReaderFlags Flags = PdfContentReaderFlags::None;
126 PdfInlineImageHandler InlineImageHandler;
127};
128
132{
133public:
135
136 PdfContentStreamReader(std::shared_ptr<InputStreamDevice> device, nullable<const PdfContentReaderArgs&> args = { });
137
138private:
139 PdfContentStreamReader(std::shared_ptr<InputStreamDevice>&& device, const PdfCanvas* canvas,
141
142public:
143 bool TryReadNext(PdfContent& data);
144
145private:
146 void beforeReadReset(PdfContent& content);
147
148 void afterReadClear(PdfContent& content);
149
150 bool tryReadNextContent(PdfContent& content);
151
152 bool tryHandleOperator(PdfContent& content, bool& eof);
153
154 bool tryReadInlineImgDict(PdfContent& content);
155
156 bool tryReadInlineImgData(charbuff& data);
157
158 bool tryHandleXObject(PdfContent& content);
159
160 bool isCalledRecursively(const PdfObject* xobj);
161
162private:
163 struct Storage
164 {
166 std::string_view Keyword;
169 };
170
171 struct Input
172 {
173 std::shared_ptr<const PdfXObject> Form;
174 std::shared_ptr<InputStreamDevice> Device;
175 const PdfCanvas* Canvas;
176 };
177
178private:
179 std::vector<Input> m_inputs;
180 PdfContentReaderArgs m_args;
181 std::shared_ptr<charbuff> m_buffer;
182 PdfPostScriptTokenizer m_tokenizer;
183 bool m_readingInlineImgData; // A state of reading inline image data
184
185 // Temp storage
186 Storage m_temp;
187};
188
189};
190
191ENABLE_BITMASK_OPERATORS(PoDoFo::PdfContentReaderFlags);
192ENABLE_BITMASK_OPERATORS(PoDoFo::PdfContentWarnings);
193ENABLE_BITMASK_OPERATORS(PoDoFo::PdfContentErrors);
194
195#endif // PDF_CONTENT_READER_H
This class represents an input device It optionally supports peeking.
Definition InputDevice.h:22
A interface that provides the necessary features for a painter to draw onto a PdfObject.
Definition PdfCanvas.h:28
Reader class to read content streams.
Definition PdfContentStreamReader.h:132
Content as read from content streams.
Definition PdfContentStreamReader.h:52
Data & operator*()
Unchecked and mutable access to content data.
Definition PdfContentStreamReader.h:88
const Data * operator->() const
Unchecked access to content data.
Definition PdfContentStreamReader.h:92
Data * operator->()
Unchecked and mutable access to content data.
Definition PdfContentStreamReader.h:96
const Data & operator*() const
Unchecked access to content data.
Definition PdfContentStreamReader.h:84
The PDF dictionary data type of PoDoFo (inherits from PdfDataContainer, the base class for such repre...
Definition PdfDictionary.h:82
This class represents a PdfName.
Definition PdfName.h:24
This class represents a PDF indirect Object in memory.
Definition PdfObject.h:35
This class is a parser for general PostScript content in PDF documents.
Definition PdfPostScriptTokenizer.h:30
A variant data type which supports all data types supported by the PDF standard.
Definition PdfVariant.h:33
Convenient type for char array storage and/or buffer with std::string compatibility.
Definition basetypes.h:38
SPDX-FileCopyrightText: (C) 2022 Francesco Pretto ceztko@gmail.com SPDX-License-Identifier: LGPL-2....
Definition basetypes.h:16
PdfContentReaderFlags
Definition PdfContentStreamReader.h:110
@ SkipFollowFormXObjects
Don't follow Form XObject.
@ SkipHandleNonFormXObjects
Don't handle non Form XObjects (PdfImage, PdfXObjectPostScript). Doesn't influence traversing of Form...
PdfContentWarnings
Definition PdfContentStreamReader.h:34
@ MissingEndImage
Missing end inline image EI operator.
@ RecursiveXObject
Recursive XObject call detected. Applies to DoXObject.
@ InvalidImageDictionaryContent
Found invalid content while reading inline image dictionary. Applies to ImageDictionary.
@ SpuriousStackContent
Operand count for the operator are more than necessary.
PdfContentType
Type of the content read from a content stream.
Definition PdfContentStreamReader.h:22
@ ImageData
Raw inline image data found between ID and EI tags (see PDF ref section 4.8.6)
@ UnexpectedKeyword
An unexpected keyword that can be a custom operator or invalid PostScript content
@ DoXObject
Issued when a Do operator is found and it is handled by the reader. NOTE: for Form XObjects BeginForm...
@ EndFormXObject
Issued when a Form XObject has just been followed.
@ ImageDictionary
Inline image dictionary.
@ BeginFormXObject
Issued when a Form XObject is being followed.
@ Operator
The token is a PDF operator.
PdfPostScriptTokenType
An enum describing the type of a read token.
Definition PdfPostScriptTokenizer.h:19
@ Keyword
The token is a PDF keyword.
@ Variant
The token is a PDF variant. A variant is usually a parameter to a keyword.
@ Name
Name datatype. Names are used as keys in dictionary to reference values.
@ None
Do not add a default appearrance.
PdfContentErrors
Definition PdfContentStreamReader.h:43
@ InvalidOperator
Unknown operator or insufficient operand count. Applies to Operator.
@ InvalidXObject
Invalid or not found XObject.
std::function< bool(const PdfDictionary &imageDict, InputStreamDevice &device)> PdfInlineImageHandler
Custom handler for inline images.
Definition PdfContentStreamReader.h:121
PdfOperator
List of PDF stream content operators.
Definition PdfDeclarations.h:726