PoDoFo 1.2.0
Loading...
Searching...
No Matches
PdfContentStreamReader.h
1// SPDX-FileCopyrightText: 2022 Francesco Pretto <ceztko@gmail.com>
2// SPDX-License-Identifier: LGPL-2.0-or-later OR MPL-2.0
3
4#ifndef PDF_CONTENT_READER_H
5#define PDF_CONTENT_READER_H
6
7#include "PdfXObject.h"
8#include "PdfCanvas.h"
9#include "PdfData.h"
10#include "PdfDictionary.h"
11#include "PdfVariantStack.h"
12#include "PdfPostScriptTokenizer.h"
13
14namespace PoDoFo {
15
19{
20 Unknown = 0,
21 Operator,
23 ImageData,
24 DoXObject,
28};
29
38
40{
41 None = 0,
42 InvalidOperator = 1,
43 InvalidXObject = 2,
44 UnexpectedToken = 4,
45};
46
49class PODOFO_API PdfContent final
50{
51 friend class PdfContentStreamReader;
52public:
53 PdfContent();
54public:
55 struct Data
56 {
57 PdfVariantStack Stack;
58 PdfOperator Operator = PdfOperator::Unknown;
59 std::string_view Keyword;
60 PdfDictionary InlineImageDictionary;
61 charbuff InlineImageData;
62 const PdfName* Name = nullptr;
63 std::shared_ptr<PdfXObject> XObject;
64 };
65
66 const PdfVariantStack& GetStack() const;
67 PdfOperator GetOperator() const;
68 const std::string_view& GetKeyword() const;
69 const PdfDictionary& GetInlineImageDictionary() const;
70 const charbuff& GetInlineImageData() const;
71 const std::shared_ptr<const PdfXObject>& GetXObject() const;
72
73 bool HasWarnings() const;
74 bool HasErrors() const;
75
76 PdfContentType GetType() const { return Type; }
77 PdfContentWarnings GetWarnings() const { return Warnings; }
78 PdfContentErrors GetErrors() const { return Errors; }
79
82 const Data& operator*() const { return Data; }
83
86 Data& operator*() { return Data; }
87
90 const Data* operator->() const { return &Data; }
91
94 Data* operator->() { return &Data; }
95
96private:
97 void checkAccess(PdfContentType type) const;
98
99private:
100 PdfContentType Type;
101 bool ThrowOnWarnings;
102 PdfContentWarnings Warnings;
103 PdfContentErrors Errors;
104 struct Data Data;
105};
106
108{
109 None = 0,
110 ThrowOnWarnings = 1,
114};
115
121
122struct PODOFO_API PdfContentReaderArgs final
123{
124 PdfContentReaderFlags Flags = PdfContentReaderFlags::None;
125 PdfInlineImageHandler InlineImageHandler;
126};
127
131{
132public:
134
135 PdfContentStreamReader(std::shared_ptr<InputStreamDevice> device, nullable<const PdfContentReaderArgs&> args = { });
136
137private:
138 PdfContentStreamReader(std::shared_ptr<InputStreamDevice>&& device, const PdfCanvas* canvas,
140
141public:
142 bool TryReadNext(PdfContent& data);
143
144private:
145 void beforeReadReset(PdfContent& content);
146
147 void afterReadClear(PdfContent& content);
148
149 bool tryReadNextContent(PdfContent& content);
150
151 bool tryHandleOperator(PdfContent& content, bool& eof);
152
153 bool tryReadInlineImgDict(PdfContent& content);
154
155 bool tryReadInlineImgData(charbuff& data, const PdfDictionary& imageDict, bool skipSaveImage);
156
157 bool tryHandleXObject(PdfContent& content);
158
159 bool isCalledRecursively(const PdfObject* xobj);
160
161private:
162 struct Storage
163 {
165 std::string_view Keyword;
168 };
169
170 struct Input
171 {
172 std::shared_ptr<const PdfXObject> Form;
173 std::shared_ptr<InputStreamDevice> Device;
174 const PdfCanvas* Canvas;
175 };
176
177private:
178 std::vector<Input> m_inputs;
179 PdfContentReaderArgs m_args;
180 std::shared_ptr<charbuff> m_buffer;
181 PdfPostScriptTokenizer m_tokenizer;
182 bool m_readingInlineImgData; // A state of reading inline image data
183
184 // Temp storage
185 Storage m_temp;
186};
187
188};
189
190ENABLE_BITMASK_OPERATORS(PoDoFo::PdfContentReaderFlags);
191ENABLE_BITMASK_OPERATORS(PoDoFo::PdfContentWarnings);
192ENABLE_BITMASK_OPERATORS(PoDoFo::PdfContentErrors);
193
194#endif // PDF_CONTENT_READER_H
This class represents an input device It optionally supports peeking.
Definition InputDevice.h:20
An interface that provides the necessary features for a painter to draw onto a PdfObject.
Definition PdfCanvas.h:26
Reader class to read content streams.
Definition PdfContentStreamReader.h:131
Content as read from content streams.
Definition PdfContentStreamReader.h:50
Data & operator*()
Unchecked and mutable access to content data.
Definition PdfContentStreamReader.h:86
const Data * operator->() const
Unchecked access to content data.
Definition PdfContentStreamReader.h:90
Data * operator->()
Unchecked and mutable access to content data.
Definition PdfContentStreamReader.h:94
const Data & operator*() const
Unchecked access to content data.
Definition PdfContentStreamReader.h:82
The PDF dictionary data type of PoDoFo (inherits from PdfDataContainer, the base class for such repre...
Definition PdfDictionary.h:80
This class represents a PdfName.
Definition PdfName.h:22
This class represents a PDF indirect Object in memory.
Definition PdfObject.h:33
This class is a parser for general PostScript content in PDF documents.
Definition PdfPostScriptTokenizer.h:27
A variant data type which supports all data types supported by the PDF standard.
Definition PdfVariant.h:31
Convenient type for char array storage and/or buffer with std::string compatibility.
Definition basetypes.h:35
All classes, functions, types and enums of PoDoFo are members of these namespace.
Definition basetypes.h:13
PdfContentReaderFlags
Definition PdfContentStreamReader.h:108
@ SkipFollowFormXObjects
Don't follow Form XObject.
@ SkipFetchInlineImages
Don't fetch inline images data, just skip it.
@ SkipHandleNonFormXObjects
Don't handle non Form XObjects (PdfImage, PdfXObjectPostScript). Doesn't influence traversing of Form...
PdfContentWarnings
Definition PdfContentStreamReader.h:31
@ MissingEndImage
Missing end inline image EI operator.
@ RecursiveXObject
Recursive XObject call detected. Applies to DoXObject.
@ InvalidImageDictionaryContent
Found invalid content while reading inline image dictionary. Applies to ImageDictionary.
@ SpuriousStackContent
Operand count for the operator are more than necessary.
PdfContentType
Type of the content read from a content stream.
Definition PdfContentStreamReader.h:19
@ ImageData
Raw inline image data found between ID and EI tags (see PDF ref section 4.8.6)
@ UnexpectedKeyword
An unexpected keyword that can be a custom operator or invalid PostScript content
@ DoXObject
Issued when a Do operator is found and it is handled by the reader. NOTE: for Form XObjects BeginForm...
@ EndFormXObject
Issued when a Form XObject has just been followed.
@ ImageDictionary
Inline image dictionary.
@ BeginFormXObject
Issued when a Form XObject is being followed.
@ Operator
The token is a PDF operator.
PdfPostScriptTokenType
An enum describing the type of a read token.
Definition PdfPostScriptTokenizer.h:16
@ Keyword
The token is a PDF keyword.
@ Variant
The token is a PDF variant. A variant is usually a parameter to a keyword.
@ Name
Name datatype. Names are used as keys in dictionary to reference values.
@ None
Do not add a default appearance.
PdfContentErrors
Definition PdfContentStreamReader.h:40
@ InvalidOperator
Unknown operator or insufficient operand count. Applies to Operator.
@ InvalidXObject
Invalid or not found XObject.
@ UnexpectedToken
Token encountered in an invalid context (e.g., misplaced PostScript brace delimiter '{' or '}')
std::function< bool(const PdfDictionary &imageDict, InputStreamDevice &device)> PdfInlineImageHandler
Custom handler for inline images.
Definition PdfContentStreamReader.h:120
PdfOperator
List of PDF stream content operators.
Definition PdfDeclarations.h:741