PoDoFo 1.0.0-dev
Loading...
Searching...
No Matches
PdfContentStreamReader.h
1
7#ifndef PDF_CONTENT_READER_H
8#define PDF_CONTENT_READER_H
9
10#include "PdfXObject.h"
11#include "PdfCanvas.h"
12#include "PdfData.h"
13#include "PdfDictionary.h"
14#include "PdfVariantStack.h"
15#include "PdfPostScriptTokenizer.h"
16
17namespace PoDoFo {
18
22{
23 Unknown = 0,
24 Operator,
26 ImageData,
27 DoXObject,
31};
32
43
46struct PODOFO_API PdfContent final
47{
48 PdfContentType Type = PdfContentType::Unknown;
49 PdfContentWarnings Warnings = PdfContentWarnings::None;
50 PdfVariantStack Stack;
51 PdfOperator Operator = PdfOperator::Unknown;
52 std::string_view Keyword;
53 PdfDictionary InlineImageDictionary;
54 charbuff InlineImageData;
55 const PdfName* Name = nullptr;
56 std::shared_ptr<const PdfXObject> XObject;
57};
58
60{
61 None = 0,
62 ThrowOnWarnings = 1,
65};
66
72
73struct PODOFO_API PdfContentReaderArgs final
74{
75 PdfContentReaderFlags Flags = PdfContentReaderFlags::None;
76 PdfInlineImageHandler InlineImageHandler;
77};
78
82{
83public:
85
86 PdfContentStreamReader(const std::shared_ptr<InputStreamDevice>& device, nullable<const PdfContentReaderArgs&> args = { });
87
88private:
89 PdfContentStreamReader(const std::shared_ptr<InputStreamDevice>& device, const PdfCanvas* canvas,
91
92public:
93 bool TryReadNext(PdfContent& data);
94
95private:
96 void beforeReadReset(PdfContent& content);
97
98 void afterReadClear(PdfContent& content);
99
100 bool tryReadNextContent(PdfContent& content);
101
102 bool tryHandleOperator(PdfContent& content, bool& eof);
103
104 bool tryReadInlineImgDict(PdfContent& content);
105
106 bool tryReadInlineImgData(charbuff& data);
107
108 bool tryHandleXObject(PdfContent& content);
109
110 void handleWarnings();
111
112 bool isCalledRecursively(const PdfObject* xobj);
113
114private:
115 struct Storage
116 {
118 std::string_view Keyword;
121 };
122
123 struct Input
124 {
125 std::shared_ptr<const PdfXObject> Form;
126 std::shared_ptr<InputStreamDevice> Device;
127 const PdfCanvas* Canvas;
128 };
129
130private:
131 std::vector<Input> m_inputs;
132 PdfContentReaderArgs m_args;
133 std::shared_ptr<charbuff> m_buffer;
134 PdfPostScriptTokenizer m_tokenizer;
135 bool m_readingInlineImgData; // A state of reading inline image data
136
137 // Temp storage
138 Storage m_temp;
139};
140
141};
142
143ENABLE_BITMASK_OPERATORS(PoDoFo::PdfContentReaderFlags);
144ENABLE_BITMASK_OPERATORS(PoDoFo::PdfContentWarnings);
145
146#endif // PDF_CONTENT_READER_H
This class represents an input device It optionally supports peeking.
Definition InputDevice.h:22
A interface that provides the necessary features for a painter to draw onto a PdfObject.
Definition PdfCanvas.h:28
Reader class to read content streams.
Definition PdfContentStreamReader.h:82
The PDF dictionary data type of PoDoFo (inherits from PdfDataContainer, the base class for such repre...
Definition PdfDictionary.h:82
This class represents a PdfName.
Definition PdfName.h:24
This class represents a PDF indirect Object in memory.
Definition PdfObject.h:35
This class is a parser for general PostScript content in PDF documents.
Definition PdfPostScriptTokenizer.h:30
A variant data type which supports all data types supported by the PDF standard.
Definition PdfVariant.h:33
Convenient type for char array storage and/or buffer with std::string compatibility.
Definition basetypes.h:38
SPDX-FileCopyrightText: (C) 2022 Francesco Pretto ceztko@gmail.com SPDX-License-Identifier: LGPL-2....
Definition basetypes.h:16
PdfContentReaderFlags
Definition PdfContentStreamReader.h:60
@ SkipFollowFormXObjects
Don't follow Form XObject.
@ SkipHandleNonFormXObjects
Don't handle non Form XObjects (PdfImage, PdfXObjectPostScript). Doesn't influence traversing of Form...
PdfContentType
Type of the content read from a content stream.
Definition PdfContentStreamReader.h:22
@ ImageData
Raw inline image data found between ID and EI tags (see PDF ref section 4.8.6)
@ UnexpectedKeyword
An unexpected keyword that can be a custom operator or invalid PostScript content
@ DoXObject
Issued when a Do operator is found and it is handled by the reader. NOTE: for Form XObjects BeginForm...
@ EndFormXObject
Issued when a Form XObject has just been followed.
@ ImageDictionary
Inline image dictionary.
@ BeginFormXObject
Issued when a Form XObject is being followed.
@ Operator
The token is a PDF operator.
PdfContentWarnings
Definition PdfContentStreamReader.h:34
@ MissingEndImage
Missing end inline image EI operator.
@ RecursiveXObject
Recursive XObject call detected. Applies to DoXObject.
@ InvalidOperator
Unknown operator or insufficient operand count. Applies to Operator.
@ InvalidXObject
Invalid or not found XObject.
@ InvalidImageDictionaryContent
Found invalid content while reading inline image dictionary. Applies to ImageDictionary.
@ SpuriousStackContent
Operand count for the operator are more than necessary.
PdfPostScriptTokenType
An enum describing the type of a read token.
Definition PdfPostScriptTokenizer.h:19
@ Keyword
The token is a PDF keyword.
@ Variant
The token is a PDF variant. A variant is usually a parameter to a keyword.
@ Name
Name datatype. Names are used as keys in dictionary to reference values.
@ None
Do not add a default appearrance.
std::function< bool(const PdfDictionary &imageDict, InputStreamDevice &device)> PdfInlineImageHandler
Custom handler for inline images.
Definition PdfContentStreamReader.h:71
PdfOperator
List of PDF stream content operators.
Definition PdfDeclarations.h:726
Content as read from content streams.
Definition PdfContentStreamReader.h:47