PoDoFo  1.0.0-dev
PdfContentStreamReader.h
1 
7 #ifndef PDF_CONTENT_READER_H
8 #define PDF_CONTENT_READER_H
9 
10 #include "PdfXObject.h"
11 #include "PdfCanvas.h"
12 #include "PdfData.h"
13 #include "PdfDictionary.h"
14 #include "PdfVariantStack.h"
15 #include "PdfPostScriptTokenizer.h"
16 
17 namespace PoDoFo {
18 
21 enum class PdfContentType
22 {
23  Unknown = 0,
24  Operator,
26  ImageData,
27  DoXObject,
31 };
32 
34 {
35  None = 0,
36  InvalidOperator = 1,
38  InvalidXObject = 4,
39  RecursiveXObject = 8,
41  MissingEndImage = 32,
42 };
43 
46 struct PODOFO_API PdfContent final
47 {
48  PdfContentType Type = PdfContentType::Unknown;
49  PdfContentWarnings Warnings = PdfContentWarnings::None;
50  PdfVariantStack Stack;
51  PdfOperator Operator = PdfOperator::Unknown;
52  std::string_view Keyword;
53  PdfDictionary InlineImageDictionary;
54  charbuff InlineImageData;
55  const PdfName* Name = nullptr;
56  std::shared_ptr<const PdfXObject> XObject;
57 };
58 
60 {
61  None = 0,
62  ThrowOnWarnings = 1,
65 };
66 
71 using PdfInlineImageHandler = std::function<bool(const PdfDictionary& imageDict, InputStreamDevice& device)>;
72 
73 struct PODOFO_API PdfContentReaderArgs final
74 {
75  PdfContentReaderFlags Flags = PdfContentReaderFlags::None;
76  PdfInlineImageHandler InlineImageHandler;
77 };
78 
81 class PODOFO_API PdfContentStreamReader final
82 {
83 public:
85 
86  PdfContentStreamReader(const std::shared_ptr<InputStreamDevice>& device, nullable<const PdfContentReaderArgs&> args = { });
87 
88 private:
89  PdfContentStreamReader(const std::shared_ptr<InputStreamDevice>& device, const PdfCanvas* canvas,
91 
92 public:
93  bool TryReadNext(PdfContent& data);
94 
95 private:
96  void beforeReadReset(PdfContent& content);
97 
98  void afterReadClear(PdfContent& content);
99 
100  bool tryReadNextContent(PdfContent& content);
101 
102  bool tryHandleOperator(PdfContent& content, bool& eof);
103 
104  bool tryReadInlineImgDict(PdfContent& content);
105 
106  bool tryReadInlineImgData(charbuff& data);
107 
108  bool tryHandleXObject(PdfContent& content);
109 
110  void handleWarnings();
111 
112  bool isCalledRecursively(const PdfObject* xobj);
113 
114 private:
115  struct Storage
116  {
117  PdfPostScriptTokenType PsType;
118  std::string_view Keyword;
120  PdfName Name;
121  };
122 
123  struct Input
124  {
125  std::shared_ptr<const PdfXObject> Form;
126  std::shared_ptr<InputStreamDevice> Device;
127  const PdfCanvas* Canvas;
128  };
129 
130 private:
131  std::vector<Input> m_inputs;
132  PdfContentReaderArgs m_args;
133  std::shared_ptr<charbuff> m_buffer;
134  PdfPostScriptTokenizer m_tokenizer;
135  bool m_readingInlineImgData; // A state of reading inline image data
136 
137  // Temp storage
138  Storage m_temp;
139 };
140 
141 };
142 
143 ENABLE_BITMASK_OPERATORS(PoDoFo::PdfContentReaderFlags);
144 ENABLE_BITMASK_OPERATORS(PoDoFo::PdfContentWarnings);
145 
146 #endif // PDF_CONTENT_READER_H
This class represents an input device It optionally supports peeking.
Definition: InputDevice.h:22
A interface that provides the necessary features for a painter to draw onto a PdfObject.
Definition: PdfCanvas.h:28
Reader class to read content streams.
Definition: PdfContentStreamReader.h:82
The PDF dictionary data type of PoDoFo (inherits from PdfDataContainer, the base class for such repre...
Definition: PdfDictionary.h:82
This class represents a PdfName.
Definition: PdfName.h:24
This class represents a PDF indirect Object in memory.
Definition: PdfObject.h:35
This class is a parser for general PostScript content in PDF documents.
Definition: PdfPostScriptTokenizer.h:30
A variant data type which supports all data types supported by the PDF standard.
Definition: PdfVariant.h:33
Convenient type for char array storage and/or buffer with std::string compatibility.
Definition: basetypes.h:38
Alternative to std::optional that supports reference (but not pointer) types.
Definition: nullable.h:29
SPDX-FileCopyrightText: (C) 2022 Francesco Pretto ceztko@gmail.com SPDX-License-Identifier: LGPL-2....
Definition: basetypes.h:16
PdfContentReaderFlags
Definition: PdfContentStreamReader.h:60
@ SkipFollowFormXObjects
Don't follow Form XObject.
@ SkipHandleNonFormXObjects
Don't handle non Form XObjects (PdfImage, PdfXObjectPostScript). Doesn't influence traversing of Form...
PdfContentWarnings
Definition: PdfContentStreamReader.h:34
@ MissingEndImage
Missing end inline image EI operator.
@ RecursiveXObject
Recursive XObject call detected. Applies to DoXObject.
@ InvalidOperator
Unknown operator or insufficient operand count. Applies to Operator.
@ InvalidXObject
Invalid or not found XObject.
@ InvalidImageDictionaryContent
Found invalid content while reading inline image dictionary. Applies to ImageDictionary.
@ SpuriousStackContent
Operand count for the operator are more than necessary.
@ Name
Name datatype. Names are used as keys in dictionary to reference values.
PdfContentType
Type of the content read from a content stream.
Definition: PdfContentStreamReader.h:22
@ ImageData
Raw inline image data found between ID and EI tags (see PDF ref section 4.8.6)
@ UnexpectedKeyword
An unexpected keyword that can be a custom operator or invalid PostScript content
@ DoXObject
Issued when a Do operator is found and it is handled by the reader. NOTE: for Form XObjects BeginForm...
@ EndFormXObject
Issued when a Form XObject has just been followed.
@ ImageDictionary
Inline image dictionary.
@ BeginFormXObject
Issued when a Form XObject is being followed.
@ Operator
The token is a PDF operator.
PdfOperator
List of PDF stream content operators.
Definition: PdfDeclarations.h:690
@ None
Do not add a default appearrance.
PdfPostScriptTokenType
An enum describing the type of a read token.
Definition: PdfPostScriptTokenizer.h:19
@ Keyword
The token is a PDF keyword.
@ Variant
The token is a PDF variant. A variant is usually a parameter to a keyword.
std::function< bool(const PdfDictionary &imageDict, InputStreamDevice &device)> PdfInlineImageHandler
Custom handler for inline images.
Definition: PdfContentStreamReader.h:71
Content as read from content streams.
Definition: PdfContentStreamReader.h:47