7 #ifndef PDF_TOKENIZER_H
8 #define PDF_TOKENIZER_H
11 #include <podofo/auxiliary/InputDevice.h>
12 #include "PdfStatefulEncrypt.h"
20 enum class PdfTokenType
30 DoubleAngleBracketsLeft,
31 DoubleAngleBracketsRight,
37 enum class PdfPostScriptLanguageLevel
43 struct PODOFO_API PdfTokenizerOptions final
45 PdfPostScriptLanguageLevel LanguageLevel = PdfPostScriptLanguageLevel::L2;
46 bool ReadReferences =
true;
54 PODOFO_PRIVATE_FRIEND(
class PdfParserObject);
57 static constexpr
unsigned BufferSize = 4096;
61 PdfTokenizer(
const std::shared_ptr<charbuff>& buffer,
const PdfTokenizerOptions& options = { });
81 bool TryReadNextToken(
InputStreamDevice& device, std::string_view& token, PdfTokenType& tokenType);
89 bool TryPeekNextToken(
InputStreamDevice& device, std::string_view& token, PdfTokenType& tokenType);
121 static bool IsWhitespace(
char ch);
126 static bool IsDelimiter(
char ch);
130 static bool IsTokenDelimiter(
char ch, PdfTokenType& tokenType);
137 static bool IsRegular(
char ch);
143 static bool IsPrintable(
char ch);
150 enum class PdfLiteralDataType
177 bool TryReadNextVariant(
InputStreamDevice& device,
const std::string_view& token, PdfTokenType tokenType,
PdfVariant& variant,
const PdfStatefulEncrypt* encrypt);
188 void EnqueueToken(
const std::string_view& token, PdfTokenType type);
237 PdfLiteralDataType DetermineDataType(
InputStreamDevice& device,
const std::string_view& token, PdfTokenType tokenType,
PdfVariant& variant);
240 bool tryReadDataType(
InputStreamDevice& device, PdfLiteralDataType dataType,
PdfVariant& variant,
const PdfStatefulEncrypt* encrypt);
243 using TokenizerPair = std::pair<std::string, PdfTokenType>;
244 using TokenizerQueque = std::deque<TokenizerPair>;
247 std::shared_ptr<charbuff> m_buffer;
248 PdfTokenizerOptions m_options;
249 TokenizerQueque m_tokenQueque;
SPDX-FileCopyrightText: (C) 2005 Dominik Seichter domseichter@web.de SPDX-FileCopyrightText: (C) 2020...
A simple tokenizer for PDF files and PDF content streams.
Definition: PdfTokenizer.h:53
bool TryReadNextToken(InputStreamDevice &device, std::string_view &token)
Reads the next token from the current file position ignoring all comments.
bool TryPeekNextToken(InputStreamDevice &device, std::string_view &token)
Try peek the next token from the current file position ignoring all comments, without actually consum...
void ReadNextVariant(InputStreamDevice &device, const std::string_view &token, PdfTokenType tokenType, PdfVariant &variant, const PdfStatefulEncrypt *encrypt)
Read the next variant from the current file position ignoring all comments.
A variant data type which supports all data types supported by the PDF standard.
Definition: PdfVariant.h:33
Convenient type for char array storage and/or buffer with std::string compatibility.
Definition: basetypes.h:38
SPDX-FileCopyrightText: (C) 2022 Francesco Pretto ceztko@gmail.com SPDX-License-Identifier: LGPL-2....
Definition: basetypes.h:16