PoDoFo 1.0.0-dev
Loading...
Searching...
No Matches
PdfEncodingMap.h
1
7#ifndef PDF_ENCODING_MAP_H
8#define PDF_ENCODING_MAP_H
9
10#include "PdfDeclarations.h"
11#include "PdfObject.h"
12#include "PdfCharCodeMap.h"
13#include "PdfCIDToGIDMap.h"
14
15namespace PoDoFo {
16
17class PdfIndirectObjectList;
18class PdfFont;
19class PdfFontMetrics;
20class PdfEncodingFactory;
21class PdfDifferenceMap;
22
31class PODOFO_API PdfEncodingMap
32{
33 friend class PdfEncoding;
34 friend class PdfEncodingMapBase;
35 friend class PdfEncodingMapSimple;
36 friend class PdfDifferenceEncoding;
37 friend class PdfNullEncodingMap;
38 friend class PdfIdentityEncoding;
39 friend class PdfPredefinedToUnicodeCMap;
40 friend class PdfStringScanContext;
41 friend class PdfEncodingFactory;
42 PODOFO_PRIVATE_FRIEND(class PdfEncodingTest);
43
44private:
46
47public:
50 bool TryGetNextCharCode(std::string_view::iterator& it,
51 const std::string_view::iterator& end, PdfCharCode& codeUnit) const;
52
56 bool TryGetCharCode(char32_t codePoint, PdfCharCode& codeUnit) const;
57
63 bool TryGetCharCode(const unicodeview& codePoints, PdfCharCode& codeUnit) const;
64
68 bool TryGetCharCode(unsigned cid, PdfCharCode& codeUnit) const;
69
72 bool TryGetNextCID(std::string_view::iterator& it,
73 const std::string_view::iterator& end, PdfCID& cid) const;
74
77 bool TryGetNextCodePoints(std::string_view::iterator& it,
78 const std::string_view::iterator& end, CodePointSpan& codePoints) const;
79
84 bool TryGetCodePoints(const PdfCharCode& codeUnit, CodePointSpan& codePoints) const;
85
86 virtual const PdfEncodingLimits& GetLimits() const = 0;
87
98 PdfEncodingMapType GetType() const { return m_Type; }
99
103 virtual PdfPredefinedEncodingType GetPredefinedEncodingType() const;
104
108 virtual bool HasLigaturesSupport() const;
109
110public:
111 virtual ~PdfEncodingMap();
112
113protected:
119 virtual bool tryGetNextCharCode(std::string_view::iterator& it,
120 const std::string_view::iterator& end, PdfCharCode& codeUnit) const;
121
127 virtual bool tryGetCharCodeSpan(const unicodeview& ligature, PdfCharCode& codeUnit) const;
128
132 virtual bool tryGetCharCode(char32_t codePoint, PdfCharCode& codeUnit) const = 0;
133
139 virtual bool tryGetCodePoints(const PdfCharCode& codeUnit, const unsigned* cidId, CodePointSpan& codePoints) const = 0;
140
145 virtual void getExportObject(PdfIndirectObjectList& objects, PdfName& name, PdfObject*& obj) const;
146
151 virtual int GetWModeRaw() const;
152
153protected:
154 virtual void AppendCodeSpaceRange(OutputStream& stream, charbuff& temp) const;
155
162
169
173 virtual PdfCIDToGIDMapConstPtr GetIntrinsicCIDToGIDMap(const PdfDictionary& fontDict, const PdfFontMetrics& metrics) const;
174
175private:
181 bool TryGetExportObject(PdfIndirectObjectList& objects, PdfName& name, PdfObject*& obj) const;
182
183 /* Overload of TryGetCodePoints that allows for a fast path to fetch code points from a full CID, if available
184 *
185 * To be called by PdfStringScanContext
186 */
187 bool TryGetCodePoints(const PdfCID& cid, CodePointSpan& codePoints) const;
188
195 bool TryGetCIDId(const PdfCharCode& codeUnit, unsigned& id) const;
196
197 bool tryGetNextCodePoints(std::string_view::iterator& it, const std::string_view::iterator& end,
199
205 PdfWModeKind GetWModeSafe() const;
206
207private:
208 PdfEncodingMapType m_Type;
209};
210
214class PODOFO_API PdfEncodingMapBase : public PdfEncodingMap
215{
216 friend class PdfCMapEncoding;
217 PODOFO_PRIVATE_FRIEND(class PdfDynamicEncodingMap);
218
219protected:
220 bool tryGetNextCharCode(std::string_view::iterator& it,
221 const std::string_view::iterator& end, PdfCharCode& codeUnit) const override;
222
223 bool tryGetCharCodeSpan(const unicodeview& codePoints, PdfCharCode& codeUnit) const override;
224
225 bool tryGetCharCode(char32_t codePoint, PdfCharCode& codeUnit) const override;
226
227 bool tryGetCodePoints(const PdfCharCode& codeUnit, const unsigned* cidId, CodePointSpan& codePoints) const override;
228
229 void AppendCodeSpaceRange(OutputStream& stream, charbuff& temp) const override;
230
231 void AppendToUnicodeEntries(OutputStream& stream, charbuff& temp) const override;
232
233 void AppendCIDMappingEntries(OutputStream& stream, const PdfFont& font, charbuff& temp) const override;
234
235public:
236 inline const PdfCharCodeMap& GetCharMap() const { return *m_charMap; }
237
238 const PdfEncodingLimits& GetLimits() const override;
239
240private:
242 PdfEncodingMapBase(std::shared_ptr<PdfCharCodeMap>&& map, PdfEncodingMapType type);
243
244private:
245 std::shared_ptr<PdfCharCodeMap> m_charMap;
246};
247
253class PODOFO_API PdfEncodingMapSimple : public PdfEncodingMap
254{
255 friend class PdfBuiltInEncoding;
256 friend class PdfDifferenceEncoding;
257 PODOFO_PRIVATE_FRIEND(class PdfFontBuiltinType1Encoding);
258
259private:
260 PdfEncodingMapSimple(const PdfEncodingLimits& limits);
261
262protected:
263 void AppendToUnicodeEntries(OutputStream& stream, charbuff& temp) const override;
264
265 void AppendCIDMappingEntries(OutputStream& stream, const PdfFont& font, charbuff& temp) const override;
266
267 const PdfEncodingLimits& GetLimits() const override;
268
269 PdfCIDToGIDMapConstPtr GetIntrinsicCIDToGIDMap(const PdfDictionary& fontDict, const PdfFontMetrics& metrics) const override;
270
271 virtual void GetBaseEncoding(const PdfEncodingMap*& baseEncoding, const PdfDifferenceMap*& differences) const;
272
273private:
274 PdfEncodingLimits m_Limits;
275};
276
282{
283 friend class PdfFontMetricsFreetype;
284 friend class PdfPredefinedEncoding;
285 friend class PdfStandardEncoding;
286 friend class PdfSymbolEncoding;
287 friend class PdfZapfDingbatsEncoding;
288 PODOFO_PRIVATE_FRIEND(class AppleLatin1Encoding);
289
290private:
292
293public:
298 inline const PdfName& GetName() const { return m_Name; }
299
300protected:
301 bool tryGetCharCode(char32_t codePoint, PdfCharCode& codeUnit) const override;
302 bool tryGetCodePoints(const PdfCharCode& codeUnit, const unsigned* cidId, CodePointSpan& codePoints) const override;
303
313 virtual const char32_t* GetToUnicodeTable() const = 0;
314
315private:
316 // To be called by PdfFontMetricsFreetype
317 void CreateUnicodeToGIDMap(const std::unordered_map<unsigned, unsigned>& codeToGidMap,
318 std::unordered_map<uint32_t, unsigned>& unicodeMap) const;
319
320private:
324 void initEncodingTable();
325
326private:
327 PdfName m_Name; // The name of the encoding
328 std::unordered_map<char32_t, char> m_EncodingTable; // The helper table for conversions into this encoding
329};
330
333class PODOFO_API PdfNullEncodingMap final : public PdfEncodingMap
334{
335 friend class PdfEncodingMapFactory;
336
337private:
339
340public:
341 const PdfEncodingLimits& GetLimits() const override;
342
343protected:
344 bool tryGetCharCode(char32_t codePoint, PdfCharCode& codeUnit) const override;
345
346 bool tryGetCodePoints(const PdfCharCode& codeUnit, const unsigned* cidId, CodePointSpan& codePoints) const override;
347
348 void AppendToUnicodeEntries(OutputStream& stream, charbuff& temp) const override;
349
350 void AppendCIDMappingEntries(OutputStream& stream, const PdfFont& font, charbuff& temp) const override;
351};
352
355using PdfEncodingMapConstPtr = std::shared_ptr<const PdfEncodingMap>;
356
359using PdfBuiltInEncodingConstPtr = std::shared_ptr<const PdfBuiltInEncoding>;
360
363using PdfToUnicodeMapConstPtr = std::shared_ptr<const PdfEncodingMap>;
364}
365
366#endif // PDF_ENCODING_MAP_H
SPDX-FileCopyrightText: (C) 2005 Dominik Seichter domseichter@web.de SPDX-FileCopyrightText: (C) 2020...
A memory owning immutable block of code points, optimized for small segments as up to 3 elements can ...
Definition PdfEncodingCommon.h:141
An interface for writing blocks of data to a data source.
Definition OutputStream.h:18
A common base class for built-in encodings which are known by name.
Definition PdfEncodingMap.h:282
virtual const char32_t * GetToUnicodeTable() const =0
Gets a table of 256 short values which are the big endian Unicode code points that are assigned to th...
const PdfName & GetName() const
Get the name of this encoding.
Definition PdfEncodingMap.h:298
A bidirectional map from character code units to unspecified code points.
Definition PdfCharCodeMap.h:79
The PDF dictionary data type of PoDoFo (inherits from PdfDataContainer, the base class for such repre...
Definition PdfDictionary.h:82
PdfDifferenceEncoding is an encoding, which is based on either the fonts encoding or a predefined enc...
Definition PdfDifferenceEncoding.h:118
A helper class for PdfDifferenceEncoding that can be used to create a differences array.
Definition PdfDifferenceEncoding.h:29
This factory creates a PdfEncoding from an existing object in the PDF.
Definition PdfEncodingFactory.h:20
Basic PdfEncodingMap implementation using a PdfCharCodeMap.
Definition PdfEncodingMap.h:215
This factory creates a PdfEncodingMap.
Definition PdfEncodingMapFactory.h:18
PdfEncodingMap used by legacy encodings like PdfBuiltInEncoding or PdfDifferenceEncoding that can def...
Definition PdfEncodingMap.h:254
A PdfEncodingMap is a low level interface to convert between utf8 and encoded strings in and to deter...
Definition PdfEncodingMap.h:32
virtual bool tryGetCodePoints(const PdfCharCode &codeUnit, const unsigned *cidId, CodePointSpan &codePoints) const =0
Get code points from a code unit.
virtual void AppendToUnicodeEntries(OutputStream &stream, charbuff &temp) const =0
During a WriteToUnicodeCMap append "beginbfchar" and "beginbfrange" entries.
virtual void AppendCIDMappingEntries(OutputStream &stream, const PdfFont &font, charbuff &temp) const =0
During a PdfEncoding::ExportToFont() append "begincidchar" and/or "begincidrange" entries.
PdfEncodingMapType GetType() const
Type of encoding, may be Simple or CMap.
Definition PdfEncodingMap.h:98
virtual bool tryGetCharCode(char32_t codePoint, PdfCharCode &codeUnit) const =0
Try get char code unit from unicode code point.
A PdfEncoding is in PdfFont to transform a text string into a representation so that it can be displa...
Definition PdfEncoding.h:56
This abstract class provides access to font metrics information.
Definition PdfFontMetrics.h:36
Before you can draw text on a PDF document, you have to create a font object first.
Definition PdfFont.h:45
PdfIdentityEncoding is a two-byte encoding which can be used with TrueType fonts to represent all cha...
Definition PdfIdentityEncoding.h:31
A list of PdfObjects that constitutes the indirect object list of the document The PdfParser will rea...
Definition PdfIndirectObjectList.h:30
This class represents a PdfName.
Definition PdfName.h:24
Dummy encoding map that will just throw exception.
Definition PdfEncodingMap.h:334
This class represents a PDF indirect Object in memory.
Definition PdfObject.h:35
A common base class for Pdf defined predefined encodings which are known by name.
Definition PdfPredefinedEncoding.h:27
Represents a predefined ToUnicode CMap as the ones described in ISO 32000-2:2020 "9....
Definition PdfPredefinedToUnicodeCMap.h:21
StandardEncoding.
Definition PdfPredefinedEncoding.h:118
A PDF string context to iteratively scan a string and collect both CID and unicode codepoints.
Definition PdfEncoding.h:25
Symbol Encoding.
Definition PdfPredefinedEncoding.h:135
ZapfDingbats encoding.
Definition PdfPredefinedEncoding.h:152
Convenient type for char array storage and/or buffer with std::string compatibility.
Definition basetypes.h:38
SPDX-FileCopyrightText: (C) 2022 Francesco Pretto ceztko@gmail.com SPDX-License-Identifier: LGPL-2....
Definition basetypes.h:16
cspan< char32_t > unicodeview
Unicode code point view.
Definition basetypes.h:27
std::shared_ptr< const PdfEncodingMap > PdfToUnicodeMapConstPtr
Convenience alias for a const /ToUnicode CMap entry shared ptr.
Definition PdfEncodingMap.h:363
std::shared_ptr< const PdfBuiltInEncoding > PdfBuiltInEncodingConstPtr
Convenience typedef for a const /Encoding map entry shared ptr.
Definition PdfEncodingMap.h:359
PdfEncodingMapType
Definition PdfDeclarations.h:125
PdfPredefinedEncodingType
Definition PdfDeclarations.h:132
std::shared_ptr< const PdfEncodingMap > PdfEncodingMapConstPtr
Convenience typedef for a const /Encoding map entry shared ptr.
Definition PdfEncodingMap.h:355
Represent a CID (Character ID) with full code unit information.
Definition PdfEncodingCommon.h:52
A character code unit.
Definition PdfEncodingCommon.h:20