PoDoFo 1.2.0
Loading...
Searching...
No Matches
PdfEncodingMap.h
1// SPDX-FileCopyrightText: 2007 Dominik Seichter <domseichter@web.de>
2// SPDX-FileCopyrightText: 2020 Francesco Pretto <ceztko@gmail.com>
3// SPDX-License-Identifier: LGPL-2.0-or-later OR MPL-2.0
4
5#ifndef PDF_ENCODING_MAP_H
6#define PDF_ENCODING_MAP_H
7
8#include "PdfDeclarations.h"
9#include "PdfObject.h"
10#include "PdfCharCodeMap.h"
11#include "PdfCIDToGIDMap.h"
12
13namespace PoDoFo {
14
15class PdfIndirectObjectList;
16class PdfFont;
17class PdfFontMetrics;
18class PdfEncodingFactory;
19class PdfDifferenceMap;
20
29class PODOFO_API PdfEncodingMap
30{
31 friend class PdfEncoding;
32 friend class PdfEncodingMapBase;
33 friend class PdfEncodingMapSimple;
34 friend class PdfDifferenceEncoding;
35 friend class PdfNullEncodingMap;
36 friend class PdfIdentityEncoding;
37 friend class PdfPredefinedToUnicodeCMap;
38 friend class PdfStringScanContext;
39 friend class PdfEncodingFactory;
40 PODOFO_PRIVATE_FRIEND(class PdfEncodingTest);
41
42private:
44
45public:
48 bool TryGetNextCharCode(std::string_view::iterator& it,
49 const std::string_view::iterator& end, PdfCharCode& codeUnit) const;
50
54 bool TryGetCharCode(char32_t codePoint, PdfCharCode& codeUnit) const;
55
61 bool TryGetCharCode(const unicodeview& codePoints, PdfCharCode& codeUnit) const;
62
66 bool TryGetCharCode(unsigned cid, PdfCharCode& codeUnit) const;
67
70 bool TryGetNextCID(std::string_view::iterator& it,
71 const std::string_view::iterator& end, PdfCID& cid) const;
72
75 bool TryGetNextCodePoints(std::string_view::iterator& it,
76 const std::string_view::iterator& end, CodePointSpan& codePoints) const;
77
82 bool TryGetCodePoints(const PdfCharCode& codeUnit, CodePointSpan& codePoints) const;
83
84 virtual const PdfEncodingLimits& GetLimits() const = 0;
85
96 PdfEncodingMapType GetType() const { return m_Type; }
97
101 virtual PdfPredefinedEncodingType GetPredefinedEncodingType() const;
102
106 virtual bool HasLigaturesSupport() const;
107
108public:
109 virtual ~PdfEncodingMap();
110
111protected:
117 virtual bool tryGetNextCharCode(std::string_view::iterator& it,
118 const std::string_view::iterator& end, PdfCharCode& codeUnit) const;
119
125 virtual bool tryGetCharCodeSpan(const unicodeview& ligature, PdfCharCode& codeUnit) const;
126
130 virtual bool tryGetCharCode(char32_t codePoint, PdfCharCode& codeUnit) const = 0;
131
137 virtual bool tryGetCodePoints(const PdfCharCode& codeUnit, const unsigned* cidId, CodePointSpan& codePoints) const = 0;
138
143 virtual void getExportObject(PdfIndirectObjectList& objects, PdfName& name, PdfObject*& obj) const;
144
149 virtual int GetWModeRaw() const;
150
151protected:
152 virtual void AppendCodeSpaceRange(OutputStream& stream, charbuff& temp) const;
153
160
167
171 virtual PdfCIDToGIDMapConstPtr GetIntrinsicCIDToGIDMap(const PdfDictionary& fontDict, const PdfFontMetrics& metrics) const;
172
173private:
179 bool TryGetExportObject(PdfIndirectObjectList& objects, PdfName& name, PdfObject*& obj) const;
180
181 /* Overload of TryGetCodePoints that allows for a fast path to fetch code points from a full CID, if available
182 *
183 * To be called by PdfStringScanContext
184 */
185 bool TryGetCodePoints(const PdfCID& cid, CodePointSpan& codePoints) const;
186
193 bool TryGetCIDId(const PdfCharCode& codeUnit, unsigned& id) const;
194
195 bool tryGetNextCodePoints(std::string_view::iterator& it, const std::string_view::iterator& end,
197
203 PdfWModeKind GetWModeSafe() const;
204
205private:
206 PdfEncodingMapType m_Type;
207};
208
212class PODOFO_API PdfEncodingMapBase : public PdfEncodingMap
213{
214 friend class PdfCMapEncoding;
215 PODOFO_PRIVATE_FRIEND(class PdfDynamicEncodingMap);
216
217protected:
218 bool tryGetNextCharCode(std::string_view::iterator& it,
219 const std::string_view::iterator& end, PdfCharCode& codeUnit) const override;
220
221 bool tryGetCharCodeSpan(const unicodeview& codePoints, PdfCharCode& codeUnit) const override;
222
223 bool tryGetCharCode(char32_t codePoint, PdfCharCode& codeUnit) const override;
224
225 bool tryGetCodePoints(const PdfCharCode& codeUnit, const unsigned* cidId, CodePointSpan& codePoints) const override;
226
227 void AppendCodeSpaceRange(OutputStream& stream, charbuff& temp) const override;
228
229 void AppendToUnicodeEntries(OutputStream& stream, const PdfFont& font, charbuff& temp) const override;
230
231 void AppendCIDMappingEntries(OutputStream& stream, const PdfFont& font, charbuff& temp) const override;
232
233public:
234 inline const PdfCharCodeMap& GetCharMap() const { return *m_charMap; }
235
236 const PdfEncodingLimits& GetLimits() const override;
237
238private:
240 PdfEncodingMapBase(std::shared_ptr<PdfCharCodeMap>&& map, PdfEncodingMapType type);
241
242private:
243 std::shared_ptr<PdfCharCodeMap> m_charMap;
244};
245
251class PODOFO_API PdfEncodingMapSimple : public PdfEncodingMap
252{
253 friend class PdfBuiltInEncoding;
254 friend class PdfDifferenceEncoding;
255 PODOFO_PRIVATE_FRIEND(class PdfFontBuiltinType1Encoding);
256
257private:
258 PdfEncodingMapSimple(const PdfEncodingLimits& limits);
259
260protected:
261 void AppendToUnicodeEntries(OutputStream& stream, const PdfFont& font, charbuff& temp) const override;
262
263 void AppendCIDMappingEntries(OutputStream& stream, const PdfFont& font, charbuff& temp) const override;
264
265 const PdfEncodingLimits& GetLimits() const override;
266
267 PdfCIDToGIDMapConstPtr GetIntrinsicCIDToGIDMap(const PdfDictionary& fontDict, const PdfFontMetrics& metrics) const override;
268
269 virtual void GetBaseEncoding(const PdfEncodingMap*& baseEncoding, const PdfDifferenceMap*& differences) const;
270
271private:
272 PdfEncodingLimits m_Limits;
273};
274
280{
281 friend class PdfFontMetricsFreetype;
282 friend class PdfPredefinedEncoding;
283 friend class PdfStandardEncoding;
284 friend class PdfSymbolEncoding;
285 friend class PdfZapfDingbatsEncoding;
286 PODOFO_PRIVATE_FRIEND(class AppleLatin1Encoding);
287
288private:
290
291public:
296 inline const PdfName& GetName() const { return m_Name; }
297
298protected:
299 bool tryGetCharCode(char32_t codePoint, PdfCharCode& codeUnit) const override;
300 bool tryGetCodePoints(const PdfCharCode& codeUnit, const unsigned* cidId, CodePointSpan& codePoints) const override;
301
311 virtual const char32_t* GetToUnicodeTable() const = 0;
312
313private:
314 // To be called by PdfFontMetricsFreetype
315 void CreateUnicodeToGIDMap(const std::unordered_map<unsigned, unsigned>& codeToGidMap,
316 std::unordered_map<uint32_t, unsigned>& unicodeMap) const;
317
318private:
322 void initEncodingTable();
323
324private:
325 PdfName m_Name; // The name of the encoding
326 std::unordered_map<char32_t, char> m_EncodingTable; // The helper table for conversions into this encoding
327};
328
331class PODOFO_API PdfNullEncodingMap final : public PdfEncodingMap
332{
333 friend class PdfEncodingMapFactory;
334
335private:
337
338public:
339 const PdfEncodingLimits& GetLimits() const override;
340
341protected:
342 bool tryGetCharCode(char32_t codePoint, PdfCharCode& codeUnit) const override;
343
344 bool tryGetCodePoints(const PdfCharCode& codeUnit, const unsigned* cidId, CodePointSpan& codePoints) const override;
345
346 void AppendToUnicodeEntries(OutputStream& stream, const PdfFont& font, charbuff& temp) const override;
347
348 void AppendCIDMappingEntries(OutputStream& stream, const PdfFont& font, charbuff& temp) const override;
349};
350
353using PdfEncodingMapConstPtr = std::shared_ptr<const PdfEncodingMap>;
354
357using PdfBuiltInEncodingConstPtr = std::shared_ptr<const PdfBuiltInEncoding>;
358
361using PdfToUnicodeMapConstPtr = std::shared_ptr<const PdfEncodingMap>;
362}
363
364#endif // PDF_ENCODING_MAP_H
This file should be included as the FIRST file in every header of PoDoFo lib.
A memory owning immutable block of code points, optimized for small segments as up to 3 elements can ...
Definition PdfEncodingCommon.h:135
An interface for writing blocks of data to a data source.
Definition OutputStream.h:16
A common base class for built-in encodings which are known by name.
Definition PdfEncodingMap.h:280
virtual const char32_t * GetToUnicodeTable() const =0
Gets a table of 256 short values which are the big endian Unicode code points that are assigned to th...
const PdfName & GetName() const
Get the name of this encoding.
Definition PdfEncodingMap.h:296
A bidirectional map from character code units to unspecified code points.
Definition PdfCharCodeMap.h:76
The PDF dictionary data type of PoDoFo (inherits from PdfDataContainer, the base class for such repre...
Definition PdfDictionary.h:80
PdfDifferenceEncoding is an encoding, which is based on either the fonts encoding or a predefined enc...
Definition PdfDifferenceEncoding.h:113
A helper class for PdfDifferenceEncoding that can be used to create a differences array.
Definition PdfDifferenceEncoding.h:27
This factory creates a PdfEncoding from an existing object in the PDF.
Definition PdfEncodingFactory.h:17
Basic PdfEncodingMap implementation using a PdfCharCodeMap.
Definition PdfEncodingMap.h:213
This factory creates a PdfEncodingMap.
Definition PdfEncodingMapFactory.h:16
PdfEncodingMap used by legacy encodings like PdfBuiltInEncoding or PdfDifferenceEncoding that can def...
Definition PdfEncodingMap.h:252
A PdfEncodingMap is a low level interface to convert between utf8 and encoded strings in and to deter...
Definition PdfEncodingMap.h:30
virtual void AppendToUnicodeEntries(OutputStream &stream, const PdfFont &font, charbuff &temp) const =0
During a WriteToUnicodeCMap append "beginbfchar" and "beginbfrange" entries.
virtual bool tryGetCodePoints(const PdfCharCode &codeUnit, const unsigned *cidId, CodePointSpan &codePoints) const =0
Get code points from a code unit.
virtual void AppendCIDMappingEntries(OutputStream &stream, const PdfFont &font, charbuff &temp) const =0
During a PdfEncoding::ExportToFont() append "begincidchar" and/or "begincidrange" entries.
PdfEncodingMapType GetType() const
Type of encoding, may be Simple or CMap.
Definition PdfEncodingMap.h:96
virtual bool tryGetCharCode(char32_t codePoint, PdfCharCode &codeUnit) const =0
Try get char code unit from unicode code point.
A PdfEncoding is in PdfFont to transform a text string into a representation so that it can be displa...
Definition PdfEncoding.h:55
This abstract class provides access to font metrics information.
Definition PdfFontMetrics.h:34
Before you can draw text on a PDF document, you have to create a font object first.
Definition PdfFont.h:43
PdfIdentityEncoding is a two-byte encoding which can be used with TrueType fonts to represent all cha...
Definition PdfIdentityEncoding.h:29
A list of PdfObjects that constitutes the indirect object list of the document The PdfParser will rea...
Definition PdfIndirectObjectList.h:29
This class represents a PdfName.
Definition PdfName.h:22
Dummy encoding map that will just throw exception.
Definition PdfEncodingMap.h:332
This class represents a PDF indirect Object in memory.
Definition PdfObject.h:33
A common base class for Pdf defined predefined encodings which are known by name.
Definition PdfPredefinedEncoding.h:25
Represents a predefined ToUnicode CMap as the ones described in ISO 32000-2:2020 "9....
Definition PdfPredefinedToUnicodeCMap.h:18
StandardEncoding.
Definition PdfPredefinedEncoding.h:112
A PDF string context to iteratively scan a string and collect both CID and unicode codepoints.
Definition PdfEncoding.h:22
Symbol Encoding.
Definition PdfPredefinedEncoding.h:129
ZapfDingbats encoding.
Definition PdfPredefinedEncoding.h:146
Convenient type for char array storage and/or buffer with std::string compatibility.
Definition basetypes.h:35
All classes, functions, types and enums of PoDoFo are members of these namespace.
Definition basetypes.h:13
cspan< char32_t > unicodeview
Unicode code point view.
Definition basetypes.h:24
std::shared_ptr< const PdfEncodingMap > PdfToUnicodeMapConstPtr
Convenience alias for a const /ToUnicode CMap entry shared ptr.
Definition PdfEncodingMap.h:361
std::shared_ptr< const PdfBuiltInEncoding > PdfBuiltInEncodingConstPtr
Convenience typedef for a const /Encoding map entry shared ptr.
Definition PdfEncodingMap.h:357
PdfEncodingMapType
Definition PdfDeclarations.h:123
PdfPredefinedEncodingType
Definition PdfDeclarations.h:130
std::shared_ptr< const PdfEncodingMap > PdfEncodingMapConstPtr
Convenience typedef for a const /Encoding map entry shared ptr.
Definition PdfEncodingMap.h:353
Represent a CID (Character ID) with full code unit information.
Definition PdfEncodingCommon.h:46
A character code unit.
Definition PdfEncodingCommon.h:17