PoDoFo 1.2.0
Loading...
Searching...
No Matches
PdfEncodingMap.h
1// SPDX-FileCopyrightText: 2007 Dominik Seichter <domseichter@web.de>
2// SPDX-FileCopyrightText: 2020 Francesco Pretto <ceztko@gmail.com>
3// SPDX-License-Identifier: LGPL-2.0-or-later OR MPL-2.0
4
5#ifndef PDF_ENCODING_MAP_H
6#define PDF_ENCODING_MAP_H
7
8#include "PdfDeclarations.h"
9#include "PdfObject.h"
10#include "PdfCharCodeMap.h"
11#include "PdfCIDToGIDMap.h"
12
13namespace PoDoFo {
14
15class PdfIndirectObjectList;
16class PdfFont;
17class PdfFontMetrics;
18class PdfEncodingFactory;
19class PdfDifferenceMap;
20
27class PODOFO_API PdfEncodingMap
28{
29 friend class PdfEncoding;
30 friend class PdfEncodingMapBase;
31 friend class PdfEncodingMapSimple;
32 friend class PdfDifferenceEncoding;
33 friend class PdfNullEncodingMap;
34 friend class PdfIdentityEncoding;
35 friend class PdfPredefinedToUnicodeCMap;
36 friend class PdfStringScanContext;
37 friend class PdfEncodingFactory;
38 PODOFO_PRIVATE_FRIEND(class PdfEncodingTest);
39
40private:
42
43public:
45 bool TryGetNextCharCode(std::string_view::iterator& it,
46 const std::string_view::iterator& end, PdfCharCode& codeUnit) const;
47
51 bool TryGetCharCode(char32_t codePoint, PdfCharCode& codeUnit) const;
52
57 bool TryGetCharCode(const unicodeview& codePoints, PdfCharCode& codeUnit) const;
58
60 bool TryGetCharCode(unsigned cid, PdfCharCode& codeUnit) const;
61
63 bool TryGetNextCID(std::string_view::iterator& it,
64 const std::string_view::iterator& end, PdfCID& cid) const;
65
67 bool TryGetNextCodePoints(std::string_view::iterator& it,
68 const std::string_view::iterator& end, CodePointSpan& codePoints) const;
69
73 bool TryGetCodePoints(const PdfCharCode& codeUnit, CodePointSpan& codePoints) const;
74
75 virtual const PdfEncodingLimits& GetLimits() const = 0;
76
85 PdfEncodingMapType GetType() const { return m_Type; }
86
88 virtual PdfPredefinedEncodingType GetPredefinedEncodingType() const;
89
91 virtual bool HasLigaturesSupport() const;
92
93public:
94 virtual ~PdfEncodingMap();
95
96protected:
100 virtual bool tryGetNextCharCode(std::string_view::iterator& it,
101 const std::string_view::iterator& end, PdfCharCode& codeUnit) const;
102
106 virtual bool tryGetCharCodeSpan(const unicodeview& ligature, PdfCharCode& codeUnit) const;
107
109 virtual bool tryGetCharCode(char32_t codePoint, PdfCharCode& codeUnit) const = 0;
110
114 virtual bool tryGetCodePoints(const PdfCharCode& codeUnit, const unsigned* cidId, CodePointSpan& codePoints) const = 0;
115
119 virtual void getExportObject(PdfIndirectObjectList& objects, PdfName& name, PdfObject*& obj) const;
120
124 virtual int GetWModeRaw() const;
125
126protected:
127 virtual void AppendCodeSpaceRange(OutputStream& stream, charbuff& temp) const;
128
134
140
143 virtual PdfCIDToGIDMapConstPtr GetIntrinsicCIDToGIDMap(const PdfDictionary& fontDict, const PdfFontMetrics& metrics) const;
144
145private:
150 bool TryGetExportObject(PdfIndirectObjectList& objects, PdfName& name, PdfObject*& obj) const;
151
152 /* Overload of TryGetCodePoints that allows for a fast path to fetch code points from a full CID, if available
153 *
154 * To be called by PdfStringScanContext
155 */
156 bool TryGetCodePoints(const PdfCID& cid, CodePointSpan& codePoints) const;
157
163 bool TryGetCIDId(const PdfCharCode& codeUnit, unsigned& id) const;
164
165 bool tryGetNextCodePoints(std::string_view::iterator& it, const std::string_view::iterator& end,
167
172 PdfWModeKind GetWModeSafe() const;
173
174private:
175 PdfEncodingMapType m_Type;
176};
177
179class PODOFO_API PdfEncodingMapBase : public PdfEncodingMap
180{
181 friend class PdfCMapEncoding;
182 PODOFO_PRIVATE_FRIEND(class PdfDynamicEncodingMap);
183
184protected:
185 bool tryGetNextCharCode(std::string_view::iterator& it,
186 const std::string_view::iterator& end, PdfCharCode& codeUnit) const override;
187
188 bool tryGetCharCodeSpan(const unicodeview& codePoints, PdfCharCode& codeUnit) const override;
189
190 bool tryGetCharCode(char32_t codePoint, PdfCharCode& codeUnit) const override;
191
192 bool tryGetCodePoints(const PdfCharCode& codeUnit, const unsigned* cidId, CodePointSpan& codePoints) const override;
193
194 void AppendCodeSpaceRange(OutputStream& stream, charbuff& temp) const override;
195
196 void AppendToUnicodeEntries(OutputStream& stream, const PdfFont& font, charbuff& temp) const override;
197
198 void AppendCIDMappingEntries(OutputStream& stream, const PdfFont& font, charbuff& temp) const override;
199
200public:
201 inline const PdfCharCodeMap& GetCharMap() const { return *m_charMap; }
202
203 const PdfEncodingLimits& GetLimits() const override;
204
205private:
207 PdfEncodingMapBase(std::shared_ptr<PdfCharCodeMap>&& map, PdfEncodingMapType type);
208
209private:
210 std::shared_ptr<PdfCharCodeMap> m_charMap;
211};
212
216class PODOFO_API PdfEncodingMapSimple : public PdfEncodingMap
217{
218 friend class PdfBuiltInEncoding;
219 friend class PdfDifferenceEncoding;
220 PODOFO_PRIVATE_FRIEND(class PdfFontBuiltinType1Encoding);
221
222private:
223 PdfEncodingMapSimple(const PdfEncodingLimits& limits);
224
225protected:
226 void AppendToUnicodeEntries(OutputStream& stream, const PdfFont& font, charbuff& temp) const override;
227
228 void AppendCIDMappingEntries(OutputStream& stream, const PdfFont& font, charbuff& temp) const override;
229
230 const PdfEncodingLimits& GetLimits() const override;
231
232 PdfCIDToGIDMapConstPtr GetIntrinsicCIDToGIDMap(const PdfDictionary& fontDict, const PdfFontMetrics& metrics) const override;
233
234 virtual void GetBaseEncoding(const PdfEncodingMap*& baseEncoding, const PdfDifferenceMap*& differences) const;
235
236private:
237 PdfEncodingLimits m_Limits;
238};
239
243{
244 friend class PdfFontMetricsFreetype;
245 friend class PdfPredefinedEncoding;
246 friend class PdfStandardEncoding;
247 friend class PdfSymbolEncoding;
248 friend class PdfZapfDingbatsEncoding;
249 PODOFO_PRIVATE_FRIEND(class AppleLatin1Encoding);
250
251private:
253
254public:
258 inline const PdfName& GetName() const { return m_Name; }
259
260protected:
261 bool tryGetCharCode(char32_t codePoint, PdfCharCode& codeUnit) const override;
262 bool tryGetCodePoints(const PdfCharCode& codeUnit, const unsigned* cidId, CodePointSpan& codePoints) const override;
263
272 virtual const char32_t* GetToUnicodeTable() const = 0;
273
274private:
275 // To be called by PdfFontMetricsFreetype
276 void CreateUnicodeToGIDMap(const std::unordered_map<unsigned, unsigned>& codeToGidMap,
277 std::unordered_map<uint32_t, unsigned>& unicodeMap) const;
278
279private:
282 void initEncodingTable();
283
284private:
285 PdfName m_Name; // The name of the encoding
286 std::unordered_map<char32_t, char> m_EncodingTable; // The helper table for conversions into this encoding
287};
288
290class PODOFO_API PdfNullEncodingMap final : public PdfEncodingMap
291{
292 friend class PdfEncodingMapFactory;
293
294private:
296
297public:
298 const PdfEncodingLimits& GetLimits() const override;
299
300protected:
301 bool tryGetCharCode(char32_t codePoint, PdfCharCode& codeUnit) const override;
302
303 bool tryGetCodePoints(const PdfCharCode& codeUnit, const unsigned* cidId, CodePointSpan& codePoints) const override;
304
305 void AppendToUnicodeEntries(OutputStream& stream, const PdfFont& font, charbuff& temp) const override;
306
307 void AppendCIDMappingEntries(OutputStream& stream, const PdfFont& font, charbuff& temp) const override;
308};
309
311using PdfEncodingMapConstPtr = std::shared_ptr<const PdfEncodingMap>;
312
314using PdfBuiltInEncodingConstPtr = std::shared_ptr<const PdfBuiltInEncoding>;
315
317using PdfToUnicodeMapConstPtr = std::shared_ptr<const PdfEncodingMap>;
318}
319
320#endif // PDF_ENCODING_MAP_H
This file should be included as the FIRST file in every header of PoDoFo lib.
A memory owning immutable block of code points, optimized for small segments as up to 3 elements can ...
Definition PdfEncodingCommon.h:119
An interface for writing blocks of data to a data source.
Definition OutputStream.h:15
A common base class for built-in encodings which are known by name.
Definition PdfEncodingMap.h:243
virtual const char32_t * GetToUnicodeTable() const =0
Gets a table of 256 short values which are the big endian Unicode code points that are assigned to th...
const PdfName & GetName() const
Get the name of this encoding.
Definition PdfEncodingMap.h:258
A bidirectional map from character code units to unspecified code points.
Definition PdfCharCodeMap.h:72
The PDF dictionary data type of PoDoFo (inherits from PdfDataContainer, the base class for such repre...
Definition PdfDictionary.h:77
PdfDifferenceEncoding is an encoding, which is based on either the fonts encoding or a predefined enc...
Definition PdfDifferenceEncoding.h:105
A helper class for PdfDifferenceEncoding that can be used to create a differences array.
Definition PdfDifferenceEncoding.h:26
This factory creates a PdfEncoding from an existing object in the PDF.
Definition PdfEncodingFactory.h:16
Basic PdfEncodingMap implementation using a PdfCharCodeMap.
Definition PdfEncodingMap.h:180
This factory creates a PdfEncodingMap.
Definition PdfEncodingMapFactory.h:15
PdfEncodingMap used by legacy encodings like PdfBuiltInEncoding or PdfDifferenceEncoding that can def...
Definition PdfEncodingMap.h:217
A PdfEncodingMap is a low level interface to convert between utf8 and encoded strings in and to deter...
Definition PdfEncodingMap.h:28
virtual void AppendToUnicodeEntries(OutputStream &stream, const PdfFont &font, charbuff &temp) const =0
During a WriteToUnicodeCMap append "beginbfchar" and "beginbfrange" entries.
virtual bool tryGetCodePoints(const PdfCharCode &codeUnit, const unsigned *cidId, CodePointSpan &codePoints) const =0
Get code points from a code unit.
virtual void AppendCIDMappingEntries(OutputStream &stream, const PdfFont &font, charbuff &temp) const =0
During a PdfEncoding::ExportToFont() append "begincidchar" and/or "begincidrange" entries.
PdfEncodingMapType GetType() const
Type of encoding, may be Simple or CMap.
Definition PdfEncodingMap.h:85
virtual bool tryGetCharCode(char32_t codePoint, PdfCharCode &codeUnit) const =0
Try get char code unit from unicode code point.
A PdfEncoding is in PdfFont to transform a text string into a representation so that it can be displa...
Definition PdfEncoding.h:51
This abstract class provides access to font metrics information.
Definition PdfFontMetrics.h:31
Before you can draw text on a PDF document, you have to create a font object first.
Definition PdfFont.h:42
PdfIdentityEncoding is a two-byte encoding which can be used with TrueType fonts to represent all cha...
Definition PdfIdentityEncoding.h:27
A list of PdfObjects that constitutes the indirect object list of the document The PdfParser will rea...
Definition PdfIndirectObjectList.h:28
This class represents a PdfName.
Definition PdfName.h:21
Dummy encoding map that will just throw exception.
Definition PdfEncodingMap.h:291
This class represents a PDF indirect Object in memory.
Definition PdfObject.h:31
A common base class for Pdf defined predefined encodings which are known by name.
Definition PdfPredefinedEncoding.h:23
Represents a predefined ToUnicode CMap as the ones described in ISO 32000-2:2020 "9....
Definition PdfPredefinedToUnicodeCMap.h:16
StandardEncoding.
Definition PdfPredefinedEncoding.h:101
A PDF string context to iteratively scan a string and collect both CID and unicode codepoints.
Definition PdfEncoding.h:21
Symbol Encoding.
Definition PdfPredefinedEncoding.h:116
ZapfDingbats encoding.
Definition PdfPredefinedEncoding.h:131
Convenient type for char array storage and/or buffer with std::string compatibility.
Definition basetypes.h:30
All classes, functions, types and enums of PoDoFo are members of these namespace.
Definition basetypes.h:13
cspan< char32_t > unicodeview
Unicode code point view.
Definition basetypes.h:21
std::shared_ptr< const PdfEncodingMap > PdfToUnicodeMapConstPtr
Convenience alias for a const /ToUnicode CMap entry shared ptr.
Definition PdfEncodingMap.h:317
std::shared_ptr< const PdfBuiltInEncoding > PdfBuiltInEncodingConstPtr
Convenience typedef for a const /Encoding map entry shared ptr.
Definition PdfEncodingMap.h:314
PdfEncodingMapType
Definition PdfDeclarations.h:114
PdfPredefinedEncodingType
Definition PdfDeclarations.h:121
std::shared_ptr< const PdfEncodingMap > PdfEncodingMapConstPtr
Convenience typedef for a const /Encoding map entry shared ptr.
Definition PdfEncodingMap.h:311
Represent a CID (Character ID) with full code unit information.
Definition PdfEncodingCommon.h:43
A character code unit.
Definition PdfEncodingCommon.h:16