PoDoFo 1.0.0-dev
Loading...
Searching...
No Matches
PdfEncodingMap.h
1
7#ifndef PDF_ENCODING_MAP_H
8#define PDF_ENCODING_MAP_H
9
10#include "PdfDeclarations.h"
11#include "PdfObject.h"
12#include "PdfCharCodeMap.h"
13
14namespace PoDoFo {
15
16class PdfIndirectObjectList;
17class PdfFont;
18
27class PODOFO_API PdfEncodingMap
28{
29 friend class PdfEncoding;
30 friend class PdfEncodingMapBase;
31 friend class PdfEncodingMapOneByte;
32 friend class PdfDifferenceEncoding;
33 friend class PdfNullEncodingMap;
34 friend class PdfIdentityEncoding;
35 friend class PdfPredefinedToUnicodeCMap;
36 friend class PdfStringScanContext;
37 PODOFO_PRIVATE_FRIEND(class PdfEncodingTest);
38
39private:
41
42public:
45 bool TryGetNextCharCode(std::string_view::iterator& it,
46 const std::string_view::iterator& end, PdfCharCode& codeUnit) const;
47
51 bool TryGetCharCode(char32_t codePoint, PdfCharCode& codeUnit) const;
52
58 bool TryGetCharCode(const unicodeview& codePoints, PdfCharCode& codeUnit) const;
59
63 bool TryGetCharCode(unsigned cid, PdfCharCode& codeUnit) const;
64
67 bool TryGetNextCID(std::string_view::iterator& it,
68 const std::string_view::iterator& end, PdfCID& cid) const;
69
72 bool TryGetNextCodePoints(std::string_view::iterator& it,
73 const std::string_view::iterator& end, CodePointSpan& codePoints) const;
74
79 bool TryGetCodePoints(const PdfCharCode& codeUnit, CodePointSpan& codePoints) const;
80
81 virtual const PdfEncodingLimits& GetLimits() const = 0;
82
93 PdfEncodingMapType GetType() const { return m_Type; }
94
98 virtual PdfPredefinedEncodingType GetPredefinedEncodingType() const;
99
103 virtual bool HasLigaturesSupport() const;
104
105public:
106 virtual ~PdfEncodingMap();
107
108protected:
114 virtual bool tryGetNextCharCode(std::string_view::iterator& it,
115 const std::string_view::iterator& end, PdfCharCode& codeUnit) const;
116
122 virtual bool tryGetCharCodeSpan(const unicodeview& ligature, PdfCharCode& codeUnit) const;
123
127 virtual bool tryGetCharCode(char32_t codePoint, PdfCharCode& codeUnit) const = 0;
128
134 virtual bool tryGetCodePoints(const PdfCharCode& codeUnit, const unsigned* cidId, CodePointSpan& codePoints) const = 0;
135
140 virtual void getExportObject(PdfIndirectObjectList& objects, PdfName& name, PdfObject*& obj) const;
141
146 virtual int GetWModeRaw() const;
147
148 static void AppendUTF16CodeTo(OutputStream& stream, char32_t codePoint, std::u16string& u16tmp);
149
150 static void AppendUTF16CodeTo(OutputStream& stream, const unicodeview& codePoints, std::u16string& u16tmp);
151
152protected:
153 virtual void AppendCodeSpaceRange(OutputStream& stream, charbuff& temp) const;
154
161
168
169private:
175 bool TryGetExportObject(PdfIndirectObjectList& objects, PdfName& name, PdfObject*& obj) const;
176
177 /* Overload of TryGetCodePoints that allows for a fast path to fetch code points from a full CID, if available
178 *
179 * To be called by PdfStringScanContext
180 */
181 bool TryGetCodePoints(const PdfCID& cid, CodePointSpan& codePoints) const;
182
189 bool TryGetCIDId(const PdfCharCode& codeUnit, unsigned& id) const;
190
191 bool tryGetNextCodePoints(std::string_view::iterator& it, const std::string_view::iterator& end,
193
199 PdfWModeKind GetWModeSafe() const;
200
201private:
202 PdfEncodingMapType m_Type;
203};
204
208class PODOFO_API PdfEncodingMapBase : public PdfEncodingMap
209{
210 friend class PdfDynamicEncodingMap;
211
212protected:
214
215protected:
216 bool tryGetNextCharCode(std::string_view::iterator& it,
217 const std::string_view::iterator& end, PdfCharCode& codeUnit) const override;
218
219 bool tryGetCharCodeSpan(const unicodeview& codePoints, PdfCharCode& codeUnit) const override;
220
221 bool tryGetCharCode(char32_t codePoint, PdfCharCode& codeUnit) const override;
222
223 bool tryGetCodePoints(const PdfCharCode& codeUnit, const unsigned* cidId, CodePointSpan& codePoints) const override;
224
225 void AppendCodeSpaceRange(OutputStream& stream, charbuff& temp) const override;
226
227 void AppendToUnicodeEntries(OutputStream& stream, charbuff& temp) const override;
228
229 void AppendCIDMappingEntries(OutputStream& stream, const PdfFont& font, charbuff& temp) const override;
230
231public:
232 inline const PdfCharCodeMap& GetCharMap() const { return *m_charMap; }
233
234 const PdfEncodingLimits& GetLimits() const override;
235
236private:
237 PdfEncodingMapBase(const std::shared_ptr<PdfCharCodeMap>& map, PdfEncodingMapType type);
238
239private:
240 std::shared_ptr<PdfCharCodeMap> m_charMap;
241};
242
248class PODOFO_API PdfEncodingMapOneByte : public PdfEncodingMap
249{
250 friend class PdfBuiltInEncoding;
251 friend class PdfDifferenceEncoding;
252
253private:
254 PdfEncodingMapOneByte(const PdfEncodingLimits& limits);
255
256protected:
257 void AppendToUnicodeEntries(OutputStream& stream, charbuff& temp) const override;
258
259 void AppendCIDMappingEntries(OutputStream& stream, const PdfFont& font, charbuff& temp) const override;
260
261 const PdfEncodingLimits& GetLimits() const override;
262
263private:
264 PdfEncodingLimits m_Limits;
265};
266
272{
273 friend class PdfFontMetricsFreetype;
274 friend class PdfPredefinedEncoding;
275 friend class PdfStandardEncoding;
276 friend class PdfSymbolEncoding;
277 friend class PdfZapfDingbatsEncoding;
278 friend class AppleLatin1Encoding;
279
280private:
281 PdfBuiltInEncoding(const PdfName& name);
282
283public:
288 inline const PdfName& GetName() const { return m_Name; }
289
290protected:
291 bool tryGetCharCode(char32_t codePoint, PdfCharCode& codeUnit) const override;
292 bool tryGetCodePoints(const PdfCharCode& codeUnit, const unsigned* cidId, CodePointSpan& codePoints) const override;
293
303 virtual const char32_t* GetToUnicodeTable() const = 0;
304
305private:
306 // To be called by PdfFontMetricsFreetype
307 void CreateUnicodeToGIDMap(const std::unordered_map<unsigned, unsigned>& codeToGidMap,
308 std::unordered_map<uint32_t, unsigned>& unicodeMap) const;
309
310private:
314 void initEncodingTable();
315
316private:
317 PdfName m_Name; // The name of the encoding
318 std::unordered_map<char32_t, char> m_EncodingTable; // The helper table for conversions into this encoding
319};
320
323class PODOFO_API PdfNullEncodingMap final : public PdfEncodingMap
324{
325 friend class PdfEncodingMapFactory;
326
327private:
329
330public:
331 const PdfEncodingLimits& GetLimits() const override;
332
333protected:
334 bool tryGetCharCode(char32_t codePoint, PdfCharCode& codeUnit) const override;
335
336 bool tryGetCodePoints(const PdfCharCode& codeUnit, const unsigned* cidId, CodePointSpan& codePoints) const override;
337
338 void AppendToUnicodeEntries(OutputStream& stream, charbuff& temp) const override;
339
340 void AppendCIDMappingEntries(OutputStream& stream, const PdfFont& font, charbuff& temp) const override;
341};
342
345using PdfEncodingMapConstPtr = std::shared_ptr<const PdfEncodingMap>;
346
349using PdfBuiltInEncodingConstPtr = std::shared_ptr<const PdfBuiltInEncoding>;
350
353using PdfToUnicodeMapConstPtr = std::shared_ptr<const PdfEncodingMap>;
354}
355
356#endif // PDF_ENCODING_MAP_H
SPDX-FileCopyrightText: (C) 2005 Dominik Seichter domseichter@web.de SPDX-FileCopyrightText: (C) 2020...
A memory owning immutable block of code points, optimized for small segments as up to 3 elements can ...
Definition PdfCharCodeMap.h:29
An interface for writing blocks of data to a data source.
Definition OutputStream.h:18
A common base class for built-in encodings which are known by name.
Definition PdfEncodingMap.h:272
virtual const char32_t * GetToUnicodeTable() const =0
Gets a table of 256 short values which are the big endian Unicode code points that are assigned to th...
const PdfName & GetName() const
Get the name of this encoding.
Definition PdfEncodingMap.h:288
A bidirectional map from character code units to unspecified code points.
Definition PdfCharCodeMap.h:132
PdfDifferenceEncoding is an encoding, which is based on either the fonts encoding or a predefined enc...
Definition PdfDifferenceEncoding.h:108
Basic PdfEncodingMap implementation using a PdfCharCodeMap.
Definition PdfEncodingMap.h:209
This factory creates a PdfEncodingMap.
Definition PdfEncodingMapFactory.h:18
PdfEncodingMap used by encodings like PdfBuiltInEncoding or PdfDifferenceEncoding that can define all...
Definition PdfEncodingMap.h:249
A PdfEncodingMap is a low level interface to convert between utf8 and encoded strings in and to deter...
Definition PdfEncodingMap.h:28
virtual bool tryGetCodePoints(const PdfCharCode &codeUnit, const unsigned *cidId, CodePointSpan &codePoints) const =0
Get code points from a code unit.
virtual void AppendToUnicodeEntries(OutputStream &stream, charbuff &temp) const =0
During a WriteToUnicodeCMap append "beginbfchar" and "beginbfrange" entries.
virtual void AppendCIDMappingEntries(OutputStream &stream, const PdfFont &font, charbuff &temp) const =0
During a PdfEncoding::ExportToFont() append "begincidchar" and/or "begincidrange" entries.
PdfEncodingMapType GetType() const
Type of encoding, may be Simple or CMap.
Definition PdfEncodingMap.h:93
virtual bool tryGetCharCode(char32_t codePoint, PdfCharCode &codeUnit) const =0
Try get char code unit from unicode code point.
A PdfEncoding is in PdfFont to transform a text string into a representation so that it can be displa...
Definition PdfEncoding.h:55
Before you can draw text on a PDF document, you have to create a font object first.
Definition PdfFont.h:45
PdfIdentityEncoding is a two-byte encoding which can be used with TrueType fonts to represent all cha...
Definition PdfIdentityEncoding.h:31
A list of PdfObjects that constitutes the indirect object list of the document The PdfParser will rea...
Definition PdfIndirectObjectList.h:30
This class represents a PdfName.
Definition PdfName.h:24
Dummy encoding map that will just throw exception.
Definition PdfEncodingMap.h:324
This class represents a PDF indirect Object in memory.
Definition PdfObject.h:35
A common base class for Pdf defined predefined encodings which are known by name.
Definition PdfPredefinedEncoding.h:27
Represents a predefined ToUnicode CMap as the ones described in ISO 32000-2:2020 "9....
Definition PdfPredefinedToUnicodeCMap.h:21
StandardEncoding.
Definition PdfPredefinedEncoding.h:107
A PDF string context to iteratively scan a string and collect both CID and unicode codepoints.
Definition PdfEncoding.h:24
Symbol Encoding.
Definition PdfPredefinedEncoding.h:124
ZapfDingbats encoding.
Definition PdfPredefinedEncoding.h:141
Convenient type for char array storage and/or buffer with std::string compatibility.
Definition basetypes.h:38
SPDX-FileCopyrightText: (C) 2022 Francesco Pretto ceztko@gmail.com SPDX-License-Identifier: LGPL-2....
Definition basetypes.h:16
cspan< char32_t > unicodeview
Unicode code point view.
Definition basetypes.h:27
std::shared_ptr< const PdfEncodingMap > PdfToUnicodeMapConstPtr
Convenience alias for a const /ToUnicode CMap entry shared ptr.
Definition PdfEncodingMap.h:353
std::shared_ptr< const PdfBuiltInEncoding > PdfBuiltInEncodingConstPtr
Convenience typedef for a const /Encoding map entry shared ptr.
Definition PdfEncodingMap.h:349
PdfEncodingMapType
Definition PdfDeclarations.h:125
PdfPredefinedEncodingType
Definition PdfDeclarations.h:132
std::shared_ptr< const PdfEncodingMap > PdfEncodingMapConstPtr
Convenience typedef for a const /Encoding map entry shared ptr.
Definition PdfEncodingMap.h:345
Represent a CID (Character ID) with full code unit information.
Definition PdfEncodingCommon.h:52
A character code unit.
Definition PdfEncodingCommon.h:20