PoDoFo  1.0.0-dev
PdfEncodingMap.h
1 
7 #ifndef PDF_ENCODING_MAP_H
8 #define PDF_ENCODING_MAP_H
9 
10 #include "PdfDeclarations.h"
11 #include "PdfObject.h"
12 #include "PdfCharCodeMap.h"
13 
14 namespace PoDoFo {
15 
16 class PdfIndirectObjectList;
17 class PdfFont;
18 
27 class PODOFO_API PdfEncodingMap
28 {
29  friend class PdfEncoding;
30  friend class PdfEncodingMapBase;
31  friend class PdfEncodingMapOneByte;
32  friend class PdfNullEncodingMap;
33  friend class PdfIdentityEncoding;
34  friend class PdfPredefinedToUnicodeCMap;
35  friend class PdfStringScanContext;
36 
37 private:
39 
40 public:
43  bool TryGetNextCharCode(std::string_view::iterator& it,
44  const std::string_view::iterator& end, PdfCharCode& codeUnit) const;
45 
49  bool TryGetCharCode(char32_t codePoint, PdfCharCode& codeUnit) const;
50 
56  bool TryGetCharCode(const unicodeview& codePoints, PdfCharCode& codeUnit) const;
57 
61  bool TryGetCharCode(unsigned cid, PdfCharCode& codeUnit) const;
62 
65  bool TryGetNextCID(std::string_view::iterator& it,
66  const std::string_view::iterator& end, PdfCID& cid) const;
67 
70  bool TryGetNextCodePoints(std::string_view::iterator& it,
71  const std::string_view::iterator& end, CodePointSpan& codePoints) const;
72 
77  bool TryGetCodePoints(const PdfCharCode& codeUnit, CodePointSpan& codePoints) const;
78 
79  virtual const PdfEncodingLimits& GetLimits() const = 0;
80 
91  PdfEncodingMapType GetType() const { return m_Type; }
92 
96  virtual PdfPredefinedEncodingType GetPredefinedEncodingType() const;
97 
101  virtual bool HasLigaturesSupport() const;
102 
108  bool TryGetExportObject(PdfIndirectObjectList& objects, PdfName& name, PdfObject*& obj) const;
109 
110 public:
111  virtual ~PdfEncodingMap();
112 
113 protected:
119  virtual bool tryGetNextCharCode(std::string_view::iterator& it,
120  const std::string_view::iterator& end, PdfCharCode& codeUnit) const;
121 
127  virtual bool tryGetCharCodeSpan(const unicodeview& ligature, PdfCharCode& codeUnit) const;
128 
132  virtual bool tryGetCharCode(char32_t codePoint, PdfCharCode& codeUnit) const = 0;
133 
139  virtual bool tryGetCodePoints(const PdfCharCode& codeUnit, const unsigned* cidId, CodePointSpan& codePoints) const = 0;
140 
145  virtual void getExportObject(PdfIndirectObjectList& objects, PdfName& name, PdfObject*& obj) const;
146 
151  virtual int GetWModeRaw() const;
152 
153  static void AppendUTF16CodeTo(OutputStream& stream, char32_t codePoint, std::u16string& u16tmp);
154 
155  static void AppendUTF16CodeTo(OutputStream& stream, const unicodeview& codePoints, std::u16string& u16tmp);
156 
157 protected:
158  virtual void AppendCodeSpaceRange(OutputStream& stream, charbuff& temp) const;
159 
165  virtual void AppendToUnicodeEntries(OutputStream& stream, charbuff& temp) const = 0;
166 
172  virtual void AppendCIDMappingEntries(OutputStream& stream, const PdfFont& font, charbuff& temp) const = 0;
173 
174 private:
175  /* Overload of TryGetCodePoints that allows for a fast path to fetch code points from a full CID, if available
176  *
177  * To be called by PdfStringScanContext
178  */
179  bool TryGetCodePoints(const PdfCID& cid, CodePointSpan& codePoints) const;
180 
187  bool TryGetCIDId(const PdfCharCode& codeUnit, unsigned& id) const;
188 
189  bool tryGetNextCodePoints(std::string_view::iterator& it, const std::string_view::iterator& end,
190  PdfCharCode& codeUnit, CodePointSpan& codePoints) const;
191 
197  PdfWModeKind GetWModeSafe() const;
198 
199 private:
200  PdfEncodingMapType m_Type;
201 };
202 
206 class PODOFO_API PdfEncodingMapBase : public PdfEncodingMap
207 {
208  friend class PdfDynamicEncodingMap;
209 
210 protected:
212 
213 protected:
214  bool tryGetNextCharCode(std::string_view::iterator& it,
215  const std::string_view::iterator& end, PdfCharCode& codeUnit) const override;
216 
217  bool tryGetCharCodeSpan(const unicodeview& codePoints, PdfCharCode& codeUnit) const override;
218 
219  bool tryGetCharCode(char32_t codePoint, PdfCharCode& codeUnit) const override;
220 
221  bool tryGetCodePoints(const PdfCharCode& codeUnit, const unsigned* cidId, CodePointSpan& codePoints) const override;
222 
223  void AppendCodeSpaceRange(OutputStream& stream, charbuff& temp) const override;
224 
225  void AppendToUnicodeEntries(OutputStream& stream, charbuff& temp) const override;
226 
227  void AppendCIDMappingEntries(OutputStream& stream, const PdfFont& font, charbuff& temp) const override;
228 
229 public:
230  inline const PdfCharCodeMap& GetCharMap() const { return *m_charMap; }
231 
232  const PdfEncodingLimits& GetLimits() const override;
233 
234 private:
235  PdfEncodingMapBase(const std::shared_ptr<PdfCharCodeMap>& map, PdfEncodingMapType type);
236 
237 private:
238  std::shared_ptr<PdfCharCodeMap> m_charMap;
239 };
240 
246 class PODOFO_API PdfEncodingMapOneByte : public PdfEncodingMap
247 {
248  friend class PdfBuiltInEncoding;
249  friend class PdfDifferenceEncoding;
250 
251 private:
252  PdfEncodingMapOneByte(const PdfEncodingLimits& limits);
253 
254 protected:
255  void AppendToUnicodeEntries(OutputStream& stream, charbuff& temp) const override;
256 
257  void AppendCIDMappingEntries(OutputStream& stream, const PdfFont& font, charbuff& temp) const override;
258 
259  const PdfEncodingLimits& GetLimits() const override;
260 
261 private:
262  PdfEncodingLimits m_Limits;
263 };
264 
269 class PODOFO_API PdfBuiltInEncoding : public PdfEncodingMapOneByte
270 {
271  friend class PdfFontMetricsFreetype;
272  friend class PdfPredefinedEncoding;
273  friend class PdfStandardEncoding;
274  friend class PdfSymbolEncoding;
275  friend class PdfZapfDingbatsEncoding;
276  friend class AppleLatin1Encoding;
277 
278 private:
279  PdfBuiltInEncoding(const PdfName& name);
280 
281 public:
286  inline const PdfName& GetName() const { return m_Name; }
287 
288 protected:
289  bool tryGetCharCode(char32_t codePoint, PdfCharCode& codeUnit) const override;
290  bool tryGetCodePoints(const PdfCharCode& codeUnit, const unsigned* cidId, CodePointSpan& codePoints) const override;
291 
301  virtual const char32_t* GetToUnicodeTable() const = 0;
302 
303 private:
304  // To be called by PdfFontMetricsFreetype
305  void CreateUnicodeToGIDMap(const std::unordered_map<unsigned, unsigned>& codeToGidMap,
306  std::unordered_map<uint32_t, unsigned>& unicodeMap) const;
307 
308 private:
312  void initEncodingTable();
313 
314 private:
315  PdfName m_Name; // The name of the encoding
316  std::unordered_map<char32_t, char> m_EncodingTable; // The helper table for conversions into this encoding
317 };
318 
321 class PODOFO_API PdfNullEncodingMap final : public PdfEncodingMap
322 {
323  friend class PdfEncodingMapFactory;
324 
325 private:
327 
328 public:
329  const PdfEncodingLimits& GetLimits() const override;
330 
331 protected:
332  bool tryGetCharCode(char32_t codePoint, PdfCharCode& codeUnit) const override;
333 
334  bool tryGetCodePoints(const PdfCharCode& codeUnit, const unsigned* cidId, CodePointSpan& codePoints) const override;
335 
336  void AppendToUnicodeEntries(OutputStream& stream, charbuff& temp) const override;
337 
338  void AppendCIDMappingEntries(OutputStream& stream, const PdfFont& font, charbuff& temp) const override;
339 };
340 
343 using PdfEncodingMapConstPtr = std::shared_ptr<const PdfEncodingMap>;
344 
347 using PdfBuiltInEncodingConstPtr = std::shared_ptr<const PdfBuiltInEncoding>;
348 
351 using PdfToUnicodeMapConstPtr = std::shared_ptr<const PdfEncodingMap>;
352 }
353 
354 #endif // PDF_ENCODING_MAP_H
SPDX-FileCopyrightText: (C) 2005 Dominik Seichter domseichter@web.de SPDX-FileCopyrightText: (C) 2020...
A memory owning immutable block of code points, optimized for small segments as up to 3 elements can ...
Definition: PdfCharCodeMap.h:29
An interface for writing blocks of data to a data source.
Definition: OutputStream.h:18
A common base class for built-in encodings which are known by name.
Definition: PdfEncodingMap.h:270
virtual const char32_t * GetToUnicodeTable() const =0
Gets a table of 256 short values which are the big endian Unicode code points that are assigned to th...
const PdfName & GetName() const
Get the name of this encoding.
Definition: PdfEncodingMap.h:286
A bidirectional map from character code units to unspecified code points.
Definition: PdfCharCodeMap.h:109
PdfDifferenceEncoding is an encoding, which is based on either the fonts encoding or a predefined enc...
Definition: PdfDifferenceEncoding.h:108
Basic PdfEncodingMap implementation using a PdfCharCodeMap.
Definition: PdfEncodingMap.h:207
This factory creates a PdfEncodingMap.
Definition: PdfEncodingMapFactory.h:18
PdfEncodingMap used by encodings like PdfBuiltInEncoding or PdfDifferenceEncoding that can define all...
Definition: PdfEncodingMap.h:247
A PdfEncodingMap is a low level interface to convert between utf8 and encoded strings in and to deter...
Definition: PdfEncodingMap.h:28
virtual bool tryGetCodePoints(const PdfCharCode &codeUnit, const unsigned *cidId, CodePointSpan &codePoints) const =0
Get code points from a code unit.
virtual void AppendToUnicodeEntries(OutputStream &stream, charbuff &temp) const =0
During a WriteToUnicodeCMap append "beginbfchar" and "beginbfrange" entries.
virtual void AppendCIDMappingEntries(OutputStream &stream, const PdfFont &font, charbuff &temp) const =0
During a PdfEncoding::ExportToFont() append "begincidchar" and/or "begincidrange" entries.
PdfEncodingMapType GetType() const
Type of encoding, may be Simple or CMap.
Definition: PdfEncodingMap.h:91
virtual bool tryGetCharCode(char32_t codePoint, PdfCharCode &codeUnit) const =0
Try get char code unit from unicode code point.
A PdfEncoding is in PdfFont to transform a text string into a representation so that it can be displa...
Definition: PdfEncoding.h:55
Before you can draw text on a PDF document, you have to create a font object first.
Definition: PdfFont.h:49
PdfIdentityEncoding is a two-byte encoding which can be used with TrueType fonts to represent all cha...
Definition: PdfIdentityEncoding.h:31
A list of PdfObjects that constitutes the indirect object list of the document The PdfParser will rea...
Definition: PdfIndirectObjectList.h:30
This class represents a PdfName.
Definition: PdfName.h:24
Dummy encoding map that will just throw exception.
Definition: PdfEncodingMap.h:322
This class represents a PDF indirect Object in memory.
Definition: PdfObject.h:35
A common base class for Pdf defined predefined encodings which are known by name.
Definition: PdfPredefinedEncoding.h:27
Represents a predefined ToUnicode CMap as the ones described in ISO 32000-2:2020 "9....
Definition: PdfPredefinedToUnicodeCMap.h:21
StandardEncoding.
Definition: PdfPredefinedEncoding.h:107
A PDF string context to iteratively scan a string and collect both CID and unicode codepoints.
Definition: PdfEncoding.h:24
Symbol Encoding.
Definition: PdfPredefinedEncoding.h:124
ZapfDingbats encoding.
Definition: PdfPredefinedEncoding.h:141
Convenient type for char array storage and/or buffer with std::string compatibility.
Definition: basetypes.h:38
SPDX-FileCopyrightText: (C) 2022 Francesco Pretto ceztko@gmail.com SPDX-License-Identifier: LGPL-2....
Definition: basetypes.h:16
cspan< char32_t > unicodeview
Unicode code point view.
Definition: basetypes.h:27
std::shared_ptr< const PdfEncodingMap > PdfToUnicodeMapConstPtr
Convenience alias for a const /ToUnicode CMap entry shared ptr.
Definition: PdfEncodingMap.h:351
std::shared_ptr< const PdfBuiltInEncoding > PdfBuiltInEncodingConstPtr
Convenience typedef for a const /Encoding map entry shared ptr.
Definition: PdfEncodingMap.h:347
PdfEncodingMapType
Definition: PdfDeclarations.h:113
PdfPredefinedEncodingType
Definition: PdfDeclarations.h:120
std::shared_ptr< const PdfEncodingMap > PdfEncodingMapConstPtr
Convenience typedef for a const /Encoding map entry shared ptr.
Definition: PdfEncodingMap.h:343
Represent a CID (Character ID) with full code unit information.
Definition: PdfEncodingCommon.h:48
A character code unit.
Definition: PdfEncodingCommon.h:20