PoDoFo  1.0.0-dev
PdfEncoding.h
1 
7 #ifndef PDF_ENCODING_H
8 #define PDF_ENCODING_H
9 
10 #include "PdfEncodingMap.h"
11 #include "PdfString.h"
12 #include "PdfObject.h"
13 
14 namespace PoDoFo
15 {
16  class PdfFont;
17  class PdfEncoding;
18  class PdfFontSimple;
19 
23  class PODOFO_API PdfStringScanContext
24  {
25  friend class PdfEncoding;
26 
27  private:
28  PdfStringScanContext(const std::string_view& encodedstr, const PdfEncoding& encoding);
29 
30  public:
31  bool IsEndOfString() const;
32 
36  bool TryScan(PdfCID& cid, std::string& utf8str, CodePointSpan& codepoints);
37 
38  private:
39  std::string_view::iterator m_it;
40  std::string_view::iterator m_end;
41  const PdfEncodingMap* m_encoding;
42  PdfEncodingLimits m_limits;
43  const PdfEncodingMap* m_toUnicode;
44  };
45 
54  class PODOFO_API PdfEncoding final
55  {
56  friend class PdfEncodingFactory;
57  friend class PdfFont;
58  friend class PdfFontCID;
59  friend class PdfFontCIDTrueType;
60  friend class PdfFontSimple;
61 
62  public:
66  PdfEncoding();
67  PdfEncoding(const PdfEncodingMapConstPtr& encoding, const PdfToUnicodeMapConstPtr& toUnicode = nullptr);
68  PdfEncoding(const PdfEncoding&) = default;
69 
70  private:
71  PdfEncoding(unsigned id, const PdfEncodingMapConstPtr& encoding,
72  const PdfEncodingMapConstPtr& toUnicode = nullptr);
73  PdfEncoding(unsigned id, const PdfEncodingLimits& limits, PdfFont* font,
74  const PdfEncodingMapConstPtr& encoding, const PdfEncodingMapConstPtr& toUnicode);
75 
76  static PdfEncoding Create(const PdfEncodingLimits& parsedLimits, const PdfEncodingMapConstPtr& encoding,
77  const PdfEncodingMapConstPtr& toUnicode);
78 
81  static std::unique_ptr<PdfEncoding> CreateSchim(const PdfEncoding& encoding, PdfFont& font);
82 
86  static std::unique_ptr<PdfEncoding> CreateDynamicEncoding(const std::shared_ptr<PdfCharCodeMap>& cidMap,
87  const std::shared_ptr<PdfCharCodeMap>& toUnicodeMap, PdfFont& font);
88 
89  public:
93  std::string ConvertToUtf8(const PdfString& encodedStr) const;
94 
98  bool TryConvertToUtf8(const PdfString& encodedStr, std::string& str) const;
99 
103  charbuff ConvertToEncoded(const std::string_view& str) const;
104 
105  bool TryConvertToEncoded(const std::string_view& str, charbuff& encoded) const;
106 
110  std::vector<PdfCID> ConvertToCIDs(const PdfString& encodedStr) const;
111 
115  bool TryConvertToCIDs(const PdfString& encodedStr, std::vector<PdfCID>& cids) const;
116 
122  char32_t GetCodePoint(const PdfCharCode& codeUnit) const;
123 
130  char32_t GetCodePoint(unsigned charCode) const;
131 
132  PdfStringScanContext StartStringScan(const PdfString& encodedStr);
133 
134  public:
138  const PdfCharCode& GetFirstChar() const;
139 
143  const PdfCharCode& GetLastChar() const;
144 
147  bool IsNull() const;
148 
151  bool HasCIDMapping() const;
152 
156  bool IsSimpleEncoding() const;
157 
160  bool HasParsedLimits() const;
161 
164  bool IsDynamicEncoding() const;
165 
169  unsigned GetId() const { return m_Id; }
170 
175  const PdfEncodingLimits& GetLimits() const;
176 
177  bool HasValidToUnicodeMap() const;
178 
181  const PdfEncodingMap& GetToUnicodeMap() const;
182 
188  bool GetToUnicodeMapSafe(const PdfEncodingMap*& toUnicode) const;
189 
195  const PdfEncodingMap& GetToUnicodeMapSafe() const;
196 
197  inline const PdfEncodingMap& GetEncodingMap() const { return *m_Encoding; }
198 
199  inline const PdfEncodingMapConstPtr GetEncodingMapPtr() const { return m_Encoding; }
200 
201  const PdfEncodingMapConstPtr GetToUnicodeMapPtr() const;
202 
203  public:
204  PdfEncoding& operator=(const PdfEncoding&) = default;
205 
206  private:
207  // These methods will be called by PdfFont
208  void ExportToFont(PdfFont& font) const;
209  bool TryGetCIDId(const PdfCharCode& codeUnit, unsigned& cid) const;
210 
211  static unsigned GetNextId();
212 
213  private:
214  bool tryExportEncodingTo(PdfDictionary& dictionary, bool wantCidMapping) const;
215  bool tryConvertEncodedToUtf8(const std::string_view& encoded, std::string& str) const;
216  bool tryConvertEncodedToCIDs(const std::string_view& encoded, std::vector<PdfCID>& cids) const;
217  void writeCIDMapping(PdfObject& cmapObj, const PdfFont& font, const std::string_view& baseFont) const;
218  void writeToUnicodeCMap(PdfObject& cmapObj) const;
219  bool tryGetCharCode(PdfFont& font, unsigned gid, const unicodeview& codePoints, PdfCharCode& unit) const;
220 
221  private:
222  unsigned m_Id;
223  PdfEncodingLimits m_ParsedLimits;
224  PdfFont* m_Font;
225  PdfEncodingMapConstPtr m_Encoding;
226  PdfEncodingMapConstPtr m_ToUnicode;
227  };
228 }
229 
230 #endif // PDF_ENCODING_H
A memory owning immutable block of code points, optimized for small segments as up to 3 elements can ...
Definition: PdfCharCodeMap.h:29
This factory creates a PdfEncoding from an existing object in the PDF.
Definition: PdfEncodingFactory.h:20
A PdfEncodingMap is a low level interface to convert between utf8 and encoded strings in and to deter...
Definition: PdfEncodingMap.h:28
A PdfEncoding is in PdfFont to transform a text string into a representation so that it can be displa...
Definition: PdfEncoding.h:55
unsigned GetId() const
Return an Id to be used in hashed containers.
Definition: PdfEncoding.h:169
A PdfFont that represents a CID-keyed font that has a TrueType/OpenType font backend (aka "CIDFontTyp...
Definition: PdfFontCIDTrueType.h:17
A PdfFont that represents a CID-keyed font.
Definition: PdfFontCID.h:17
This is a common base class for simple, non CID-keyed fonts like Type1, TrueType and Type3.
Definition: PdfFontSimple.h:20
Before you can draw text on a PDF document, you have to create a font object first.
Definition: PdfFont.h:49
A PDF string context to iteratively scan a string and collect both CID and unicode codepoints.
Definition: PdfEncoding.h:24
A string that can be written to a PDF document.
Definition: PdfString.h:24
Convenient type for char array storage and/or buffer with std::string compatibility.
Definition: basetypes.h:38
SPDX-FileCopyrightText: (C) 2022 Francesco Pretto ceztko@gmail.com SPDX-License-Identifier: LGPL-2....
Definition: basetypes.h:16
cspan< char32_t > unicodeview
Unicode code point view.
Definition: basetypes.h:27
std::shared_ptr< const PdfEncodingMap > PdfToUnicodeMapConstPtr
Convenience alias for a const /ToUnicode CMap entry shared ptr.
Definition: PdfEncodingMap.h:351
@ Create
Create a new file or truncate existing one for writing/reading.
std::shared_ptr< const PdfEncodingMap > PdfEncodingMapConstPtr
Convenience typedef for a const /Encoding map entry shared ptr.
Definition: PdfEncodingMap.h:343
Represent a CID (Character ID) with full code unit information.
Definition: PdfEncodingCommon.h:48
A character code unit.
Definition: PdfEncodingCommon.h:20