PoDoFo 1.2.0
Loading...
Searching...
No Matches
PdfEncoding.h
1// SPDX-FileCopyrightText: 2021 Francesco Pretto <ceztko@gmail.com>
2// SPDX-License-Identifier: LGPL-2.0-or-later OR MPL-2.0
3
4#ifndef PDF_ENCODING_H
5#define PDF_ENCODING_H
6
7#include "PdfEncodingMap.h"
8#include "PdfString.h"
9#include "PdfObject.h"
10#include "PdfCIDToGIDMap.h"
11
12namespace PoDoFo
13{
14 class PdfFont;
15 class PdfEncoding;
16 class PdfFontSimple;
17
20 class PODOFO_API PdfStringScanContext
21 {
22 friend class PdfEncoding;
23
24 private:
25 PdfStringScanContext(const std::string_view& encodedstr, const PdfEncoding& encoding);
26
27 public:
28 bool IsEndOfString() const;
29
32 bool TryScan(PdfCID& cid, std::string& utf8str, CodePointSpan& codepoints);
33
34 bool TryScan(PdfCID& cid, std::string& utf8str, std::vector<unsigned>& positions, CodePointSpan& codepoints);
35
36 private:
37 std::string_view::iterator m_it;
38 std::string_view::iterator m_end;
39 const PdfEncodingMap* m_encoding;
40 PdfEncodingLimits m_limits;
41 const PdfEncodingMap* m_toUnicode;
42 };
43
50 class PODOFO_API PdfEncoding final
51 {
52 friend class PdfEncodingFactory;
53 friend class PdfFont;
54 friend class PdfFontCID;
55 friend class PdfFontCIDTrueType;
56 friend class PdfFontSimple;
57
58 public:
63 PdfEncoding(const PdfEncoding&) = default;
64
65 private:
68 PdfEncoding(unsigned id, bool isObjectLoaded, const PdfEncodingLimits& limits, PdfFont* font,
70 PdfCIDToGIDMapConstPtr&& cidToGidMap);
71
73 static PdfEncoding Create(const PdfEncodingLimits& parsedLimits, PdfEncodingMapConstPtr&& encoding,
74 PdfEncodingMapConstPtr&& toUnicode, PdfCIDToGIDMapConstPtr&& cidToGidMap);
75
78
80 static std::unique_ptr<PdfEncoding> CreateSchim(const PdfEncoding& encoding, PdfFont& font);
81
84 static std::unique_ptr<PdfEncoding> CreateDynamicEncoding(std::shared_ptr<PdfCharCodeMap>&& cidMap,
85 std::shared_ptr<PdfCharCodeMap>&& toUnicodeMap, PdfFont& font);
86
87 public:
89 std::string ConvertToUtf8(const PdfString& encodedStr) const;
90
92 bool TryConvertToUtf8(const PdfString& encodedStr, std::string& str) const;
93
95 charbuff ConvertToEncoded(const std::string_view& str) const;
96
97 bool TryConvertToEncoded(const std::string_view& str, charbuff& encoded) const;
98
100 std::vector<PdfCID> ConvertToCIDs(const PdfString& encodedStr) const;
101
103 bool TryConvertToCIDs(const PdfString& encodedStr, std::vector<PdfCID>& cids) const;
104
109 char32_t GetCodePoint(const PdfCharCode& codeUnit) const;
110
116 char32_t GetCodePoint(unsigned charCode) const;
117
118 PdfStringScanContext StartStringScan(const PdfString& encodedStr);
119
120 public:
123 const PdfCharCode& GetFirstChar() const;
124
127 const PdfCharCode& GetLastChar() const;
128
130 bool IsNull() const;
131
133 bool HasCIDMapping() const;
134
137 bool IsSimpleEncoding() const;
138
140 bool HasParsedLimits() const;
141
143 bool IsDynamicEncoding() const;
144
146 unsigned GetId() const { return m_Id; }
147
149 bool IsObjectLoaded() const { return m_IsObjectLoaded; }
150
154 const PdfEncodingLimits& GetLimits() const;
155
156 bool HasValidToUnicodeMap() const;
157
159 const PdfEncodingMap& GetToUnicodeMap() const;
160
165 bool GetToUnicodeMapSafe(const PdfEncodingMap*& toUnicode) const;
166
171 const PdfEncodingMap& GetToUnicodeMapSafe() const;
172
173 const PdfEncodingMap& GetEncodingMap() const { return *m_Encoding; }
174
175 PdfEncodingMapConstPtr GetEncodingMapPtr() const { return m_Encoding; }
176
177 PdfEncodingMapConstPtr GetToUnicodeMapPtr() const;
178
179 public:
180 PdfEncoding& operator=(const PdfEncoding&) = default;
181
182 private:
183 // These methods will be called by PdfFont
184 void ExportToFont(PdfFont& font, const PdfCIDSystemInfo& cidInfo) const;
185 void ExportToFont(PdfFont& font) const;
186 bool TryGetCIDId(const PdfCharCode& codeUnit, unsigned& cid) const;
187 const PdfCIDToGIDMap* GetCIDToGIDMap() const { return m_CIDToGIDMap.get(); }
188
189 static unsigned GetNextId();
190
191 private:
192 void exportToFont(PdfFont& font, const PdfCIDSystemInfo* cidInfo) const;
193 bool tryExportEncodingTo(PdfDictionary& dictionary, bool wantCidMapping) const;
194 bool tryConvertEncodedToUtf8(const std::string_view& encoded, std::string& str) const;
195 bool tryConvertEncodedToCIDs(const std::string_view& encoded, std::vector<PdfCID>& cids) const;
196 void writeCIDMapping(PdfObject& cmapObj, const PdfFont& font, const PdfCIDSystemInfo& info) const;
197 void writeToUnicodeCMap(PdfObject& cmapObj, const PdfFont& font) const;
198 bool tryGetCharCode(PdfFont& font, unsigned gid, const unicodeview& codePoints, PdfCharCode& unit) const;
199
200 private:
201 unsigned m_Id;
202 bool m_IsObjectLoaded;
203 PdfEncodingLimits m_ParsedLimits;
204 PdfFont* m_Font;
205 PdfEncodingMapConstPtr m_Encoding;
206 PdfEncodingMapConstPtr m_ToUnicode;
207 PdfCIDToGIDMapConstPtr m_CIDToGIDMap;
208 };
209}
210
211#endif // PDF_ENCODING_H
A memory owning immutable block of code points, optimized for small segments as up to 3 elements can ...
Definition PdfEncodingCommon.h:119
This factory creates a PdfEncoding from an existing object in the PDF.
Definition PdfEncodingFactory.h:16
A PdfEncodingMap is a low level interface to convert between utf8 and encoded strings in and to deter...
Definition PdfEncodingMap.h:28
A PdfEncoding is in PdfFont to transform a text string into a representation so that it can be displa...
Definition PdfEncoding.h:51
bool IsObjectLoaded() const
True if the encoding is constructed from object loaded information.
Definition PdfEncoding.h:149
unsigned GetId() const
Return an Id to be used in hashed containers.
Definition PdfEncoding.h:146
A PdfFont that represents a CID-keyed font that has a TrueType/OpenType font backend (aka "CIDFontTyp...
Definition PdfFontCIDTrueType.h:14
A PdfFont that represents a CID-keyed font.
Definition PdfFontCID.h:14
This is a common base class for simple, non CID-keyed fonts like Type1, TrueType and Type3.
Definition PdfFontSimple.h:17
Before you can draw text on a PDF document, you have to create a font object first.
Definition PdfFont.h:42
A PDF string context to iteratively scan a string and collect both CID and unicode codepoints.
Definition PdfEncoding.h:21
A string that can be written to a PDF document.
Definition PdfString.h:21
Convenient type for char array storage and/or buffer with std::string compatibility.
Definition basetypes.h:30
All classes, functions, types and enums of PoDoFo are members of these namespace.
Definition basetypes.h:13
std::shared_ptr< const PdfEncodingMap > PdfToUnicodeMapConstPtr
Convenience alias for a const /ToUnicode CMap entry shared ptr.
Definition PdfEncodingMap.h:317
@ Create
Create a new file or truncate existing one for writing/reading.
std::shared_ptr< const PdfEncodingMap > PdfEncodingMapConstPtr
Convenience typedef for a const /Encoding map entry shared ptr.
Definition PdfEncodingMap.h:311
Represent a CID (Character ID) with full code unit information.
Definition PdfEncodingCommon.h:43
A character code unit.
Definition PdfEncodingCommon.h:16