PoDoFo 1.0.0-dev
Loading...
Searching...
No Matches
PdfCharCodeMap.h
1
7#ifndef PDF_CHAR_CODE_MAP_H
8#define PDF_CHAR_CODE_MAP_H
9
10#include "PdfDeclarations.h"
11#include "PdfEncodingCommon.h"
12
13namespace PoDoFo
14{
23
28 class PODOFO_API CodePointSpan final
29 {
30 public:
34 CodePointSpan(const codepointview& view);
35 CodePointSpan(const codepointview& view, codepoint codepoint);
37 void CopyTo(std::vector<codepoint>& codePoints) const;
38 codepointview view() const;
39 unsigned GetSize() const;
40 CodePointSpan& operator=(const CodePointSpan&);
41 operator codepointview() const;
42
47 codepoint operator*() const;
48
49 private:
50 union
51 {
52 struct
53 {
54 uint32_t Size;
55 std::array<codepoint, 3> Data;
56 } m_Block;
57
58 struct
59 {
60 uint32_t Size;
61 std::unique_ptr<codepoint[]> Data;
62 } m_Array;
63 };
64 };
65
66 // Map code units -> code point(s)
67 // pp. 474-475 of PdfReference 1.7 "The value of dstString can be a string of up to 512 bytes"
68 using CodeUnitMap = std::unordered_map<PdfCharCode, CodePointSpan>;
69
70 struct PODOFO_API CodeUnitRange final
71 {
72 PdfCharCode SrcCodeLo;
73 unsigned Size = 0;
74 CodePointSpan DstCodeLo;
75
76 CodeUnitRange();
77
78 CodeUnitRange(PdfCharCode srcCodeLo, unsigned size, CodePointSpan dstCodeLo);
79
80 PdfCharCode GetSrcCodeHi() const;
81 };
82
83 struct PODOFO_API CodeUnitRangeInequality
84 {
85 using is_transparent = std::true_type;
86
87 bool operator()(const CodeUnitRange& lhs, const PdfCharCode& rhs) const
88 {
89 return lhs.SrcCodeLo < rhs;
90 }
91 bool operator()(const PdfCharCode& lhs, const CodeUnitRange& rhs) const
92 {
93 return lhs < rhs.SrcCodeLo;
94 }
95 bool operator()(const CodeUnitRange& lhs, const CodeUnitRange& rhs) const
96 {
97 return lhs.SrcCodeLo < rhs.SrcCodeLo;
98 }
99 };
100
101 using CodeUnitRanges = std::set<CodeUnitRange, CodeUnitRangeInequality>;
102
109 struct PODOFO_API CodeSpaceRange final
110 {
112 CodeSpaceRange(unsigned codeLo, unsigned codeHi, unsigned char codeSpaceSize);
113
114 unsigned CodeLo;
115 unsigned CodeHi;
116 unsigned char CodeSpaceSize;
117
118 PdfCharCode GetSrcCodeLo() const;
119 PdfCharCode GetSrcCodeHi() const;
120 };
121
131 class PODOFO_API PdfCharCodeMap final
132 {
133 PODOFO_PRIVATE_FRIEND(class PdfCMapEncodingFactory);
134
135 public:
137
139
141
142 private:
143 PdfCharCodeMap(CodeUnitMap&& mapping, CodeUnitRanges&& ranges, const PdfEncodingLimits& limits);
144
145 public:
150 void PushMapping(const PdfCharCode& codeUnit, const codepointview& codePoints);
151
154 void PushMapping(const PdfCharCode& codeUnit, codepoint codePoint);
155
160 void PushRange(const PdfCharCode& srcCodeLo, unsigned size, codepoint dstCodeLo);
161
166 void PushRange(const PdfCharCode& srcCodeLo, unsigned size, const codepointview& dstCodeLo);
167
170 bool TryGetCodePoints(const PdfCharCode& codeUnit, CodePointSpan& codePoints) const;
171
176 bool TryGetNextCharCode(std::string_view::iterator& it,
177 const std::string_view::iterator& end, PdfCharCode& code) const;
178
182 bool TryGetCharCode(const codepointview& codePoints, PdfCharCode& code) const;
183
186 bool TryGetCharCode(codepoint codePoint, PdfCharCode& code) const;
187
188 PdfCharCodeMap& operator=(PdfCharCodeMap&& map) noexcept;
189
190 const PdfEncodingLimits& GetLimits() const { return m_Limits; }
191
192 bool IsEmpty() const;
193
196 bool IsTrivialIdentity() const;
197
198 std::vector<CodeSpaceRange> GetCodeSpaceRanges() const;
199
200 public:
203 const CodeUnitMap& GetMappings() const { return m_Mappings; }
204
207 const CodeUnitRanges& GetRanges() const { return m_Ranges; }
208
209 private:
210 void move(PdfCharCodeMap& map) noexcept;
211 void pushMapping(const PdfCharCode& codeUnit, const codepointview& codePoints);
212
213 // Map code point(s) -> code units
214 struct CodePointMapNode
215 {
216 codepoint CodePoint;
217 PdfCharCode CodeUnit;
218 CodePointMapNode* Ligatures;
219 CodePointMapNode* Left;
220 CodePointMapNode* Right;
221 };
222
223 private:
224 PdfCharCodeMap(const PdfCharCodeMap&) = delete;
225 PdfCharCodeMap& operator=(const PdfCharCodeMap&) = delete;
226
227 private:
228 void updateLimits(const PdfCharCode& codeUnit);
229 void reviseCodePointMap();
230 bool tryFixNextRanges(const CodeUnitRanges::iterator& it, unsigned prevRangeCodeUpper);
231 static bool tryFindNextCharacterId(const CodePointMapNode* node, std::string_view::iterator &it,
232 const std::string_view::iterator& end, PdfCharCode& cid);
233 static const CodePointMapNode* findNode(const CodePointMapNode* node, codepoint codePoint);
234 static void deleteNode(CodePointMapNode* node);
235 static CodePointMapNode* findOrAddNode(CodePointMapNode*& node, codepoint codePoint);
236
237 private:
238 PdfEncodingLimits m_Limits;
239 CodeUnitMap m_Mappings;
240 CodeUnitRanges m_Ranges;
241 bool m_MapDirty;
242 CodePointMapNode* m_codePointMapHead; // Head of a BST to lookup code points
243 };
244}
245
246#endif // PDF_CHAR_CODE_MAP_H
SPDX-FileCopyrightText: (C) 2005 Dominik Seichter domseichter@web.de SPDX-FileCopyrightText: (C) 2020...
A memory owning immutable block of code points, optimized for small segments as up to 3 elements can ...
Definition PdfCharCodeMap.h:29
A bidirectional map from character code units to unspecified code points.
Definition PdfCharCodeMap.h:132
const CodeUnitMap & GetMappings() const
Provides direct mappings.
Definition PdfCharCodeMap.h:203
const CodeUnitRanges & GetRanges() const
Provides range mappings.
Definition PdfCharCodeMap.h:207
Convenient type for char array storage and/or buffer with std::string compatibility.
Definition basetypes.h:38
SPDX-FileCopyrightText: (C) 2022 Francesco Pretto ceztko@gmail.com SPDX-License-Identifier: LGPL-2....
Definition basetypes.h:16
char32_t codepoint
A convenient typedef for an unspecified codepoint The underlying type is convenientely char32_t so it...
Definition PdfCharCodeMap.h:21
Represent a range in the "begincodespacerange" section.
Definition PdfCharCodeMap.h:110
A character code unit.
Definition PdfEncodingCommon.h:20