MySQL 9.5.0
Source Code Documentation
collations_internal.h
Go to the documentation of this file.
1/* Copyright (c) 2020, 2025, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is designed to work with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have either included with
13 the program or referenced in the documentation.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
23
24#ifndef STRINGS_COLLATIONS_INTERNAL_H_
25#define STRINGS_COLLATIONS_INTERNAL_H_
26
27#include <cstddef>
28#include <functional>
29#include <mutex>
30#include <string>
31#include <string_view>
32#include <unordered_map>
33#include <utility>
34
36
37constexpr char MY_CHARSET_INDEX[]{"Index.xml"};
38
39typedef int myf;
40
41namespace mysql {
42
43namespace collation {
44class Name;
45} // namespace collation
46
47namespace collation_internals {
48
49using id_hash_map = std::unordered_map<unsigned, CHARSET_INFO *>;
50
52 using is_transparent = void;
53 [[nodiscard]] size_t operator()(std::string_view txt) const {
54 return std::hash<std::string_view>{}(txt);
55 }
56};
57
59 std::equal_to<>>;
60
61/**
62 Helper class: implementation of character set/collation library
63
64 @see mysql::collation_internals::entry.
65*/
66class Collations final {
67 public:
68 Collations(const Collations &) = delete;
69 Collations &operator=(const Collations &) = delete;
70
71 /**
72 Constructor
73
74 @param charset_dir Optional "/\0"-terminated path to the directory
75 containing Index.xml
76 @param loader Optional user-specified hooks to the character
77 set/collation parser/initializer.
78 */
79 explicit Collations(const char *charset_dir,
80 MY_CHARSET_LOADER *loader = nullptr);
81
83
84 /**
85 Finds collation by its name
86
87 @note Forces collation parsing/initialization if not done yet.
88
89 @param name Collation name
90
91 @param flags Optional mysys-specific flags
92
93 @param [out] errmsg Optional buffer to return error message from
94 collation parser/initializer
95
96 @returns pointer to a collation object on success, nullptr if not found
97 */
99 MY_CHARSET_ERRMSG *errmsg = nullptr);
100
101 /**
102 Finds collation by its number
103
104 @note Forces collation parsing/initialization if not done yet.
105
106 @param id Collation id (hardcoded in library sources or
107 specified in Index.xml)
108
109 @param flags Optional mysys-specific flags
110
111 @param [out] errmsg Optional buffer to return error message from
112 collation parser/initializer
113
114 @returns pointer to a collation object on success, nullptr if not found
115 */
116 CHARSET_INFO *find_by_id(unsigned id, myf flags = 0,
117 MY_CHARSET_ERRMSG *errmsg = nullptr);
118
119 /**
120 Finds primary collation by its character set name
121
122 @note Forces collation parsing/initialization if not done yet.
123
124 @param cs_name Character set name
125
126 @param flags Optional mysys-specific flags
127
128 @param [out] errmsg Optional buffer to return error message from
129 collation parser/initializer
130
131 @returns pointer to a collation object on success, nullptr if not found
132 */
134 myf flags = 0,
135 MY_CHARSET_ERRMSG *errmsg = nullptr);
136
137 /**
138 Finds binary collation by its character set name
139
140 @note Forces collation parsing/initialization if not done yet.
141
142 @param cs_name Character set name
143
144 @param flags Optional mysys-specific flags
145
146 @param [out] errmsg Optional buffer to return error message from
147 collation parser/initializer
148
149 @returns pointer to a collation object on success, nullptr if not found
150 */
152 myf flags = 0,
153 MY_CHARSET_ERRMSG *errmsg = nullptr);
154
155 /**
156 Finds collation by its name and returns its id
157
158 @param name Collation name
159
160 @returns collation id
161 */
162 unsigned get_collation_id(const mysql::collation::Name &name) const;
163
164 /**
165 Finds character set by its name and returns an id of its primary collation
166
167 @param name Collation name
168
169 @returns primary collation id
170 */
171 unsigned get_primary_collation_id(const mysql::collation::Name &name) const;
172
173 /**
174 Finds character set by its name and returns an id of its default binary
175 collation
176
177 @param name Collation name
178
179 @returns default binary collation id
180 */
182 const mysql::collation::Name &name) const;
183
184 /**
185 If not done yet, force collation parsing/initialization under m_mutex lock
186
187 @param cs Pointer to collation object
188
189 @param flags Optional mysys-specific flags
190
191 @param [out] errmsg Optional buffer to return error message from
192 collation parser/initializer
193
194 @returns @p cs on success, otherwise nullptr
195 */
197 MY_CHARSET_ERRMSG *errmsg = nullptr);
198
199 /**
200 Like find_by_name but without initialization of return value
201
202 @param name Collation name
203
204 @returns Pointer to CHARSET_INFO object on success, nullptr if not found.
205 The resulting value can point to a half-initialized object.
206 Moreover, further initialization of that object or parsing
207 of its collation XML can fail.
208 */
210
211 /**
212 For registering compile-time collations
213
214 @param cs Collation object
215
216 @returns false on success, otherwise true.
217 */
219
220 /**
221 Iterate over all collation objects known to the library
222
223 @param f Closure to execute on each collation object known to the library
224 */
225 void iterate(const std::function<void(const CHARSET_INFO *)> &f) {
226 for (const auto &i : m_all_by_collation_name) {
227 f(i.second);
228 }
229 }
230
231 protected:
232 /**
233 Internals of safe_init_when_necessary()
234
235 This function is similar to safe_init_when_necessary, but, unlike
236 safe_init_when_necessary(), it doesn't acquire locks.
237
238 @param cs Pointer to collation object
239
240 @param flags Optional mysys-specific flags
241
242 @param [out] errmsg Optional buffer to return error message from
243 collation parser/initializer
244
245 @returns @p cs on success, otherwise nullptr
246 */
248 MY_CHARSET_ERRMSG *errmsg);
249
250 /**
251 Optional '/'-terminated path to the directory containing Index.xml
252 */
253 const std::string m_charset_dir;
254
255 /**
256 Maps collation ids to CHARSET_INFO object pointers
257 */
259
260 /**
261 Maps normalized strings of all known character set names, collation names,
262 and their aliases to CHARSET_INFO object pointers
263
264 @note see old_conv and get_old_charset_by_name() for exclusions
265 @see old_conv(), get_old_charset_by_name()
266 */
268
269 /**
270 Maps normalized strings of character set names to CHARSET_INFO object
271 pointers
272
273 @note In MySQL, CHARSET_INFO object of character set is also an object
274 of its primary collation.
275 */
277
278 /**
279 Maps normalized strings of character set names to CHARSET_INFO objects
280 of preferred binary collations
281
282 @note utf8mb4 has two separate binary collations, so m_binary_by_cs_name
283 contains a reference to utf8mb4_bin only.
284 */
286
287 /**
288 False if m_loader references external MY_CHARSET_LOADER, otherwise true.
289 */
290 const bool m_owns_loader;
291
292 /**
293 Shared MY_CHARSET_LOADER implementation for use in collation parser and
294 initializer
295
296 By default references an instance of mysql::collation_internals::Loader.
297 */
299
300 private:
301 /**
302 Collation parser/initializer mutex
303
304 The library parses collations and initializes CHARSET_INFO objects in
305 depth on demand, so m_mutex is necessary to guarantee a safety of
306 concurrent find_... function calls.
307 */
308 std::mutex m_mutex;
309};
310
311/**
312 Global entry point to character set/collation library internals
313*/
314extern Collations *entry;
315
316} // namespace collation_internals
317} // namespace mysql
318
319#endif // STRINGS_COLLATIONS_INTERNAL_H_
static Mysys_charset_loader * loader
Definition: charset.cc:197
User-specified callback interface for collation parser/initializer.
Definition: m_ctype.h:189
Normalizes character set/collation names.
Definition: collations.h:64
Helper class: implementation of character set/collation library.
Definition: collations_internal.h:66
unsigned get_default_binary_collation_id(const mysql::collation::Name &name) const
Finds character set by its name and returns an id of its default binary collation.
Definition: collations_internal.cc:689
MY_CHARSET_LOADER * m_loader
Shared MY_CHARSET_LOADER implementation for use in collation parser and initializer.
Definition: collations_internal.h:298
CHARSET_INFO * find_by_name_unsafe(const mysql::collation::Name &name)
Like find_by_name but without initialization of return value.
Definition: collations_internal.cc:763
CHARSET_INFO * find_primary(const mysql::collation::Name &cs_name, myf flags=0, MY_CHARSET_ERRMSG *errmsg=nullptr)
Finds primary collation by its character set name.
Definition: collations_internal.cc:660
sv_hash_map m_all_by_collation_name
Maps normalized strings of all known character set names, collation names, and their aliases to CHARS...
Definition: collations_internal.h:267
id_hash_map m_all_by_id
Maps collation ids to CHARSET_INFO object pointers.
Definition: collations_internal.h:258
void iterate(const std::function< void(const CHARSET_INFO *)> &f)
Iterate over all collation objects known to the library.
Definition: collations_internal.h:225
CHARSET_INFO * find_by_name(const mysql::collation::Name &name, myf flags=0, MY_CHARSET_ERRMSG *errmsg=nullptr)
Finds collation by its name.
Definition: collations_internal.cc:648
unsigned get_primary_collation_id(const mysql::collation::Name &name) const
Finds character set by its name and returns an id of its primary collation.
Definition: collations_internal.cc:682
Collations(const Collations &)=delete
const std::string m_charset_dir
Optional '/'-terminated path to the directory containing Index.xml.
Definition: collations_internal.h:253
~Collations()
Definition: collations_internal.cc:636
CHARSET_INFO * unsafe_init(CHARSET_INFO *cs, myf flags, MY_CHARSET_ERRMSG *errmsg)
Internals of safe_init_when_necessary()
Definition: collations_internal.cc:712
Collations & operator=(const Collations &)=delete
std::mutex m_mutex
Collation parser/initializer mutex.
Definition: collations_internal.h:308
sv_hash_map m_primary_by_cs_name
Maps normalized strings of character set names to CHARSET_INFO object pointers.
Definition: collations_internal.h:276
unsigned get_collation_id(const mysql::collation::Name &name) const
Finds collation by its name and returns its id.
Definition: collations_internal.cc:675
sv_hash_map m_binary_by_cs_name
Maps normalized strings of character set names to CHARSET_INFO objects of preferred binary collations...
Definition: collations_internal.h:285
bool add_internal_collation(CHARSET_INFO *cs)
For registering compile-time collations.
Definition: collations_internal.cc:734
CHARSET_INFO * find_default_binary(const mysql::collation::Name &cs_name, myf flags=0, MY_CHARSET_ERRMSG *errmsg=nullptr)
Finds binary collation by its character set name.
Definition: collations_internal.cc:667
CHARSET_INFO * find_by_id(unsigned id, myf flags=0, MY_CHARSET_ERRMSG *errmsg=nullptr)
Finds collation by its number.
Definition: collations_internal.cc:655
const bool m_owns_loader
False if m_loader references external MY_CHARSET_LOADER, otherwise true.
Definition: collations_internal.h:290
CHARSET_INFO * safe_init_when_necessary(CHARSET_INFO *cs, myf flags=0, MY_CHARSET_ERRMSG *errmsg=nullptr)
If not done yet, force collation parsing/initialization under m_mutex lock.
Definition: collations_internal.cc:696
int myf
Definition: collations_internal.h:39
constexpr char MY_CHARSET_INDEX[]
Definition: collations_internal.h:37
static int flags[50]
Definition: hp_test1.cc:40
A better implementation of the UNIX ctype(3) library.
int myf
Definition: my_inttypes.h:94
const char * collation
Definition: audit_api_message_emit.cc:184
Definition: commit_order_queue.h:34
std::unordered_map< unsigned, CHARSET_INFO * > id_hash_map
Definition: collations_internal.h:49
std::unordered_map< std::string, CHARSET_INFO *, string_hash, std::equal_to<> > sv_hash_map
Definition: collations_internal.h:59
Collations * entry
Global entry point to character set/collation library internals.
Definition: collations_internal.cc:43
Definition: instrumented_condition_variable.h:32
std::unordered_map< Key, Value, Hash, Key_equal, ut::allocator< std::pair< const Key, Value > > > unordered_map
Definition: ut0new.h:2904
Definition: m_ctype.h:421
Helper structure to return error messages from collation parser/initializer.
Definition: m_ctype.h:180
Definition: collations_internal.h:51
void is_transparent
Definition: collations_internal.h:52
size_t operator()(std::string_view txt) const
Definition: collations_internal.h:53