MySQL 9.3.0
Source Code Documentation
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages Concepts
collations_internal.h
Go to the documentation of this file.
1/* Copyright (c) 2020, 2025, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is designed to work with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have either included with
13 the program or referenced in the documentation.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
23
24#ifndef STRINGS_COLLATIONS_INTERNAL_H_
25#define STRINGS_COLLATIONS_INTERNAL_H_
26
27#include <functional>
28#include <mutex>
29#include <string>
30#include <string_view>
31#include <unordered_map>
32#include <utility>
33
35
36constexpr char MY_CHARSET_INDEX[]{"Index.xml"};
37
38typedef int myf;
39
40namespace mysql {
41
42namespace collation {
43class Name;
44} // namespace collation
45
46namespace collation_internals {
47
48using id_hash_map = std::unordered_map<unsigned, CHARSET_INFO *>;
49
51 using is_transparent = void;
52 [[nodiscard]] size_t operator()(std::string_view txt) const {
53 return std::hash<std::string_view>{}(txt);
54 }
55};
56
58 std::equal_to<>>;
59
60/**
61 Helper class: implementation of character set/collation library
62
63 @see mysql::collation_internals::entry.
64*/
65class Collations final {
66 public:
67 Collations(const Collations &) = delete;
68 Collations &operator=(const Collations &) = delete;
69
70 /**
71 Constructor
72
73 @param charset_dir Optional "/\0"-terminated path to the directory
74 containing Index.xml
75 @param loader Optional user-specified hooks to the character
76 set/collation parser/initializer.
77 */
78 explicit Collations(const char *charset_dir,
79 MY_CHARSET_LOADER *loader = nullptr);
80
82
83 /**
84 Finds collation by its name
85
86 @note Forces collation parsing/initialization if not done yet.
87
88 @param name Collation name
89
90 @param flags Optional mysys-specific flags
91
92 @param [out] errmsg Optional buffer to return error message from
93 collation parser/initializer
94
95 @returns pointer to a collation object on success, nullptr if not found
96 */
98 MY_CHARSET_ERRMSG *errmsg = nullptr);
99
100 /**
101 Finds collation by its number
102
103 @note Forces collation parsing/initialization if not done yet.
104
105 @param id Collation id (hardcoded in library sources or
106 specified in Index.xml)
107
108 @param flags Optional mysys-specific flags
109
110 @param [out] errmsg Optional buffer to return error message from
111 collation parser/initializer
112
113 @returns pointer to a collation object on success, nullptr if not found
114 */
115 CHARSET_INFO *find_by_id(unsigned id, myf flags = 0,
116 MY_CHARSET_ERRMSG *errmsg = nullptr);
117
118 /**
119 Finds primary collation by its character set name
120
121 @note Forces collation parsing/initialization if not done yet.
122
123 @param cs_name Character set name
124
125 @param flags Optional mysys-specific flags
126
127 @param [out] errmsg Optional buffer to return error message from
128 collation parser/initializer
129
130 @returns pointer to a collation object on success, nullptr if not found
131 */
133 myf flags = 0,
134 MY_CHARSET_ERRMSG *errmsg = nullptr);
135
136 /**
137 Finds binary collation by its character set name
138
139 @note Forces collation parsing/initialization if not done yet.
140
141 @param cs_name Character set name
142
143 @param flags Optional mysys-specific flags
144
145 @param [out] errmsg Optional buffer to return error message from
146 collation parser/initializer
147
148 @returns pointer to a collation object on success, nullptr if not found
149 */
151 myf flags = 0,
152 MY_CHARSET_ERRMSG *errmsg = nullptr);
153
154 /**
155 Finds collation by its name and returns its id
156
157 @param name Collation name
158
159 @returns collation id
160 */
161 unsigned get_collation_id(const mysql::collation::Name &name) const;
162
163 /**
164 Finds character set by its name and returns an id of its primary collation
165
166 @param name Collation name
167
168 @returns primary collation id
169 */
170 unsigned get_primary_collation_id(const mysql::collation::Name &name) const;
171
172 /**
173 Finds character set by its name and returns an id of its default binary
174 collation
175
176 @param name Collation name
177
178 @returns default binary collation id
179 */
181 const mysql::collation::Name &name) const;
182
183 /**
184 If not done yet, force collation parsing/initialization under m_mutex lock
185
186 @param cs Pointer to collation object
187
188 @param flags Optional mysys-specific flags
189
190 @param [out] errmsg Optional buffer to return error message from
191 collation parser/initializer
192
193 @returns @p cs on success, otherwise nullptr
194 */
196 MY_CHARSET_ERRMSG *errmsg = nullptr);
197
198 /**
199 Like find_by_name but without initialization of return value
200
201 @param name Collation name
202
203 @returns Pointer to CHARSET_INFO object on success, nullptr if not found.
204 The resulting value can point to a half-initialized object.
205 Moreover, further initialization of that object or parsing
206 of its collation XML can fail.
207 */
209
210 /**
211 For registering compile-time collations
212
213 @param cs Collation object
214
215 @returns false on success, otherwise true.
216 */
218
219 /**
220 Iterate over all collation objects known to the library
221
222 @param f Closure to execute on each collation object known to the library
223 */
224 void iterate(const std::function<void(const CHARSET_INFO *)> &f) {
225 for (const auto &i : m_all_by_collation_name) {
226 f(i.second);
227 }
228 }
229
230 protected:
231 /**
232 Internals of safe_init_when_necessary()
233
234 This function is similar to safe_init_when_necessary, but, unlike
235 safe_init_when_necessary(), it doesn't acquire locks.
236
237 @param cs Pointer to collation object
238
239 @param flags Optional mysys-specific flags
240
241 @param [out] errmsg Optional buffer to return error message from
242 collation parser/initializer
243
244 @returns @p cs on success, otherwise nullptr
245 */
247 MY_CHARSET_ERRMSG *errmsg);
248
249 /**
250 Optional '/'-terminated path to the directory containing Index.xml
251 */
252 const std::string m_charset_dir;
253
254 /**
255 Maps collation ids to CHARSET_INFO object pointers
256 */
258
259 /**
260 Maps normalized strings of all known character set names, collation names,
261 and their aliases to CHARSET_INFO object pointers
262
263 @note see old_conv and get_old_charset_by_name() for exclusions
264 @see old_conv(), get_old_charset_by_name()
265 */
267
268 /**
269 Maps normalized strings of character set names to CHARSET_INFO object
270 pointers
271
272 @note In MySQL, CHARSET_INFO object of character set is also an object
273 of its primary collation.
274 */
276
277 /**
278 Maps normalized strings of character set names to CHARSET_INFO objects
279 of preferred binary collations
280
281 @note utf8mb4 has two separate binary collations, so m_binary_by_cs_name
282 contains a reference to utf8mb4_bin only.
283 */
285
286 /**
287 False if m_loader references external MY_CHARSET_LOADER, otherwise true.
288 */
289 const bool m_owns_loader;
290
291 /**
292 Shared MY_CHARSET_LOADER implementation for use in collation parser and
293 initializer
294
295 By default references an instance of mysql::collation_internals::Loader.
296 */
298
299 private:
300 /**
301 Collation parser/initializer mutex
302
303 The library parses collations and initializes CHARSET_INFO objects in
304 depth on demand, so m_mutex is necessary to guarantee a safety of
305 concurrent find_... function calls.
306 */
307 std::mutex m_mutex;
308};
309
310/**
311 Global entry point to character set/collation library internals
312*/
313extern Collations *entry;
314
315} // namespace collation_internals
316} // namespace mysql
317
318#endif // STRINGS_COLLATIONS_INTERNAL_H_
static Mysys_charset_loader * loader
Definition: charset.cc:185
User-specified callback interface for collation parser/initializer.
Definition: m_ctype.h:189
Normalizes character set/collation names.
Definition: collations.h:64
Helper class: implementation of character set/collation library.
Definition: collations_internal.h:65
unsigned get_default_binary_collation_id(const mysql::collation::Name &name) const
Finds character set by its name and returns an id of its default binary collation.
Definition: collations_internal.cc:689
MY_CHARSET_LOADER * m_loader
Shared MY_CHARSET_LOADER implementation for use in collation parser and initializer.
Definition: collations_internal.h:297
CHARSET_INFO * find_by_name_unsafe(const mysql::collation::Name &name)
Like find_by_name but without initialization of return value.
Definition: collations_internal.cc:763
CHARSET_INFO * find_primary(const mysql::collation::Name &cs_name, myf flags=0, MY_CHARSET_ERRMSG *errmsg=nullptr)
Finds primary collation by its character set name.
Definition: collations_internal.cc:660
sv_hash_map m_all_by_collation_name
Maps normalized strings of all known character set names, collation names, and their aliases to CHARS...
Definition: collations_internal.h:266
id_hash_map m_all_by_id
Maps collation ids to CHARSET_INFO object pointers.
Definition: collations_internal.h:257
void iterate(const std::function< void(const CHARSET_INFO *)> &f)
Iterate over all collation objects known to the library.
Definition: collations_internal.h:224
CHARSET_INFO * find_by_name(const mysql::collation::Name &name, myf flags=0, MY_CHARSET_ERRMSG *errmsg=nullptr)
Finds collation by its name.
Definition: collations_internal.cc:648
unsigned get_primary_collation_id(const mysql::collation::Name &name) const
Finds character set by its name and returns an id of its primary collation.
Definition: collations_internal.cc:682
Collations(const Collations &)=delete
const std::string m_charset_dir
Optional '/'-terminated path to the directory containing Index.xml.
Definition: collations_internal.h:252
~Collations()
Definition: collations_internal.cc:636
CHARSET_INFO * unsafe_init(CHARSET_INFO *cs, myf flags, MY_CHARSET_ERRMSG *errmsg)
Internals of safe_init_when_necessary()
Definition: collations_internal.cc:712
Collations & operator=(const Collations &)=delete
std::mutex m_mutex
Collation parser/initializer mutex.
Definition: collations_internal.h:307
sv_hash_map m_primary_by_cs_name
Maps normalized strings of character set names to CHARSET_INFO object pointers.
Definition: collations_internal.h:275
unsigned get_collation_id(const mysql::collation::Name &name) const
Finds collation by its name and returns its id.
Definition: collations_internal.cc:675
sv_hash_map m_binary_by_cs_name
Maps normalized strings of character set names to CHARSET_INFO objects of preferred binary collations...
Definition: collations_internal.h:284
bool add_internal_collation(CHARSET_INFO *cs)
For registering compile-time collations.
Definition: collations_internal.cc:734
CHARSET_INFO * find_default_binary(const mysql::collation::Name &cs_name, myf flags=0, MY_CHARSET_ERRMSG *errmsg=nullptr)
Finds binary collation by its character set name.
Definition: collations_internal.cc:667
CHARSET_INFO * find_by_id(unsigned id, myf flags=0, MY_CHARSET_ERRMSG *errmsg=nullptr)
Finds collation by its number.
Definition: collations_internal.cc:655
const bool m_owns_loader
False if m_loader references external MY_CHARSET_LOADER, otherwise true.
Definition: collations_internal.h:289
CHARSET_INFO * safe_init_when_necessary(CHARSET_INFO *cs, myf flags=0, MY_CHARSET_ERRMSG *errmsg=nullptr)
If not done yet, force collation parsing/initialization under m_mutex lock.
Definition: collations_internal.cc:696
int myf
Definition: collations_internal.h:38
constexpr char MY_CHARSET_INDEX[]
Definition: collations_internal.h:36
static int flags[50]
Definition: hp_test1.cc:40
A better implementation of the UNIX ctype(3) library.
int myf
Definition: my_inttypes.h:94
const char * collation
Definition: audit_api_message_emit.cc:184
Definition: commit_order_queue.h:34
std::unordered_map< unsigned, CHARSET_INFO * > id_hash_map
Definition: collations_internal.h:48
std::unordered_map< std::string, CHARSET_INFO *, string_hash, std::equal_to<> > sv_hash_map
Definition: collations_internal.h:58
Collations * entry
Global entry point to character set/collation library internals.
Definition: collations_internal.cc:43
Definition: instrumented_condition_variable.h:32
std::unordered_map< Key, Value, Hash, Key_equal, ut::allocator< std::pair< const Key, Value > > > unordered_map
Definition: ut0new.h:2900
Definition: m_ctype.h:421
Helper structure to return error messages from collation parser/initializer.
Definition: m_ctype.h:180
Definition: collations_internal.h:50
void is_transparent
Definition: collations_internal.h:51
size_t operator()(std::string_view txt) const
Definition: collations_internal.h:52