MySQL 8.2.0
Source Code Documentation
collations_internal.h
Go to the documentation of this file.
1/* Copyright (c) 2020, 2023, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23#ifndef STRINGS_COLLATIONS_INTERNAL_H_
24#define STRINGS_COLLATIONS_INTERNAL_H_
25
26#include <cstddef>
27#include <functional>
28#include <mutex>
29#include <string>
30#include <unordered_map>
31#include <utility>
32
34
35constexpr char MY_CHARSET_INDEX[]{"Index.xml"};
36
37typedef int myf;
38
39namespace mysql {
40
41namespace collation {
42class Name;
43} // namespace collation
44
45namespace collation_internals {
46
47/**
48 Helper class: implementation of character set/collation library
49
50 @see mysql::collation_internals::entry.
51*/
52class Collations final {
53 public:
54 Collations(const Collations &) = delete;
55 Collations &operator=(const Collations &) = delete;
56
57 /**
58 Constructor
59
60 @param charset_dir Optional "/\0"-terminated path to the directory
61 containing Index.xml
62 @param loader Optional user-specified hooks to the character
63 set/collation parser/initializer.
64 */
65 explicit Collations(const char *charset_dir,
66 MY_CHARSET_LOADER *loader = nullptr);
67
69
70 /**
71 Finds collation by its name
72
73 @note Forces collation parsing/initialization if not done yet.
74
75 @param name Collation name
76
77 @param flags Optional mysys-specific flags
78
79 @param [out] errmsg Optional buffer to return error message from
80 collation parser/initializer
81
82 @returns pointer to a collation object on success, nullptr if not found
83 */
85 MY_CHARSET_ERRMSG *errmsg = nullptr);
86
87 /**
88 Finds collation by its number
89
90 @note Forces collation parsing/initialization if not done yet.
91
92 @param id Collation id (hardcoded in library sources or
93 specified in Index.xml)
94
95 @param flags Optional mysys-specific flags
96
97 @param [out] errmsg Optional buffer to return error message from
98 collation parser/initializer
99
100 @returns pointer to a collation object on success, nullptr if not found
101 */
102 CHARSET_INFO *find_by_id(unsigned id, myf flags = 0,
103 MY_CHARSET_ERRMSG *errmsg = nullptr);
104
105 /**
106 Finds primary collation by its character set name
107
108 @note Forces collation parsing/initialization if not done yet.
109
110 @param cs_name Character set name
111
112 @param flags Optional mysys-specific flags
113
114 @param [out] errmsg Optional buffer to return error message from
115 collation parser/initializer
116
117 @returns pointer to a collation object on success, nullptr if not found
118 */
120 MY_CHARSET_ERRMSG *errmsg = nullptr);
121
122 /**
123 Finds binary collation by its character set name
124
125 @note Forces collation parsing/initialization if not done yet.
126
127 @param cs_name Character set name
128
129 @param flags Optional mysys-specific flags
130
131 @param [out] errmsg Optional buffer to return error message from
132 collation parser/initializer
133
134 @returns pointer to a collation object on success, nullptr if not found
135 */
137 myf flags = 0,
138 MY_CHARSET_ERRMSG *errmsg = nullptr);
139
140 /**
141 Finds collation by its name and returns its id
142
143 @param name Collation name
144
145 @returns collation id
146 */
147 unsigned get_collation_id(const mysql::collation::Name &name) const;
148
149 /**
150 Finds character set by its name and returns an id of its primary collation
151
152 @param name Collation name
153
154 @returns primary collation id
155 */
156 unsigned get_primary_collation_id(const mysql::collation::Name &) const;
157
158 /**
159 Finds character set by its name and returns an id of its default binary
160 collation
161
162 @param name Collation name
163
164 @returns default binary collation id
165 */
167 const mysql::collation::Name &) const;
168
169 /**
170 If not done yet, force collation parsing/initialization under m_mutex lock
171
172 @param cs Pointer to collation object
173
174 @param flags Optional mysys-specific flags
175
176 @param [out] errmsg Optional buffer to return error message from
177 collation parser/initializer
178
179 @returns @p cs on success, otherwise nullptr
180 */
182 MY_CHARSET_ERRMSG *errmsg = nullptr);
183
184 /**
185 Like find_by_name but without initialization of return value
186
187 @param name Collation name
188
189 @returns Pointer to CHARSET_INFO object on success, nullptr if not found.
190 The resulting value can point to a half-initialized object.
191 Moreover, further initialization of that object or parsing
192 of its collation XML can fail.
193 */
195
196 /**
197 For registering compile-time collations
198
199 @param cs Collation object
200
201 @returns false on success, otherwise true.
202 */
204
205 /**
206 Iterate over all collation objects known to the library
207
208 @param f Closure to execute on each collation object known to the library
209 */
210 void iterate(const std::function<void(const CHARSET_INFO *)> &f) {
211 for (const auto &i : m_all_by_collation_name) {
212 f(i.second);
213 }
214 }
215
216 protected:
217 /**
218 Internals of safe_init_when_necessary()
219
220 This function is similar to safe_init_when_necessary, but, unlike
221 safe_init_when_necessary(), it doesn't acquire locks.
222
223 @param cs Pointer to collation object
224
225 @param flags Optional mysys-specific flags
226
227 @param [out] errmsg Optional buffer to return error message from
228 collation parser/initializer
229
230 @returns @p cs on success, otherwise nullptr
231 */
233 MY_CHARSET_ERRMSG *errmsg);
234
235 /**
236 Optional '/'-terminated path to the directory containing Index.xml
237 */
238 const std::string m_charset_dir;
239
240 /**
241 Common parametric type to map character set/collation names or their ids
242 to CHARSET_INFO object pointers
243
244 @tparam Key Name or id type (std::string or unsigned respectively)
245
246 TODO(gleb): it would be good to use mysql::collation::Name instead of
247 std::string for Key.
248 */
249 template <typename Key>
250 using Hash = std::unordered_map<Key, CHARSET_INFO *>;
251
252 /**
253 Maps collation ids to CHARSET_INFO object pointers
254 */
256
257 /**
258 Maps normalized strings of all known character set names, collation names,
259 and their aliases to CHARSET_INFO object pointers
260
261 @note @see old_conv and get_old_charset_by_name() for exclusions
262 */
264
265 /**
266 Maps normalized strings of character set names to CHARSET_INFO object
267 pointers
268
269 @note In MySQL, CHARSET_INFO object of character set is also an object
270 of its primary collation.
271 */
273
274 /**
275 Maps normalized strings of character set names to CHARSET_INFO objects
276 of preferred binary collations
277
278 @note utf8mb4 has two separate binary collations, so m_binary_by_cs_name
279 contains a reference to utf8mb4_bin only.
280 */
282
283 /**
284 False if m_loader references external MY_CHARSET_LOADER, otherwise true.
285 */
286 const bool m_owns_loader;
287
288 /**
289 Shared MY_CHARSET_LOADER implementation for use in collation parser and
290 initializer
291
292 By default references an instance of mysql::collation_internals::Loader.
293 */
295
296 private:
297 /**
298 Collation parser/initializer mutex
299
300 The library parses collations and initializes CHARSET_INFO objects in
301 depth on demand, so m_mutex is necessary to guarantee a safety of
302 concurrent find_... function calls.
303 */
304 std::mutex m_mutex;
305};
306
307/**
308 Global entry point to character set/collation library internals
309*/
310extern Collations *entry;
311
312} // namespace collation_internals
313} // namespace mysql
314
315#endif // STRINGS_COLLATIONS_INTERNAL_H_
static Mysys_charset_loader * loader
Definition: charset.cc:184
User-specified callback interface for collation parser/initializer.
Definition: m_ctype.h:189
Normalizes character set/collation names.
Definition: collations.h:62
Helper class: implementation of character set/collation library.
Definition: collations_internal.h:52
Hash< std::string > m_all_by_collation_name
Maps normalized strings of all known character set names, collation names, and their aliases to CHARS...
Definition: collations_internal.h:263
MY_CHARSET_LOADER * m_loader
Shared MY_CHARSET_LOADER implementation for use in collation parser and initializer.
Definition: collations_internal.h:294
CHARSET_INFO * find_by_name_unsafe(const mysql::collation::Name &name)
Like find_by_name but without initialization of return value.
Definition: collations_internal.cc:773
Hash< std::string > m_binary_by_cs_name
Maps normalized strings of character set names to CHARSET_INFO objects of preferred binary collations...
Definition: collations_internal.h:281
void iterate(const std::function< void(const CHARSET_INFO *)> &f)
Iterate over all collation objects known to the library.
Definition: collations_internal.h:210
Hash< unsigned > m_all_by_id
Maps collation ids to CHARSET_INFO object pointers.
Definition: collations_internal.h:255
CHARSET_INFO * find_by_name(const mysql::collation::Name &, myf flags=0, MY_CHARSET_ERRMSG *errmsg=nullptr)
Finds collation by its name.
Definition: collations_internal.cc:665
Collations(const Collations &)=delete
const std::string m_charset_dir
Optional '/'-terminated path to the directory containing Index.xml.
Definition: collations_internal.h:238
~Collations()
Definition: collations_internal.cc:653
CHARSET_INFO * unsafe_init(CHARSET_INFO *cs, myf flags, MY_CHARSET_ERRMSG *errmsg)
Internals of safe_init_when_necessary()
Definition: collations_internal.cc:723
unsigned get_primary_collation_id(const mysql::collation::Name &) const
Finds character set by its name and returns an id of its primary collation.
Definition: collations_internal.cc:695
Collations & operator=(const Collations &)=delete
Hash< std::string > m_primary_by_cs_name
Maps normalized strings of character set names to CHARSET_INFO object pointers.
Definition: collations_internal.h:272
std::mutex m_mutex
Collation parser/initializer mutex.
Definition: collations_internal.h:304
unsigned get_collation_id(const mysql::collation::Name &name) const
Finds collation by its name and returns its id.
Definition: collations_internal.cc:689
std::unordered_map< Key, CHARSET_INFO * > Hash
Common parametric type to map character set/collation names or their ids to CHARSET_INFO object point...
Definition: collations_internal.h:250
CHARSET_INFO * find_default_binary(const mysql::collation::Name &, myf flags=0, MY_CHARSET_ERRMSG *errmsg=nullptr)
Finds binary collation by its character set name.
Definition: collations_internal.cc:682
bool add_internal_collation(CHARSET_INFO *cs)
For registering compile-time collations.
Definition: collations_internal.cc:745
CHARSET_INFO * find_by_id(unsigned id, myf flags=0, MY_CHARSET_ERRMSG *errmsg=nullptr)
Finds collation by its number.
Definition: collations_internal.cc:671
unsigned get_default_binary_collation_id(const mysql::collation::Name &) const
Finds character set by its name and returns an id of its default binary collation.
Definition: collations_internal.cc:701
CHARSET_INFO * find_primary(const mysql::collation::Name &, myf flags=0, MY_CHARSET_ERRMSG *errmsg=nullptr)
Finds primary collation by its character set name.
Definition: collations_internal.cc:676
const bool m_owns_loader
False if m_loader references external MY_CHARSET_LOADER, otherwise true.
Definition: collations_internal.h:286
CHARSET_INFO * safe_init_when_necessary(CHARSET_INFO *cs, myf flags=0, MY_CHARSET_ERRMSG *errmsg=nullptr)
If not done yet, force collation parsing/initialization under m_mutex lock.
Definition: collations_internal.cc:707
int myf
Definition: collations_internal.h:37
constexpr char MY_CHARSET_INDEX[]
Definition: collations_internal.h:35
static int flags[50]
Definition: hp_test1.cc:39
A better implementation of the UNIX ctype(3) library.
int myf
Definition: my_inttypes.h:93
const char * collation
Definition: audit_api_message_emit.cc:183
Definition: commit_order_queue.h:33
Collations * entry
Global entry point to character set/collation library internals.
Definition: collations_internal.cc:40
Definition: instrumented_condition_variable.h:31
case opt name
Definition: sslopt-case.h:32
Definition: m_ctype.h:422
Helper structure to return error messages from collation parser/initializer.
Definition: m_ctype.h:180