MySQL 8.4.0
Source Code Documentation
immutable_string.h
Go to the documentation of this file.
1#ifndef IMMUTABLE_STRING_H
2#define IMMUTABLE_STRING_H
3
4/* Copyright (c) 2020, 2024, Oracle and/or its affiliates.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is designed to work with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have either included with
16 the program or referenced in the documentation.
17
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License, version 2.0, for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
26
27/**
28 * @file
29 *
30 * ImmutableString defines a storage format for strings that is designed to be
31 * as compact as possible, while still being reasonably fast to decode. There
32 * are two variants; one with length, and one with a “next” pointer that can
33 * point to another string. As the name implies, both are designed to be
34 * immutable, i.e., they are not designed to be changed (especially not in
35 * length) after being encoded. See the individual classes for more details.
36 */
37
38#include <assert.h>
39#include <stddef.h>
40#include <stdint.h>
41
42#include <limits>
43#include <string_view>
44
45#include "my_compiler.h"
46
48MY_COMPILER_GCC_DIAGNOSTIC_IGNORE("-Wsuggest-override")
49MY_COMPILER_CLANG_DIAGNOSTIC_IGNORE("-Wdeprecated-dynamic-exception-spec")
51#include <google/protobuf/io/coded_stream.h>
53
54#include "template_utils.h"
55
56/**
57 * The variant with length (ImmutableStringWithLength) stores the length as a
58 * Varint128 (similar to protobuf), immediately followed by the string itself.
59 * (There is no zero termination.) This saves space over using e.g. a fixed
60 * size_t as length, since most strings are short. This is used for keys in the
61 * hash join buffer, but would be applicable other places as well.
62 */
64 public:
66 explicit ImmutableStringWithLength(const char *encoded) : m_ptr(encoded) {}
67
68 inline std::string_view Decode() const;
69
70 /// Encode the given string as an ImmutableStringWithLength, and returns
71 /// a new object pointing to it. *dst must contain at least the number
72 /// of bytes returned by RequiredBytesForEncode.
73 ///
74 /// “dst” is moved to one byte past the end of the written stream.
75 static inline ImmutableStringWithLength Encode(const char *data,
76 size_t length, char **dst);
77
78 /// Calculates an upper bound on the space required for encoding a string
79 /// of the given length.
80 static inline size_t RequiredBytesForEncode(size_t length) {
81 static constexpr int kMaxVarintBytes = 10;
82 return kMaxVarintBytes + length;
83 }
84
85 /// Compares full contents (data/size).
86 inline bool operator==(ImmutableStringWithLength other) const;
87
88 private:
89 const char *m_ptr = nullptr;
90};
91
92// From protobuf.
93inline uint64_t ZigZagEncode64(int64_t n) {
94 // Note: the right-shift must be arithmetic
95 // Note: left shift must be unsigned because of overflow
96 return (static_cast<uint64_t>(n) << 1) ^ static_cast<uint64_t>(n >> 63);
97}
98
99// From protobuf.
100inline int64_t ZigZagDecode64(uint64_t n) {
101 // Note: Using unsigned types prevent undefined behavior
102 return static_cast<int64_t>((n >> 1) ^ (~(n & 1) + 1));
103}
104
105// Defined in sql/hash_join_buffer.cc, since that is the primary user
106// of ImmutableString functionality.
107std::pair<const char *, uint64_t> VarintParseSlow64(const char *p,
108 uint32_t res32);
109
110// From protobuf. Included here because protobuf 3.6 does not expose the
111// parse_context.h header to clients.
112inline const char *VarintParse64(const char *p, uint64_t *out) {
113 auto ptr = pointer_cast<const uint8_t *>(p);
114 uint32_t res = ptr[0];
115 if (!(res & 0x80)) {
116 *out = res;
117 return p + 1;
118 }
119 uint32_t x = ptr[1];
120 res += (x - 1) << 7;
121 if (!(x & 0x80)) {
122 *out = res;
123 return p + 2;
124 }
125 auto tmp = VarintParseSlow64(p, res);
126 *out = tmp.second;
127 return tmp.first;
128}
129
130std::string_view ImmutableStringWithLength::Decode() const {
131 uint64_t size;
132 const char *data = VarintParse64(m_ptr, &size);
133 return {data, static_cast<size_t>(size)};
134}
135
137 size_t length,
138 char **dst) {
139 using google::protobuf::io::CodedOutputStream;
140
141 const char *base = *dst;
142 uint8_t *ptr = CodedOutputStream::WriteVarint64ToArray(
143 length, pointer_cast<uint8_t *>(*dst));
144 if (length != 0) { // Avoid sending nullptr to memcpy().
145 memcpy(ptr, data, length);
146 }
147 *dst = pointer_cast<char *>(ptr + length);
148
149 return ImmutableStringWithLength(base);
150}
151
153 ImmutableStringWithLength other) const {
154 return Decode() == other.Decode();
155}
156
157/**
158 * LinkedImmutableString is designed for storing rows (values) in hash join. It
159 * does not need a length, since it is implicit from the contents; however,
160 * since there might be multiple values with the same key, we simulate a
161 * multimap by having a “next” pointer. (Normally, linked lists are a bad idea
162 * due to pointer chasing, but here, we're doing so much work for each value
163 * that the overhead disappears into the noise.)
164 *
165 * As the next pointer is usually be very close in memory to ourselves
166 * (nearly all rows are stored in the same MEM_ROOT), we don't need to store
167 * the entire pointer; instead, we store the difference between the start of
168 * this string and the next pointer, as a zigzag-encoded Varint128. As with
169 * the length in ImmutableStringWithLength, this typically saves 6–7 bytes
170 * for each value. The special value of 0 means that there is no next pointer
171 * (ie., it is nullptr), as that would otherwise be an infinite loop.
172 */
174 public:
175 struct Decoded;
176
177 /// NOTE: nullptr is a legal value for encoded, and signals the same thing
178 /// as nullptr would on a const char *.
179 explicit LinkedImmutableString(const char *encoded) : m_ptr(encoded) {}
180
181 inline Decoded Decode() const;
182
183 /// Encode the given string and “next” pointer as a header for
184 /// LinkedImmutableString, and returns a new object pointing to it.
185 /// Note that unlike ImmutableStringWithLength::Encode(), this only
186 /// encodes the header; since there is no explicitly stored length,
187 /// you must write the contents of the string yourself.
188 ///
189 /// *dst must contain at least the number of bytes returned by
190 /// RequiredBytesForEncode. It is moved to one byte past the end of the
191 /// written stream (which is the right place to store the string itself).
193 char **dst);
194
195 /// Calculates an upper bound on the space required for encoding a string
196 /// of the given length.
197 static inline size_t RequiredBytesForEncode(size_t length) {
198 static constexpr int kMaxVarintBytes = 10;
199 return kMaxVarintBytes + length;
200 }
201
202 inline bool operator==(std::nullptr_t) const { return m_ptr == nullptr; }
203 inline bool operator!=(std::nullptr_t) const { return m_ptr != nullptr; }
204
205 private:
206 const char *m_ptr;
207};
208
210 const char *data;
212};
213
216 uint64_t ptr_diff;
217 const char *ptr = VarintParse64(m_ptr, &ptr_diff);
218 decoded.data = ptr;
219 if (ptr_diff == 0) {
220 decoded.next = LinkedImmutableString{nullptr};
221 } else {
222 decoded.next = LinkedImmutableString(m_ptr + ZigZagDecode64(ptr_diff));
223 }
224 return decoded;
225}
226
228 LinkedImmutableString next, char **dst) {
229 using google::protobuf::io::CodedOutputStream;
230
231 const char *base = *dst;
232 uint8_t *ptr = pointer_cast<uint8_t *>(*dst);
233 if (next.m_ptr == nullptr) {
234 *ptr++ = 0;
235 } else {
236 ptr = CodedOutputStream::WriteVarint64ToArray(
237 ZigZagEncode64(next.m_ptr - base), ptr);
238 }
239 *dst = pointer_cast<char *>(ptr);
240 return LinkedImmutableString(base);
241}
242
243#endif // IMMUTABLE_STRING_H
The variant with length (ImmutableStringWithLength) stores the length as a Varint128 (similar to prot...
Definition: immutable_string.h:63
ImmutableStringWithLength(const char *encoded)
Definition: immutable_string.h:66
const char * m_ptr
Definition: immutable_string.h:89
bool operator==(ImmutableStringWithLength other) const
Compares full contents (data/size).
Definition: immutable_string.h:152
static ImmutableStringWithLength Encode(const char *data, size_t length, char **dst)
Encode the given string as an ImmutableStringWithLength, and returns a new object pointing to it.
Definition: immutable_string.h:136
ImmutableStringWithLength()=default
std::string_view Decode() const
Definition: immutable_string.h:130
static size_t RequiredBytesForEncode(size_t length)
Calculates an upper bound on the space required for encoding a string of the given length.
Definition: immutable_string.h:80
LinkedImmutableString is designed for storing rows (values) in hash join.
Definition: immutable_string.h:173
Decoded Decode() const
Definition: immutable_string.h:214
bool operator!=(std::nullptr_t) const
Definition: immutable_string.h:203
bool operator==(std::nullptr_t) const
Definition: immutable_string.h:202
const char * m_ptr
Definition: immutable_string.h:206
static size_t RequiredBytesForEncode(size_t length)
Calculates an upper bound on the space required for encoding a string of the given length.
Definition: immutable_string.h:197
LinkedImmutableString(const char *encoded)
NOTE: nullptr is a legal value for encoded, and signals the same thing as nullptr would on a const ch...
Definition: immutable_string.h:179
static LinkedImmutableString EncodeHeader(LinkedImmutableString next, char **dst)
Encode the given string and “next” pointer as a header for LinkedImmutableString, and returns a new o...
Definition: immutable_string.h:227
const char * p
Definition: ctype-mb.cc:1235
const char * VarintParse64(const char *p, uint64_t *out)
Definition: immutable_string.h:112
int64_t ZigZagDecode64(uint64_t n)
Definition: immutable_string.h:100
std::pair< const char *, uint64_t > VarintParseSlow64(const char *p, uint32_t res32)
Definition: hash_join_buffer.cc:351
uint64_t ZigZagEncode64(int64_t n)
Definition: immutable_string.h:93
Header for compiler-dependent features.
#define MY_COMPILER_GCC_DIAGNOSTIC_IGNORE(X)
Definition: my_compiler.h:250
#define MY_COMPILER_MSVC_DIAGNOSTIC_IGNORE(X)
Definition: my_compiler.h:255
#define MY_COMPILER_DIAGNOSTIC_PUSH()
save the compiler's diagnostic (enabled warnings, errors, ...) state
Definition: my_compiler.h:285
#define MY_COMPILER_DIAGNOSTIC_POP()
restore the compiler's diagnostic (enabled warnings, errors, ...) state
Definition: my_compiler.h:286
bool length(const dd::Spatial_reference_system *srs, const Geometry *g1, double *length, bool *null) noexcept
Computes the length of linestrings and multilinestrings.
Definition: length.cc:76
size_t size(const char *const c)
Definition: base64.h:46
MY_COMPILER_CLANG_DIAGNOSTIC_IGNORE("-Winconsistent-missing-destructor-override") extern "C"
Definition: protobuf_plugin.cc:32
Definition: immutable_string.h:209
const char * data
Definition: immutable_string.h:210
LinkedImmutableString next
Definition: immutable_string.h:211
int n
Definition: xcom_base.cc:509