MySQL 8.3.0
Source Code Documentation
immutable_string.h
Go to the documentation of this file.
1#ifndef IMMUTABLE_STRING_H
2#define IMMUTABLE_STRING_H
3
4/* Copyright (c) 2020, 2023, Oracle and/or its affiliates.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is also distributed with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have included with MySQL.
16
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License, version 2.0, for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
25
26/**
27 * @file
28 *
29 * ImmutableString defines a storage format for strings that is designed to be
30 * as compact as possible, while still being reasonably fast to decode. There
31 * are two variants; one with length, and one with a “next” pointer that can
32 * point to another string. As the name implies, both are designed to be
33 * immutable, i.e., they are not designed to be changed (especially not in
34 * length) after being encoded. See the individual classes for more details.
35 */
36
37#include <assert.h>
38#include <stddef.h>
39#include <stdint.h>
40
41#include <limits>
42#include <string_view>
43
44#include "my_compiler.h"
45
47MY_COMPILER_GCC_DIAGNOSTIC_IGNORE("-Wsuggest-override")
48MY_COMPILER_CLANG_DIAGNOSTIC_IGNORE("-Wdeprecated-dynamic-exception-spec")
50#include <google/protobuf/io/coded_stream.h>
52
53#include "template_utils.h"
54
55/**
56 * The variant with length (ImmutableStringWithLength) stores the length as a
57 * Varint128 (similar to protobuf), immediately followed by the string itself.
58 * (There is no zero termination.) This saves space over using e.g. a fixed
59 * size_t as length, since most strings are short. This is used for keys in the
60 * hash join buffer, but would be applicable other places as well.
61 */
63 public:
65 explicit ImmutableStringWithLength(const char *encoded) : m_ptr(encoded) {}
66
67 inline std::string_view Decode() const;
68
69 /// Encode the given string as an ImmutableStringWithLength, and returns
70 /// a new object pointing to it. *dst must contain at least the number
71 /// of bytes returned by RequiredBytesForEncode.
72 ///
73 /// “dst” is moved to one byte past the end of the written stream.
74 static inline ImmutableStringWithLength Encode(const char *data,
75 size_t length, char **dst);
76
77 /// Calculates an upper bound on the space required for encoding a string
78 /// of the given length.
79 static inline size_t RequiredBytesForEncode(size_t length) {
80 static constexpr int kMaxVarintBytes = 10;
81 return kMaxVarintBytes + length;
82 }
83
84 /// Compares full contents (data/size).
85 inline bool operator==(ImmutableStringWithLength other) const;
86
87 private:
88 const char *m_ptr = nullptr;
89};
90
91// From protobuf.
92inline uint64_t ZigZagEncode64(int64_t n) {
93 // Note: the right-shift must be arithmetic
94 // Note: left shift must be unsigned because of overflow
95 return (static_cast<uint64_t>(n) << 1) ^ static_cast<uint64_t>(n >> 63);
96}
97
98// From protobuf.
99inline int64_t ZigZagDecode64(uint64_t n) {
100 // Note: Using unsigned types prevent undefined behavior
101 return static_cast<int64_t>((n >> 1) ^ (~(n & 1) + 1));
102}
103
104// Defined in sql/hash_join_buffer.cc, since that is the primary user
105// of ImmutableString functionality.
106std::pair<const char *, uint64_t> VarintParseSlow64(const char *p,
107 uint32_t res32);
108
109// From protobuf. Included here because protobuf 3.6 does not expose the
110// parse_context.h header to clients.
111inline const char *VarintParse64(const char *p, uint64_t *out) {
112 auto ptr = pointer_cast<const uint8_t *>(p);
113 uint32_t res = ptr[0];
114 if (!(res & 0x80)) {
115 *out = res;
116 return p + 1;
117 }
118 uint32_t x = ptr[1];
119 res += (x - 1) << 7;
120 if (!(x & 0x80)) {
121 *out = res;
122 return p + 2;
123 }
124 auto tmp = VarintParseSlow64(p, res);
125 *out = tmp.second;
126 return tmp.first;
127}
128
129std::string_view ImmutableStringWithLength::Decode() const {
130 uint64_t size;
131 const char *data = VarintParse64(m_ptr, &size);
132 return {data, static_cast<size_t>(size)};
133}
134
136 size_t length,
137 char **dst) {
138 using google::protobuf::io::CodedOutputStream;
139
140 const char *base = *dst;
141 uint8_t *ptr = CodedOutputStream::WriteVarint64ToArray(
142 length, pointer_cast<uint8_t *>(*dst));
143 if (length != 0) { // Avoid sending nullptr to memcpy().
144 memcpy(ptr, data, length);
145 }
146 *dst = pointer_cast<char *>(ptr + length);
147
148 return ImmutableStringWithLength(base);
149}
150
152 ImmutableStringWithLength other) const {
153 return Decode() == other.Decode();
154}
155
156/**
157 * LinkedImmutableString is designed for storing rows (values) in hash join. It
158 * does not need a length, since it is implicit from the contents; however,
159 * since there might be multiple values with the same key, we simulate a
160 * multimap by having a “next” pointer. (Normally, linked lists are a bad idea
161 * due to pointer chasing, but here, we're doing so much work for each value
162 * that the overhead disappears into the noise.)
163 *
164 * As the next pointer is usually be very close in memory to ourselves
165 * (nearly all rows are stored in the same MEM_ROOT), we don't need to store
166 * the entire pointer; instead, we store the difference between the start of
167 * this string and the next pointer, as a zigzag-encoded Varint128. As with
168 * the length in ImmutableStringWithLength, this typically saves 6–7 bytes
169 * for each value. The special value of 0 means that there is no next pointer
170 * (ie., it is nullptr), as that would otherwise be an infinite loop.
171 */
173 public:
174 struct Decoded;
175
176 /// NOTE: nullptr is a legal value for encoded, and signals the same thing
177 /// as nullptr would on a const char *.
178 explicit LinkedImmutableString(const char *encoded) : m_ptr(encoded) {}
179
180 inline Decoded Decode() const;
181
182 /// Encode the given string and “next” pointer as a header for
183 /// LinkedImmutableString, and returns a new object pointing to it.
184 /// Note that unlike ImmutableStringWithLength::Encode(), this only
185 /// encodes the header; since there is no explicitly stored length,
186 /// you must write the contents of the string yourself.
187 ///
188 /// *dst must contain at least the number of bytes returned by
189 /// RequiredBytesForEncode. It is moved to one byte past the end of the
190 /// written stream (which is the right place to store the string itself).
192 char **dst);
193
194 /// Calculates an upper bound on the space required for encoding a string
195 /// of the given length.
196 static inline size_t RequiredBytesForEncode(size_t length) {
197 static constexpr int kMaxVarintBytes = 10;
198 return kMaxVarintBytes + length;
199 }
200
201 inline bool operator==(std::nullptr_t) const { return m_ptr == nullptr; }
202 inline bool operator!=(std::nullptr_t) const { return m_ptr != nullptr; }
203
204 private:
205 const char *m_ptr;
206};
207
209 const char *data;
211};
212
215 uint64_t ptr_diff;
216 const char *ptr = VarintParse64(m_ptr, &ptr_diff);
217 decoded.data = ptr;
218 if (ptr_diff == 0) {
219 decoded.next = LinkedImmutableString{nullptr};
220 } else {
221 decoded.next = LinkedImmutableString(m_ptr + ZigZagDecode64(ptr_diff));
222 }
223 return decoded;
224}
225
227 LinkedImmutableString next, char **dst) {
228 using google::protobuf::io::CodedOutputStream;
229
230 const char *base = *dst;
231 uint8_t *ptr = pointer_cast<uint8_t *>(*dst);
232 if (next.m_ptr == nullptr) {
233 *ptr++ = 0;
234 } else {
235 ptr = CodedOutputStream::WriteVarint64ToArray(
236 ZigZagEncode64(next.m_ptr - base), ptr);
237 }
238 *dst = pointer_cast<char *>(ptr);
239 return LinkedImmutableString(base);
240}
241
242#endif // IMMUTABLE_STRING_H
The variant with length (ImmutableStringWithLength) stores the length as a Varint128 (similar to prot...
Definition: immutable_string.h:62
ImmutableStringWithLength(const char *encoded)
Definition: immutable_string.h:65
const char * m_ptr
Definition: immutable_string.h:88
bool operator==(ImmutableStringWithLength other) const
Compares full contents (data/size).
Definition: immutable_string.h:151
static ImmutableStringWithLength Encode(const char *data, size_t length, char **dst)
Encode the given string as an ImmutableStringWithLength, and returns a new object pointing to it.
Definition: immutable_string.h:135
ImmutableStringWithLength()=default
std::string_view Decode() const
Definition: immutable_string.h:129
static size_t RequiredBytesForEncode(size_t length)
Calculates an upper bound on the space required for encoding a string of the given length.
Definition: immutable_string.h:79
LinkedImmutableString is designed for storing rows (values) in hash join.
Definition: immutable_string.h:172
Decoded Decode() const
Definition: immutable_string.h:213
bool operator!=(std::nullptr_t) const
Definition: immutable_string.h:202
bool operator==(std::nullptr_t) const
Definition: immutable_string.h:201
const char * m_ptr
Definition: immutable_string.h:205
static size_t RequiredBytesForEncode(size_t length)
Calculates an upper bound on the space required for encoding a string of the given length.
Definition: immutable_string.h:196
LinkedImmutableString(const char *encoded)
NOTE: nullptr is a legal value for encoded, and signals the same thing as nullptr would on a const ch...
Definition: immutable_string.h:178
static LinkedImmutableString EncodeHeader(LinkedImmutableString next, char **dst)
Encode the given string and “next” pointer as a header for LinkedImmutableString, and returns a new o...
Definition: immutable_string.h:226
const char * p
Definition: ctype-mb.cc:1234
const char * VarintParse64(const char *p, uint64_t *out)
Definition: immutable_string.h:111
int64_t ZigZagDecode64(uint64_t n)
Definition: immutable_string.h:99
std::pair< const char *, uint64_t > VarintParseSlow64(const char *p, uint32_t res32)
Definition: hash_join_buffer.cc:269
uint64_t ZigZagEncode64(int64_t n)
Definition: immutable_string.h:92
Header for compiler-dependent features.
#define MY_COMPILER_GCC_DIAGNOSTIC_IGNORE(X)
Definition: my_compiler.h:249
#define MY_COMPILER_MSVC_DIAGNOSTIC_IGNORE(X)
Definition: my_compiler.h:254
#define MY_COMPILER_DIAGNOSTIC_PUSH()
save the compiler's diagnostic (enabled warnings, errors, ...) state
Definition: my_compiler.h:284
#define MY_COMPILER_DIAGNOSTIC_POP()
restore the compiler's diagnostic (enabled warnings, errors, ...) state
Definition: my_compiler.h:285
bool length(const dd::Spatial_reference_system *srs, const Geometry *g1, double *length, bool *null) noexcept
Computes the length of linestrings and multilinestrings.
Definition: length.cc:75
MY_COMPILER_CLANG_DIAGNOSTIC_IGNORE("-Winconsistent-missing-destructor-override") extern "C"
Definition: protobuf_plugin.cc:31
Definition: immutable_string.h:208
const char * data
Definition: immutable_string.h:209
LinkedImmutableString next
Definition: immutable_string.h:210
int n
Definition: xcom_base.cc:508