MySQL  8.0.27
Source Code Documentation
immutable_string.h
Go to the documentation of this file.
1 #ifndef IMMUTABLE_STRING_H
2 #define IMMUTABLE_STRING_H
3 
4 /* Copyright (c) 2020, 2021, Oracle and/or its affiliates.
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License, version 2.0,
8  as published by the Free Software Foundation.
9 
10  This program is also distributed with certain software (including
11  but not limited to OpenSSL) that is licensed under separate terms,
12  as designated in a particular file or component or in included license
13  documentation. The authors of MySQL hereby grant you an additional
14  permission to link the program and your derivative works with the
15  separately licensed software that they have included with MySQL.
16 
17  This program is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  GNU General Public License, version 2.0, for more details.
21 
22  You should have received a copy of the GNU General Public License
23  along with this program; if not, write to the Free Software
24  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
25 
26 /**
27  * @file
28  *
29  * ImmutableString defines a storage format for strings that is designed to be
30  * as compact as possible, while still being reasonably fast to decode. There
31  * are two variants; one with length, and one with a “next” pointer that can
32  * point to another string. As the name implies, both are designed to be
33  * immutable, i.e., they are not designed to be changed (especially not in
34  * length) after being encoded. See the individual classes for more details.
35  */
36 
37 #include <assert.h>
38 #include <stddef.h>
39 #include <stdint.h>
40 
41 #include <limits>
42 #include <string_view>
43 
44 #include "my_compiler.h"
45 
47 MY_COMPILER_GCC_DIAGNOSTIC_IGNORE("-Wsuggest-override")
48 MY_COMPILER_CLANG_DIAGNOSTIC_IGNORE("-Wdeprecated-dynamic-exception-spec")
50 #include <google/protobuf/io/coded_stream.h>
52 
53 #include "template_utils.h"
54 
55 /**
56  * The variant with length (ImmutableStringWithLength) stores the length as a
57  * Varint128 (similar to protobuf), immediately followed by the string itself.
58  * (There is no zero termination.) This saves space over using e.g. a fixed
59  * size_t as length, since most strings are short. This is used for keys in the
60  * hash join buffer, but would be applicable other places as well.
61  */
63  public:
65  explicit ImmutableStringWithLength(const char *encoded) : m_ptr(encoded) {}
66 
67  inline std::string_view Decode() const;
68 
69  /// Encode the given string as an ImmutableStringWithLength, and returns
70  /// a new object pointing to it. *dst must contain at least the number
71  /// of bytes returned by RequiredBytesForEncode.
72  ///
73  /// “dst” is moved to one byte past the end of the written stream.
74  static inline ImmutableStringWithLength Encode(const char *data,
75  size_t length, char **dst);
76 
77  /// Calculates an upper bound on the space required for encoding a string
78  /// of the given length.
79  static inline size_t RequiredBytesForEncode(size_t length) {
80  static constexpr int kMaxVarintBytes = 10;
81  return kMaxVarintBytes + length;
82  }
83 
84  /// Compares full contents (data/size).
85  inline bool operator==(ImmutableStringWithLength other) const;
86 
87  private:
88  const char *m_ptr = nullptr;
89 };
90 
91 // From protobuf.
92 inline uint64_t ZigZagEncode64(int64_t n) {
93  // Note: the right-shift must be arithmetic
94  // Note: left shift must be unsigned because of overflow
95  return (static_cast<uint64_t>(n) << 1) ^ static_cast<uint64_t>(n >> 63);
96 }
97 
98 // From protobuf.
99 inline int64_t ZigZagDecode64(uint64_t n) {
100  // Note: Using unsigned types prevent undefined behavior
101  return static_cast<int64_t>((n >> 1) ^ (~(n & 1) + 1));
102 }
103 
104 // Defined in sql/hash_join_buffer.cc, since that is the primary user
105 // of ImmutableString functionality.
106 std::pair<const char *, uint64_t> VarintParseSlow64(const char *p,
107  uint32_t res32);
108 
109 // From protobuf. Included here because protobuf 3.6 does not expose the
110 // parse_context.h header to clients.
111 inline const char *VarintParse64(const char *p, uint64_t *out) {
112  auto ptr = pointer_cast<const uint8_t *>(p);
113  uint32_t res = ptr[0];
114  if (!(res & 0x80)) {
115  *out = res;
116  return p + 1;
117  }
118  uint32_t x = ptr[1];
119  res += (x - 1) << 7;
120  if (!(x & 0x80)) {
121  *out = res;
122  return p + 2;
123  }
124  auto tmp = VarintParseSlow64(p, res);
125  *out = tmp.second;
126  return tmp.first;
127 }
128 
129 std::string_view ImmutableStringWithLength::Decode() const {
130  uint64_t size;
131  const char *data = VarintParse64(m_ptr, &size);
132  return {data, static_cast<size_t>(size)};
133 }
134 
136  size_t length,
137  char **dst) {
138  using google::protobuf::io::CodedOutputStream;
139 
140  const char *base = *dst;
141  uint8_t *ptr = CodedOutputStream::WriteVarint64ToArray(
142  length, pointer_cast<uint8_t *>(*dst));
143  if (length != 0) { // Avoid sending nullptr to memcpy().
144  memcpy(ptr, data, length);
145  }
146  *dst = pointer_cast<char *>(ptr + length);
147 
148  return ImmutableStringWithLength(base);
149 }
150 
152  ImmutableStringWithLength other) const {
153  return Decode() == other.Decode();
154 }
155 
156 /**
157  * LinkedImmutableString is designed for storing rows (values) in hash join. It
158  * does not need a length, since it is implicit from the contents; however,
159  * since there might be multiple values with the same key, we simulate a
160  * multimap by having a “next” pointer. (Normally, linked lists are a bad idea
161  * due to pointer chasing, but here, we're doing so much work for each value
162  * that the overhead disappears into the noise.)
163  *
164  * As the next pointer is usually be very close in memory to ourselves
165  * (nearly all rows are stored in the same MEM_ROOT), we don't need to store
166  * the entire pointer; instead, we store the difference between the start of
167  * this string and the next pointer, as a zigzag-encoded Varint128. As with
168  * the length in ImmutableStringWithLength, this typically saves 6–7 bytes
169  * for each value. The special value of 0 means that there is no next pointer
170  * (ie., it is nullptr), as that would otherwise be an infinite loop.
171  */
173  public:
174  struct Decoded;
175 
176  /// NOTE: nullptr is a legal value for encoded, and signals the same thing
177  /// as nullptr would on a const char *.
178  explicit LinkedImmutableString(const char *encoded) : m_ptr(encoded) {}
179 
180  inline Decoded Decode() const;
181 
182  /// Encode the given string and “next” pointer as a header for
183  /// LinkedImmutableString, and returns a new object pointing to it.
184  /// Note that unlike ImmutableStringWithLength::Encode(), this only
185  /// encodes the header; since there is no explicitly stored length,
186  /// you must write the contents of the string yourself.
187  ///
188  /// *dst must contain at least the number of bytes returned by
189  /// RequiredBytesForEncode. It is moved to one byte past the end of the
190  /// written stream (which is the right place to store the string itself).
192  char **dst);
193 
194  /// Calculates an upper bound on the space required for encoding a string
195  /// of the given length.
196  static inline size_t RequiredBytesForEncode(size_t length) {
197  static constexpr int kMaxVarintBytes = 10;
198  return kMaxVarintBytes + length;
199  }
200 
201  inline bool operator==(std::nullptr_t) const { return m_ptr == nullptr; }
202  inline bool operator!=(std::nullptr_t) const { return m_ptr != nullptr; }
203 
204  private:
205  const char *m_ptr;
206 };
207 
209  const char *data;
211 };
212 
215  uint64_t ptr_diff;
216  const char *ptr = VarintParse64(m_ptr, &ptr_diff);
217  decoded.data = ptr;
218  if (ptr_diff == 0) {
219  decoded.next = LinkedImmutableString{nullptr};
220  } else {
221  decoded.next = LinkedImmutableString(m_ptr + ZigZagDecode64(ptr_diff));
222  }
223  return decoded;
224 }
225 
227  LinkedImmutableString next, char **dst) {
228  using google::protobuf::io::CodedOutputStream;
229 
230  const char *base = *dst;
231  uint8_t *ptr = pointer_cast<uint8_t *>(*dst);
232  if (next.m_ptr == nullptr) {
233  *ptr++ = 0;
234  } else {
235  ptr = CodedOutputStream::WriteVarint64ToArray(
236  ZigZagEncode64(next.m_ptr - base), ptr);
237  }
238  *dst = pointer_cast<char *>(ptr);
239  return LinkedImmutableString(base);
240 }
241 
242 #endif // IMMUTABLE_STRING_H
The variant with length (ImmutableStringWithLength) stores the length as a Varint128 (similar to prot...
Definition: immutable_string.h:62
ImmutableStringWithLength(const char *encoded)
Definition: immutable_string.h:65
const char * m_ptr
Definition: immutable_string.h:88
bool operator==(ImmutableStringWithLength other) const
Compares full contents (data/size).
Definition: immutable_string.h:151
static ImmutableStringWithLength Encode(const char *data, size_t length, char **dst)
Encode the given string as an ImmutableStringWithLength, and returns a new object pointing to it.
Definition: immutable_string.h:135
ImmutableStringWithLength()=default
std::string_view Decode() const
Definition: immutable_string.h:129
static size_t RequiredBytesForEncode(size_t length)
Calculates an upper bound on the space required for encoding a string of the given length.
Definition: immutable_string.h:79
LinkedImmutableString is designed for storing rows (values) in hash join.
Definition: immutable_string.h:172
Decoded Decode() const
Definition: immutable_string.h:213
bool operator!=(std::nullptr_t) const
Definition: immutable_string.h:202
bool operator==(std::nullptr_t) const
Definition: immutable_string.h:201
const char * m_ptr
Definition: immutable_string.h:205
static size_t RequiredBytesForEncode(size_t length)
Calculates an upper bound on the space required for encoding a string of the given length.
Definition: immutable_string.h:196
LinkedImmutableString(const char *encoded)
NOTE: nullptr is a legal value for encoded, and signals the same thing as nullptr would on a const ch...
Definition: immutable_string.h:178
static LinkedImmutableString EncodeHeader(LinkedImmutableString next, char **dst)
Encode the given string and “next” pointer as a header for LinkedImmutableString, and returns a new o...
Definition: immutable_string.h:226
const char * p
Definition: ctype-mb.cc:1236
const char * VarintParse64(const char *p, uint64_t *out)
Definition: immutable_string.h:111
std::pair< const char *, uint64_t > VarintParseSlow64(const char *p, uint32_t res32)
Definition: hash_join_buffer.cc:263
int64_t ZigZagDecode64(uint64_t n)
Definition: immutable_string.h:99
uint64_t ZigZagEncode64(int64_t n)
Definition: immutable_string.h:92
Header for compiler-dependent features.
#define MY_COMPILER_GCC_DIAGNOSTIC_IGNORE(X)
Definition: my_compiler.h:260
#define MY_COMPILER_MSVC_DIAGNOSTIC_IGNORE(X)
Definition: my_compiler.h:265
#define MY_COMPILER_DIAGNOSTIC_PUSH()
save the compiler's diagnostic (enabled warnings, errors, ...) state
Definition: my_compiler.h:295
#define MY_COMPILER_DIAGNOSTIC_POP()
restore the compiler's diagnostic (enabled warnings, errors, ...) state
Definition: my_compiler.h:296
bool length(const dd::Spatial_reference_system *srs, const Geometry *g1, double *length, bool *null) noexcept
Computes the length of linestrings and multilinestrings.
Definition: length.cc:75
MY_COMPILER_CLANG_DIAGNOSTIC_IGNORE("-Winconsistent-missing-destructor-override") extern "C"
Definition: protobuf_plugin.cc:31
Definition: immutable_string.h:208
const char * data
Definition: immutable_string.h:209
LinkedImmutableString next
Definition: immutable_string.h:210
int n
Definition: xcom_base.cc:505