Skip to content

Commit 6d5085c

Browse files
author
Tor Didriksen
committed
Bug#37586193 Use std::string_view for charset/collation lookup
We have various instances of std::unordered_map<std::string, CHARSET_INFO*> for looking up collations and character sets. With a slight rewrite, we can use std::string_view rather than std::string for lookup. This is more light-weight, as it saves memory allocation and deallocation. Converting Name::m_normalized from a raw pointer to std::string is done in order to simplify the code. Change-Id: Ica799aa4db35dc605bc8208ffc9a1c254ff55bbe
1 parent 517b56d commit 6d5085c

File tree

7 files changed

+80
-99
lines changed

7 files changed

+80
-99
lines changed

include/mysql/strings/collations.h

+11-16
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
#include <cstddef>
2828
#include <string>
29+
#include <string_view>
2930

3031
#include "mysql/strings/api.h"
3132

@@ -75,28 +76,22 @@ class MYSQL_STRINGS_EXPORT Name {
7576
@note throws std::bad_alloc
7677
*/
7778
Name(const char *name, size_t size);
79+
~Name() = default;
7880

79-
/**
80-
Constructor
81-
82-
@note throws std::bad_alloc
83-
*/
84-
Name(const Name &);
85-
86-
Name(Name &&) noexcept;
87-
88-
~Name();
89-
90-
Name &operator=(const Name &);
91-
Name &operator=(Name &&) noexcept;
81+
// These must be explicitly defined for clang on Windows due to
82+
// __declspec(dllexport).
83+
Name(const Name &) = default;
84+
Name(Name &&) = default;
85+
Name &operator=(const Name &) = default;
86+
Name &operator=(Name &&) = default;
9287

9388
/**
94-
Returns normalized name as std::string
89+
Returns normalized name as std::string_view.
9590
*/
96-
std::string operator()() const { return m_normalized; }
91+
std::string_view to_string_view() const { return m_normalized; }
9792

9893
private:
99-
const char *m_normalized{nullptr};
94+
std::string m_normalized;
10095
};
10196

10297
/**

mysys/charset.cc

+4-3
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,8 @@ CHARSET_INFO *my_collation_get_by_name(const char *collation_name, myf flags,
294294
if (cs == nullptr && (flags & MY_WME)) {
295295
char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
296296
my_stpcpy(get_charsets_dir(index_file), MY_CHARSET_INDEX);
297-
my_error(EE_UNKNOWN_COLLATION, MYF(0), name().c_str(), index_file);
297+
my_error(EE_UNKNOWN_COLLATION, MYF(0),
298+
std::string(name.to_string_view()).c_str(), index_file);
298299
}
299300
return cs;
300301
}
@@ -325,7 +326,7 @@ CHARSET_INFO *my_charset_get_by_name(const char *cs_name, uint cs_flags,
325326
CHARSET_INFO *cs = nullptr;
326327
if (cs_flags & MY_CS_PRIMARY) {
327328
cs = entry()->find_primary(name, flags, errmsg);
328-
if (cs == nullptr && name() == "utf8") {
329+
if (cs == nullptr && name.to_string_view() == "utf8") {
329330
// The parser does get_charset_by_csname().
330331
// Also needed for e.g. SET character_set_client= 'utf8'.
331332
// Also needed by the lexer for: "select _utf8 0xD0B0D0B1D0B2;"
@@ -334,7 +335,7 @@ CHARSET_INFO *my_charset_get_by_name(const char *cs_name, uint cs_flags,
334335
}
335336
} else if (cs_flags & MY_CS_BINSORT) {
336337
cs = entry()->find_default_binary(name, flags, errmsg);
337-
if (cs == nullptr && name() == "utf8") {
338+
if (cs == nullptr && name.to_string_view() == "utf8") {
338339
cs = entry()->find_default_binary(mysql::collation::Name("utf8mb3"),
339340
flags, errmsg);
340341
}

strings/CMakeLists.txt

+4
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,10 @@ ELSEIF(WIN32)
128128
PUBLIC MYSQL_LIBSTRINGS_DLL
129129
PRIVATE MYSQL_LIBSTRINGS_EXPORT
130130
)
131+
IF(WIN32_VS)
132+
# m_normalized ... needs to have dll-interface to be used by client ...
133+
TARGET_COMPILE_OPTIONS(strings_shared PUBLIC /wd4251)
134+
ENDIF()
131135
ELSE()
132136
ADD_SHARED_LIBRARY(strings_shared
133137
$<TARGET_OBJECTS:strings_objlib>

strings/collations.cc

+4-41
Original file line numberDiff line numberDiff line change
@@ -49,53 +49,16 @@ mysql::collation::Name::Name(const char *name, size_t size) {
4949
}
5050

5151
// TODO(gleb): fail instead of truncating too long names?
52-
size_t const truncated_size = std::min(size, MY_CS_BUFFER_SIZE);
53-
char *normalized = new char[truncated_size + 1];
52+
const size_t truncated_size = std::min(size, MY_CS_BUFFER_SIZE);
53+
m_normalized.reserve(truncated_size);
5454

5555
for (size_t i = 0; i < truncated_size; i++) {
5656
// TODO(gleb): use ASCII instead of Latin1?
57-
normalized[i] = static_cast<char>(
58-
my_charset_latin1.to_lower[static_cast<uint8_t>(name[i])]);
57+
m_normalized.push_back(static_cast<char>(
58+
my_charset_latin1.to_lower[static_cast<uint8_t>(name[i])]));
5959
}
60-
normalized[truncated_size] = '\0';
61-
m_normalized = normalized;
6260
}
6361

64-
mysql::collation::Name::Name(const mysql::collation::Name &name) {
65-
size_t const size = strlen(name.m_normalized);
66-
char *normalized = new char[size + 1];
67-
memcpy(normalized, name.m_normalized, size + 1);
68-
m_normalized = normalized;
69-
}
70-
71-
mysql::collation::Name::Name(mysql::collation::Name &&name) noexcept
72-
: m_normalized(name.m_normalized) {
73-
name.m_normalized = nullptr;
74-
}
75-
76-
mysql::collation::Name::~Name() { delete[] m_normalized; }
77-
78-
/// @cond Doxygen_is_confused
79-
mysql::collation::Name &mysql::collation::Name::Name::operator=(
80-
const Name &name) {
81-
if (this == &name) {
82-
return *this;
83-
}
84-
this->~Name();
85-
new (this) Name(name);
86-
return *this;
87-
}
88-
89-
mysql::collation::Name &mysql::collation::Name::Name::operator=(
90-
Name &&name) noexcept {
91-
if (this != &name) {
92-
this->~Name();
93-
new (this) Name(std::move(name));
94-
}
95-
return *this;
96-
}
97-
/// @endcond
98-
9962
void mysql::collation::initialize(const char *charset_dir,
10063
MY_CHARSET_LOADER *loader) {
10164
assert(mysql::collation_internals::entry == nullptr);

strings/collations_internal.cc

+36-20
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include <cstdlib>
3030
#include <memory>
3131
#include <new>
32+
#include <string_view>
3233

3334
#include "mysql/my_loglevel.h"
3435
#include "mysql/strings/collations.h"
@@ -545,28 +546,36 @@ class Charset_loader : public MY_CHARSET_LOADER {
545546
void *read_file(const char *, size_t *) override { return nullptr; }
546547
};
547548

548-
template <typename Key>
549-
using Hash = std::unordered_map<Key, CHARSET_INFO *>;
549+
using id_hash_map = mysql::collation_internals::id_hash_map;
550+
using sv_hash_map = mysql::collation_internals::sv_hash_map;
550551

551-
template <typename Key>
552-
CHARSET_INFO *find_in_hash(const Hash<Key> &hash, Key key) {
552+
CHARSET_INFO *find_in_hash(const sv_hash_map &hash, std::string_view key) {
553+
auto it = hash.find((key));
554+
return it == hash.end() ? nullptr : it->second;
555+
}
556+
557+
CHARSET_INFO *find_in_hash(const id_hash_map &hash, unsigned key) {
553558
auto it = hash.find(key);
554559
return it == hash.end() ? nullptr : it->second;
555560
}
556561

557-
CHARSET_INFO *find_collation_in_hash(const Hash<std::string> &hash,
558-
const std::string &key) {
562+
CHARSET_INFO *find_collation_in_hash(const sv_hash_map &hash,
563+
std::string_view key) {
559564
return find_in_hash(hash, key);
560565
}
561566

562-
CHARSET_INFO *find_cs_in_hash(const Hash<std::string> &hash,
563-
const mysql::collation::Name &key) {
564-
auto it = hash.find(key());
567+
CHARSET_INFO *find_cs_in_hash(const sv_hash_map &hash, std::string_view key) {
568+
auto it = hash.find(key);
565569
return it == hash.end() ? nullptr : it->second;
566570
}
567571

568-
template <typename Key>
569-
bool add_to_hash(Hash<Key> *hash, Key key, CHARSET_INFO *cs) {
572+
bool add_to_hash(id_hash_map *hash, unsigned key, CHARSET_INFO *cs) {
573+
// return !hash->insert({key, cs}).second;
574+
(*hash)[key] = cs;
575+
return false;
576+
}
577+
578+
bool add_to_hash(sv_hash_map *hash, std::string key, CHARSET_INFO *cs) {
570579
// return !hash->insert({key, cs}).second;
571580
(*hash)[key] = cs;
572581
return false;
@@ -639,7 +648,8 @@ Collations::~Collations() {
639648
CHARSET_INFO *Collations::find_by_name(const mysql::collation::Name &name,
640649
myf flags, MY_CHARSET_ERRMSG *errmsg) {
641650
return safe_init_when_necessary(
642-
find_collation_in_hash(m_all_by_collation_name, name()), flags, errmsg);
651+
find_collation_in_hash(m_all_by_collation_name, name.to_string_view()),
652+
flags, errmsg);
643653
}
644654

645655
CHARSET_INFO *Collations::find_by_id(unsigned id, myf flags,
@@ -650,31 +660,36 @@ CHARSET_INFO *Collations::find_by_id(unsigned id, myf flags,
650660
CHARSET_INFO *Collations::find_primary(const mysql::collation::Name &cs_name,
651661
myf flags, MY_CHARSET_ERRMSG *errmsg) {
652662
return safe_init_when_necessary(
653-
find_cs_in_hash(m_primary_by_cs_name, cs_name), flags, errmsg);
663+
find_cs_in_hash(m_primary_by_cs_name, cs_name.to_string_view()), flags,
664+
errmsg);
654665
}
655666

656667
CHARSET_INFO *Collations::find_default_binary(
657668
const mysql::collation::Name &cs_name, myf flags,
658669
MY_CHARSET_ERRMSG *errmsg) {
659-
return safe_init_when_necessary(find_cs_in_hash(m_binary_by_cs_name, cs_name),
660-
flags, errmsg);
670+
return safe_init_when_necessary(
671+
find_cs_in_hash(m_binary_by_cs_name, cs_name.to_string_view()), flags,
672+
errmsg);
661673
}
662674

663675
unsigned Collations::get_collation_id(
664676
const mysql::collation::Name &name) const {
665-
CHARSET_INFO *cs = find_collation_in_hash(m_all_by_collation_name, name());
677+
CHARSET_INFO *cs =
678+
find_collation_in_hash(m_all_by_collation_name, name.to_string_view());
666679
return cs ? cs->number : 0;
667680
}
668681

669682
unsigned Collations::get_primary_collation_id(
670683
const mysql::collation::Name &name) const {
671-
CHARSET_INFO *cs = find_cs_in_hash(m_primary_by_cs_name, name);
684+
CHARSET_INFO *cs =
685+
find_cs_in_hash(m_primary_by_cs_name, name.to_string_view());
672686
return cs ? cs->number : 0;
673687
}
674688

675689
unsigned Collations::get_default_binary_collation_id(
676690
const mysql::collation::Name &name) const {
677-
CHARSET_INFO *cs = find_cs_in_hash(m_binary_by_cs_name, name);
691+
CHARSET_INFO *cs =
692+
find_cs_in_hash(m_binary_by_cs_name, name.to_string_view());
678693
return cs ? cs->number : 0;
679694
}
680695

@@ -719,7 +734,8 @@ CHARSET_INFO *Collations::unsafe_init(CHARSET_INFO *cs,
719734
bool Collations::add_internal_collation(CHARSET_INFO *cs) {
720735
assert(cs->number != 0);
721736

722-
std::string const normalized_name{mysql::collation::Name{cs->m_coll_name}()};
737+
const std::string normalized_name{
738+
mysql::collation::Name{cs->m_coll_name}.to_string_view()};
723739

724740
if (add_to_hash(&m_all_by_collation_name, normalized_name, cs) ||
725741
add_to_hash(&m_all_by_id, cs->number, cs)) {
@@ -746,7 +762,7 @@ bool Collations::add_internal_collation(CHARSET_INFO *cs) {
746762

747763
CHARSET_INFO *Collations::find_by_name_unsafe(
748764
const mysql::collation::Name &name) {
749-
return find_collation_in_hash(m_all_by_collation_name, name());
765+
return find_collation_in_hash(m_all_by_collation_name, name.to_string_view());
750766
}
751767

752768
} // namespace mysql::collation_internals

strings/collations_internal.h

+17-16
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include <functional>
2828
#include <mutex>
2929
#include <string>
30+
#include <string_view>
3031
#include <unordered_map>
3132
#include <utility>
3233

@@ -44,6 +45,18 @@ class Name;
4445

4546
namespace collation_internals {
4647

48+
using id_hash_map = std::unordered_map<unsigned, CHARSET_INFO *>;
49+
50+
struct string_hash {
51+
using is_transparent = void;
52+
[[nodiscard]] size_t operator()(std::string_view txt) const {
53+
return std::hash<std::string_view>{}(txt);
54+
}
55+
};
56+
57+
using sv_hash_map = std::unordered_map<std::string, CHARSET_INFO *, string_hash,
58+
std::equal_to<>>;
59+
4760
/**
4861
Helper class: implementation of character set/collation library
4962
@@ -238,22 +251,10 @@ class Collations final {
238251
*/
239252
const std::string m_charset_dir;
240253

241-
/**
242-
Common parametric type to map character set/collation names or their ids
243-
to CHARSET_INFO object pointers
244-
245-
@tparam Key Name or id type (std::string or unsigned respectively)
246-
247-
TODO(gleb): it would be good to use mysql::collation::Name instead of
248-
std::string for Key.
249-
*/
250-
template <typename Key>
251-
using Hash = std::unordered_map<Key, CHARSET_INFO *>;
252-
253254
/**
254255
Maps collation ids to CHARSET_INFO object pointers
255256
*/
256-
Hash<unsigned> m_all_by_id;
257+
id_hash_map m_all_by_id;
257258

258259
/**
259260
Maps normalized strings of all known character set names, collation names,
@@ -262,7 +263,7 @@ class Collations final {
262263
@note see old_conv and get_old_charset_by_name() for exclusions
263264
@see old_conv(), get_old_charset_by_name()
264265
*/
265-
Hash<std::string> m_all_by_collation_name;
266+
sv_hash_map m_all_by_collation_name;
266267

267268
/**
268269
Maps normalized strings of character set names to CHARSET_INFO object
@@ -271,7 +272,7 @@ class Collations final {
271272
@note In MySQL, CHARSET_INFO object of character set is also an object
272273
of its primary collation.
273274
*/
274-
Hash<std::string> m_primary_by_cs_name;
275+
sv_hash_map m_primary_by_cs_name;
275276

276277
/**
277278
Maps normalized strings of character set names to CHARSET_INFO objects
@@ -280,7 +281,7 @@ class Collations final {
280281
@note utf8mb4 has two separate binary collations, so m_binary_by_cs_name
281282
contains a reference to utf8mb4_bin only.
282283
*/
283-
Hash<std::string> m_binary_by_cs_name;
284+
sv_hash_map m_binary_by_cs_name;
284285

285286
/**
286287
False if m_loader references external MY_CHARSET_LOADER, otherwise true.

strings/ctype.cc

+4-3
Original file line numberDiff line numberDiff line change
@@ -564,8 +564,9 @@ static int cs_value(MY_XML_PARSER *st, const char *attr, size_t len) {
564564
// Replace "utf8_" with "utf8mb3_" for external character sets.
565565
// Convert to lowercase first.
566566
mysql::collation::Name normalized_name(attr, len);
567-
if (normalized_name().starts_with("utf8_")) {
568-
std::string collation_name_string = normalized_name();
567+
if (normalized_name.to_string_view().starts_with("utf8_")) {
568+
std::string collation_name_string =
569+
std::string(normalized_name.to_string_view());
569570
// insert "mb3" to get "utf8mb3_xxx"
570571
collation_name_string.insert(4, "mb3");
571572
i->cs.m_coll_name =
@@ -579,7 +580,7 @@ static int cs_value(MY_XML_PARSER *st, const char *attr, size_t len) {
579580
// Replace "utf8" with "utf8mb3" for external character sets.
580581
// Convert to lowercase first.
581582
mysql::collation::Name normalized_name(attr, len);
582-
if (normalized_name() == "utf8") {
583+
if (normalized_name.to_string_view() == "utf8") {
583584
i->cs.csname =
584585
mstr(i->csname, STRING_WITH_LEN("utf8mb3"), MY_CS_NAME_SIZE - 1);
585586
} else {

0 commit comments

Comments
 (0)