Skip to content

Commit 4a126bf

Browse files
committed
Removing some duplicate code in THD::convert_string() & friends
1. Adding THD::convert_string(LEX_CSTRING *to,...) as a wrapper for convert_string(LEX_STRING *to,...), as LEX_CSTRING is now frequently used for conversion purpose. This reduced duplicate code in TEXT_STRING_sys, TEXT_STRING_literal, TEXT_STRING_filesystem grammar rules in *.yy 2. Adding yet another THD::convert_string() with an extra parameter "bool simple_copy_is_possible". This even more reduced repeatable code in the mentioned grammar rules in *.yy 3. Deriving Lex_ident_cli_st from Lex_string_with_metadata_st, as they have very similar functionality. Moving m_quote from Lex_ident_cli_st to Lex_string_with_metadata_st, as m_quote will be used later to optimize string literals anyway (e.g. avoid redundant copying on the tokenizer stage). Adjusting Lex_input_stream::get_text() accordingly. 4. Moving the reminders of the code in TEXT_STRING_sys, TEXT_STRING_literal, TEXT_STRING_filesystem grammar rules as new methods in THD: - make_text_string_sys() - make_text_string_connection() - make_text_string_filesystem() and changing *.yy to use these new methods. This reduced the amount of similar code in sql_yacc.yy and sql_yacc_ora.yy. 5. Removing duplicate code in Lex_input_stream::body_utf8_append_ident(): by reusing THD::make_text_string_sys(). Thanks to #3 and #4. 6. Making THD members charset_is_system_charset, charset_is_collation_connection, charset_is_character_set_filesystem private, as they are not needed externally any more.
1 parent af68252 commit 4a126bf

File tree

5 files changed

+101
-130
lines changed

5 files changed

+101
-130
lines changed

sql/sql_class.h

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3130,8 +3130,10 @@ class THD :public Statement,
31303130
is set if a statement accesses a temporary table created through
31313131
CREATE TEMPORARY TABLE.
31323132
*/
3133-
bool charset_is_system_charset, charset_is_collation_connection;
3133+
private:
3134+
bool charset_is_system_charset, charset_is_collation_connection;
31343135
bool charset_is_character_set_filesystem;
3136+
public:
31353137
bool enable_slow_log; /* Enable slow log for current statement */
31363138
bool abort_on_warning;
31373139
bool got_warning; /* Set on call to push_warning() */
@@ -3706,6 +3708,25 @@ class THD :public Statement,
37063708
bool convert_string(LEX_STRING *to, CHARSET_INFO *to_cs,
37073709
const char *from, size_t from_length,
37083710
CHARSET_INFO *from_cs);
3711+
bool convert_string(LEX_CSTRING *to, CHARSET_INFO *to_cs,
3712+
const char *from, size_t from_length,
3713+
CHARSET_INFO *from_cs)
3714+
{
3715+
LEX_STRING tmp;
3716+
bool rc= convert_string(&tmp, to_cs, from, from_length, from_cs);
3717+
to->str= tmp.str;
3718+
to->length= tmp.length;
3719+
return rc;
3720+
}
3721+
bool convert_string(LEX_CSTRING *to, CHARSET_INFO *tocs,
3722+
const LEX_CSTRING *from, CHARSET_INFO *fromcs,
3723+
bool simple_copy_is_possible)
3724+
{
3725+
if (!simple_copy_is_possible)
3726+
return unlikely(convert_string(to, tocs, from->str, from->length, fromcs));
3727+
*to= *from;
3728+
return false;
3729+
}
37093730
/*
37103731
Convert a strings between character sets.
37113732
Uses my_convert_fix(), which uses an mb_wc .. mc_mb loop internally.
@@ -3767,6 +3788,24 @@ class THD :public Statement,
37673788
Item_basic_constant *make_string_literal_nchar(const Lex_string_with_metadata_st &str);
37683789
Item_basic_constant *make_string_literal_charset(const Lex_string_with_metadata_st &str,
37693790
CHARSET_INFO *cs);
3791+
bool make_text_string_sys(LEX_CSTRING *to,
3792+
const Lex_string_with_metadata_st *from)
3793+
{
3794+
return convert_string(to, system_charset_info,
3795+
from, charset(), charset_is_system_charset);
3796+
}
3797+
bool make_text_string_connection(LEX_CSTRING *to,
3798+
const Lex_string_with_metadata_st *from)
3799+
{
3800+
return convert_string(to, variables.collation_connection,
3801+
from, charset(), charset_is_collation_connection);
3802+
}
3803+
bool make_text_string_filesystem(LEX_CSTRING *to,
3804+
const Lex_string_with_metadata_st *from)
3805+
{
3806+
return convert_string(to, variables.character_set_filesystem,
3807+
from, charset(), charset_is_character_set_filesystem);
3808+
}
37703809
void add_changed_table(TABLE *table);
37713810
void add_changed_table(const char *key, size_t key_length);
37723811
CHANGED_TABLE_LIST * changed_table_dup(const char *key, size_t key_length);

sql/sql_lex.cc

Lines changed: 15 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -416,32 +416,18 @@ void Lex_input_stream::body_utf8_append(const char *ptr)
416416
operation.
417417
*/
418418

419-
void Lex_input_stream::body_utf8_append_ident(THD *thd,
420-
const LEX_CSTRING *txt,
421-
const char *end_ptr)
419+
void
420+
Lex_input_stream::body_utf8_append_ident(THD *thd,
421+
const Lex_string_with_metadata_st *txt,
422+
const char *end_ptr)
422423
{
423424
if (!m_cpp_utf8_processed_ptr)
424425
return;
425426

426427
LEX_CSTRING utf_txt;
427-
CHARSET_INFO *txt_cs= thd->charset();
428-
429-
if (!my_charset_same(txt_cs, &my_charset_utf8_general_ci))
430-
{
431-
LEX_STRING to;
432-
thd->convert_string(&to,
433-
&my_charset_utf8_general_ci,
434-
txt->str, (uint) txt->length,
435-
txt_cs);
436-
utf_txt.str= to.str;
437-
utf_txt.length= to.length;
438-
439-
}
440-
else
441-
utf_txt= *txt;
428+
thd->make_text_string_sys(&utf_txt, txt); // QQ: check return value?
442429

443430
/* NOTE: utf_txt.length is in bytes, not in symbols. */
444-
445431
memcpy(m_body_utf8_ptr, utf_txt.str, utf_txt.length);
446432
m_body_utf8_ptr += utf_txt.length;
447433
*m_body_utf8_ptr= 0;
@@ -1043,13 +1029,13 @@ bool Lex_input_stream::get_text(Lex_string_with_metadata_st *dst, uint sep,
10431029
uchar c;
10441030
uint found_escape=0;
10451031
CHARSET_INFO *cs= m_thd->charset();
1032+
bool is_8bit= false;
10461033

1047-
dst->set_8bit(false);
10481034
while (! eof())
10491035
{
10501036
c= yyGet();
10511037
if (c & 0x80)
1052-
dst->set_8bit(true);
1038+
is_8bit= true;
10531039
#ifdef USE_MB
10541040
{
10551041
int l;
@@ -1093,23 +1079,24 @@ bool Lex_input_stream::get_text(Lex_string_with_metadata_st *dst, uint sep,
10931079

10941080
if (!(to= (char*) m_thd->alloc((uint) (end - str) + 1)))
10951081
{
1096-
dst->str= ""; // Sql_alloc has set error flag
1097-
dst->length= 0;
1098-
return true;
1082+
dst->set(&empty_clex_str, 0, '\0');
1083+
return true; // Sql_alloc has set error flag
10991084
}
1100-
dst->str= to;
11011085

11021086
m_cpp_text_start= m_cpp_tok_start + pre_skip;
11031087
m_cpp_text_end= get_cpp_ptr() - post_skip;
11041088

11051089
if (!found_escape)
11061090
{
1107-
memcpy(to, str, dst->length= (end - str));
1108-
to[dst->length]= 0;
1091+
size_t len= (end - str);
1092+
memcpy(to, str, len);
1093+
to[len]= '\0';
1094+
dst->set(to, len, is_8bit, '\0');
11091095
}
11101096
else
11111097
{
1112-
dst->length= unescape(cs, to, str, end, sep);
1098+
size_t len= unescape(cs, to, str, end, sep);
1099+
dst->set(to, len, is_8bit, '\0');
11131100
}
11141101
return false;
11151102
}

sql/sql_lex.h

Lines changed: 33 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -37,22 +37,47 @@
3737

3838

3939
/**
40-
A string with metadata.
40+
A string with metadata. Usually points to a string in the client
41+
character set, but unlike Lex_ident_cli_st (see below) it does not
42+
necessarily point to a query fragment. It can also point to memory
43+
of other kinds (e.g. an additional THD allocated memory buffer
44+
not overlapping with the current query text).
45+
4146
We'll add more flags here eventually, to know if the string has, e.g.:
4247
- multi-byte characters
4348
- bad byte sequences
4449
- backslash escapes: 'a\nb'
45-
- separator escapes: 'a''b'
4650
and reuse the original query fragments instead of making the string
4751
copy too early, in Lex_input_stream::get_text().
4852
This will allow to avoid unnecessary copying, as well as
4953
create more optimal Item types in sql_yacc.yy
5054
*/
5155
struct Lex_string_with_metadata_st: public LEX_CSTRING
5256
{
57+
private:
5358
bool m_is_8bit; // True if the string has 8bit characters
59+
char m_quote; // Quote character, or 0 if not quoted
5460
public:
5561
void set_8bit(bool is_8bit) { m_is_8bit= is_8bit; }
62+
void set_metadata(bool is_8bit, char quote)
63+
{
64+
m_is_8bit= is_8bit;
65+
m_quote= quote;
66+
}
67+
void set(const char *s, size_t len, bool is_8bit, char quote)
68+
{
69+
str= s;
70+
length= len;
71+
set_metadata(is_8bit, quote);
72+
}
73+
void set(const LEX_CSTRING *s, bool is_8bit, char quote)
74+
{
75+
((LEX_CSTRING &)*this)= *s;
76+
set_metadata(is_8bit, quote);
77+
}
78+
bool is_8bit() const { return m_is_8bit; }
79+
bool is_quoted() const { return m_quote != '\0'; }
80+
char quote() const { return m_quote; }
5681
// Get string repertoire by the 8-bit flag and the character set
5782
uint repertoire(CHARSET_INFO *cs) const
5883
{
@@ -71,44 +96,27 @@ struct Lex_string_with_metadata_st: public LEX_CSTRING
7196
Used to store identifiers in the client character set.
7297
Points to a query fragment.
7398
*/
74-
struct Lex_ident_cli_st: public LEX_CSTRING
99+
struct Lex_ident_cli_st: public Lex_string_with_metadata_st
75100
{
76-
private:
77-
bool m_is_8bit;
78-
char m_quote;
79101
public:
80102
void set_keyword(const char *s, size_t len)
81103
{
82-
str= s;
83-
length= len;
84-
m_is_8bit= false;
85-
m_quote= '\0';
104+
set(s, len, false, '\0');
86105
}
87106
void set_ident(const char *s, size_t len, bool is_8bit)
88107
{
89-
str= s;
90-
length= len;
91-
m_is_8bit= is_8bit;
92-
m_quote= '\0';
108+
set(s, len, is_8bit, '\0');
93109
}
94110
void set_ident_quoted(const char *s, size_t len, bool is_8bit, char quote)
95111
{
96-
str= s;
97-
length= len;
98-
m_is_8bit= is_8bit;
99-
m_quote= quote;
112+
set(s, len, is_8bit, quote);
100113
}
101114
void set_unquoted(const LEX_CSTRING *s, bool is_8bit)
102115
{
103-
((LEX_CSTRING &)*this)= *s;
104-
m_is_8bit= is_8bit;
105-
m_quote= '\0';
116+
set(s, is_8bit, '\0');
106117
}
107118
const char *pos() const { return str - is_quoted(); }
108119
const char *end() const { return str + length + is_quoted(); }
109-
bool is_quoted() const { return m_quote != '\0'; }
110-
bool is_8bit() const { return m_is_8bit; }
111-
char quote() const { return m_quote; }
112120
};
113121

114122

@@ -2453,7 +2461,7 @@ class Lex_input_stream
24532461
void body_utf8_append(const char *ptr);
24542462
void body_utf8_append(const char *ptr, const char *end_ptr);
24552463
void body_utf8_append_ident(THD *thd,
2456-
const LEX_CSTRING *txt,
2464+
const Lex_string_with_metadata_st *txt,
24572465
const char *end_ptr);
24582466
void body_utf8_append_escape(THD *thd,
24592467
const LEX_CSTRING *txt,

sql/sql_yacc.yy

Lines changed: 7 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -15170,57 +15170,26 @@ IDENT_sys:
1517015170
TEXT_STRING_sys:
1517115171
TEXT_STRING
1517215172
{
15173-
if (thd->charset_is_system_charset)
15174-
$$= $1;
15175-
else
15176-
{
15177-
LEX_STRING to;
15178-
if (unlikely(thd->convert_string(&to, system_charset_info,
15179-
$1.str, $1.length,
15180-
thd->charset())))
15181-
MYSQL_YYABORT;
15182-
$$.str= to.str;
15183-
$$.length= to.length;
15184-
}
15173+
if (thd->make_text_string_sys(&$$, &$1))
15174+
MYSQL_YYABORT;
1518515175
}
1518615176
;
1518715177

1518815178
TEXT_STRING_literal:
1518915179
TEXT_STRING
1519015180
{
15191-
if (thd->charset_is_collation_connection)
15192-
$$= $1;
15193-
else
15194-
{
15195-
LEX_STRING to;
15196-
if (unlikely(thd->convert_string(&to,
15197-
thd->variables.collation_connection,
15198-
$1.str, $1.length,
15199-
thd->charset())))
15200-
MYSQL_YYABORT;
15201-
$$.str= to.str;
15202-
$$.length= to.length;
15203-
}
15181+
if (thd->make_text_string_connection(&$$, &$1))
15182+
MYSQL_YYABORT;
1520415183
}
1520515184
;
1520615185

1520715186
TEXT_STRING_filesystem:
1520815187
TEXT_STRING
1520915188
{
15210-
if (thd->charset_is_character_set_filesystem)
15211-
$$= $1;
15212-
else
15213-
{
15214-
LEX_STRING to;
15215-
if (unlikely(thd->convert_string(&to,
15216-
thd->variables.character_set_filesystem,
15217-
$1.str, $1.length,
15218-
thd->charset())))
15219-
MYSQL_YYABORT;
15220-
$$.str= to.str;
15221-
$$.length= to.length;
15222-
}
15189+
if (thd->make_text_string_filesystem(&$$, &$1))
15190+
MYSQL_YYABORT;
1522315191
}
15192+
;
1522415193

1522515194
ident_table_alias:
1522615195
IDENT_sys

sql/sql_yacc_ora.yy

Lines changed: 6 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -14919,56 +14919,24 @@ IDENT_sys:
1491914919
TEXT_STRING_sys:
1492014920
TEXT_STRING
1492114921
{
14922-
if (thd->charset_is_system_charset)
14923-
$$= $1;
14924-
else
14925-
{
14926-
LEX_STRING to;
14927-
if (unlikely(thd->convert_string(&to, system_charset_info,
14928-
$1.str, $1.length,
14929-
thd->charset())))
14930-
MYSQL_YYABORT;
14931-
$$.str= to.str;
14932-
$$.length= to.length;
14933-
}
14922+
if (thd->make_text_string_sys(&$$, &$1))
14923+
MYSQL_YYABORT;
1493414924
}
1493514925
;
1493614926

1493714927
TEXT_STRING_literal:
1493814928
TEXT_STRING
1493914929
{
14940-
if (thd->charset_is_collation_connection)
14941-
$$= $1;
14942-
else
14943-
{
14944-
LEX_STRING to;
14945-
if (unlikely(thd->convert_string(&to,
14946-
thd->variables.collation_connection,
14947-
$1.str, $1.length,
14948-
thd->charset())))
14949-
MYSQL_YYABORT;
14950-
$$.str= to.str;
14951-
$$.length= to.length;
14952-
}
14930+
if (thd->make_text_string_connection(&$$, &$1))
14931+
MYSQL_YYABORT;
1495314932
}
1495414933
;
1495514934

1495614935
TEXT_STRING_filesystem:
1495714936
TEXT_STRING
1495814937
{
14959-
if (thd->charset_is_character_set_filesystem)
14960-
$$= $1;
14961-
else
14962-
{
14963-
LEX_STRING to;
14964-
if (unlikely(thd->convert_string(&to,
14965-
thd->variables.character_set_filesystem,
14966-
$1.str, $1.length,
14967-
thd->charset())))
14968-
MYSQL_YYABORT;
14969-
$$.str= to.str;
14970-
$$.length= to.length;
14971-
}
14938+
if (thd->make_text_string_filesystem(&$$, &$1))
14939+
MYSQL_YYABORT;
1497214940
}
1497314941
;
1497414942

0 commit comments

Comments
 (0)