18 #include <drizzled/charset.h>
19 #include <drizzled/error.h>
20 #include <drizzled/internal/m_string.h>
21 #include <drizzled/configmake.h>
33 static vector<unsigned char*> memory_vector;
35 extern charset_info_st my_charset_utf8mb4_icelandic_uca_ci;
36 extern charset_info_st my_charset_utf8mb4_latvian_uca_ci;
37 extern charset_info_st my_charset_utf8mb4_romanian_uca_ci;
38 extern charset_info_st my_charset_utf8mb4_slovenian_uca_ci;
39 extern charset_info_st my_charset_utf8mb4_polish_uca_ci;
40 extern charset_info_st my_charset_utf8mb4_estonian_uca_ci;
41 extern charset_info_st my_charset_utf8mb4_spanish_uca_ci;
42 extern charset_info_st my_charset_utf8mb4_swedish_uca_ci;
43 extern charset_info_st my_charset_utf8mb4_turkish_uca_ci;
44 extern charset_info_st my_charset_utf8mb4_czech_uca_ci;
45 extern charset_info_st my_charset_utf8mb4_danish_uca_ci;
46 extern charset_info_st my_charset_utf8mb4_lithuanian_uca_ci;
47 extern charset_info_st my_charset_utf8mb4_slovak_uca_ci;
48 extern charset_info_st my_charset_utf8mb4_spanish2_uca_ci;
49 extern charset_info_st my_charset_utf8mb4_roman_uca_ci;
50 extern charset_info_st my_charset_utf8mb4_persian_uca_ci;
51 extern charset_info_st my_charset_utf8mb4_esperanto_uca_ci;
52 extern charset_info_st my_charset_utf8mb4_hungarian_uca_ci;
53 extern charset_info_st my_charset_utf8mb4_sinhala_uca_ci;
65 bool my_charset_same(
const charset_info_st *cs1,
const charset_info_st *cs2)
67 return cs1 == cs2 || not strcmp(cs1->csname, cs2->csname);
70 static uint get_collation_number_internal(
const char *name)
72 for (charset_info_st **cs= all_charsets; cs < all_charsets + array_elements(all_charsets) - 1; cs++)
74 if (cs[0] && cs[0]->name && not my_charset_utf8_general_ci.strcasecmp(cs[0]->name, name))
82 static unsigned char* cs_alloc(
size_t size)
84 memory_vector.push_back(
new unsigned char[size]);
85 return memory_vector.back();
88 static void init_state_maps(charset_info_st *cs)
90 cs->state_map= cs_alloc(256);
91 cs->ident_map= cs_alloc(256);
93 unsigned char *state_map= cs->state_map;
94 unsigned char *ident_map= cs->ident_map;
97 for (
int i= 0; i < 256; i++)
100 state_map[i]= MY_LEX_IDENT;
101 else if (cs->isdigit(i))
102 state_map[i]= MY_LEX_NUMBER_IDENT;
103 else if (my_mbcharlen(cs, i) > 1)
104 state_map[i]= MY_LEX_IDENT;
105 else if (cs->isspace(i))
106 state_map[i]= MY_LEX_SKIP;
108 state_map[i]= MY_LEX_CHAR;
110 state_map[
'_']=state_map[
'$']= MY_LEX_IDENT;
111 state_map[
'\'']= MY_LEX_STRING;
112 state_map[
'.']= MY_LEX_REAL_OR_POINT;
113 state_map[
'>']=state_map[
'=']=state_map[
'!']= MY_LEX_CMP_OP;
114 state_map[
'<']= MY_LEX_LONG_CMP_OP;
115 state_map[
'&']=state_map[
'|']= MY_LEX_BOOL;
116 state_map[
'#']= MY_LEX_COMMENT;
117 state_map[
';']= MY_LEX_SEMICOLON;
118 state_map[
':']= MY_LEX_SET_VAR;
119 state_map[0]= MY_LEX_EOL;
120 state_map[
'\\']= MY_LEX_ESCAPE;
121 state_map[
'/']= MY_LEX_LONG_COMMENT;
122 state_map[
'*']= MY_LEX_END_LONG_COMMENT;
123 state_map[
'@']= MY_LEX_USER_END;
124 state_map[
'`']= MY_LEX_USER_VARIABLE_DELIMITER;
125 state_map[
'"']= MY_LEX_STRING_OR_DELIMITER;
130 for (
int i= 0; i < 256; i++)
132 ident_map[i]= state_map[i] == MY_LEX_IDENT || state_map[i] == MY_LEX_NUMBER_IDENT;
136 state_map[
'x']= state_map[
'X']= MY_LEX_IDENT_OR_HEX;
137 state_map[
'b']= state_map[
'B']= MY_LEX_IDENT_OR_BIN;
140 static bool charset_initialized=
false;
143 const DRIZZLED_API charset_info_st *default_charset_info = &my_charset_utf8_general_ci;
145 static void add_compiled_collation(charset_info_st * cs)
147 all_charsets[cs->number]= cs;
148 cs->state|= MY_CS_AVAILABLE;
151 static void init_compiled_charsets()
153 add_compiled_collation(&my_charset_bin);
155 add_compiled_collation(&my_charset_utf8mb4_general_ci);
156 add_compiled_collation(&my_charset_utf8mb4_bin);
157 add_compiled_collation(&my_charset_utf8mb4_unicode_ci);
158 add_compiled_collation(&my_charset_utf8mb4_icelandic_uca_ci);
159 add_compiled_collation(&my_charset_utf8mb4_latvian_uca_ci);
160 add_compiled_collation(&my_charset_utf8mb4_romanian_uca_ci);
161 add_compiled_collation(&my_charset_utf8mb4_slovenian_uca_ci);
162 add_compiled_collation(&my_charset_utf8mb4_polish_uca_ci);
163 add_compiled_collation(&my_charset_utf8mb4_estonian_uca_ci);
164 add_compiled_collation(&my_charset_utf8mb4_spanish_uca_ci);
165 add_compiled_collation(&my_charset_utf8mb4_swedish_uca_ci);
166 add_compiled_collation(&my_charset_utf8mb4_turkish_uca_ci);
167 add_compiled_collation(&my_charset_utf8mb4_czech_uca_ci);
168 add_compiled_collation(&my_charset_utf8mb4_danish_uca_ci);
169 add_compiled_collation(&my_charset_utf8mb4_lithuanian_uca_ci);
170 add_compiled_collation(&my_charset_utf8mb4_slovak_uca_ci);
171 add_compiled_collation(&my_charset_utf8mb4_spanish2_uca_ci);
172 add_compiled_collation(&my_charset_utf8mb4_roman_uca_ci);
173 add_compiled_collation(&my_charset_utf8mb4_persian_uca_ci);
174 add_compiled_collation(&my_charset_utf8mb4_esperanto_uca_ci);
175 add_compiled_collation(&my_charset_utf8mb4_hungarian_uca_ci);
176 add_compiled_collation(&my_charset_utf8mb4_sinhala_uca_ci);
179 static void init_available_charsets()
185 if (charset_initialized)
187 memset(&all_charsets, 0,
sizeof(all_charsets));
188 init_compiled_charsets();
191 for (charset_info_st**cs= all_charsets;
192 cs < all_charsets+array_elements(all_charsets)-1;
195 if (*cs && cs[0]->ctype)
196 init_state_maps(*cs);
199 charset_initialized=
true;
204 charset_initialized=
false;
206 while (not memory_vector.empty())
208 delete[] memory_vector.back();
209 memory_vector.pop_back();
213 uint32_t get_collation_number(
const char *name)
215 init_available_charsets();
216 return get_collation_number_internal(name);
219 uint32_t get_charset_number(
const char *charset_name, uint32_t cs_flags)
221 init_available_charsets();
223 for (charset_info_st** cs= all_charsets; cs < all_charsets + array_elements(all_charsets) - 1; cs++)
225 if (cs[0] && cs[0]->csname && (cs[0]->state & cs_flags) && not my_charset_utf8_general_ci.strcasecmp(cs[0]->csname, charset_name))
226 return cs[0]->number;
231 const char *get_charset_name(uint32_t charset_number)
233 init_available_charsets();
234 const charset_info_st* cs= all_charsets[charset_number];
235 return cs && cs->number == charset_number && cs->name ? cs->name :
"?";
238 static const charset_info_st *get_internal_charset(uint32_t cs_number)
240 charset_info_st* cs= all_charsets[cs_number];
247 assert(not (not (cs->state & MY_CS_COMPILED) && not (cs->state & MY_CS_LOADED)));
248 if (not (cs->state & MY_CS_AVAILABLE))
250 if (not (cs->state & MY_CS_READY))
252 if (cs->coll->init && cs->coll->init(*cs, cs_alloc))
254 cs->state|= MY_CS_READY;
259 const charset_info_st *get_charset(uint32_t cs_number)
261 if (cs_number == default_charset_info->number)
262 return default_charset_info;
264 init_available_charsets();
266 if (!cs_number || cs_number >= array_elements(all_charsets)-1)
269 return get_internal_charset(cs_number);
272 const charset_info_st *get_charset_by_name(
const char *cs_name)
274 init_available_charsets();
275 uint32_t cs_number= get_collation_number(cs_name);
276 return cs_number ? get_internal_charset(cs_number) : NULL;
279 const charset_info_st *get_charset_by_csname(
const char *cs_name, uint32_t cs_flags)
281 init_available_charsets();
282 uint32_t cs_number= get_charset_number(cs_name, cs_flags);
283 return cs_number ? get_internal_charset(cs_number) : NULL;
312 size_t escape_quotes_for_drizzle(
const charset_info_st *charset_info,
313 char *to,
size_t to_length,
314 const char *from,
size_t length)
316 const char *to_start= to;
317 const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
318 bool overflow=
false;
319 bool use_mb_flag= use_mb(charset_info);
320 for (end= from + length; from < end; from++)
323 if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
325 if (to + tmp_length > to_end)
361 return overflow ? UINT32_MAX : to - to_start;
TODO: Rename this file - func.h is stupid.
Visibility Control Macros.