SDL  2.0
SDL_iconv.c
Go to the documentation of this file.
1 /*
2  Simple DirectMedia Layer
3  Copyright (C) 1997-2018 Sam Lantinga <slouken@libsdl.org>
4 
5  This software is provided 'as-is', without any express or implied
6  warranty. In no event will the authors be held liable for any damages
7  arising from the use of this software.
8 
9  Permission is granted to anyone to use this software for any purpose,
10  including commercial applications, and to alter it and redistribute it
11  freely, subject to the following restrictions:
12 
13  1. The origin of this software must not be misrepresented; you must not
14  claim that you wrote the original software. If you use this software
15  in a product, an acknowledgment in the product documentation would be
16  appreciated but is not required.
17  2. Altered source versions must be plainly marked as such, and must not be
18  misrepresented as being the original software.
19  3. This notice may not be removed or altered from any source distribution.
20 */
21 
22 #if defined(__clang_analyzer__) && !defined(SDL_DISABLE_ANALYZE_MACROS)
23 #define SDL_DISABLE_ANALYZE_MACROS 1
24 #endif
25 
26 #include "../SDL_internal.h"
27 
28 /* This file contains portable iconv functions for SDL */
29 
30 #include "SDL_stdinc.h"
31 #include "SDL_endian.h"
32 
33 #if defined(HAVE_ICONV) && defined(HAVE_ICONV_H)
34 #include <iconv.h>
35 
36 /* Depending on which standard the iconv() was implemented with,
37  iconv() may or may not use const char ** for the inbuf param.
38  If we get this wrong, it's just a warning, so no big deal.
39 */
40 #if defined(_XGP6) || defined(__APPLE__) || \
41  defined(__EMSCRIPTEN__) || \
42  (defined(__GLIBC__) && ((__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2)) || \
43  (defined(_NEWLIB_VERSION)))
44 #define ICONV_INBUF_NONCONST
45 #endif
46 
47 #include <errno.h>
48 
49 SDL_COMPILE_TIME_ASSERT(iconv_t, sizeof (iconv_t) <= sizeof (SDL_iconv_t));
50 
51 SDL_iconv_t
52 SDL_iconv_open(const char *tocode, const char *fromcode)
53 {
54  return (SDL_iconv_t) ((size_t) iconv_open(tocode, fromcode));
55 }
56 
57 int
58 SDL_iconv_close(SDL_iconv_t cd)
59 {
60  return iconv_close((iconv_t) ((size_t) cd));
61 }
62 
63 size_t
64 SDL_iconv(SDL_iconv_t cd,
65  const char **inbuf, size_t * inbytesleft,
66  char **outbuf, size_t * outbytesleft)
67 {
68  size_t retCode;
69 #ifdef ICONV_INBUF_NONCONST
70  retCode = iconv((iconv_t) ((size_t) cd), (char **) inbuf, inbytesleft, outbuf, outbytesleft);
71 #else
72  retCode = iconv((iconv_t) ((size_t) cd), inbuf, inbytesleft, outbuf, outbytesleft);
73 #endif
74  if (retCode == (size_t) - 1) {
75  switch (errno) {
76  case E2BIG:
77  return SDL_ICONV_E2BIG;
78  case EILSEQ:
79  return SDL_ICONV_EILSEQ;
80  case EINVAL:
81  return SDL_ICONV_EINVAL;
82  default:
83  return SDL_ICONV_ERROR;
84  }
85  }
86  return retCode;
87 }
88 
89 #else
90 
91 /* Lots of useful information on Unicode at:
92  http://www.cl.cam.ac.uk/~mgk25/unicode.html
93 */
94 
95 #define UNICODE_BOM 0xFEFF
96 
97 #define UNKNOWN_ASCII '?'
98 #define UNKNOWN_UNICODE 0xFFFD
99 
100 enum
101 {
106  ENCODING_UTF16, /* Needs byte order marker */
109  ENCODING_UTF32, /* Needs byte order marker */
116 };
117 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
118 #define ENCODING_UTF16NATIVE ENCODING_UTF16BE
119 #define ENCODING_UTF32NATIVE ENCODING_UTF32BE
120 #define ENCODING_UCS2NATIVE ENCODING_UCS2BE
121 #define ENCODING_UCS4NATIVE ENCODING_UCS4BE
122 #else
123 #define ENCODING_UTF16NATIVE ENCODING_UTF16LE
124 #define ENCODING_UTF32NATIVE ENCODING_UTF32LE
125 #define ENCODING_UCS2NATIVE ENCODING_UCS2LE
126 #define ENCODING_UCS4NATIVE ENCODING_UCS4LE
127 #endif
128 
130 {
131  int src_fmt;
132  int dst_fmt;
133 };
134 
135 static struct
136 {
137  const char *name;
138  int format;
139 } encodings[] = {
140 /* *INDENT-OFF* */
141  { "ASCII", ENCODING_ASCII },
142  { "US-ASCII", ENCODING_ASCII },
143  { "8859-1", ENCODING_LATIN1 },
144  { "ISO-8859-1", ENCODING_LATIN1 },
145  { "UTF8", ENCODING_UTF8 },
146  { "UTF-8", ENCODING_UTF8 },
147  { "UTF16", ENCODING_UTF16 },
148  { "UTF-16", ENCODING_UTF16 },
149  { "UTF16BE", ENCODING_UTF16BE },
150  { "UTF-16BE", ENCODING_UTF16BE },
151  { "UTF16LE", ENCODING_UTF16LE },
152  { "UTF-16LE", ENCODING_UTF16LE },
153  { "UTF32", ENCODING_UTF32 },
154  { "UTF-32", ENCODING_UTF32 },
155  { "UTF32BE", ENCODING_UTF32BE },
156  { "UTF-32BE", ENCODING_UTF32BE },
157  { "UTF32LE", ENCODING_UTF32LE },
158  { "UTF-32LE", ENCODING_UTF32LE },
159  { "UCS2", ENCODING_UCS2BE },
160  { "UCS-2", ENCODING_UCS2BE },
161  { "UCS-2LE", ENCODING_UCS2LE },
162  { "UCS-2BE", ENCODING_UCS2BE },
163  { "UCS-2-INTERNAL", ENCODING_UCS2NATIVE },
164  { "UCS4", ENCODING_UCS4BE },
165  { "UCS-4", ENCODING_UCS4BE },
166  { "UCS-4LE", ENCODING_UCS4LE },
167  { "UCS-4BE", ENCODING_UCS4BE },
168  { "UCS-4-INTERNAL", ENCODING_UCS4NATIVE },
169 /* *INDENT-ON* */
170 };
171 
172 static const char *
173 getlocale(char *buffer, size_t bufsize)
174 {
175  const char *lang;
176  char *ptr;
177 
178  lang = SDL_getenv("LC_ALL");
179  if (!lang) {
180  lang = SDL_getenv("LC_CTYPE");
181  }
182  if (!lang) {
183  lang = SDL_getenv("LC_MESSAGES");
184  }
185  if (!lang) {
186  lang = SDL_getenv("LANG");
187  }
188  if (!lang || !*lang || SDL_strcmp(lang, "C") == 0) {
189  lang = "ASCII";
190  }
191 
192  /* We need to trim down strings like "en_US.UTF-8@blah" to "UTF-8" */
193  ptr = SDL_strchr(lang, '.');
194  if (ptr != NULL) {
195  lang = ptr + 1;
196  }
197 
198  SDL_strlcpy(buffer, lang, bufsize);
199  ptr = SDL_strchr(buffer, '@');
200  if (ptr != NULL) {
201  *ptr = '\0'; /* chop end of string. */
202  }
203 
204  return buffer;
205 }
206 
207 SDL_iconv_t
208 SDL_iconv_open(const char *tocode, const char *fromcode)
209 {
212  int i;
213  char fromcode_buffer[64];
214  char tocode_buffer[64];
215 
216  if (!fromcode || !*fromcode) {
217  fromcode = getlocale(fromcode_buffer, sizeof(fromcode_buffer));
218  }
219  if (!tocode || !*tocode) {
220  tocode = getlocale(tocode_buffer, sizeof(tocode_buffer));
221  }
222  for (i = 0; i < SDL_arraysize(encodings); ++i) {
223  if (SDL_strcasecmp(fromcode, encodings[i].name) == 0) {
224  src_fmt = encodings[i].format;
225  if (dst_fmt != ENCODING_UNKNOWN) {
226  break;
227  }
228  }
229  if (SDL_strcasecmp(tocode, encodings[i].name) == 0) {
230  dst_fmt = encodings[i].format;
231  if (src_fmt != ENCODING_UNKNOWN) {
232  break;
233  }
234  }
235  }
236  if (src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN) {
237  SDL_iconv_t cd = (SDL_iconv_t) SDL_malloc(sizeof(*cd));
238  if (cd) {
239  cd->src_fmt = src_fmt;
240  cd->dst_fmt = dst_fmt;
241  return cd;
242  }
243  }
244  return (SDL_iconv_t) - 1;
245 }
246 
247 size_t
248 SDL_iconv(SDL_iconv_t cd,
249  const char **inbuf, size_t * inbytesleft,
250  char **outbuf, size_t * outbytesleft)
251 {
252  /* For simplicity, we'll convert everything to and from UCS-4 */
253  const char *src;
254  char *dst;
255  size_t srclen, dstlen;
256  Uint32 ch = 0;
257  size_t total;
258 
259  if (!inbuf || !*inbuf) {
260  /* Reset the context */
261  return 0;
262  }
263  if (!outbuf || !*outbuf || !outbytesleft || !*outbytesleft) {
264  return SDL_ICONV_E2BIG;
265  }
266  src = *inbuf;
267  srclen = (inbytesleft ? *inbytesleft : 0);
268  dst = *outbuf;
269  dstlen = *outbytesleft;
270 
271  switch (cd->src_fmt) {
272  case ENCODING_UTF16:
273  /* Scan for a byte order marker */
274  {
275  Uint8 *p = (Uint8 *) src;
276  size_t n = srclen / 2;
277  while (n) {
278  if (p[0] == 0xFF && p[1] == 0xFE) {
279  cd->src_fmt = ENCODING_UTF16BE;
280  break;
281  } else if (p[0] == 0xFE && p[1] == 0xFF) {
282  cd->src_fmt = ENCODING_UTF16LE;
283  break;
284  }
285  p += 2;
286  --n;
287  }
288  if (n == 0) {
289  /* We can't tell, default to host order */
290  cd->src_fmt = ENCODING_UTF16NATIVE;
291  }
292  }
293  break;
294  case ENCODING_UTF32:
295  /* Scan for a byte order marker */
296  {
297  Uint8 *p = (Uint8 *) src;
298  size_t n = srclen / 4;
299  while (n) {
300  if (p[0] == 0xFF && p[1] == 0xFE &&
301  p[2] == 0x00 && p[3] == 0x00) {
302  cd->src_fmt = ENCODING_UTF32BE;
303  break;
304  } else if (p[0] == 0x00 && p[1] == 0x00 &&
305  p[2] == 0xFE && p[3] == 0xFF) {
306  cd->src_fmt = ENCODING_UTF32LE;
307  break;
308  }
309  p += 4;
310  --n;
311  }
312  if (n == 0) {
313  /* We can't tell, default to host order */
314  cd->src_fmt = ENCODING_UTF32NATIVE;
315  }
316  }
317  break;
318  }
319 
320  switch (cd->dst_fmt) {
321  case ENCODING_UTF16:
322  /* Default to host order, need to add byte order marker */
323  if (dstlen < 2) {
324  return SDL_ICONV_E2BIG;
325  }
326  *(Uint16 *) dst = UNICODE_BOM;
327  dst += 2;
328  dstlen -= 2;
329  cd->dst_fmt = ENCODING_UTF16NATIVE;
330  break;
331  case ENCODING_UTF32:
332  /* Default to host order, need to add byte order marker */
333  if (dstlen < 4) {
334  return SDL_ICONV_E2BIG;
335  }
336  *(Uint32 *) dst = UNICODE_BOM;
337  dst += 4;
338  dstlen -= 4;
339  cd->dst_fmt = ENCODING_UTF32NATIVE;
340  break;
341  }
342 
343  total = 0;
344  while (srclen > 0) {
345  /* Decode a character */
346  switch (cd->src_fmt) {
347  case ENCODING_ASCII:
348  {
349  Uint8 *p = (Uint8 *) src;
350  ch = (Uint32) (p[0] & 0x7F);
351  ++src;
352  --srclen;
353  }
354  break;
355  case ENCODING_LATIN1:
356  {
357  Uint8 *p = (Uint8 *) src;
358  ch = (Uint32) p[0];
359  ++src;
360  --srclen;
361  }
362  break;
363  case ENCODING_UTF8: /* RFC 3629 */
364  {
365  Uint8 *p = (Uint8 *) src;
366  size_t left = 0;
367  SDL_bool overlong = SDL_FALSE;
368  if (p[0] >= 0xFC) {
369  if ((p[0] & 0xFE) != 0xFC) {
370  /* Skip illegal sequences
371  return SDL_ICONV_EILSEQ;
372  */
373  ch = UNKNOWN_UNICODE;
374  } else {
375  if (p[0] == 0xFC && srclen > 1 && (p[1] & 0xFC) == 0x80) {
376  overlong = SDL_TRUE;
377  }
378  ch = (Uint32) (p[0] & 0x01);
379  left = 5;
380  }
381  } else if (p[0] >= 0xF8) {
382  if ((p[0] & 0xFC) != 0xF8) {
383  /* Skip illegal sequences
384  return SDL_ICONV_EILSEQ;
385  */
386  ch = UNKNOWN_UNICODE;
387  } else {
388  if (p[0] == 0xF8 && srclen > 1 && (p[1] & 0xF8) == 0x80) {
389  overlong = SDL_TRUE;
390  }
391  ch = (Uint32) (p[0] & 0x03);
392  left = 4;
393  }
394  } else if (p[0] >= 0xF0) {
395  if ((p[0] & 0xF8) != 0xF0) {
396  /* Skip illegal sequences
397  return SDL_ICONV_EILSEQ;
398  */
399  ch = UNKNOWN_UNICODE;
400  } else {
401  if (p[0] == 0xF0 && srclen > 1 && (p[1] & 0xF0) == 0x80) {
402  overlong = SDL_TRUE;
403  }
404  ch = (Uint32) (p[0] & 0x07);
405  left = 3;
406  }
407  } else if (p[0] >= 0xE0) {
408  if ((p[0] & 0xF0) != 0xE0) {
409  /* Skip illegal sequences
410  return SDL_ICONV_EILSEQ;
411  */
412  ch = UNKNOWN_UNICODE;
413  } else {
414  if (p[0] == 0xE0 && srclen > 1 && (p[1] & 0xE0) == 0x80) {
415  overlong = SDL_TRUE;
416  }
417  ch = (Uint32) (p[0] & 0x0F);
418  left = 2;
419  }
420  } else if (p[0] >= 0xC0) {
421  if ((p[0] & 0xE0) != 0xC0) {
422  /* Skip illegal sequences
423  return SDL_ICONV_EILSEQ;
424  */
425  ch = UNKNOWN_UNICODE;
426  } else {
427  if ((p[0] & 0xDE) == 0xC0) {
428  overlong = SDL_TRUE;
429  }
430  ch = (Uint32) (p[0] & 0x1F);
431  left = 1;
432  }
433  } else {
434  if ((p[0] & 0x80) != 0x00) {
435  /* Skip illegal sequences
436  return SDL_ICONV_EILSEQ;
437  */
438  ch = UNKNOWN_UNICODE;
439  } else {
440  ch = (Uint32) p[0];
441  }
442  }
443  ++src;
444  --srclen;
445  if (srclen < left) {
446  return SDL_ICONV_EINVAL;
447  }
448  while (left--) {
449  ++p;
450  if ((p[0] & 0xC0) != 0x80) {
451  /* Skip illegal sequences
452  return SDL_ICONV_EILSEQ;
453  */
454  ch = UNKNOWN_UNICODE;
455  break;
456  }
457  ch <<= 6;
458  ch |= (p[0] & 0x3F);
459  ++src;
460  --srclen;
461  }
462  if (overlong) {
463  /* Potential security risk
464  return SDL_ICONV_EILSEQ;
465  */
466  ch = UNKNOWN_UNICODE;
467  }
468  if ((ch >= 0xD800 && ch <= 0xDFFF) ||
469  (ch == 0xFFFE || ch == 0xFFFF) || ch > 0x10FFFF) {
470  /* Skip illegal sequences
471  return SDL_ICONV_EILSEQ;
472  */
473  ch = UNKNOWN_UNICODE;
474  }
475  }
476  break;
477  case ENCODING_UTF16BE: /* RFC 2781 */
478  {
479  Uint8 *p = (Uint8 *) src;
480  Uint16 W1, W2;
481  if (srclen < 2) {
482  return SDL_ICONV_EINVAL;
483  }
484  W1 = ((Uint16) p[0] << 8) | (Uint16) p[1];
485  src += 2;
486  srclen -= 2;
487  if (W1 < 0xD800 || W1 > 0xDFFF) {
488  ch = (Uint32) W1;
489  break;
490  }
491  if (W1 > 0xDBFF) {
492  /* Skip illegal sequences
493  return SDL_ICONV_EILSEQ;
494  */
495  ch = UNKNOWN_UNICODE;
496  break;
497  }
498  if (srclen < 2) {
499  return SDL_ICONV_EINVAL;
500  }
501  p = (Uint8 *) src;
502  W2 = ((Uint16) p[0] << 8) | (Uint16) p[1];
503  src += 2;
504  srclen -= 2;
505  if (W2 < 0xDC00 || W2 > 0xDFFF) {
506  /* Skip illegal sequences
507  return SDL_ICONV_EILSEQ;
508  */
509  ch = UNKNOWN_UNICODE;
510  break;
511  }
512  ch = (((Uint32) (W1 & 0x3FF) << 10) |
513  (Uint32) (W2 & 0x3FF)) + 0x10000;
514  }
515  break;
516  case ENCODING_UTF16LE: /* RFC 2781 */
517  {
518  Uint8 *p = (Uint8 *) src;
519  Uint16 W1, W2;
520  if (srclen < 2) {
521  return SDL_ICONV_EINVAL;
522  }
523  W1 = ((Uint16) p[1] << 8) | (Uint16) p[0];
524  src += 2;
525  srclen -= 2;
526  if (W1 < 0xD800 || W1 > 0xDFFF) {
527  ch = (Uint32) W1;
528  break;
529  }
530  if (W1 > 0xDBFF) {
531  /* Skip illegal sequences
532  return SDL_ICONV_EILSEQ;
533  */
534  ch = UNKNOWN_UNICODE;
535  break;
536  }
537  if (srclen < 2) {
538  return SDL_ICONV_EINVAL;
539  }
540  p = (Uint8 *) src;
541  W2 = ((Uint16) p[1] << 8) | (Uint16) p[0];
542  src += 2;
543  srclen -= 2;
544  if (W2 < 0xDC00 || W2 > 0xDFFF) {
545  /* Skip illegal sequences
546  return SDL_ICONV_EILSEQ;
547  */
548  ch = UNKNOWN_UNICODE;
549  break;
550  }
551  ch = (((Uint32) (W1 & 0x3FF) << 10) |
552  (Uint32) (W2 & 0x3FF)) + 0x10000;
553  }
554  break;
555  case ENCODING_UCS2LE:
556  {
557  Uint8 *p = (Uint8 *) src;
558  if (srclen < 2) {
559  return SDL_ICONV_EINVAL;
560  }
561  ch = ((Uint32) p[1] << 8) | (Uint32) p[0];
562  src += 2;
563  srclen -= 2;
564  }
565  break;
566  case ENCODING_UCS2BE:
567  {
568  Uint8 *p = (Uint8 *) src;
569  if (srclen < 2) {
570  return SDL_ICONV_EINVAL;
571  }
572  ch = ((Uint32) p[0] << 8) | (Uint32) p[1];
573  src += 2;
574  srclen -= 2;
575  }
576  break;
577  case ENCODING_UCS4BE:
578  case ENCODING_UTF32BE:
579  {
580  Uint8 *p = (Uint8 *) src;
581  if (srclen < 4) {
582  return SDL_ICONV_EINVAL;
583  }
584  ch = ((Uint32) p[0] << 24) |
585  ((Uint32) p[1] << 16) |
586  ((Uint32) p[2] << 8) | (Uint32) p[3];
587  src += 4;
588  srclen -= 4;
589  }
590  break;
591  case ENCODING_UCS4LE:
592  case ENCODING_UTF32LE:
593  {
594  Uint8 *p = (Uint8 *) src;
595  if (srclen < 4) {
596  return SDL_ICONV_EINVAL;
597  }
598  ch = ((Uint32) p[3] << 24) |
599  ((Uint32) p[2] << 16) |
600  ((Uint32) p[1] << 8) | (Uint32) p[0];
601  src += 4;
602  srclen -= 4;
603  }
604  break;
605  }
606 
607  /* Encode a character */
608  switch (cd->dst_fmt) {
609  case ENCODING_ASCII:
610  {
611  Uint8 *p = (Uint8 *) dst;
612  if (dstlen < 1) {
613  return SDL_ICONV_E2BIG;
614  }
615  if (ch > 0x7F) {
616  *p = UNKNOWN_ASCII;
617  } else {
618  *p = (Uint8) ch;
619  }
620  ++dst;
621  --dstlen;
622  }
623  break;
624  case ENCODING_LATIN1:
625  {
626  Uint8 *p = (Uint8 *) dst;
627  if (dstlen < 1) {
628  return SDL_ICONV_E2BIG;
629  }
630  if (ch > 0xFF) {
631  *p = UNKNOWN_ASCII;
632  } else {
633  *p = (Uint8) ch;
634  }
635  ++dst;
636  --dstlen;
637  }
638  break;
639  case ENCODING_UTF8: /* RFC 3629 */
640  {
641  Uint8 *p = (Uint8 *) dst;
642  if (ch > 0x10FFFF) {
643  ch = UNKNOWN_UNICODE;
644  }
645  if (ch <= 0x7F) {
646  if (dstlen < 1) {
647  return SDL_ICONV_E2BIG;
648  }
649  *p = (Uint8) ch;
650  ++dst;
651  --dstlen;
652  } else if (ch <= 0x7FF) {
653  if (dstlen < 2) {
654  return SDL_ICONV_E2BIG;
655  }
656  p[0] = 0xC0 | (Uint8) ((ch >> 6) & 0x1F);
657  p[1] = 0x80 | (Uint8) (ch & 0x3F);
658  dst += 2;
659  dstlen -= 2;
660  } else if (ch <= 0xFFFF) {
661  if (dstlen < 3) {
662  return SDL_ICONV_E2BIG;
663  }
664  p[0] = 0xE0 | (Uint8) ((ch >> 12) & 0x0F);
665  p[1] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
666  p[2] = 0x80 | (Uint8) (ch & 0x3F);
667  dst += 3;
668  dstlen -= 3;
669  } else if (ch <= 0x1FFFFF) {
670  if (dstlen < 4) {
671  return SDL_ICONV_E2BIG;
672  }
673  p[0] = 0xF0 | (Uint8) ((ch >> 18) & 0x07);
674  p[1] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
675  p[2] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
676  p[3] = 0x80 | (Uint8) (ch & 0x3F);
677  dst += 4;
678  dstlen -= 4;
679  } else if (ch <= 0x3FFFFFF) {
680  if (dstlen < 5) {
681  return SDL_ICONV_E2BIG;
682  }
683  p[0] = 0xF8 | (Uint8) ((ch >> 24) & 0x03);
684  p[1] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
685  p[2] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
686  p[3] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
687  p[4] = 0x80 | (Uint8) (ch & 0x3F);
688  dst += 5;
689  dstlen -= 5;
690  } else {
691  if (dstlen < 6) {
692  return SDL_ICONV_E2BIG;
693  }
694  p[0] = 0xFC | (Uint8) ((ch >> 30) & 0x01);
695  p[1] = 0x80 | (Uint8) ((ch >> 24) & 0x3F);
696  p[2] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
697  p[3] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
698  p[4] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
699  p[5] = 0x80 | (Uint8) (ch & 0x3F);
700  dst += 6;
701  dstlen -= 6;
702  }
703  }
704  break;
705  case ENCODING_UTF16BE: /* RFC 2781 */
706  {
707  Uint8 *p = (Uint8 *) dst;
708  if (ch > 0x10FFFF) {
709  ch = UNKNOWN_UNICODE;
710  }
711  if (ch < 0x10000) {
712  if (dstlen < 2) {
713  return SDL_ICONV_E2BIG;
714  }
715  p[0] = (Uint8) (ch >> 8);
716  p[1] = (Uint8) ch;
717  dst += 2;
718  dstlen -= 2;
719  } else {
720  Uint16 W1, W2;
721  if (dstlen < 4) {
722  return SDL_ICONV_E2BIG;
723  }
724  ch = ch - 0x10000;
725  W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
726  W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
727  p[0] = (Uint8) (W1 >> 8);
728  p[1] = (Uint8) W1;
729  p[2] = (Uint8) (W2 >> 8);
730  p[3] = (Uint8) W2;
731  dst += 4;
732  dstlen -= 4;
733  }
734  }
735  break;
736  case ENCODING_UTF16LE: /* RFC 2781 */
737  {
738  Uint8 *p = (Uint8 *) dst;
739  if (ch > 0x10FFFF) {
740  ch = UNKNOWN_UNICODE;
741  }
742  if (ch < 0x10000) {
743  if (dstlen < 2) {
744  return SDL_ICONV_E2BIG;
745  }
746  p[1] = (Uint8) (ch >> 8);
747  p[0] = (Uint8) ch;
748  dst += 2;
749  dstlen -= 2;
750  } else {
751  Uint16 W1, W2;
752  if (dstlen < 4) {
753  return SDL_ICONV_E2BIG;
754  }
755  ch = ch - 0x10000;
756  W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
757  W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
758  p[1] = (Uint8) (W1 >> 8);
759  p[0] = (Uint8) W1;
760  p[3] = (Uint8) (W2 >> 8);
761  p[2] = (Uint8) W2;
762  dst += 4;
763  dstlen -= 4;
764  }
765  }
766  break;
767  case ENCODING_UCS2BE:
768  {
769  Uint8 *p = (Uint8 *) dst;
770  if (ch > 0xFFFF) {
771  ch = UNKNOWN_UNICODE;
772  }
773  if (dstlen < 2) {
774  return SDL_ICONV_E2BIG;
775  }
776  p[0] = (Uint8) (ch >> 8);
777  p[1] = (Uint8) ch;
778  dst += 2;
779  dstlen -= 2;
780  }
781  break;
782  case ENCODING_UCS2LE:
783  {
784  Uint8 *p = (Uint8 *) dst;
785  if (ch > 0xFFFF) {
786  ch = UNKNOWN_UNICODE;
787  }
788  if (dstlen < 2) {
789  return SDL_ICONV_E2BIG;
790  }
791  p[1] = (Uint8) (ch >> 8);
792  p[0] = (Uint8) ch;
793  dst += 2;
794  dstlen -= 2;
795  }
796  break;
797  case ENCODING_UTF32BE:
798  if (ch > 0x10FFFF) {
799  ch = UNKNOWN_UNICODE;
800  }
801  /* fallthrough */
802  case ENCODING_UCS4BE:
803  if (ch > 0x7FFFFFFF) {
804  ch = UNKNOWN_UNICODE;
805  }
806  {
807  Uint8 *p = (Uint8 *) dst;
808  if (dstlen < 4) {
809  return SDL_ICONV_E2BIG;
810  }
811  p[0] = (Uint8) (ch >> 24);
812  p[1] = (Uint8) (ch >> 16);
813  p[2] = (Uint8) (ch >> 8);
814  p[3] = (Uint8) ch;
815  dst += 4;
816  dstlen -= 4;
817  }
818  break;
819  case ENCODING_UTF32LE:
820  if (ch > 0x10FFFF) {
821  ch = UNKNOWN_UNICODE;
822  }
823  /* fallthrough */
824  case ENCODING_UCS4LE:
825  if (ch > 0x7FFFFFFF) {
826  ch = UNKNOWN_UNICODE;
827  }
828  {
829  Uint8 *p = (Uint8 *) dst;
830  if (dstlen < 4) {
831  return SDL_ICONV_E2BIG;
832  }
833  p[3] = (Uint8) (ch >> 24);
834  p[2] = (Uint8) (ch >> 16);
835  p[1] = (Uint8) (ch >> 8);
836  p[0] = (Uint8) ch;
837  dst += 4;
838  dstlen -= 4;
839  }
840  break;
841  }
842 
843  /* Update state */
844  *inbuf = src;
845  *inbytesleft = srclen;
846  *outbuf = dst;
847  *outbytesleft = dstlen;
848  ++total;
849  }
850  return total;
851 }
852 
853 int
854 SDL_iconv_close(SDL_iconv_t cd)
855 {
856  if (cd != (SDL_iconv_t)-1) {
857  SDL_free(cd);
858  }
859  return 0;
860 }
861 
862 #endif /* !HAVE_ICONV */
863 
864 char *
865 SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf,
866  size_t inbytesleft)
867 {
868  SDL_iconv_t cd;
869  char *string;
870  size_t stringsize;
871  char *outbuf;
872  size_t outbytesleft;
873  size_t retCode = 0;
874 
875  cd = SDL_iconv_open(tocode, fromcode);
876  if (cd == (SDL_iconv_t) - 1) {
877  /* See if we can recover here (fixes iconv on Solaris 11) */
878  if (!tocode || !*tocode) {
879  tocode = "UTF-8";
880  }
881  if (!fromcode || !*fromcode) {
882  fromcode = "UTF-8";
883  }
884  cd = SDL_iconv_open(tocode, fromcode);
885  }
886  if (cd == (SDL_iconv_t) - 1) {
887  return NULL;
888  }
889 
890  stringsize = inbytesleft > 4 ? inbytesleft : 4;
891  string = SDL_malloc(stringsize);
892  if (!string) {
893  SDL_iconv_close(cd);
894  return NULL;
895  }
896  outbuf = string;
897  outbytesleft = stringsize;
898  SDL_memset(outbuf, 0, 4);
899 
900  while (inbytesleft > 0) {
901  retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
902  switch (retCode) {
903  case SDL_ICONV_E2BIG:
904  {
905  char *oldstring = string;
906  stringsize *= 2;
907  string = SDL_realloc(string, stringsize);
908  if (!string) {
909  SDL_iconv_close(cd);
910  return NULL;
911  }
912  outbuf = string + (outbuf - oldstring);
913  outbytesleft = stringsize - (outbuf - string);
914  SDL_memset(outbuf, 0, 4);
915  }
916  break;
917  case SDL_ICONV_EILSEQ:
918  /* Try skipping some input data - not perfect, but... */
919  ++inbuf;
920  --inbytesleft;
921  break;
922  case SDL_ICONV_EINVAL:
923  case SDL_ICONV_ERROR:
924  /* We can't continue... */
925  inbytesleft = 0;
926  break;
927  }
928  }
929  SDL_iconv_close(cd);
930 
931  return string;
932 }
933 
934 /* vi: set ts=4 sw=4 expandtab: */
#define SDL_strlcpy
int SDL_iconv_close(SDL_iconv_t cd)
Definition: SDL_iconv.c:854
static struct @31 encodings[]
GLsizei const GLchar *const * string
GLenum GLenum dst
#define SDL_ICONV_EILSEQ
Definition: SDL_stdinc.h:541
#define UNKNOWN_UNICODE
Definition: SDL_iconv.c:98
GLfloat GLfloat p
SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode)
Definition: SDL_iconv.c:208
const char * name
Definition: SDL_iconv.c:137
uint32_t Uint32
Definition: SDL_stdinc.h:181
#define SDL_realloc
#define SDL_strcasecmp
GLenum src
GLenum GLuint GLsizei bufsize
GLuint const GLchar * name
#define SDL_strchr
unsigned int size_t
#define UNICODE_BOM
Definition: SDL_iconv.c:95
uint8_t Uint8
Definition: SDL_stdinc.h:157
#define SDL_free
char * SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf, size_t inbytesleft)
Definition: SDL_iconv.c:865
#define SDL_ICONV_E2BIG
Definition: SDL_stdinc.h:540
#define ENCODING_UTF32NATIVE
Definition: SDL_iconv.c:119
#define SDL_ICONV_EINVAL
Definition: SDL_stdinc.h:542
#define SDL_getenv
return Display return Display Bool Bool int int int return Display XEvent Bool(*) XPointer return Display return Display Drawable _Xconst char unsigned int unsigned int return Display Pixmap Pixmap XColor XColor unsigned int unsigned int return Display _Xconst char char int char return Display Visual unsigned int int int char unsigned int unsigned int in i)
Definition: SDL_x11sym.h:50
#define NULL
Definition: begin_code.h:164
SDL_bool
Definition: SDL_stdinc.h:139
GLuint buffer
#define ENCODING_UCS2NATIVE
Definition: SDL_iconv.c:120
#define ENCODING_UCS4NATIVE
Definition: SDL_iconv.c:121
#define UNKNOWN_ASCII
Definition: SDL_iconv.c:97
GLdouble n
uint16_t Uint16
Definition: SDL_stdinc.h:169
#define SDL_ICONV_ERROR
Definition: SDL_stdinc.h:539
#define SDL_arraysize(array)
Definition: SDL_stdinc.h:93
#define SDL_malloc
#define SDL_strcmp
size_t SDL_iconv(SDL_iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft)
Definition: SDL_iconv.c:248
#define ENCODING_UTF16NATIVE
Definition: SDL_iconv.c:118
int format
Definition: SDL_iconv.c:138
#define SDL_memset
GLint left
static const char * getlocale(char *buffer, size_t bufsize)
Definition: SDL_iconv.c:173
#define SDL_COMPILE_TIME_ASSERT(name, x)
Definition: SDL_stdinc.h:290