SDL  2.0
SDL_iconv.c
Go to the documentation of this file.
1 /*
2  Simple DirectMedia Layer
3  Copyright (C) 1997-2016 Sam Lantinga <slouken@libsdl.org>
4 
5  This software is provided 'as-is', without any express or implied
6  warranty. In no event will the authors be held liable for any damages
7  arising from the use of this software.
8 
9  Permission is granted to anyone to use this software for any purpose,
10  including commercial applications, and to alter it and redistribute it
11  freely, subject to the following restrictions:
12 
13  1. The origin of this software must not be misrepresented; you must not
14  claim that you wrote the original software. If you use this software
15  in a product, an acknowledgment in the product documentation would be
16  appreciated but is not required.
17  2. Altered source versions must be plainly marked as such, and must not be
18  misrepresented as being the original software.
19  3. This notice may not be removed or altered from any source distribution.
20 */
21 
22 #if defined(__clang_analyzer__) && !defined(SDL_DISABLE_ANALYZE_MACROS)
23 #define SDL_DISABLE_ANALYZE_MACROS 1
24 #endif
25 
26 #include "../SDL_internal.h"
27 
28 /* This file contains portable iconv functions for SDL */
29 
30 #include "SDL_stdinc.h"
31 #include "SDL_endian.h"
32 
33 #if defined(HAVE_ICONV) && defined(HAVE_ICONV_H)
34 #include <iconv.h>
35 
36 /* Depending on which standard the iconv() was implemented with,
37  iconv() may or may not use const char ** for the inbuf param.
38  If we get this wrong, it's just a warning, so no big deal.
39 */
40 #if defined(_XGP6) || defined(__APPLE__) || \
41  (defined(__GLIBC__) && ((__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2)) || \
42  (defined(_NEWLIB_VERSION)))
43 #define ICONV_INBUF_NONCONST
44 #endif
45 
46 #include <errno.h>
47 
48 SDL_COMPILE_TIME_ASSERT(iconv_t, sizeof (iconv_t) <= sizeof (SDL_iconv_t));
49 
50 SDL_iconv_t
51 SDL_iconv_open(const char *tocode, const char *fromcode)
52 {
53  return (SDL_iconv_t) ((size_t) iconv_open(tocode, fromcode));
54 }
55 
56 int
57 SDL_iconv_close(SDL_iconv_t cd)
58 {
59  return iconv_close((iconv_t) ((size_t) cd));
60 }
61 
62 size_t
63 SDL_iconv(SDL_iconv_t cd,
64  const char **inbuf, size_t * inbytesleft,
65  char **outbuf, size_t * outbytesleft)
66 {
67  size_t retCode;
68 #ifdef ICONV_INBUF_NONCONST
69  retCode = iconv((iconv_t) ((size_t) cd), (char **) inbuf, inbytesleft, outbuf, outbytesleft);
70 #else
71  retCode = iconv((iconv_t) ((size_t) cd), inbuf, inbytesleft, outbuf, outbytesleft);
72 #endif
73  if (retCode == (size_t) - 1) {
74  switch (errno) {
75  case E2BIG:
76  return SDL_ICONV_E2BIG;
77  case EILSEQ:
78  return SDL_ICONV_EILSEQ;
79  case EINVAL:
80  return SDL_ICONV_EINVAL;
81  default:
82  return SDL_ICONV_ERROR;
83  }
84  }
85  return retCode;
86 }
87 
88 #else
89 
90 /* Lots of useful information on Unicode at:
91  http://www.cl.cam.ac.uk/~mgk25/unicode.html
92 */
93 
94 #define UNICODE_BOM 0xFEFF
95 
96 #define UNKNOWN_ASCII '?'
97 #define UNKNOWN_UNICODE 0xFFFD
98 
99 enum
100 {
105  ENCODING_UTF16, /* Needs byte order marker */
108  ENCODING_UTF32, /* Needs byte order marker */
115 };
116 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
117 #define ENCODING_UTF16NATIVE ENCODING_UTF16BE
118 #define ENCODING_UTF32NATIVE ENCODING_UTF32BE
119 #define ENCODING_UCS2NATIVE ENCODING_UCS2BE
120 #define ENCODING_UCS4NATIVE ENCODING_UCS4BE
121 #else
122 #define ENCODING_UTF16NATIVE ENCODING_UTF16LE
123 #define ENCODING_UTF32NATIVE ENCODING_UTF32LE
124 #define ENCODING_UCS2NATIVE ENCODING_UCS2LE
125 #define ENCODING_UCS4NATIVE ENCODING_UCS4LE
126 #endif
127 
129 {
130  int src_fmt;
131  int dst_fmt;
132 };
133 
134 static struct
135 {
136  const char *name;
137  int format;
138 } encodings[] = {
139 /* *INDENT-OFF* */
140  { "ASCII", ENCODING_ASCII },
141  { "US-ASCII", ENCODING_ASCII },
142  { "8859-1", ENCODING_LATIN1 },
143  { "ISO-8859-1", ENCODING_LATIN1 },
144  { "UTF8", ENCODING_UTF8 },
145  { "UTF-8", ENCODING_UTF8 },
146  { "UTF16", ENCODING_UTF16 },
147  { "UTF-16", ENCODING_UTF16 },
148  { "UTF16BE", ENCODING_UTF16BE },
149  { "UTF-16BE", ENCODING_UTF16BE },
150  { "UTF16LE", ENCODING_UTF16LE },
151  { "UTF-16LE", ENCODING_UTF16LE },
152  { "UTF32", ENCODING_UTF32 },
153  { "UTF-32", ENCODING_UTF32 },
154  { "UTF32BE", ENCODING_UTF32BE },
155  { "UTF-32BE", ENCODING_UTF32BE },
156  { "UTF32LE", ENCODING_UTF32LE },
157  { "UTF-32LE", ENCODING_UTF32LE },
158  { "UCS2", ENCODING_UCS2BE },
159  { "UCS-2", ENCODING_UCS2BE },
160  { "UCS-2LE", ENCODING_UCS2LE },
161  { "UCS-2BE", ENCODING_UCS2BE },
162  { "UCS-2-INTERNAL", ENCODING_UCS2NATIVE },
163  { "UCS4", ENCODING_UCS4BE },
164  { "UCS-4", ENCODING_UCS4BE },
165  { "UCS-4LE", ENCODING_UCS4LE },
166  { "UCS-4BE", ENCODING_UCS4BE },
167  { "UCS-4-INTERNAL", ENCODING_UCS4NATIVE },
168 /* *INDENT-ON* */
169 };
170 
171 static const char *
172 getlocale(char *buffer, size_t bufsize)
173 {
174  const char *lang;
175  char *ptr;
176 
177  lang = SDL_getenv("LC_ALL");
178  if (!lang) {
179  lang = SDL_getenv("LC_CTYPE");
180  }
181  if (!lang) {
182  lang = SDL_getenv("LC_MESSAGES");
183  }
184  if (!lang) {
185  lang = SDL_getenv("LANG");
186  }
187  if (!lang || !*lang || SDL_strcmp(lang, "C") == 0) {
188  lang = "ASCII";
189  }
190 
191  /* We need to trim down strings like "en_US.UTF-8@blah" to "UTF-8" */
192  ptr = SDL_strchr(lang, '.');
193  if (ptr != NULL) {
194  lang = ptr + 1;
195  }
196 
197  SDL_strlcpy(buffer, lang, bufsize);
198  ptr = SDL_strchr(buffer, '@');
199  if (ptr != NULL) {
200  *ptr = '\0'; /* chop end of string. */
201  }
202 
203  return buffer;
204 }
205 
206 SDL_iconv_t
207 SDL_iconv_open(const char *tocode, const char *fromcode)
208 {
211  int i;
212  char fromcode_buffer[64];
213  char tocode_buffer[64];
214 
215  if (!fromcode || !*fromcode) {
216  fromcode = getlocale(fromcode_buffer, sizeof(fromcode_buffer));
217  }
218  if (!tocode || !*tocode) {
219  tocode = getlocale(tocode_buffer, sizeof(tocode_buffer));
220  }
221  for (i = 0; i < SDL_arraysize(encodings); ++i) {
222  if (SDL_strcasecmp(fromcode, encodings[i].name) == 0) {
223  src_fmt = encodings[i].format;
224  if (dst_fmt != ENCODING_UNKNOWN) {
225  break;
226  }
227  }
228  if (SDL_strcasecmp(tocode, encodings[i].name) == 0) {
229  dst_fmt = encodings[i].format;
230  if (src_fmt != ENCODING_UNKNOWN) {
231  break;
232  }
233  }
234  }
235  if (src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN) {
236  SDL_iconv_t cd = (SDL_iconv_t) SDL_malloc(sizeof(*cd));
237  if (cd) {
238  cd->src_fmt = src_fmt;
239  cd->dst_fmt = dst_fmt;
240  return cd;
241  }
242  }
243  return (SDL_iconv_t) - 1;
244 }
245 
246 size_t
247 SDL_iconv(SDL_iconv_t cd,
248  const char **inbuf, size_t * inbytesleft,
249  char **outbuf, size_t * outbytesleft)
250 {
251  /* For simplicity, we'll convert everything to and from UCS-4 */
252  const char *src;
253  char *dst;
254  size_t srclen, dstlen;
255  Uint32 ch = 0;
256  size_t total;
257 
258  if (!inbuf || !*inbuf) {
259  /* Reset the context */
260  return 0;
261  }
262  if (!outbuf || !*outbuf || !outbytesleft || !*outbytesleft) {
263  return SDL_ICONV_E2BIG;
264  }
265  src = *inbuf;
266  srclen = (inbytesleft ? *inbytesleft : 0);
267  dst = *outbuf;
268  dstlen = *outbytesleft;
269 
270  switch (cd->src_fmt) {
271  case ENCODING_UTF16:
272  /* Scan for a byte order marker */
273  {
274  Uint8 *p = (Uint8 *) src;
275  size_t n = srclen / 2;
276  while (n) {
277  if (p[0] == 0xFF && p[1] == 0xFE) {
278  cd->src_fmt = ENCODING_UTF16BE;
279  break;
280  } else if (p[0] == 0xFE && p[1] == 0xFF) {
281  cd->src_fmt = ENCODING_UTF16LE;
282  break;
283  }
284  p += 2;
285  --n;
286  }
287  if (n == 0) {
288  /* We can't tell, default to host order */
289  cd->src_fmt = ENCODING_UTF16NATIVE;
290  }
291  }
292  break;
293  case ENCODING_UTF32:
294  /* Scan for a byte order marker */
295  {
296  Uint8 *p = (Uint8 *) src;
297  size_t n = srclen / 4;
298  while (n) {
299  if (p[0] == 0xFF && p[1] == 0xFE &&
300  p[2] == 0x00 && p[3] == 0x00) {
301  cd->src_fmt = ENCODING_UTF32BE;
302  break;
303  } else if (p[0] == 0x00 && p[1] == 0x00 &&
304  p[2] == 0xFE && p[3] == 0xFF) {
305  cd->src_fmt = ENCODING_UTF32LE;
306  break;
307  }
308  p += 4;
309  --n;
310  }
311  if (n == 0) {
312  /* We can't tell, default to host order */
313  cd->src_fmt = ENCODING_UTF32NATIVE;
314  }
315  }
316  break;
317  }
318 
319  switch (cd->dst_fmt) {
320  case ENCODING_UTF16:
321  /* Default to host order, need to add byte order marker */
322  if (dstlen < 2) {
323  return SDL_ICONV_E2BIG;
324  }
325  *(Uint16 *) dst = UNICODE_BOM;
326  dst += 2;
327  dstlen -= 2;
328  cd->dst_fmt = ENCODING_UTF16NATIVE;
329  break;
330  case ENCODING_UTF32:
331  /* Default to host order, need to add byte order marker */
332  if (dstlen < 4) {
333  return SDL_ICONV_E2BIG;
334  }
335  *(Uint32 *) dst = UNICODE_BOM;
336  dst += 4;
337  dstlen -= 4;
338  cd->dst_fmt = ENCODING_UTF32NATIVE;
339  break;
340  }
341 
342  total = 0;
343  while (srclen > 0) {
344  /* Decode a character */
345  switch (cd->src_fmt) {
346  case ENCODING_ASCII:
347  {
348  Uint8 *p = (Uint8 *) src;
349  ch = (Uint32) (p[0] & 0x7F);
350  ++src;
351  --srclen;
352  }
353  break;
354  case ENCODING_LATIN1:
355  {
356  Uint8 *p = (Uint8 *) src;
357  ch = (Uint32) p[0];
358  ++src;
359  --srclen;
360  }
361  break;
362  case ENCODING_UTF8: /* RFC 3629 */
363  {
364  Uint8 *p = (Uint8 *) src;
365  size_t left = 0;
366  SDL_bool overlong = SDL_FALSE;
367  if (p[0] >= 0xFC) {
368  if ((p[0] & 0xFE) != 0xFC) {
369  /* Skip illegal sequences
370  return SDL_ICONV_EILSEQ;
371  */
372  ch = UNKNOWN_UNICODE;
373  } else {
374  if (p[0] == 0xFC && srclen > 1 && (p[1] & 0xFC) == 0x80) {
375  overlong = SDL_TRUE;
376  }
377  ch = (Uint32) (p[0] & 0x01);
378  left = 5;
379  }
380  } else if (p[0] >= 0xF8) {
381  if ((p[0] & 0xFC) != 0xF8) {
382  /* Skip illegal sequences
383  return SDL_ICONV_EILSEQ;
384  */
385  ch = UNKNOWN_UNICODE;
386  } else {
387  if (p[0] == 0xF8 && srclen > 1 && (p[1] & 0xF8) == 0x80) {
388  overlong = SDL_TRUE;
389  }
390  ch = (Uint32) (p[0] & 0x03);
391  left = 4;
392  }
393  } else if (p[0] >= 0xF0) {
394  if ((p[0] & 0xF8) != 0xF0) {
395  /* Skip illegal sequences
396  return SDL_ICONV_EILSEQ;
397  */
398  ch = UNKNOWN_UNICODE;
399  } else {
400  if (p[0] == 0xF0 && srclen > 1 && (p[1] & 0xF0) == 0x80) {
401  overlong = SDL_TRUE;
402  }
403  ch = (Uint32) (p[0] & 0x07);
404  left = 3;
405  }
406  } else if (p[0] >= 0xE0) {
407  if ((p[0] & 0xF0) != 0xE0) {
408  /* Skip illegal sequences
409  return SDL_ICONV_EILSEQ;
410  */
411  ch = UNKNOWN_UNICODE;
412  } else {
413  if (p[0] == 0xE0 && srclen > 1 && (p[1] & 0xE0) == 0x80) {
414  overlong = SDL_TRUE;
415  }
416  ch = (Uint32) (p[0] & 0x0F);
417  left = 2;
418  }
419  } else if (p[0] >= 0xC0) {
420  if ((p[0] & 0xE0) != 0xC0) {
421  /* Skip illegal sequences
422  return SDL_ICONV_EILSEQ;
423  */
424  ch = UNKNOWN_UNICODE;
425  } else {
426  if ((p[0] & 0xDE) == 0xC0) {
427  overlong = SDL_TRUE;
428  }
429  ch = (Uint32) (p[0] & 0x1F);
430  left = 1;
431  }
432  } else {
433  if ((p[0] & 0x80) != 0x00) {
434  /* Skip illegal sequences
435  return SDL_ICONV_EILSEQ;
436  */
437  ch = UNKNOWN_UNICODE;
438  } else {
439  ch = (Uint32) p[0];
440  }
441  }
442  ++src;
443  --srclen;
444  if (srclen < left) {
445  return SDL_ICONV_EINVAL;
446  }
447  while (left--) {
448  ++p;
449  if ((p[0] & 0xC0) != 0x80) {
450  /* Skip illegal sequences
451  return SDL_ICONV_EILSEQ;
452  */
453  ch = UNKNOWN_UNICODE;
454  break;
455  }
456  ch <<= 6;
457  ch |= (p[0] & 0x3F);
458  ++src;
459  --srclen;
460  }
461  if (overlong) {
462  /* Potential security risk
463  return SDL_ICONV_EILSEQ;
464  */
465  ch = UNKNOWN_UNICODE;
466  }
467  if ((ch >= 0xD800 && ch <= 0xDFFF) ||
468  (ch == 0xFFFE || ch == 0xFFFF) || ch > 0x10FFFF) {
469  /* Skip illegal sequences
470  return SDL_ICONV_EILSEQ;
471  */
472  ch = UNKNOWN_UNICODE;
473  }
474  }
475  break;
476  case ENCODING_UTF16BE: /* RFC 2781 */
477  {
478  Uint8 *p = (Uint8 *) src;
479  Uint16 W1, W2;
480  if (srclen < 2) {
481  return SDL_ICONV_EINVAL;
482  }
483  W1 = ((Uint16) p[0] << 8) | (Uint16) p[1];
484  src += 2;
485  srclen -= 2;
486  if (W1 < 0xD800 || W1 > 0xDFFF) {
487  ch = (Uint32) W1;
488  break;
489  }
490  if (W1 > 0xDBFF) {
491  /* Skip illegal sequences
492  return SDL_ICONV_EILSEQ;
493  */
494  ch = UNKNOWN_UNICODE;
495  break;
496  }
497  if (srclen < 2) {
498  return SDL_ICONV_EINVAL;
499  }
500  p = (Uint8 *) src;
501  W2 = ((Uint16) p[0] << 8) | (Uint16) p[1];
502  src += 2;
503  srclen -= 2;
504  if (W2 < 0xDC00 || W2 > 0xDFFF) {
505  /* Skip illegal sequences
506  return SDL_ICONV_EILSEQ;
507  */
508  ch = UNKNOWN_UNICODE;
509  break;
510  }
511  ch = (((Uint32) (W1 & 0x3FF) << 10) |
512  (Uint32) (W2 & 0x3FF)) + 0x10000;
513  }
514  break;
515  case ENCODING_UTF16LE: /* RFC 2781 */
516  {
517  Uint8 *p = (Uint8 *) src;
518  Uint16 W1, W2;
519  if (srclen < 2) {
520  return SDL_ICONV_EINVAL;
521  }
522  W1 = ((Uint16) p[1] << 8) | (Uint16) p[0];
523  src += 2;
524  srclen -= 2;
525  if (W1 < 0xD800 || W1 > 0xDFFF) {
526  ch = (Uint32) W1;
527  break;
528  }
529  if (W1 > 0xDBFF) {
530  /* Skip illegal sequences
531  return SDL_ICONV_EILSEQ;
532  */
533  ch = UNKNOWN_UNICODE;
534  break;
535  }
536  if (srclen < 2) {
537  return SDL_ICONV_EINVAL;
538  }
539  p = (Uint8 *) src;
540  W2 = ((Uint16) p[1] << 8) | (Uint16) p[0];
541  src += 2;
542  srclen -= 2;
543  if (W2 < 0xDC00 || W2 > 0xDFFF) {
544  /* Skip illegal sequences
545  return SDL_ICONV_EILSEQ;
546  */
547  ch = UNKNOWN_UNICODE;
548  break;
549  }
550  ch = (((Uint32) (W1 & 0x3FF) << 10) |
551  (Uint32) (W2 & 0x3FF)) + 0x10000;
552  }
553  break;
554  case ENCODING_UCS2LE:
555  {
556  Uint8 *p = (Uint8 *) src;
557  if (srclen < 2) {
558  return SDL_ICONV_EINVAL;
559  }
560  ch = ((Uint32) p[1] << 8) | (Uint32) p[0];
561  src += 2;
562  srclen -= 2;
563  }
564  break;
565  case ENCODING_UCS2BE:
566  {
567  Uint8 *p = (Uint8 *) src;
568  if (srclen < 2) {
569  return SDL_ICONV_EINVAL;
570  }
571  ch = ((Uint32) p[0] << 8) | (Uint32) p[1];
572  src += 2;
573  srclen -= 2;
574  }
575  break;
576  case ENCODING_UCS4BE:
577  case ENCODING_UTF32BE:
578  {
579  Uint8 *p = (Uint8 *) src;
580  if (srclen < 4) {
581  return SDL_ICONV_EINVAL;
582  }
583  ch = ((Uint32) p[0] << 24) |
584  ((Uint32) p[1] << 16) |
585  ((Uint32) p[2] << 8) | (Uint32) p[3];
586  src += 4;
587  srclen -= 4;
588  }
589  break;
590  case ENCODING_UCS4LE:
591  case ENCODING_UTF32LE:
592  {
593  Uint8 *p = (Uint8 *) src;
594  if (srclen < 4) {
595  return SDL_ICONV_EINVAL;
596  }
597  ch = ((Uint32) p[3] << 24) |
598  ((Uint32) p[2] << 16) |
599  ((Uint32) p[1] << 8) | (Uint32) p[0];
600  src += 4;
601  srclen -= 4;
602  }
603  break;
604  }
605 
606  /* Encode a character */
607  switch (cd->dst_fmt) {
608  case ENCODING_ASCII:
609  {
610  Uint8 *p = (Uint8 *) dst;
611  if (dstlen < 1) {
612  return SDL_ICONV_E2BIG;
613  }
614  if (ch > 0x7F) {
615  *p = UNKNOWN_ASCII;
616  } else {
617  *p = (Uint8) ch;
618  }
619  ++dst;
620  --dstlen;
621  }
622  break;
623  case ENCODING_LATIN1:
624  {
625  Uint8 *p = (Uint8 *) dst;
626  if (dstlen < 1) {
627  return SDL_ICONV_E2BIG;
628  }
629  if (ch > 0xFF) {
630  *p = UNKNOWN_ASCII;
631  } else {
632  *p = (Uint8) ch;
633  }
634  ++dst;
635  --dstlen;
636  }
637  break;
638  case ENCODING_UTF8: /* RFC 3629 */
639  {
640  Uint8 *p = (Uint8 *) dst;
641  if (ch > 0x10FFFF) {
642  ch = UNKNOWN_UNICODE;
643  }
644  if (ch <= 0x7F) {
645  if (dstlen < 1) {
646  return SDL_ICONV_E2BIG;
647  }
648  *p = (Uint8) ch;
649  ++dst;
650  --dstlen;
651  } else if (ch <= 0x7FF) {
652  if (dstlen < 2) {
653  return SDL_ICONV_E2BIG;
654  }
655  p[0] = 0xC0 | (Uint8) ((ch >> 6) & 0x1F);
656  p[1] = 0x80 | (Uint8) (ch & 0x3F);
657  dst += 2;
658  dstlen -= 2;
659  } else if (ch <= 0xFFFF) {
660  if (dstlen < 3) {
661  return SDL_ICONV_E2BIG;
662  }
663  p[0] = 0xE0 | (Uint8) ((ch >> 12) & 0x0F);
664  p[1] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
665  p[2] = 0x80 | (Uint8) (ch & 0x3F);
666  dst += 3;
667  dstlen -= 3;
668  } else if (ch <= 0x1FFFFF) {
669  if (dstlen < 4) {
670  return SDL_ICONV_E2BIG;
671  }
672  p[0] = 0xF0 | (Uint8) ((ch >> 18) & 0x07);
673  p[1] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
674  p[2] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
675  p[3] = 0x80 | (Uint8) (ch & 0x3F);
676  dst += 4;
677  dstlen -= 4;
678  } else if (ch <= 0x3FFFFFF) {
679  if (dstlen < 5) {
680  return SDL_ICONV_E2BIG;
681  }
682  p[0] = 0xF8 | (Uint8) ((ch >> 24) & 0x03);
683  p[1] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
684  p[2] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
685  p[3] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
686  p[4] = 0x80 | (Uint8) (ch & 0x3F);
687  dst += 5;
688  dstlen -= 5;
689  } else {
690  if (dstlen < 6) {
691  return SDL_ICONV_E2BIG;
692  }
693  p[0] = 0xFC | (Uint8) ((ch >> 30) & 0x01);
694  p[1] = 0x80 | (Uint8) ((ch >> 24) & 0x3F);
695  p[2] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
696  p[3] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
697  p[4] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
698  p[5] = 0x80 | (Uint8) (ch & 0x3F);
699  dst += 6;
700  dstlen -= 6;
701  }
702  }
703  break;
704  case ENCODING_UTF16BE: /* RFC 2781 */
705  {
706  Uint8 *p = (Uint8 *) dst;
707  if (ch > 0x10FFFF) {
708  ch = UNKNOWN_UNICODE;
709  }
710  if (ch < 0x10000) {
711  if (dstlen < 2) {
712  return SDL_ICONV_E2BIG;
713  }
714  p[0] = (Uint8) (ch >> 8);
715  p[1] = (Uint8) ch;
716  dst += 2;
717  dstlen -= 2;
718  } else {
719  Uint16 W1, W2;
720  if (dstlen < 4) {
721  return SDL_ICONV_E2BIG;
722  }
723  ch = ch - 0x10000;
724  W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
725  W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
726  p[0] = (Uint8) (W1 >> 8);
727  p[1] = (Uint8) W1;
728  p[2] = (Uint8) (W2 >> 8);
729  p[3] = (Uint8) W2;
730  dst += 4;
731  dstlen -= 4;
732  }
733  }
734  break;
735  case ENCODING_UTF16LE: /* RFC 2781 */
736  {
737  Uint8 *p = (Uint8 *) dst;
738  if (ch > 0x10FFFF) {
739  ch = UNKNOWN_UNICODE;
740  }
741  if (ch < 0x10000) {
742  if (dstlen < 2) {
743  return SDL_ICONV_E2BIG;
744  }
745  p[1] = (Uint8) (ch >> 8);
746  p[0] = (Uint8) ch;
747  dst += 2;
748  dstlen -= 2;
749  } else {
750  Uint16 W1, W2;
751  if (dstlen < 4) {
752  return SDL_ICONV_E2BIG;
753  }
754  ch = ch - 0x10000;
755  W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
756  W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
757  p[1] = (Uint8) (W1 >> 8);
758  p[0] = (Uint8) W1;
759  p[3] = (Uint8) (W2 >> 8);
760  p[2] = (Uint8) W2;
761  dst += 4;
762  dstlen -= 4;
763  }
764  }
765  break;
766  case ENCODING_UCS2BE:
767  {
768  Uint8 *p = (Uint8 *) dst;
769  if (ch > 0xFFFF) {
770  ch = UNKNOWN_UNICODE;
771  }
772  if (dstlen < 2) {
773  return SDL_ICONV_E2BIG;
774  }
775  p[0] = (Uint8) (ch >> 8);
776  p[1] = (Uint8) ch;
777  dst += 2;
778  dstlen -= 2;
779  }
780  break;
781  case ENCODING_UCS2LE:
782  {
783  Uint8 *p = (Uint8 *) dst;
784  if (ch > 0xFFFF) {
785  ch = UNKNOWN_UNICODE;
786  }
787  if (dstlen < 2) {
788  return SDL_ICONV_E2BIG;
789  }
790  p[1] = (Uint8) (ch >> 8);
791  p[0] = (Uint8) ch;
792  dst += 2;
793  dstlen -= 2;
794  }
795  break;
796  case ENCODING_UTF32BE:
797  if (ch > 0x10FFFF) {
798  ch = UNKNOWN_UNICODE;
799  }
800  case ENCODING_UCS4BE:
801  if (ch > 0x7FFFFFFF) {
802  ch = UNKNOWN_UNICODE;
803  }
804  {
805  Uint8 *p = (Uint8 *) dst;
806  if (dstlen < 4) {
807  return SDL_ICONV_E2BIG;
808  }
809  p[0] = (Uint8) (ch >> 24);
810  p[1] = (Uint8) (ch >> 16);
811  p[2] = (Uint8) (ch >> 8);
812  p[3] = (Uint8) ch;
813  dst += 4;
814  dstlen -= 4;
815  }
816  break;
817  case ENCODING_UTF32LE:
818  if (ch > 0x10FFFF) {
819  ch = UNKNOWN_UNICODE;
820  }
821  case ENCODING_UCS4LE:
822  if (ch > 0x7FFFFFFF) {
823  ch = UNKNOWN_UNICODE;
824  }
825  {
826  Uint8 *p = (Uint8 *) dst;
827  if (dstlen < 4) {
828  return SDL_ICONV_E2BIG;
829  }
830  p[3] = (Uint8) (ch >> 24);
831  p[2] = (Uint8) (ch >> 16);
832  p[1] = (Uint8) (ch >> 8);
833  p[0] = (Uint8) ch;
834  dst += 4;
835  dstlen -= 4;
836  }
837  break;
838  }
839 
840  /* Update state */
841  *inbuf = src;
842  *inbytesleft = srclen;
843  *outbuf = dst;
844  *outbytesleft = dstlen;
845  ++total;
846  }
847  return total;
848 }
849 
850 int
851 SDL_iconv_close(SDL_iconv_t cd)
852 {
853  if (cd != (SDL_iconv_t)-1) {
854  SDL_free(cd);
855  }
856  return 0;
857 }
858 
859 #endif /* !HAVE_ICONV */
860 
861 char *
862 SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf,
863  size_t inbytesleft)
864 {
865  SDL_iconv_t cd;
866  char *string;
867  size_t stringsize;
868  char *outbuf;
869  size_t outbytesleft;
870  size_t retCode = 0;
871 
872  cd = SDL_iconv_open(tocode, fromcode);
873  if (cd == (SDL_iconv_t) - 1) {
874  /* See if we can recover here (fixes iconv on Solaris 11) */
875  if (!tocode || !*tocode) {
876  tocode = "UTF-8";
877  }
878  if (!fromcode || !*fromcode) {
879  fromcode = "UTF-8";
880  }
881  cd = SDL_iconv_open(tocode, fromcode);
882  }
883  if (cd == (SDL_iconv_t) - 1) {
884  return NULL;
885  }
886 
887  stringsize = inbytesleft > 4 ? inbytesleft : 4;
888  string = SDL_malloc(stringsize);
889  if (!string) {
890  SDL_iconv_close(cd);
891  return NULL;
892  }
893  outbuf = string;
894  outbytesleft = stringsize;
895  SDL_memset(outbuf, 0, 4);
896 
897  while (inbytesleft > 0) {
898  retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
899  switch (retCode) {
900  case SDL_ICONV_E2BIG:
901  {
902  char *oldstring = string;
903  stringsize *= 2;
904  string = SDL_realloc(string, stringsize);
905  if (!string) {
906  SDL_iconv_close(cd);
907  return NULL;
908  }
909  outbuf = string + (outbuf - oldstring);
910  outbytesleft = stringsize - (outbuf - string);
911  SDL_memset(outbuf, 0, 4);
912  }
913  break;
914  case SDL_ICONV_EILSEQ:
915  /* Try skipping some input data - not perfect, but... */
916  ++inbuf;
917  --inbytesleft;
918  break;
919  case SDL_ICONV_EINVAL:
920  case SDL_ICONV_ERROR:
921  /* We can't continue... */
922  inbytesleft = 0;
923  break;
924  }
925  }
926  SDL_iconv_close(cd);
927 
928  return string;
929 }
930 
931 /* vi: set ts=4 sw=4 expandtab: */
#define SDL_strlcpy
GLenum GLenum dst
static struct @26 encodings[]
int SDL_iconv_close(SDL_iconv_t cd)
Definition: SDL_iconv.c:851
GLdouble n
GLsizei const GLchar *const * string
#define SDL_ICONV_EILSEQ
Definition: SDL_stdinc.h:467
#define UNKNOWN_UNICODE
Definition: SDL_iconv.c:97
GLfloat GLfloat p
SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode)
Definition: SDL_iconv.c:207
const char * name
Definition: SDL_iconv.c:136
GLuint const GLchar * name
uint32_t Uint32
An unsigned 32-bit integer type.
Definition: SDL_stdinc.h:159
#define SDL_realloc
#define SDL_strcasecmp
GLenum GLuint GLsizei bufsize
#define SDL_strchr
unsigned int size_t
#define UNICODE_BOM
Definition: SDL_iconv.c:94
GLint left
uint8_t Uint8
An unsigned 8-bit integer type.
Definition: SDL_stdinc.h:143
void SDL_free(void *mem)
char * SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf, size_t inbytesleft)
Definition: SDL_iconv.c:862
#define SDL_ICONV_E2BIG
Definition: SDL_stdinc.h:466
#define ENCODING_UTF32NATIVE
Definition: SDL_iconv.c:118
#define SDL_ICONV_EINVAL
Definition: SDL_stdinc.h:468
#define SDL_getenv
return Display return Display Bool Bool int int int return Display XEvent Bool(*) XPointer return Display return Display Drawable _Xconst char unsigned int unsigned int return Display Pixmap Pixmap XColor XColor unsigned int unsigned int return Display _Xconst char char int char return Display Visual unsigned int int int char unsigned int unsigned int in i)
Definition: SDL_x11sym.h:50
#define NULL
Definition: begin_code.h:143
SDL_bool
Definition: SDL_stdinc.h:130
#define ENCODING_UCS2NATIVE
Definition: SDL_iconv.c:119
#define ENCODING_UCS4NATIVE
Definition: SDL_iconv.c:120
SDL_COMPILE_TIME_ASSERT(size, CountTo >0)
#define UNKNOWN_ASCII
Definition: SDL_iconv.c:96
GLuint buffer
uint16_t Uint16
An unsigned 16-bit integer type.
Definition: SDL_stdinc.h:151
#define SDL_ICONV_ERROR
Definition: SDL_stdinc.h:465
#define SDL_arraysize(array)
Definition: SDL_stdinc.h:90
#define SDL_malloc
#define SDL_strcmp
size_t SDL_iconv(SDL_iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft)
Definition: SDL_iconv.c:247
#define ENCODING_UTF16NATIVE
Definition: SDL_iconv.c:117
int format
Definition: SDL_iconv.c:137
GLenum src
#define SDL_memset
static const char * getlocale(char *buffer, size_t bufsize)
Definition: SDL_iconv.c:172