145 #ifndef __UTF_OLD_H__ 146 #define __UTF_OLD_H__ 148 #ifndef U_HIDE_DEPRECATED_API 156 #ifdef U_USE_UTF_DEPRECATES 164 typedef int32_t UTextOffset;
196 #define UTF8_ERROR_VALUE_1 0x15 203 #define UTF8_ERROR_VALUE_2 0x9f 211 #define UTF_ERROR_VALUE 0xffff 219 #define UTF_IS_ERROR(c) \ 220 (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2) 227 #define UTF_IS_VALID(c) \ 228 (UTF_IS_UNICODE_CHAR(c) && \ 229 (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2) 235 #define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800) 242 #define UTF_IS_UNICODE_NONCHAR(c) \ 244 ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \ 245 (uint32_t)(c)<=0x10ffff) 262 #define UTF_IS_UNICODE_CHAR(c) \ 263 ((uint32_t)(c)<0xd800 || \ 264 ((uint32_t)(c)>0xdfff && \ 265 (uint32_t)(c)<=0x10ffff && \ 266 !UTF_IS_UNICODE_NONCHAR(c))) 283 #elif defined(U_STATIC_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) 293 #define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte]) 299 #define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1) 302 #define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0) 304 #define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e) 306 #define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80) 309 #define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f) 325 # define UTF8_CHAR_LENGTH(c) \ 326 ((uint32_t)(c)<=0x7f ? 1 : \ 327 ((uint32_t)(c)<=0x7ff ? 2 : \ 328 ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \ 332 # define UTF8_CHAR_LENGTH(c) \ 333 ((uint32_t)(c)<=0x7f ? 1 : \ 334 ((uint32_t)(c)<=0x7ff ? 2 : \ 335 ((uint32_t)(c)<=0xffff ? 3 : \ 336 ((uint32_t)(c)<=0x10ffff ? 4 : \ 337 ((uint32_t)(c)<=0x3ffffff ? 5 : \ 338 ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \ 347 #define UTF8_MAX_CHAR_LENGTH 4 350 #define UTF8_ARRAY_SIZE(size) ((5*(size))/2) 353 #define UTF8_GET_CHAR_UNSAFE(s, i, c) { \ 354 int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \ 355 UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \ 356 UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \ 360 #define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ 361 int32_t _utf8_get_char_safe_index=(int32_t)(i); \ 362 UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \ 363 UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \ 367 #define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \ 369 if((uint8_t)((c)-0xc0)<0x35) { \ 370 uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \ 371 UTF8_MASK_LEAD_BYTE(c, __count); \ 375 (c)=((c)<<6)|((s)[(i)++]&0x3f); \ 377 (c)=((c)<<6)|((s)[(i)++]&0x3f); \ 379 (c)=((c)<<6)|((s)[(i)++]&0x3f); \ 387 #define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \ 388 if((uint32_t)(c)<=0x7f) { \ 389 (s)[(i)++]=(uint8_t)(c); \ 391 if((uint32_t)(c)<=0x7ff) { \ 392 (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ 394 if((uint32_t)(c)<=0xffff) { \ 395 (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ 397 (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \ 398 (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \ 400 (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ 402 (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ 407 #define UTF8_FWD_1_UNSAFE(s, i) { \ 408 (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \ 412 #define UTF8_FWD_N_UNSAFE(s, i, n) { \ 415 UTF8_FWD_1_UNSAFE(s, i); \ 421 #define UTF8_SET_CHAR_START_UNSAFE(s, i) { \ 422 while(UTF8_IS_TRAIL((s)[i])) { --(i); } \ 426 #define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ 429 if(UTF8_IS_LEAD(c)) { \ 430 (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \ 432 (c)=UTF8_ERROR_VALUE_1; \ 438 #define UTF8_APPEND_CHAR_SAFE(s, i, length, c) { \ 439 if((uint32_t)(c)<=0x7f) { \ 440 (s)[(i)++]=(uint8_t)(c); \ 442 (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \ 447 #define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length) 450 #define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n) 453 #define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i) 456 #define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \ 458 if(UTF8_IS_TRAIL(c)) { \ 459 uint8_t __b, __count=1, __shift=6; \ 466 UTF8_MASK_LEAD_BYTE(__b, __count); \ 467 (c)|=(UChar32)__b<<__shift; \ 470 (c)|=(UChar32)(__b&0x3f)<<__shift; \ 479 #define UTF8_BACK_1_UNSAFE(s, i) { \ 480 while(UTF8_IS_TRAIL((s)[--(i)])) {} \ 484 #define UTF8_BACK_N_UNSAFE(s, i, n) { \ 487 UTF8_BACK_1_UNSAFE(s, i); \ 493 #define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \ 494 UTF8_BACK_1_UNSAFE(s, i); \ 495 UTF8_FWD_1_UNSAFE(s, i); \ 499 #define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \ 503 (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \ 505 (c)=UTF8_ERROR_VALUE_1; \ 511 #define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i) 514 #define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n) 517 #define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length) 522 #define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800) 525 #define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00) 528 #define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0) 531 #define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) 534 #define UTF16_GET_PAIR_VALUE(first, second) \ 535 (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET) 538 #define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) 541 #define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) 544 #define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary) 547 #define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary) 550 #define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar) 553 #define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar) 556 #define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar) 559 #define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff) 562 #define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) 565 #define UTF16_MAX_CHAR_LENGTH 2 568 #define UTF16_ARRAY_SIZE(size) (size) 581 #define UTF16_GET_CHAR_UNSAFE(s, i, c) { \ 583 if(UTF_IS_SURROGATE(c)) { \ 584 if(UTF_IS_SURROGATE_FIRST(c)) { \ 585 (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \ 587 (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \ 593 #define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ 595 if(UTF_IS_SURROGATE(c)) { \ 597 if(UTF_IS_SURROGATE_FIRST(c)) { \ 598 if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \ 599 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ 603 (c)=UTF_ERROR_VALUE; \ 606 if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ 607 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ 611 (c)=UTF_ERROR_VALUE; \ 614 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ 615 (c)=UTF_ERROR_VALUE; \ 620 #define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \ 622 if(UTF_IS_FIRST_SURROGATE(c)) { \ 623 (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \ 628 #define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \ 629 if((uint32_t)(c)<=0xffff) { \ 630 (s)[(i)++]=(uint16_t)(c); \ 632 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ 633 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ 638 #define UTF16_FWD_1_UNSAFE(s, i) { \ 639 if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \ 645 #define UTF16_FWD_N_UNSAFE(s, i, n) { \ 648 UTF16_FWD_1_UNSAFE(s, i); \ 654 #define UTF16_SET_CHAR_START_UNSAFE(s, i) { \ 655 if(UTF_IS_SECOND_SURROGATE((s)[i])) { \ 661 #define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ 663 if(UTF_IS_FIRST_SURROGATE(c)) { \ 665 if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \ 667 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ 671 (c)=UTF_ERROR_VALUE; \ 673 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ 675 (c)=UTF_ERROR_VALUE; \ 680 #define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \ 681 if((uint32_t)(c)<=0xffff) { \ 682 (s)[(i)++]=(uint16_t)(c); \ 683 } else if((uint32_t)(c)<=0x10ffff) { \ 684 if((i)+1<(length)) { \ 685 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ 686 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ 688 (s)[(i)++]=UTF_ERROR_VALUE; \ 691 (s)[(i)++]=UTF_ERROR_VALUE; \ 696 #define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length) 699 #define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n) 702 #define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i) 705 #define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \ 707 if(UTF_IS_SECOND_SURROGATE(c)) { \ 708 (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \ 713 #define UTF16_BACK_1_UNSAFE(s, i) { \ 714 if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \ 720 #define UTF16_BACK_N_UNSAFE(s, i, n) { \ 723 UTF16_BACK_1_UNSAFE(s, i); \ 729 #define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \ 730 if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \ 736 #define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \ 738 if(UTF_IS_SECOND_SURROGATE(c)) { \ 740 if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ 742 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ 746 (c)=UTF_ERROR_VALUE; \ 748 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ 750 (c)=UTF_ERROR_VALUE; \ 755 #define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i) 758 #define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n) 761 #define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length) 781 #define UTF32_IS_SAFE(c, strict) \ 783 (uint32_t)(c)<=0x10ffff : \ 784 UTF_IS_UNICODE_CHAR(c)) 797 #define UTF32_IS_SINGLE(uchar) 1 799 #define UTF32_IS_LEAD(uchar) 0 801 #define UTF32_IS_TRAIL(uchar) 0 806 #define UTF32_NEED_MULTIPLE_UCHAR(c) 0 808 #define UTF32_CHAR_LENGTH(c) 1 810 #define UTF32_MAX_CHAR_LENGTH 1 815 #define UTF32_ARRAY_SIZE(size) (size) 818 #define UTF32_GET_CHAR_UNSAFE(s, i, c) { \ 823 #define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ 825 if(!UTF32_IS_SAFE(c, strict)) { \ 826 (c)=UTF_ERROR_VALUE; \ 833 #define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \ 838 #define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \ 843 #define UTF32_FWD_1_UNSAFE(s, i) { \ 848 #define UTF32_FWD_N_UNSAFE(s, i, n) { \ 853 #define UTF32_SET_CHAR_START_UNSAFE(s, i) { \ 857 #define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ 859 if(!UTF32_IS_SAFE(c, strict)) { \ 860 (c)=UTF_ERROR_VALUE; \ 865 #define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \ 866 if((uint32_t)(c)<=0x10ffff) { \ 874 #define UTF32_FWD_1_SAFE(s, i, length) { \ 879 #define UTF32_FWD_N_SAFE(s, i, length, n) { \ 880 if(((i)+=(n))>(length)) { \ 886 #define UTF32_SET_CHAR_START_SAFE(s, start, i) { \ 892 #define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \ 897 #define UTF32_BACK_1_UNSAFE(s, i) { \ 902 #define UTF32_BACK_N_UNSAFE(s, i, n) { \ 907 #define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \ 911 #define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \ 913 if(!UTF32_IS_SAFE(c, strict)) { \ 914 (c)=UTF_ERROR_VALUE; \ 919 #define UTF32_BACK_1_SAFE(s, start, i) { \ 924 #define UTF32_BACK_N_SAFE(s, start, i, n) { \ 932 #define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \ 942 #define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size) 945 #define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c) 948 #define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) 952 #define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c) 955 #define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) 959 #define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c) 962 #define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) 966 #define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i) 969 #define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length) 973 #define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n) 976 #define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n) 980 #define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i) 983 #define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i) 987 #define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c) 990 #define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) 994 #define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i) 997 #define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i) 1001 #define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n) 1004 #define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n) 1008 #define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) 1011 #define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) 1020 #define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar) 1027 #define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar) 1034 #define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar) 1041 #define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c) 1048 #define UTF_CHAR_LENGTH(c) U16_LENGTH(c) 1055 #define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH 1066 #define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c) 1079 #define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c) 1092 #define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) 1103 #define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length) 1114 #define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n) 1130 #define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i) 1143 #define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c) 1156 #define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i) 1169 #define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n) 1185 #define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
C API: 8-bit Unicode handling macros.
U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[]
Internal array with numbers of trail bytes for any given byte used in lead byte position.
C API: Code point macros.
#define U_CFUNC
This is used in a declaration of a library private ICU C function.
C API: 16-bit Unicode handling macros.