31# pragma warning(disable : 4255)
35# pragma warning(disable : 4668)
38# pragma warning(disable : 4820)
41#if defined(__cplusplus)
43# define utf8_cplusplus _MSVC_LANG
45# define utf8_cplusplus __cplusplus
56#if defined(_MSC_VER) && (_MSC_VER < 1920)
64# pragma clang diagnostic push
65# pragma clang diagnostic ignored "-Wold-style-cast"
66# pragma clang diagnostic ignored "-Wcast-qual"
68# if __has_warning("-Wunsafe-buffer-usage")
69# pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
78# define UTF8_ATTRIBUTE(a) __attribute((a))
80# define UTF8_ATTRIBUTE(a) __attribute__((a))
86# define utf8_restrict __restrict
87# define utf8_weak __inline
88#elif defined(__clang__) || defined(__GNUC__)
89# define utf8_nonnull UTF8_ATTRIBUTE(nonnull)
90# define utf8_pure UTF8_ATTRIBUTE(pure)
91# define utf8_restrict __restrict__
92# define utf8_weak UTF8_ATTRIBUTE(weak)
93#elif defined(__TINYC__)
94# define utf8_nonnull UTF8_ATTRIBUTE(nonnull)
95# define utf8_pure UTF8_ATTRIBUTE(pure)
97# define utf8_weak UTF8_ATTRIBUTE(weak)
98#elif defined(__IAR_SYSTEMS_ICC__)
100# define utf8_pure UTF8_ATTRIBUTE(pure)
101# define utf8_restrict __restrict
102# define utf8_weak UTF8_ATTRIBUTE(weak)
104# error Non clang, non gcc, non MSVC, non tcc, non iar compiler found!
108# define utf8_null NULL
113#if defined(utf8_cplusplus) && utf8_cplusplus >= 201402L && \
114 (!defined(_MSC_VER) || (defined(_MSC_VER) && _MSC_VER >= 1910))
115# define utf8_constexpr14 constexpr
116# define utf8_constexpr14_impl constexpr
119# define utf8_constexpr14 utf8_weak
120# define utf8_constexpr14_impl
123#if defined(utf8_cplusplus) && utf8_cplusplus >= 202002L && \
124 defined(__cpp_char8_t)
321 utf8_int32_t src1_lwr_cp = 0, src2_lwr_cp = 0, src1_upr_cp = 0,
322 src2_upr_cp = 0, src1_orig_cp = 0, src2_orig_cp = 0;
337 if ((0 == src1_orig_cp) && (0 == src2_orig_cp)) {
339 }
else if ((src1_lwr_cp == src2_lwr_cp) || (src1_upr_cp == src2_upr_cp)) {
345 return src1_lwr_cp - src2_lwr_cp;
358 while (
'\0' != *src) {
375 while (
'\0' != *src) {
411 while ((
'\0' != *src1) || (
'\0' != *src2)) {
414 }
else if (*src1 > *src2) {
435 while (
'\0' != *src) {
449 while (
'\0' != *src) {
457 if ((0x80 != (0xc0 & *r)) && (0 < offset)) {
460 if (*r == src[offset]) {
471 }
while (0x80 == (0xc0 & *r));
489 }
while ((0x80 == (0xc0 & *src)));
509 if (alloc_func_ptr) {
510 n = alloc_func_ptr(user_data, bytes);
512#if !defined(UTF8_NO_STD_MALLOC)
526 while (
'\0' != src[bytes]) {
527 n[bytes] = src[bytes];
547 while ((
size_t)(str - t) <
n &&
'\0' != *str) {
548 if (0xf0 == (0xf8 & *str)) {
551 }
else if (0xe0 == (0xf0 & *str)) {
554 }
else if (0xc0 == (0xe0 & *str)) {
567 if ((
size_t)(str - t) >
n) {
575 utf8_int32_t src1_lwr_cp = 0, src2_lwr_cp = 0, src1_upr_cp = 0,
576 src2_upr_cp = 0, src1_orig_cp = 0, src2_orig_cp = 0;
588 if ((1 ==
n) && ((0xc0 == (0xe0 & *s1)) || (0xc0 == (0xe0 & *s2)))) {
599 if ((2 >=
n) && ((0xe0 == (0xf0 & *s1)) || (0xe0 == (0xf0 & *s2)))) {
610 if ((3 >=
n) && ((0xf0 == (0xf8 & *s1)) || (0xf0 == (0xf8 & *s2)))) {
632 if ((0 == src1_orig_cp) && (0 == src2_orig_cp)) {
634 }
else if ((src1_lwr_cp == src2_lwr_cp) || (src1_upr_cp == src2_upr_cp)) {
640 return src1_lwr_cp - src2_lwr_cp;
658 while ((
'\0' != *src) && (0 !=
n--)) {
670 while ((0 !=
n--) && ((
'\0' != *src1) || (
'\0' != *src2))) {
673 }
else if (*src1 > *src2) {
688 size_t index = 0, check_index = 0;
696 for (index = 0; index <
n; index++) {
697 d[index] = src[index];
698 if (
'\0' == src[index]) {
703 for (check_index = index - 1;
704 check_index > 0 && 0x80 == (0xc0 & d[check_index]); check_index--) {
708 if (check_index < index &&
710 (index - check_index) ==
n)) {
715 for (; index <
n; index++) {
733 while (
'\0' != src[bytes] && bytes <
n) {
741 if (alloc_func_ptr) {
742 c = alloc_func_ptr(user_data, bytes + 1);
744#if !defined(UTF8_NO_STD_MALLOC)
759 while (
'\0' != src[bytes] && bytes <
n) {
760 c[bytes] = src[bytes];
777 while (
'\0' != *src) {
781 }
else if (0 == ((
int)0xffffff80 & chr)) {
785 }
else if (0 == ((
int)0xfffff800 & chr)) {
790 }
else if (0 == ((
int)0xffff0000 & chr)) {
809 while (
'\0' != *src) {
812 while ((src[offset] == c[offset]) && (
'\0' != src[offset])) {
816 if (
'\0' == c[offset]) {
832 }
while (0x80 == (0xc0 & *src));
843 while (
'\0' != *str) {
851 if ((0x80 != (0xc0 & *a)) && (0 < offset)) {
854 if (*a == str[offset]) {
865 }
while (0x80 == (0xc0 & *a));
883 }
while ((0x80 == (0xc0 & *str)));
899 while (size <
n &&
'\0' != str[size]) {
909 while (
'\0' != *src) {
917 if ((0x80 != (0xc0 & *a)) && (0 < offset)) {
926 if (*a == src[offset]) {
934 }
while (0x80 == (0xc0 & *a));
965 if (
'\0' == *needle) {
969 while (
'\0' != *haystack) {
973 while (*haystack == *
n && (*haystack !=
'\0' && *
n !=
'\0')) {
998 if (
'\0' == *needle) {
1011 while ((0 != h_cp) && (0 != n_cp)) {
1047 size_t consumed = 0;
1049 while ((
void)(consumed = (
size_t)(str - t)), consumed <
n &&
'\0' != *str) {
1050 const size_t remaining =
n - consumed;
1052 if (0xf0 == (0xf8 & *str)) {
1054 if (remaining < 4) {
1060 if ((0x80 != (0xc0 & str[1])) || (0x80 != (0xc0 & str[2])) ||
1061 (0x80 != (0xc0 & str[3]))) {
1066 if ((remaining != 4) && (0x80 == (0xc0 & str[4]))) {
1073 if ((0 == (0x07 & str[0])) && (0 == (0x30 & str[1]))) {
1079 }
else if (0xe0 == (0xf0 & *str)) {
1081 if (remaining < 3) {
1087 if ((0x80 != (0xc0 & str[1])) || (0x80 != (0xc0 & str[2]))) {
1092 if ((remaining != 3) && (0x80 == (0xc0 & str[3]))) {
1099 if ((0 == (0x0f & str[0])) && (0 == (0x20 & str[1]))) {
1105 }
else if (0xc0 == (0xe0 & *str)) {
1107 if (remaining < 2) {
1113 if (0x80 != (0xc0 & str[1])) {
1118 if ((remaining != 2) && (0x80 == (0xc0 & str[2]))) {
1125 if (0 == (0x1e & str[0])) {
1131 }
else if (0x00 == (0x80 & *str)) {
1149 if (replacement > 0x7f) {
1153 while (
'\0' != *read) {
1154 if (0xf0 == (0xf8 & *read)) {
1157 if ((0x80 != (0xc0 & read[1])) || (0x80 != (0xc0 & read[2])) ||
1158 (0x80 != (0xc0 & read[3]))) {
1167 }
else if (0xe0 == (0xf0 & *read)) {
1170 if ((0x80 != (0xc0 & read[1])) || (0x80 != (0xc0 & read[2]))) {
1179 }
else if (0xc0 == (0xe0 & *read)) {
1182 if (0x80 != (0xc0 & read[1])) {
1191 }
else if (0x00 == (0x80 & *read)) {
1211 if (0xf0 == (0xf8 & str[0])) {
1213 *out_codepoint = ((0x07 & str[0]) << 18) | ((0x3f & str[1]) << 12) |
1214 ((0x3f & str[2]) << 6) | (0x3f & str[3]);
1216 }
else if (0xe0 == (0xf0 & str[0])) {
1219 ((0x0f & str[0]) << 12) | ((0x3f & str[1]) << 6) | (0x3f & str[2]);
1221 }
else if (0xc0 == (0xe0 & str[0])) {
1223 *out_codepoint = ((0x1f & str[0]) << 6) | (0x3f & str[1]);
1227 *out_codepoint = str[0];
1235 if (0xf0 == (0xf8 & str[0])) {
1238 }
else if (0xe0 == (0xf0 & str[0])) {
1241 }
else if (0xc0 == (0xe0 & str[0])) {
1349 if (((0x0041 <= cp) && (0x005a >= cp)) ||
1350 ((0x00c0 <= cp) && (0x00d6 >= cp)) ||
1351 ((0x00d8 <= cp) && (0x00de >= cp)) ||
1352 ((0x0391 <= cp) && (0x03a1 >= cp)) ||
1353 ((0x03a3 <= cp) && (0x03ab >= cp)) ||
1354 ((0x0410 <= cp) && (0x042f >= cp))) {
1356 }
else if ((0x0400 <= cp) && (0x040f >= cp)) {
1358 }
else if (((0x0100 <= cp) && (0x012f >= cp)) ||
1359 ((0x0132 <= cp) && (0x0137 >= cp)) ||
1360 ((0x014a <= cp) && (0x0177 >= cp)) ||
1361 ((0x0182 <= cp) && (0x0185 >= cp)) ||
1362 ((0x01a0 <= cp) && (0x01a5 >= cp)) ||
1363 ((0x01de <= cp) && (0x01ef >= cp)) ||
1364 ((0x01f8 <= cp) && (0x021f >= cp)) ||
1365 ((0x0222 <= cp) && (0x0233 >= cp)) ||
1366 ((0x0246 <= cp) && (0x024f >= cp)) ||
1367 ((0x03d8 <= cp) && (0x03ef >= cp)) ||
1368 ((0x0460 <= cp) && (0x0481 >= cp)) ||
1369 ((0x048a <= cp) && (0x04ff >= cp))) {
1371 }
else if (((0x0139 <= cp) && (0x0148 >= cp)) ||
1372 ((0x0179 <= cp) && (0x017e >= cp)) ||
1373 ((0x01af <= cp) && (0x01b0 >= cp)) ||
1374 ((0x01b3 <= cp) && (0x01b6 >= cp)) ||
1375 ((0x01cd <= cp) && (0x01dc >= cp))) {
1512 if (((0x0061 <= cp) && (0x007a >= cp)) ||
1513 ((0x00e0 <= cp) && (0x00f6 >= cp)) ||
1514 ((0x00f8 <= cp) && (0x00fe >= cp)) ||
1515 ((0x03b1 <= cp) && (0x03c1 >= cp)) ||
1516 ((0x03c3 <= cp) && (0x03cb >= cp)) ||
1517 ((0x0430 <= cp) && (0x044f >= cp))) {
1519 }
else if ((0x0450 <= cp) && (0x045f >= cp)) {
1521 }
else if (((0x0100 <= cp) && (0x012f >= cp)) ||
1522 ((0x0132 <= cp) && (0x0137 >= cp)) ||
1523 ((0x014a <= cp) && (0x0177 >= cp)) ||
1524 ((0x0182 <= cp) && (0x0185 >= cp)) ||
1525 ((0x01a0 <= cp) && (0x01a5 >= cp)) ||
1526 ((0x01de <= cp) && (0x01ef >= cp)) ||
1527 ((0x01f8 <= cp) && (0x021f >= cp)) ||
1528 ((0x0222 <= cp) && (0x0233 >= cp)) ||
1529 ((0x0246 <= cp) && (0x024f >= cp)) ||
1530 ((0x03d8 <= cp) && (0x03ef >= cp)) ||
1531 ((0x0460 <= cp) && (0x0481 >= cp)) ||
1532 ((0x048a <= cp) && (0x04ff >= cp))) {
1534 }
else if (((0x0139 <= cp) && (0x0148 >= cp)) ||
1535 ((0x0179 <= cp) && (0x017e >= cp)) ||
1536 ((0x01af <= cp) && (0x01b0 >= cp)) ||
1537 ((0x01b3 <= cp) && (0x01b6 >= cp)) ||
1538 ((0x01cd <= cp) && (0x01dc >= cp))) {
1679 if (0xf0 == (0xf8 & s[0])) {
1681 *out_codepoint = ((0x07 & s[0]) << 18) | ((0x3f & s[1]) << 12) |
1682 ((0x3f & s[2]) << 6) | (0x3f & s[3]);
1683 }
else if (0xe0 == (0xf0 & s[0])) {
1686 ((0x0f & s[0]) << 12) | ((0x3f & s[1]) << 6) | (0x3f & s[2]);
1687 }
else if (0xc0 == (0xe0 & s[0])) {
1689 *out_codepoint = ((0x1f & s[0]) << 6) | (0x3f & s[1]);
1692 *out_codepoint = s[0];
1697 }
while ((0 != (0x80 & s[0])) && (0x80 == (0xc0 & s[0])));
1703#undef utf8_constexpr14
1706#ifdef utf8_cplusplus
1710#if defined(__clang__)
1711# pragma clang diagnostic pop
utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t * utf8chr(const utf8_int8_t *src, utf8_int32_t chr)
utf8_nonnull utf8_weak utf8_int8_t * utf8catcodepoint(utf8_int8_t *str, utf8_int32_t chr, size_t n)
utf8_weak utf8_int8_t * utf8dup(const utf8_int8_t *src)
utf8_constexpr14 utf8_nonnull utf8_pure size_t utf8len(const utf8_int8_t *str)
utf8_constexpr14 utf8_nonnull size_t utf8codepointcalcsize(const utf8_int8_t *str)
utf8_nonnull utf8_weak utf8_int8_t * utf8ncat(utf8_int8_t *utf8_restrict dst, const utf8_int8_t *utf8_restrict src, size_t n)
utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t * utf8valid(const utf8_int8_t *str)
utf8_constexpr14 utf8_int32_t utf8uprcodepoint(utf8_int32_t cp)
utf8_constexpr14 utf8_nonnull utf8_pure int utf8ncmp(const utf8_int8_t *src1, const utf8_int8_t *src2, size_t n)
utf8_constexpr14 size_t utf8codepointsize(utf8_int32_t chr)
utf8_nonnull utf8_weak void utf8lwr(utf8_int8_t *utf8_restrict str)
utf8_nonnull utf8_weak void utf8upr(utf8_int8_t *utf8_restrict str)
utf8_constexpr14_impl utf8_int8_t * utf8fry(const utf8_int8_t *str)
utf8_constexpr14 utf8_nonnull utf8_pure size_t utf8size_lazy(const utf8_int8_t *str)
utf8_constexpr14 utf8_nonnull utf8_pure size_t utf8size(const utf8_int8_t *str)
utf8_nonnull utf8_weak utf8_int8_t * utf8cat(utf8_int8_t *utf8_restrict dst, const utf8_int8_t *utf8_restrict src)
utf8_constexpr14 utf8_nonnull utf8_pure size_t utf8cspn(const utf8_int8_t *src, const utf8_int8_t *reject)
utf8_constexpr14 utf8_nonnull utf8_int8_t * utf8codepoint(const utf8_int8_t *utf8_restrict str, utf8_int32_t *utf8_restrict out_codepoint)
utf8_constexpr14 int utf8isupper(utf8_int32_t chr)
utf8_constexpr14 utf8_nonnull utf8_int8_t * utf8rcodepoint(const utf8_int8_t *utf8_restrict str, utf8_int32_t *utf8_restrict out_codepoint)
utf8_constexpr14 int utf8islower(utf8_int32_t chr)
utf8_constexpr14 utf8_nonnull utf8_pure int utf8ncasecmp(const utf8_int8_t *src1, const utf8_int8_t *src2, size_t n)
utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t * utf8str(const utf8_int8_t *haystack, const utf8_int8_t *needle)
utf8_constexpr14 utf8_nonnull utf8_pure int utf8casecmp(const utf8_int8_t *src1, const utf8_int8_t *src2)
utf8_weak utf8_int8_t * utf8dup_ex(const utf8_int8_t *src, utf8_int8_t *(*alloc_func_ptr)(utf8_int8_t *, size_t), utf8_int8_t *user_data)
utf8_weak utf8_int8_t * utf8ndup(const utf8_int8_t *src, size_t n)
utf8_nonnull utf8_weak int utf8makevalid(utf8_int8_t *str, const utf8_int32_t replacement)
utf8_weak utf8_int8_t * utf8ndup_ex(const utf8_int8_t *src, size_t n, utf8_int8_t *(*alloc_func_ptr)(utf8_int8_t *, size_t), utf8_int8_t *user_data)
utf8_nonnull utf8_weak utf8_int8_t * utf8ncpy(utf8_int8_t *utf8_restrict dst, const utf8_int8_t *utf8_restrict src, size_t n)
utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t * utf8rchr(const utf8_int8_t *src, int chr)
utf8_constexpr14 utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp)
#define utf8_constexpr14_impl
utf8_constexpr14 utf8_nonnull utf8_pure int utf8cmp(const utf8_int8_t *src1, const utf8_int8_t *src2)
utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t * utf8casestr(const utf8_int8_t *haystack, const utf8_int8_t *needle)
utf8_nonnull utf8_weak utf8_int8_t * utf8cpy(utf8_int8_t *utf8_restrict dst, const utf8_int8_t *utf8_restrict src)
utf8_constexpr14_impl int utf8coll(const utf8_int8_t *src1, const utf8_int8_t *src2)
utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t * utf8pbrk(const utf8_int8_t *str, const utf8_int8_t *accept)
utf8_constexpr14 utf8_nonnull utf8_pure size_t utf8spn(const utf8_int8_t *src, const utf8_int8_t *accept)
utf8_constexpr14 utf8_nonnull utf8_pure utf8_int8_t * utf8nvalid(const utf8_int8_t *str, size_t n)
utf8_constexpr14 utf8_nonnull utf8_pure size_t utf8nlen(const utf8_int8_t *str, size_t n)
utf8_constexpr14 utf8_nonnull utf8_pure size_t utf8nsize_lazy(const utf8_int8_t *str, size_t n)