#include #include #include #include #include //TODO - move to header size_t json_compute_unicode(const char* input); char* json_instance_unicode(const char* input); void json_write_unicode(const char* input, FILE *out); int json_write_unicode_to_buffer(const char* input, char* buffer, size_t buffer_size); char* json_instance_unicode(const char* input) { size_t req = json_compute_unicode(input) + 1; // +1 for sentinel char* result = calloc(1, req); json_write_unicode_to_buffer(input, result, req); //Possibly return null if req != written but for now we just ignore return result; } size_t json_compute_unicode(const char* input) { setlocale(LC_CTYPE, ""); // Enable locale-aware decoding size_t result = 2; const char *p = input; mbstate_t st; memset(&st, 0, sizeof(st)); while (*p) { wchar_t wc; size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st); if (len == (size_t)-1 || len == (size_t)-2) { // Invalid UTF-8 sequence — fallback to hex escape result += 4; p++; memset(&st, 0, sizeof(st)); continue; } if (wc == L'"') result += 2; else if (wc == L'\\') result += 2; else if (wc == L'\b') result += 2; else if (wc == L'\f') result += 2; else if (wc == L'\n') result += 2; else if (wc == L'\r') result += 2; else if (wc == L'\t') result += 2; else if (wc < 0x20) { // Control characters: output as \u0000..\u001F result += 6; } else { // Write UTF-8 bytes directly result += len; } p += len; } return result; } int json_write_unicode_to_buffer(const char* input, char* buffer, size_t buffer_size) { setlocale(LC_CTYPE, ""); // Enable locale-aware decoding const char *p = input; mbstate_t st; memset(&st, 0, sizeof(st)); size_t written = 0; // Open quote written += snprintf(buffer + written, buffer_size - written, "\""); while (*p) { wchar_t wc; size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st); if (len == (size_t)-1 || len == (size_t)-2) { // Invalid UTF-8 sequence — fallback to hex escape written += snprintf(buffer + written, buffer_size - written, "\\u00%02X", (unsigned char)*p); p++; memset(&st, 0, sizeof(st)); continue; } if (wc == L'"') written += snprintf(buffer + written, buffer_size - written, "\\\""); else if (wc == L'\\') written += snprintf(buffer + written, buffer_size - written, "\\\\"); else if (wc == L'\b') written += snprintf(buffer + written, buffer_size - written, "\\b"); else if (wc == L'\f') written += snprintf(buffer + written, buffer_size - written, "\\f"); else if (wc == L'\n') written += snprintf(buffer + written, buffer_size - written, "\\n"); else if (wc == L'\r') written += snprintf(buffer + written, buffer_size - written, "\\r"); else if (wc == L'\t') written += snprintf(buffer + written, buffer_size - written, "\\t"); else if (wc < 0x20) { // Control characters: output as \u0000..\u001F written += snprintf(buffer + written, buffer_size - written, "\\u00%02X", wc); } else { // Write UTF-8 bytes directly written += snprintf(buffer + written, buffer_size - written, "%.*s", (int)len, p); } p += len; } // Close quote written += snprintf(buffer + written, buffer_size - written, "\""); return written; } void json_write_unicode(const char* input, FILE *out) { setlocale(LC_CTYPE, ""); // Enable locale-aware decoding const char *p = input; mbstate_t st; memset(&st, 0, sizeof(st)); fputc('"', out); // Open quote while (*p) { wchar_t wc; size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st); if (len == (size_t)-1 || len == (size_t)-2) { // Invalid UTF-8 sequence — fallback to hex escape fprintf(out, "\\u00%02X", (unsigned char)*p); p++; memset(&st, 0, sizeof(st)); continue; } if (wc == L'"') fputs("\\\"", out); else if (wc == L'\\') fputs("\\\\", out); else if (wc == L'\b') fputs("\\b", out); else if (wc == L'\f') fputs("\\f", out); else if (wc == L'\n') fputs("\\n", out); else if (wc == L'\r') fputs("\\r", out); else if (wc == L'\t') fputs("\\t", out); else if (wc < 0x20) { // Control characters: output as \u0000..\u001F fprintf(out, "\\u00%02X", wc); } else { // Write UTF-8 bytes directly fwrite(p, 1, len, out); } p += len; } fputc('"', out); // Close quote } void json_write_filename_bytes(const char *input, size_t length, FILE *out) { for (size_t i = 0; i < length; i++) { unsigned char c = (unsigned char)input[i]; switch (c) { case '"': fputs("\\\"", out); break; case '\\': fputs("\\\\", out); break; case '\b': fputs("\\b", out); break; case '\f': fputs("\\f", out); break; case '\n': fputs("\\n", out); break; case '\r': fputs("\\r", out); break; case '\t': fputs("\\t", out); break; default: if (c < 0x20 || c >= 0x80) { // surrogateescape encoding fprintf(out, "\\uDC%02X", c); } else { fputc(c, out); } } } }