178 lines
4.9 KiB
C
178 lines
4.9 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <wchar.h>
|
|
#include <locale.h>
|
|
|
|
|
|
//TODO - move to header
|
|
size_t json_compute_unicode(const char* input);
|
|
char* json_instance_unicode(const char* input);
|
|
void json_write_unicode(const char* input, FILE *out);
|
|
int json_write_unicode_to_buffer(const char* input, char* buffer, size_t buffer_size);
|
|
|
|
char* json_instance_unicode(const char* input) {
|
|
size_t req = json_compute_unicode(input) + 1; // +1 for sentinel
|
|
char* result = calloc(1, req);
|
|
json_write_unicode_to_buffer(input, result, req); //Possibly return null if req != written but for now we just ignore
|
|
return result;
|
|
}
|
|
|
|
|
|
size_t json_compute_unicode(const char* input) {
|
|
setlocale(LC_CTYPE, ""); // Enable locale-aware decoding
|
|
size_t result = 2;
|
|
|
|
const char *p = input;
|
|
mbstate_t st;
|
|
memset(&st, 0, sizeof(st));
|
|
|
|
while (*p) {
|
|
wchar_t wc;
|
|
size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
|
|
|
|
if (len == (size_t)-1 || len == (size_t)-2) {
|
|
// Invalid UTF-8 sequence — fallback to hex escape
|
|
result += 4;
|
|
p++;
|
|
memset(&st, 0, sizeof(st));
|
|
continue;
|
|
}
|
|
|
|
if (wc == L'"') result += 2;
|
|
else if (wc == L'\\') result += 2;
|
|
else if (wc == L'\b') result += 2;
|
|
else if (wc == L'\f') result += 2;
|
|
else if (wc == L'\n') result += 2;
|
|
else if (wc == L'\r') result += 2;
|
|
else if (wc == L'\t') result += 2;
|
|
else if (wc < 0x20) {
|
|
// Control characters: output as \u0000..\u001F
|
|
result += 6;
|
|
} else {
|
|
// Write UTF-8 bytes directly
|
|
result += len;
|
|
}
|
|
|
|
p += len;
|
|
}
|
|
return result;
|
|
|
|
}
|
|
|
|
int json_write_unicode_to_buffer(const char* input, char* buffer, size_t buffer_size) {
|
|
setlocale(LC_CTYPE, ""); // Enable locale-aware decoding
|
|
|
|
const char *p = input;
|
|
mbstate_t st;
|
|
memset(&st, 0, sizeof(st));
|
|
size_t written = 0;
|
|
|
|
// Open quote
|
|
written += snprintf(buffer + written, buffer_size - written, "\"");
|
|
|
|
while (*p) {
|
|
wchar_t wc;
|
|
size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
|
|
|
|
if (len == (size_t)-1 || len == (size_t)-2) {
|
|
// Invalid UTF-8 sequence — fallback to hex escape
|
|
written += snprintf(buffer + written, buffer_size - written, "\\u00%02X", (unsigned char)*p);
|
|
p++;
|
|
memset(&st, 0, sizeof(st));
|
|
continue;
|
|
}
|
|
|
|
if (wc == L'"') written += snprintf(buffer + written, buffer_size - written, "\\\"");
|
|
else if (wc == L'\\') written += snprintf(buffer + written, buffer_size - written, "\\\\");
|
|
else if (wc == L'\b') written += snprintf(buffer + written, buffer_size - written, "\\b");
|
|
else if (wc == L'\f') written += snprintf(buffer + written, buffer_size - written, "\\f");
|
|
else if (wc == L'\n') written += snprintf(buffer + written, buffer_size - written, "\\n");
|
|
else if (wc == L'\r') written += snprintf(buffer + written, buffer_size - written, "\\r");
|
|
else if (wc == L'\t') written += snprintf(buffer + written, buffer_size - written, "\\t");
|
|
else if (wc < 0x20) {
|
|
// Control characters: output as \u0000..\u001F
|
|
written += snprintf(buffer + written, buffer_size - written, "\\u00%02X", wc);
|
|
} else {
|
|
// Write UTF-8 bytes directly
|
|
written += snprintf(buffer + written, buffer_size - written, "%.*s", (int)len, p);
|
|
}
|
|
|
|
p += len;
|
|
}
|
|
|
|
// Close quote
|
|
written += snprintf(buffer + written, buffer_size - written, "\"");
|
|
return written;
|
|
}
|
|
|
|
|
|
void json_write_unicode(const char* input, FILE *out) {
|
|
setlocale(LC_CTYPE, ""); // Enable locale-aware decoding
|
|
|
|
const char *p = input;
|
|
mbstate_t st;
|
|
memset(&st, 0, sizeof(st));
|
|
|
|
fputc('"', out); // Open quote
|
|
|
|
while (*p) {
|
|
wchar_t wc;
|
|
size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
|
|
|
|
if (len == (size_t)-1 || len == (size_t)-2) {
|
|
// Invalid UTF-8 sequence — fallback to hex escape
|
|
fprintf(out, "\\u00%02X", (unsigned char)*p);
|
|
p++;
|
|
memset(&st, 0, sizeof(st));
|
|
continue;
|
|
}
|
|
|
|
if (wc == L'"') fputs("\\\"", out);
|
|
else if (wc == L'\\') fputs("\\\\", out);
|
|
else if (wc == L'\b') fputs("\\b", out);
|
|
else if (wc == L'\f') fputs("\\f", out);
|
|
else if (wc == L'\n') fputs("\\n", out);
|
|
else if (wc == L'\r') fputs("\\r", out);
|
|
else if (wc == L'\t') fputs("\\t", out);
|
|
else if (wc < 0x20) {
|
|
// Control characters: output as \u0000..\u001F
|
|
fprintf(out, "\\u00%02X", wc);
|
|
} else {
|
|
// Write UTF-8 bytes directly
|
|
fwrite(p, 1, len, out);
|
|
}
|
|
|
|
p += len;
|
|
}
|
|
|
|
fputc('"', out); // Close quote
|
|
}
|
|
|
|
|
|
|
|
|
|
void json_write_filename_bytes(const char *input, size_t length, FILE *out) {
|
|
for (size_t i = 0; i < length; i++) {
|
|
unsigned char c = (unsigned char)input[i];
|
|
|
|
switch (c) {
|
|
case '"': fputs("\\\"", out); break;
|
|
case '\\': fputs("\\\\", out); break;
|
|
case '\b': fputs("\\b", out); break;
|
|
case '\f': fputs("\\f", out); break;
|
|
case '\n': fputs("\\n", out); break;
|
|
case '\r': fputs("\\r", out); break;
|
|
case '\t': fputs("\\t", out); break;
|
|
|
|
default:
|
|
if (c < 0x20 || c >= 0x80) {
|
|
// surrogateescape encoding
|
|
fprintf(out, "\\uDC%02X", c);
|
|
} else {
|
|
fputc(c, out);
|
|
}
|
|
}
|
|
}
|
|
}
|