Initial commit
This commit is contained in:
177
json-writer.c
Normal file
177
json-writer.c
Normal file
@@ -0,0 +1,177 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <wchar.h>
|
||||
#include <locale.h>
|
||||
|
||||
|
||||
//TODO - move to header
|
||||
size_t json_compute_unicode(const char* input);
|
||||
char* json_instance_unicode(const char* input);
|
||||
void json_write_unicode(const char* input, FILE *out);
|
||||
int json_write_unicode_to_buffer(const char* input, char* buffer, size_t buffer_size);
|
||||
|
||||
char* json_instance_unicode(const char* input) {
|
||||
size_t req = json_compute_unicode(input) + 1; // +1 for sentinel
|
||||
char* result = calloc(1, req);
|
||||
json_write_unicode_to_buffer(input, result, req); //Possibly return null if req != written but for now we just ignore
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
size_t json_compute_unicode(const char* input) {
|
||||
setlocale(LC_CTYPE, ""); // Enable locale-aware decoding
|
||||
size_t result = 2;
|
||||
|
||||
const char *p = input;
|
||||
mbstate_t st;
|
||||
memset(&st, 0, sizeof(st));
|
||||
|
||||
while (*p) {
|
||||
wchar_t wc;
|
||||
size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
|
||||
|
||||
if (len == (size_t)-1 || len == (size_t)-2) {
|
||||
// Invalid UTF-8 sequence — fallback to hex escape
|
||||
result += 4;
|
||||
p++;
|
||||
memset(&st, 0, sizeof(st));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (wc == L'"') result += 2;
|
||||
else if (wc == L'\\') result += 2;
|
||||
else if (wc == L'\b') result += 2;
|
||||
else if (wc == L'\f') result += 2;
|
||||
else if (wc == L'\n') result += 2;
|
||||
else if (wc == L'\r') result += 2;
|
||||
else if (wc == L'\t') result += 2;
|
||||
else if (wc < 0x20) {
|
||||
// Control characters: output as \u0000..\u001F
|
||||
result += 6;
|
||||
} else {
|
||||
// Write UTF-8 bytes directly
|
||||
result += len;
|
||||
}
|
||||
|
||||
p += len;
|
||||
}
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
int json_write_unicode_to_buffer(const char* input, char* buffer, size_t buffer_size) {
|
||||
setlocale(LC_CTYPE, ""); // Enable locale-aware decoding
|
||||
|
||||
const char *p = input;
|
||||
mbstate_t st;
|
||||
memset(&st, 0, sizeof(st));
|
||||
size_t written = 0;
|
||||
|
||||
// Open quote
|
||||
written += snprintf(buffer + written, buffer_size - written, "\"");
|
||||
|
||||
while (*p) {
|
||||
wchar_t wc;
|
||||
size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
|
||||
|
||||
if (len == (size_t)-1 || len == (size_t)-2) {
|
||||
// Invalid UTF-8 sequence — fallback to hex escape
|
||||
written += snprintf(buffer + written, buffer_size - written, "\\u00%02X", (unsigned char)*p);
|
||||
p++;
|
||||
memset(&st, 0, sizeof(st));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (wc == L'"') written += snprintf(buffer + written, buffer_size - written, "\\\"");
|
||||
else if (wc == L'\\') written += snprintf(buffer + written, buffer_size - written, "\\\\");
|
||||
else if (wc == L'\b') written += snprintf(buffer + written, buffer_size - written, "\\b");
|
||||
else if (wc == L'\f') written += snprintf(buffer + written, buffer_size - written, "\\f");
|
||||
else if (wc == L'\n') written += snprintf(buffer + written, buffer_size - written, "\\n");
|
||||
else if (wc == L'\r') written += snprintf(buffer + written, buffer_size - written, "\\r");
|
||||
else if (wc == L'\t') written += snprintf(buffer + written, buffer_size - written, "\\t");
|
||||
else if (wc < 0x20) {
|
||||
// Control characters: output as \u0000..\u001F
|
||||
written += snprintf(buffer + written, buffer_size - written, "\\u00%02X", wc);
|
||||
} else {
|
||||
// Write UTF-8 bytes directly
|
||||
written += snprintf(buffer + written, buffer_size - written, "%.*s", (int)len, p);
|
||||
}
|
||||
|
||||
p += len;
|
||||
}
|
||||
|
||||
// Close quote
|
||||
written += snprintf(buffer + written, buffer_size - written, "\"");
|
||||
return written;
|
||||
}
|
||||
|
||||
|
||||
void json_write_unicode(const char* input, FILE *out) {
|
||||
setlocale(LC_CTYPE, ""); // Enable locale-aware decoding
|
||||
|
||||
const char *p = input;
|
||||
mbstate_t st;
|
||||
memset(&st, 0, sizeof(st));
|
||||
|
||||
fputc('"', out); // Open quote
|
||||
|
||||
while (*p) {
|
||||
wchar_t wc;
|
||||
size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
|
||||
|
||||
if (len == (size_t)-1 || len == (size_t)-2) {
|
||||
// Invalid UTF-8 sequence — fallback to hex escape
|
||||
fprintf(out, "\\u00%02X", (unsigned char)*p);
|
||||
p++;
|
||||
memset(&st, 0, sizeof(st));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (wc == L'"') fputs("\\\"", out);
|
||||
else if (wc == L'\\') fputs("\\\\", out);
|
||||
else if (wc == L'\b') fputs("\\b", out);
|
||||
else if (wc == L'\f') fputs("\\f", out);
|
||||
else if (wc == L'\n') fputs("\\n", out);
|
||||
else if (wc == L'\r') fputs("\\r", out);
|
||||
else if (wc == L'\t') fputs("\\t", out);
|
||||
else if (wc < 0x20) {
|
||||
// Control characters: output as \u0000..\u001F
|
||||
fprintf(out, "\\u00%02X", wc);
|
||||
} else {
|
||||
// Write UTF-8 bytes directly
|
||||
fwrite(p, 1, len, out);
|
||||
}
|
||||
|
||||
p += len;
|
||||
}
|
||||
|
||||
fputc('"', out); // Close quote
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void json_write_filename_bytes(const char *input, size_t length, FILE *out) {
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
unsigned char c = (unsigned char)input[i];
|
||||
|
||||
switch (c) {
|
||||
case '"': fputs("\\\"", out); break;
|
||||
case '\\': fputs("\\\\", out); break;
|
||||
case '\b': fputs("\\b", out); break;
|
||||
case '\f': fputs("\\f", out); break;
|
||||
case '\n': fputs("\\n", out); break;
|
||||
case '\r': fputs("\\r", out); break;
|
||||
case '\t': fputs("\\t", out); break;
|
||||
|
||||
default:
|
||||
if (c < 0x20 || c >= 0x80) {
|
||||
// surrogateescape encoding
|
||||
fprintf(out, "\\uDC%02X", c);
|
||||
} else {
|
||||
fputc(c, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user