gout

A static git page generator
git clone https://git.bracken.jp/gout.git
Log | Files | Refs | README | LICENSE

commit 6d1e33e1549fcdcdbb03c956cd566e35aaaef2bd
parent c4080dcdb82c89b83170f36df9ada4d604f19407
Author: Chris Bracken <chris@bracken.jp>
Date:   Thu, 19 Feb 2026 13:39:05 +0900

format: filter out control chars in print_xml_encoded

Don't print characters in the control char range. Since we're comparing
char to a range (>= 0x20) we need to cast to unsigned so that chars with
the high bit set and're treated as < 0 and missed.

Adds some tests.

Diffstat:
Msrc/format.c | 10++++++++--
Msrc/format_tests.c | 49+++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/src/format.c b/src/format.c @@ -90,7 +90,7 @@ void print_xml_encoded_len(FILE* out, bool output_crlf) { size_t len = (str_len >= 0) ? (size_t)str_len : strlen(str); for (size_t i = 0; i < len && str[i] != '\0'; i++) { - char c = str[i]; + unsigned char c = (unsigned char)str[i]; switch (c) { case '<': fprintf(out, "&lt;"); @@ -107,6 +107,9 @@ void print_xml_encoded_len(FILE* out, case '"': fprintf(out, "&quot;"); break; + case '\t': + fprintf(out, "\t"); + break; case '\r': case '\n': if (output_crlf) { @@ -114,7 +117,10 @@ void print_xml_encoded_len(FILE* out, } break; default: - fprintf(out, "%c", c); + if (c >= 0x20) { + fprintf(out, "%c", c); + } + break; } } } diff --git a/src/format_tests.c b/src/format_tests.c @@ -256,6 +256,55 @@ UTEST(print_xml_encoded_len, ZeroLength) { free(buf); } +UTEST(print_xml_encoded_len, ControlCharacters) { + char* buf = NULL; + size_t size = 0; + FILE* out = open_memstream(&buf, &size); + ASSERT_NE(NULL, out); + + /* Tab (\t), LF (\n), CR (\r) should be preserved. + * Other control characters like \x01 (SOH) should be filtered. */ + const char* test_str = "a\t\n\r\x01" "b"; + print_xml_encoded_len(out, test_str, -1, true); + fclose(out); + + EXPECT_STREQ("a\t\n\rb", buf); + + free(buf); +} + +UTEST(print_xml_encoded_len, MultiByteUtf8) { + char* buf = NULL; + size_t size = 0; + FILE* out = open_memstream(&buf, &size); + ASSERT_NE(NULL, out); + + /* UTF-8 'é' is 0xC3 0xA9. These should be preserved as-is. */ + const char* test_str = "caf\xC3\xA9"; + print_xml_encoded_len(out, test_str, -1, true); + fclose(out); + + EXPECT_STREQ("caf\xC3\xA9", buf); + + free(buf); +} + +UTEST(print_xml_encoded_len, EmbeddedNull) { + char* buf = NULL; + size_t size = 0; + FILE* out = open_memstream(&buf, &size); + ASSERT_NE(NULL, out); + + /* The function should stop at \0 even if str_len is larger. */ + const char* test_str = "abc\0def"; + print_xml_encoded_len(out, test_str, 10, true); + fclose(out); + + EXPECT_STREQ("abc", buf); + + free(buf); +} + UTEST(print_gopher_text_len, BasicWithLf) { char* buf = NULL; size_t size = 0;