commit 6d1e33e1549fcdcdbb03c956cd566e35aaaef2bd
parent c4080dcdb82c89b83170f36df9ada4d604f19407
Author: Chris Bracken <chris@bracken.jp>
Date: Thu, 19 Feb 2026 13:39:05 +0900
format: filter out control chars in print_xml_encoded
Don't print characters in the control char range. Since we're comparing
char to a range (>= 0x20) we need to cast to unsigned so that chars with
the high bit set and're treated as < 0 and missed.
Adds some tests.
Diffstat:
2 files changed, 57 insertions(+), 2 deletions(-)
diff --git a/src/format.c b/src/format.c
@@ -90,7 +90,7 @@ void print_xml_encoded_len(FILE* out,
bool output_crlf) {
size_t len = (str_len >= 0) ? (size_t)str_len : strlen(str);
for (size_t i = 0; i < len && str[i] != '\0'; i++) {
- char c = str[i];
+ unsigned char c = (unsigned char)str[i];
switch (c) {
case '<':
fprintf(out, "<");
@@ -107,6 +107,9 @@ void print_xml_encoded_len(FILE* out,
case '"':
fprintf(out, """);
break;
+ case '\t':
+ fprintf(out, "\t");
+ break;
case '\r':
case '\n':
if (output_crlf) {
@@ -114,7 +117,10 @@ void print_xml_encoded_len(FILE* out,
}
break;
default:
- fprintf(out, "%c", c);
+ if (c >= 0x20) {
+ fprintf(out, "%c", c);
+ }
+ break;
}
}
}
diff --git a/src/format_tests.c b/src/format_tests.c
@@ -256,6 +256,55 @@ UTEST(print_xml_encoded_len, ZeroLength) {
free(buf);
}
+UTEST(print_xml_encoded_len, ControlCharacters) {
+ char* buf = NULL;
+ size_t size = 0;
+ FILE* out = open_memstream(&buf, &size);
+ ASSERT_NE(NULL, out);
+
+ /* Tab (\t), LF (\n), CR (\r) should be preserved.
+ * Other control characters like \x01 (SOH) should be filtered. */
+ const char* test_str = "a\t\n\r\x01" "b";
+ print_xml_encoded_len(out, test_str, -1, true);
+ fclose(out);
+
+ EXPECT_STREQ("a\t\n\rb", buf);
+
+ free(buf);
+}
+
+UTEST(print_xml_encoded_len, MultiByteUtf8) {
+ char* buf = NULL;
+ size_t size = 0;
+ FILE* out = open_memstream(&buf, &size);
+ ASSERT_NE(NULL, out);
+
+ /* UTF-8 'é' is 0xC3 0xA9. These should be preserved as-is. */
+ const char* test_str = "caf\xC3\xA9";
+ print_xml_encoded_len(out, test_str, -1, true);
+ fclose(out);
+
+ EXPECT_STREQ("caf\xC3\xA9", buf);
+
+ free(buf);
+}
+
+UTEST(print_xml_encoded_len, EmbeddedNull) {
+ char* buf = NULL;
+ size_t size = 0;
+ FILE* out = open_memstream(&buf, &size);
+ ASSERT_NE(NULL, out);
+
+ /* The function should stop at \0 even if str_len is larger. */
+ const char* test_str = "abc\0def";
+ print_xml_encoded_len(out, test_str, 10, true);
+ fclose(out);
+
+ EXPECT_STREQ("abc", buf);
+
+ free(buf);
+}
+
UTEST(print_gopher_text_len, BasicWithLf) {
char* buf = NULL;
size_t size = 0;