format.c (8237B)
1 #include "format.h" 2 3 #include <assert.h> 4 #include <err.h> 5 #include <stdbool.h> 6 #include <stdlib.h> 7 #include <string.h> 8 #include <time.h> 9 #include <wchar.h> 10 11 static const char* kUtf8Ellipsis = "\xE2\x80\xA6"; 12 13 static bool is_unicode_modifier(wchar_t wc) { 14 return wc == 0x200D || (wc >= 0xFE00 && wc <= 0xFE0F); 15 } 16 17 static bool print_time_formatted(FILE* out, 18 time_t time, 19 int timezone_offset, 20 const char* format) { 21 assert(out != NULL); 22 assert(format != NULL); 23 // Reject any offset > 24 hours. 24 if (timezone_offset < -1440 || timezone_offset > 1440) { 25 warnx("invalid timezone offset: %d", timezone_offset); 26 return false; 27 } 28 time_t local_time = time + (timezone_offset * 60); 29 struct tm tm_buf; 30 struct tm* time_in = gmtime_r(&local_time, &tm_buf); 31 if (!time_in) { 32 return false; 33 } 34 35 char formatted_time[32]; 36 if (!strftime(formatted_time, sizeof(formatted_time), format, time_in)) { 37 err(1, "strftime"); 38 } 39 fprintf(out, "%s", formatted_time); 40 return true; 41 } 42 43 void print_time(FILE* out, time_t time, int timezone_offset) { 44 if (!print_time_formatted(out, time, timezone_offset, 45 "%a, %e %b %Y %H:%M:%S")) { 46 return; 47 } 48 49 char timezone_sign = timezone_offset < 0 ? '-' : '+'; 50 int abs_offset = abs(timezone_offset); 51 int timezone_hours = abs_offset / 60; 52 int timezone_mins = abs_offset % 60; 53 fprintf(out, " %c%02d%02d", timezone_sign, timezone_hours, timezone_mins); 54 } 55 56 void print_time_z(FILE* out, time_t time) { 57 print_time_formatted(out, time, 0, "%Y-%m-%dT%H:%M:%SZ"); 58 } 59 60 /* TODO: add timezone_offset to print_time_short. */ 61 void print_time_short(FILE* out, time_t time) { 62 print_time_formatted(out, time, 0, "%Y-%m-%d %H:%M"); 63 } 64 65 void print_percent_encoded(FILE* out, const char* str) { 66 assert(out != NULL); 67 assert(str != NULL); 68 static const char* hex_chars = "0123456789ABCDEF"; 69 70 size_t str_len = strlen(str); 71 for (size_t i = 0; i < str_len; i++) { 72 unsigned char uc = str[i]; 73 // NOTE: do not encode '/' for paths or ",-." 74 if (uc < ',' || uc >= 127 || (uc >= ':' && uc <= '@') || uc == '[' || 75 uc == ']') { 76 fprintf(out, "%%%c%c", hex_chars[(uc >> 4) & 0x0f], hex_chars[uc & 0x0f]); 77 } else { 78 fprintf(out, "%c", uc); 79 } 80 } 81 } 82 83 void print_xml_encoded(FILE* out, const char* str) { 84 assert(out != NULL); 85 assert(str != NULL); 86 print_xml_encoded_len(out, str, -1, true); 87 } 88 89 void print_xml_encoded_len(FILE* out, 90 const char* str, 91 ssize_t str_len, 92 bool output_crlf) { 93 assert(out != NULL); 94 assert(str != NULL); 95 96 size_t len = (str_len >= 0) ? (size_t)str_len : strlen(str); 97 for (size_t i = 0; i < len && str[i] != '\0'; i++) { 98 unsigned char c = (unsigned char)str[i]; 99 switch (c) { 100 case '<': 101 fprintf(out, "<"); 102 break; 103 case '>': 104 fprintf(out, ">"); 105 break; 106 case '\'': 107 fprintf(out, "'"); 108 break; 109 case '&': 110 fprintf(out, "&"); 111 break; 112 case '"': 113 fprintf(out, """); 114 break; 115 case '\t': 116 fprintf(out, "\t"); 117 break; 118 case '\r': 119 case '\n': 120 if (output_crlf) { 121 fprintf(out, "%c", c); 122 } 123 break; 124 default: 125 if (c >= 0x20) { 126 fprintf(out, "%c", c); 127 } 128 break; 129 } 130 } 131 } 132 133 void print_gopher_text(FILE* out, const char* str, bool output_lf) { 134 assert(out != NULL); 135 assert(str != NULL); 136 print_gopher_text_len(out, str, -1, output_lf); 137 } 138 139 void print_gopher_text_len(FILE* out, 140 const char* str, 141 ssize_t str_len, 142 bool output_lf) { 143 assert(out != NULL); 144 assert(str != NULL); 145 146 size_t len = (str_len >= 0) ? (size_t)str_len : strlen(str); 147 bool start_of_line = true; 148 for (size_t i = 0; i < len && str[i] != '\0'; i++) { 149 char c = str[i]; 150 if (output_lf && start_of_line && c == '[') { 151 fprintf(out, "[|"); 152 } else if (c == '\t') { 153 fprintf(out, " "); 154 } else if (c == '\r' || (!output_lf && c == '\n')) { 155 /* Ignore. */ 156 } else { 157 fprintf(out, "%c", c); 158 } 159 start_of_line = (c == '\n'); 160 } 161 } 162 163 void print_gopher_link(FILE* out, const char* str) { 164 assert(out != NULL); 165 assert(str != NULL); 166 167 for (size_t i = 0; str[i] != '\0'; i++) { 168 char c = str[i]; 169 if (c == '|') { 170 fprintf(out, "\\|"); 171 } else if (c == '\t') { 172 fprintf(out, " "); 173 } else if (c == '\r' || c == '\n') { 174 // Ignore. 175 } else { 176 fprintf(out, "%c", c); 177 } 178 } 179 } 180 181 static void print_padded_internal(FILE* out, 182 const char* str, 183 size_t width, 184 char pad_char, 185 bool gopher_markup) { 186 assert(out != NULL); 187 assert(str != NULL); 188 189 if (width == 0) { 190 return; 191 } 192 193 size_t display_width = 0; 194 size_t last_char_width = 0; 195 mbstate_t state; 196 memset(&state, 0, sizeof(state)); 197 198 const char* ptr = str; 199 size_t len = strlen(str); 200 const char* end = str + len; 201 202 while (ptr < end) { 203 wchar_t wc; 204 size_t bytes = mbrtowc(&wc, ptr, end - ptr, &state); 205 if (bytes == 0) { 206 break; 207 } 208 209 if (bytes == (size_t)-1 || bytes == (size_t)-2) { 210 // Invalid (-1) or incomplete (-2) UTF-8. Consume 1 byte. 211 if (display_width == width - 1 && ptr + 1 < end) { 212 fprintf(out, "%s", kUtf8Ellipsis); 213 display_width++; 214 break; 215 } else if (display_width < width) { 216 fprintf(out, "%c", *ptr); 217 display_width++; 218 last_char_width = 1; 219 } 220 ptr++; 221 memset(&state, 0, sizeof(state)); 222 continue; 223 } 224 225 int w = wcwidth(wc); 226 size_t char_width = (w < 0) ? 0 : w; 227 228 // Gopher-specific adjustments for character width. 229 if (gopher_markup) { 230 if (wc == L'|') { 231 char_width = 1; 232 } else if (wc == L'\t') { 233 // Tab expansion width is handled in the printing logic below. 234 char_width = 0; 235 const char* tptr = ptr + bytes; 236 for (size_t i = 0; i < 8 && display_width + char_width < width; i++) { 237 char_width++; 238 // If this isn't the end of the string, we might need an ellipsis. 239 if (display_width + char_width == width && tptr < end) { 240 break; 241 } 242 } 243 } else if (wc == L'\r' || wc == L'\n') { 244 char_width = 0; 245 } 246 } 247 248 if (display_width + char_width > width || 249 (ptr + bytes < end && display_width + char_width == width && 250 char_width > 0)) { 251 if (display_width < width) { 252 fprintf(out, "%s", kUtf8Ellipsis); 253 display_width++; 254 } 255 break; 256 } 257 258 if (gopher_markup && wc == L'|') { 259 fprintf(out, "\\|"); 260 display_width++; 261 last_char_width = 1; 262 } else if (gopher_markup && wc == L'\t') { 263 for (size_t i = 0; i < 8 && display_width < width; i++) { 264 fprintf(out, " "); 265 display_width++; 266 } 267 last_char_width = 1; 268 } else if (gopher_markup && (wc == L'\r' || wc == L'\n')) { 269 last_char_width = 0; 270 } else { 271 // Hack: handle zero-width joiner and variation selector. 272 if (is_unicode_modifier(wc)) { 273 display_width -= last_char_width; 274 char_width = 0; 275 } 276 277 fprintf(out, "%.*s", (int)bytes, ptr); 278 display_width += char_width; 279 last_char_width = char_width; 280 } 281 282 ptr += bytes; 283 } 284 285 // Pad remainder. 286 if (pad_char != '\0') { 287 while (display_width < width) { 288 fprintf(out, "%c", pad_char); 289 display_width++; 290 } 291 } 292 } 293 294 void print_gopher_link_padded(FILE* out, 295 const char* str, 296 size_t width, 297 char pad_char) { 298 print_padded_internal(out, str, width, pad_char, true); 299 } 300 301 void print_utf8_padded(FILE* out, 302 const char* str, 303 size_t width, 304 char pad_char) { 305 print_padded_internal(out, str, width, pad_char, false); 306 }