format.c (8216B)
1 #include "format.h" 2 3 #include <assert.h> 4 #include <err.h> 5 #include <stdlib.h> 6 #include <string.h> 7 #include <time.h> 8 #include <wchar.h> 9 10 static const char* kUtf8Ellipsis = "\xE2\x80\xA6"; 11 12 static bool is_unicode_modifier(wchar_t wc) { 13 return wc == 0x200D || (wc >= 0xFE00 && wc <= 0xFE0F); 14 } 15 16 static bool print_time_formatted(FILE* out, 17 time_t time, 18 int timezone_offset, 19 const char* format) { 20 assert(out != NULL); 21 assert(format != NULL); 22 // Reject any offset > 24 hours. 23 if (timezone_offset < -1440 || timezone_offset > 1440) { 24 warnx("invalid timezone offset: %d", timezone_offset); 25 return false; 26 } 27 time_t local_time = time + (timezone_offset * 60); 28 struct tm tm_buf; 29 struct tm* time_in = gmtime_r(&local_time, &tm_buf); 30 if (!time_in) { 31 return false; 32 } 33 34 char formatted_time[32]; 35 if (!strftime(formatted_time, sizeof(formatted_time), format, time_in)) { 36 err(1, "strftime"); 37 } 38 fprintf(out, "%s", formatted_time); 39 return true; 40 } 41 42 void print_time(FILE* out, time_t time, int timezone_offset) { 43 if (!print_time_formatted(out, time, timezone_offset, 44 "%a, %e %b %Y %H:%M:%S")) { 45 return; 46 } 47 48 char timezone_sign = timezone_offset < 0 ? '-' : '+'; 49 int abs_offset = abs(timezone_offset); 50 int timezone_hours = abs_offset / 60; 51 int timezone_mins = abs_offset % 60; 52 fprintf(out, " %c%02d%02d", timezone_sign, timezone_hours, timezone_mins); 53 } 54 55 void print_time_z(FILE* out, time_t time) { 56 print_time_formatted(out, time, 0, "%Y-%m-%dT%H:%M:%SZ"); 57 } 58 59 /* TODO: add timezone_offset to print_time_short. */ 60 void print_time_short(FILE* out, time_t time) { 61 print_time_formatted(out, time, 0, "%Y-%m-%d %H:%M"); 62 } 63 64 void print_percent_encoded(FILE* out, const char* str) { 65 assert(out != NULL); 66 assert(str != NULL); 67 static const char* hex_chars = "0123456789ABCDEF"; 68 69 size_t str_len = strlen(str); 70 for (size_t i = 0; i < str_len; i++) { 71 unsigned char uc = str[i]; 72 // NOTE: do not encode '/' for paths or ",-." 73 if (uc < ',' || uc >= 127 || (uc >= ':' && uc <= '@') || uc == '[' || 74 uc == ']') { 75 fprintf(out, "%%%c%c", hex_chars[(uc >> 4) & 0x0f], hex_chars[uc & 0x0f]); 76 } else { 77 fprintf(out, "%c", uc); 78 } 79 } 80 } 81 82 void print_xml_encoded(FILE* out, const char* str) { 83 assert(out != NULL); 84 assert(str != NULL); 85 print_xml_encoded_len(out, str, -1, true); 86 } 87 88 void print_xml_encoded_len(FILE* out, 89 const char* str, 90 ssize_t str_len, 91 bool output_crlf) { 92 assert(out != NULL); 93 assert(str != NULL); 94 95 size_t len = (str_len >= 0) ? (size_t)str_len : strlen(str); 96 for (size_t i = 0; i < len && str[i] != '\0'; i++) { 97 unsigned char c = (unsigned char)str[i]; 98 switch (c) { 99 case '<': 100 fprintf(out, "<"); 101 break; 102 case '>': 103 fprintf(out, ">"); 104 break; 105 case '\'': 106 fprintf(out, "'"); 107 break; 108 case '&': 109 fprintf(out, "&"); 110 break; 111 case '"': 112 fprintf(out, """); 113 break; 114 case '\t': 115 fprintf(out, "\t"); 116 break; 117 case '\r': 118 case '\n': 119 if (output_crlf) { 120 fprintf(out, "%c", c); 121 } 122 break; 123 default: 124 if (c >= 0x20) { 125 fprintf(out, "%c", c); 126 } 127 break; 128 } 129 } 130 } 131 132 void print_gopher_text(FILE* out, const char* str, bool output_lf) { 133 assert(out != NULL); 134 assert(str != NULL); 135 print_gopher_text_len(out, str, -1, output_lf); 136 } 137 138 void print_gopher_text_len(FILE* out, 139 const char* str, 140 ssize_t str_len, 141 bool output_lf) { 142 assert(out != NULL); 143 assert(str != NULL); 144 145 size_t len = (str_len >= 0) ? (size_t)str_len : strlen(str); 146 bool start_of_line = true; 147 for (size_t i = 0; i < len && str[i] != '\0'; i++) { 148 char c = str[i]; 149 if (output_lf && start_of_line && c == '[') { 150 fprintf(out, "[|"); 151 } else if (c == '\t') { 152 fprintf(out, " "); 153 } else if (c == '\r' || (!output_lf && c == '\n')) { 154 /* Ignore. */ 155 } else { 156 fprintf(out, "%c", c); 157 } 158 start_of_line = (c == '\n'); 159 } 160 } 161 162 void print_gopher_link(FILE* out, const char* str) { 163 assert(out != NULL); 164 assert(str != NULL); 165 166 for (size_t i = 0; str[i] != '\0'; i++) { 167 char c = str[i]; 168 if (c == '|') { 169 fprintf(out, "\\|"); 170 } else if (c == '\t') { 171 fprintf(out, " "); 172 } else if (c == '\r' || c == '\n') { 173 // Ignore. 174 } else { 175 fprintf(out, "%c", c); 176 } 177 } 178 } 179 180 static void print_padded_internal(FILE* out, 181 const char* str, 182 size_t width, 183 char pad_char, 184 bool gopher_markup) { 185 assert(out != NULL); 186 assert(str != NULL); 187 188 if (width == 0) { 189 return; 190 } 191 192 size_t display_width = 0; 193 size_t last_char_width = 0; 194 mbstate_t state; 195 memset(&state, 0, sizeof(state)); 196 197 const char* ptr = str; 198 size_t len = strlen(str); 199 const char* end = str + len; 200 201 while (ptr < end) { 202 wchar_t wc; 203 size_t bytes = mbrtowc(&wc, ptr, end - ptr, &state); 204 if (bytes == 0) { 205 break; 206 } 207 208 if (bytes == (size_t)-1 || bytes == (size_t)-2) { 209 // Invalid (-1) or incomplete (-2) UTF-8. Consume 1 byte. 210 if (display_width == width - 1 && ptr + 1 < end) { 211 fprintf(out, "%s", kUtf8Ellipsis); 212 display_width++; 213 break; 214 } else if (display_width < width) { 215 fprintf(out, "%c", *ptr); 216 display_width++; 217 last_char_width = 1; 218 } 219 ptr++; 220 memset(&state, 0, sizeof(state)); 221 continue; 222 } 223 224 int w = wcwidth(wc); 225 size_t char_width = (w < 0) ? 0 : w; 226 227 // Gopher-specific adjustments for character width. 228 if (gopher_markup) { 229 if (wc == L'|') { 230 char_width = 1; 231 } else if (wc == L'\t') { 232 // Tab expansion width is handled in the printing logic below. 233 char_width = 0; 234 const char* tptr = ptr + bytes; 235 for (size_t i = 0; i < 8 && display_width + char_width < width; i++) { 236 char_width++; 237 // If this isn't the end of the string, we might need an ellipsis. 238 if (display_width + char_width == width && tptr < end) { 239 break; 240 } 241 } 242 } else if (wc == L'\r' || wc == L'\n') { 243 char_width = 0; 244 } 245 } 246 247 if (display_width + char_width > width || 248 (ptr + bytes < end && display_width + char_width == width && 249 char_width > 0)) { 250 if (display_width < width) { 251 fprintf(out, "%s", kUtf8Ellipsis); 252 display_width++; 253 } 254 break; 255 } 256 257 if (gopher_markup && wc == L'|') { 258 fprintf(out, "\\|"); 259 display_width++; 260 last_char_width = 1; 261 } else if (gopher_markup && wc == L'\t') { 262 for (size_t i = 0; i < 8 && display_width < width; i++) { 263 fprintf(out, " "); 264 display_width++; 265 } 266 last_char_width = 1; 267 } else if (gopher_markup && (wc == L'\r' || wc == L'\n')) { 268 last_char_width = 0; 269 } else { 270 // Hack: handle zero-width joiner and variation selector. 271 if (is_unicode_modifier(wc)) { 272 display_width -= last_char_width; 273 char_width = 0; 274 } 275 276 fprintf(out, "%.*s", (int)bytes, ptr); 277 display_width += char_width; 278 last_char_width = char_width; 279 } 280 281 ptr += bytes; 282 } 283 284 // Pad remainder. 285 if (pad_char != '\0') { 286 while (display_width < width) { 287 fprintf(out, "%c", pad_char); 288 display_width++; 289 } 290 } 291 } 292 293 void print_gopher_link_padded(FILE* out, 294 const char* str, 295 size_t width, 296 char pad_char) { 297 print_padded_internal(out, str, width, pad_char, true); 298 } 299 300 void print_utf8_padded(FILE* out, 301 const char* str, 302 size_t width, 303 char pad_char) { 304 print_padded_internal(out, str, width, pad_char, false); 305 }