gout

A static git page generator
git clone https://git.bracken.jp/gout.git
Log | Files | Refs | README | LICENSE

format.c (8216B)


      1 #include "format.h"
      2 
      3 #include <assert.h>
      4 #include <err.h>
      5 #include <stdlib.h>
      6 #include <string.h>
      7 #include <time.h>
      8 #include <wchar.h>
      9 
     10 static const char* kUtf8Ellipsis = "\xE2\x80\xA6";
     11 
     12 static bool is_unicode_modifier(wchar_t wc) {
     13   return wc == 0x200D || (wc >= 0xFE00 && wc <= 0xFE0F);
     14 }
     15 
     16 static bool print_time_formatted(FILE* out,
     17                                  time_t time,
     18                                  int timezone_offset,
     19                                  const char* format) {
     20   assert(out != NULL);
     21   assert(format != NULL);
     22   // Reject any offset > 24 hours.
     23   if (timezone_offset < -1440 || timezone_offset > 1440) {
     24     warnx("invalid timezone offset: %d", timezone_offset);
     25     return false;
     26   }
     27   time_t local_time = time + (timezone_offset * 60);
     28   struct tm tm_buf;
     29   struct tm* time_in = gmtime_r(&local_time, &tm_buf);
     30   if (!time_in) {
     31     return false;
     32   }
     33 
     34   char formatted_time[32];
     35   if (!strftime(formatted_time, sizeof(formatted_time), format, time_in)) {
     36     err(1, "strftime");
     37   }
     38   fprintf(out, "%s", formatted_time);
     39   return true;
     40 }
     41 
     42 void print_time(FILE* out, time_t time, int timezone_offset) {
     43   if (!print_time_formatted(out, time, timezone_offset,
     44                             "%a, %e %b %Y %H:%M:%S")) {
     45     return;
     46   }
     47 
     48   char timezone_sign = timezone_offset < 0 ? '-' : '+';
     49   int abs_offset = abs(timezone_offset);
     50   int timezone_hours = abs_offset / 60;
     51   int timezone_mins = abs_offset % 60;
     52   fprintf(out, " %c%02d%02d", timezone_sign, timezone_hours, timezone_mins);
     53 }
     54 
     55 void print_time_z(FILE* out, time_t time) {
     56   print_time_formatted(out, time, 0, "%Y-%m-%dT%H:%M:%SZ");
     57 }
     58 
     59 /* TODO: add timezone_offset to print_time_short. */
     60 void print_time_short(FILE* out, time_t time) {
     61   print_time_formatted(out, time, 0, "%Y-%m-%d %H:%M");
     62 }
     63 
     64 void print_percent_encoded(FILE* out, const char* str) {
     65   assert(out != NULL);
     66   assert(str != NULL);
     67   static const char* hex_chars = "0123456789ABCDEF";
     68 
     69   size_t str_len = strlen(str);
     70   for (size_t i = 0; i < str_len; i++) {
     71     unsigned char uc = str[i];
     72     // NOTE: do not encode '/' for paths or ",-."
     73     if (uc < ',' || uc >= 127 || (uc >= ':' && uc <= '@') || uc == '[' ||
     74         uc == ']') {
     75       fprintf(out, "%%%c%c", hex_chars[(uc >> 4) & 0x0f], hex_chars[uc & 0x0f]);
     76     } else {
     77       fprintf(out, "%c", uc);
     78     }
     79   }
     80 }
     81 
     82 void print_xml_encoded(FILE* out, const char* str) {
     83   assert(out != NULL);
     84   assert(str != NULL);
     85   print_xml_encoded_len(out, str, -1, true);
     86 }
     87 
     88 void print_xml_encoded_len(FILE* out,
     89                            const char* str,
     90                            ssize_t str_len,
     91                            bool output_crlf) {
     92   assert(out != NULL);
     93   assert(str != NULL);
     94 
     95   size_t len = (str_len >= 0) ? (size_t)str_len : strlen(str);
     96   for (size_t i = 0; i < len && str[i] != '\0'; i++) {
     97     unsigned char c = (unsigned char)str[i];
     98     switch (c) {
     99       case '<':
    100         fprintf(out, "&lt;");
    101         break;
    102       case '>':
    103         fprintf(out, "&gt;");
    104         break;
    105       case '\'':
    106         fprintf(out, "&#39;");
    107         break;
    108       case '&':
    109         fprintf(out, "&amp;");
    110         break;
    111       case '"':
    112         fprintf(out, "&quot;");
    113         break;
    114       case '\t':
    115         fprintf(out, "\t");
    116         break;
    117       case '\r':
    118       case '\n':
    119         if (output_crlf) {
    120           fprintf(out, "%c", c);
    121         }
    122         break;
    123       default:
    124         if (c >= 0x20) {
    125           fprintf(out, "%c", c);
    126         }
    127         break;
    128     }
    129   }
    130 }
    131 
    132 void print_gopher_text(FILE* out, const char* str, bool output_lf) {
    133   assert(out != NULL);
    134   assert(str != NULL);
    135   print_gopher_text_len(out, str, -1, output_lf);
    136 }
    137 
    138 void print_gopher_text_len(FILE* out,
    139                            const char* str,
    140                            ssize_t str_len,
    141                            bool output_lf) {
    142   assert(out != NULL);
    143   assert(str != NULL);
    144 
    145   size_t len = (str_len >= 0) ? (size_t)str_len : strlen(str);
    146   bool start_of_line = true;
    147   for (size_t i = 0; i < len && str[i] != '\0'; i++) {
    148     char c = str[i];
    149     if (output_lf && start_of_line && c == '[') {
    150       fprintf(out, "[|");
    151     } else if (c == '\t') {
    152       fprintf(out, "        ");
    153     } else if (c == '\r' || (!output_lf && c == '\n')) {
    154       /* Ignore. */
    155     } else {
    156       fprintf(out, "%c", c);
    157     }
    158     start_of_line = (c == '\n');
    159   }
    160 }
    161 
    162 void print_gopher_link(FILE* out, const char* str) {
    163   assert(out != NULL);
    164   assert(str != NULL);
    165 
    166   for (size_t i = 0; str[i] != '\0'; i++) {
    167     char c = str[i];
    168     if (c == '|') {
    169       fprintf(out, "\\|");
    170     } else if (c == '\t') {
    171       fprintf(out, "        ");
    172     } else if (c == '\r' || c == '\n') {
    173       // Ignore.
    174     } else {
    175       fprintf(out, "%c", c);
    176     }
    177   }
    178 }
    179 
    180 static void print_padded_internal(FILE* out,
    181                                   const char* str,
    182                                   size_t width,
    183                                   char pad_char,
    184                                   bool gopher_markup) {
    185   assert(out != NULL);
    186   assert(str != NULL);
    187 
    188   if (width == 0) {
    189     return;
    190   }
    191 
    192   size_t display_width = 0;
    193   size_t last_char_width = 0;
    194   mbstate_t state;
    195   memset(&state, 0, sizeof(state));
    196 
    197   const char* ptr = str;
    198   size_t len = strlen(str);
    199   const char* end = str + len;
    200 
    201   while (ptr < end) {
    202     wchar_t wc;
    203     size_t bytes = mbrtowc(&wc, ptr, end - ptr, &state);
    204     if (bytes == 0) {
    205       break;
    206     }
    207 
    208     if (bytes == (size_t)-1 || bytes == (size_t)-2) {
    209       // Invalid (-1) or incomplete (-2) UTF-8. Consume 1 byte.
    210       if (display_width == width - 1 && ptr + 1 < end) {
    211         fprintf(out, "%s", kUtf8Ellipsis);
    212         display_width++;
    213         break;
    214       } else if (display_width < width) {
    215         fprintf(out, "%c", *ptr);
    216         display_width++;
    217         last_char_width = 1;
    218       }
    219       ptr++;
    220       memset(&state, 0, sizeof(state));
    221       continue;
    222     }
    223 
    224     int w = wcwidth(wc);
    225     size_t char_width = (w < 0) ? 0 : w;
    226 
    227     // Gopher-specific adjustments for character width.
    228     if (gopher_markup) {
    229       if (wc == L'|') {
    230         char_width = 1;
    231       } else if (wc == L'\t') {
    232         // Tab expansion width is handled in the printing logic below.
    233         char_width = 0;
    234         const char* tptr = ptr + bytes;
    235         for (size_t i = 0; i < 8 && display_width + char_width < width; i++) {
    236           char_width++;
    237           // If this isn't the end of the string, we might need an ellipsis.
    238           if (display_width + char_width == width && tptr < end) {
    239             break;
    240           }
    241         }
    242       } else if (wc == L'\r' || wc == L'\n') {
    243         char_width = 0;
    244       }
    245     }
    246 
    247     if (display_width + char_width > width ||
    248         (ptr + bytes < end && display_width + char_width == width &&
    249          char_width > 0)) {
    250       if (display_width < width) {
    251         fprintf(out, "%s", kUtf8Ellipsis);
    252         display_width++;
    253       }
    254       break;
    255     }
    256 
    257     if (gopher_markup && wc == L'|') {
    258       fprintf(out, "\\|");
    259       display_width++;
    260       last_char_width = 1;
    261     } else if (gopher_markup && wc == L'\t') {
    262       for (size_t i = 0; i < 8 && display_width < width; i++) {
    263         fprintf(out, " ");
    264         display_width++;
    265       }
    266       last_char_width = 1;
    267     } else if (gopher_markup && (wc == L'\r' || wc == L'\n')) {
    268       last_char_width = 0;
    269     } else {
    270       // Hack: handle zero-width joiner and variation selector.
    271       if (is_unicode_modifier(wc)) {
    272         display_width -= last_char_width;
    273         char_width = 0;
    274       }
    275 
    276       fprintf(out, "%.*s", (int)bytes, ptr);
    277       display_width += char_width;
    278       last_char_width = char_width;
    279     }
    280 
    281     ptr += bytes;
    282   }
    283 
    284   // Pad remainder.
    285   if (pad_char != '\0') {
    286     while (display_width < width) {
    287       fprintf(out, "%c", pad_char);
    288       display_width++;
    289     }
    290   }
    291 }
    292 
    293 void print_gopher_link_padded(FILE* out,
    294                               const char* str,
    295                               size_t width,
    296                               char pad_char) {
    297   print_padded_internal(out, str, width, pad_char, true);
    298 }
    299 
    300 void print_utf8_padded(FILE* out,
    301                        const char* str,
    302                        size_t width,
    303                        char pad_char) {
    304   print_padded_internal(out, str, width, pad_char, false);
    305 }