format.c - gout - A static git page generator

format.c (8237B)
      1 #include "format.h"
      2 
      3 #include <assert.h>
      4 #include <err.h>
      5 #include <stdbool.h>
      6 #include <stdlib.h>
      7 #include <string.h>
      8 #include <time.h>
      9 #include <wchar.h>
     10 
     11 static const char* kUtf8Ellipsis = "\xE2\x80\xA6";
     12 
     13 static bool is_unicode_modifier(wchar_t wc) {
     14   return wc == 0x200D || (wc >= 0xFE00 && wc <= 0xFE0F);
     15 }
     16 
     17 static bool print_time_formatted(FILE* out,
     18                                  time_t time,
     19                                  int timezone_offset,
     20                                  const char* format) {
     21   assert(out != NULL);
     22   assert(format != NULL);
     23   // Reject any offset > 24 hours.
     24   if (timezone_offset < -1440 || timezone_offset > 1440) {
     25     warnx("invalid timezone offset: %d", timezone_offset);
     26     return false;
     27   }
     28   time_t local_time = time + (timezone_offset * 60);
     29   struct tm tm_buf;
     30   struct tm* time_in = gmtime_r(&local_time, &tm_buf);
     31   if (!time_in) {
     32     return false;
     33   }
     34 
     35   char formatted_time[32];
     36   if (!strftime(formatted_time, sizeof(formatted_time), format, time_in)) {
     37     err(1, "strftime");
     38   }
     39   fprintf(out, "%s", formatted_time);
     40   return true;
     41 }
     42 
     43 void print_time(FILE* out, time_t time, int timezone_offset) {
     44   if (!print_time_formatted(out, time, timezone_offset,
     45                             "%a, %e %b %Y %H:%M:%S")) {
     46     return;
     47   }
     48 
     49   char timezone_sign = timezone_offset < 0 ? '-' : '+';
     50   int abs_offset = abs(timezone_offset);
     51   int timezone_hours = abs_offset / 60;
     52   int timezone_mins = abs_offset % 60;
     53   fprintf(out, " %c%02d%02d", timezone_sign, timezone_hours, timezone_mins);
     54 }
     55 
     56 void print_time_z(FILE* out, time_t time) {
     57   print_time_formatted(out, time, 0, "%Y-%m-%dT%H:%M:%SZ");
     58 }
     59 
     60 /* TODO: add timezone_offset to print_time_short. */
     61 void print_time_short(FILE* out, time_t time) {
     62   print_time_formatted(out, time, 0, "%Y-%m-%d %H:%M");
     63 }
     64 
     65 void print_percent_encoded(FILE* out, const char* str) {
     66   assert(out != NULL);
     67   assert(str != NULL);
     68   static const char* hex_chars = "0123456789ABCDEF";
     69 
     70   size_t str_len = strlen(str);
     71   for (size_t i = 0; i < str_len; i++) {
     72     unsigned char uc = str[i];
     73     // NOTE: do not encode '/' for paths or ",-."
     74     if (uc < ',' || uc >= 127 || (uc >= ':' && uc <= '@') || uc == '[' ||
     75         uc == ']') {
     76       fprintf(out, "%%%c%c", hex_chars[(uc >> 4) & 0x0f], hex_chars[uc & 0x0f]);
     77     } else {
     78       fprintf(out, "%c", uc);
     79     }
     80   }
     81 }
     82 
     83 void print_xml_encoded(FILE* out, const char* str) {
     84   assert(out != NULL);
     85   assert(str != NULL);
     86   print_xml_encoded_len(out, str, -1, true);
     87 }
     88 
     89 void print_xml_encoded_len(FILE* out,
     90                            const char* str,
     91                            ssize_t str_len,
     92                            bool output_crlf) {
     93   assert(out != NULL);
     94   assert(str != NULL);
     95 
     96   size_t len = (str_len >= 0) ? (size_t)str_len : strlen(str);
     97   for (size_t i = 0; i < len && str[i] != '\0'; i++) {
     98     unsigned char c = (unsigned char)str[i];
     99     switch (c) {
    100       case '<':
    101         fprintf(out, "&lt;");
    102         break;
    103       case '>':
    104         fprintf(out, "&gt;");
    105         break;
    106       case '\'':
    107         fprintf(out, "&#39;");
    108         break;
    109       case '&':
    110         fprintf(out, "&amp;");
    111         break;
    112       case '"':
    113         fprintf(out, "&quot;");
    114         break;
    115       case '\t':
    116         fprintf(out, "\t");
    117         break;
    118       case '\r':
    119       case '\n':
    120         if (output_crlf) {
    121           fprintf(out, "%c", c);
    122         }
    123         break;
    124       default:
    125         if (c >= 0x20) {
    126           fprintf(out, "%c", c);
    127         }
    128         break;
    129     }
    130   }
    131 }
    132 
    133 void print_gopher_text(FILE* out, const char* str, bool output_lf) {
    134   assert(out != NULL);
    135   assert(str != NULL);
    136   print_gopher_text_len(out, str, -1, output_lf);
    137 }
    138 
    139 void print_gopher_text_len(FILE* out,
    140                            const char* str,
    141                            ssize_t str_len,
    142                            bool output_lf) {
    143   assert(out != NULL);
    144   assert(str != NULL);
    145 
    146   size_t len = (str_len >= 0) ? (size_t)str_len : strlen(str);
    147   bool start_of_line = true;
    148   for (size_t i = 0; i < len && str[i] != '\0'; i++) {
    149     char c = str[i];
    150     if (output_lf && start_of_line && c == '[') {
    151       fprintf(out, "[|");
    152     } else if (c == '\t') {
    153       fprintf(out, "        ");
    154     } else if (c == '\r' || (!output_lf && c == '\n')) {
    155       /* Ignore. */
    156     } else {
    157       fprintf(out, "%c", c);
    158     }
    159     start_of_line = (c == '\n');
    160   }
    161 }
    162 
    163 void print_gopher_link(FILE* out, const char* str) {
    164   assert(out != NULL);
    165   assert(str != NULL);
    166 
    167   for (size_t i = 0; str[i] != '\0'; i++) {
    168     char c = str[i];
    169     if (c == '|') {
    170       fprintf(out, "\\|");
    171     } else if (c == '\t') {
    172       fprintf(out, "        ");
    173     } else if (c == '\r' || c == '\n') {
    174       // Ignore.
    175     } else {
    176       fprintf(out, "%c", c);
    177     }
    178   }
    179 }
    180 
    181 static void print_padded_internal(FILE* out,
    182                                   const char* str,
    183                                   size_t width,
    184                                   char pad_char,
    185                                   bool gopher_markup) {
    186   assert(out != NULL);
    187   assert(str != NULL);
    188 
    189   if (width == 0) {
    190     return;
    191   }
    192 
    193   size_t display_width = 0;
    194   size_t last_char_width = 0;
    195   mbstate_t state;
    196   memset(&state, 0, sizeof(state));
    197 
    198   const char* ptr = str;
    199   size_t len = strlen(str);
    200   const char* end = str + len;
    201 
    202   while (ptr < end) {
    203     wchar_t wc;
    204     size_t bytes = mbrtowc(&wc, ptr, end - ptr, &state);
    205     if (bytes == 0) {
    206       break;
    207     }
    208 
    209     if (bytes == (size_t)-1 || bytes == (size_t)-2) {
    210       // Invalid (-1) or incomplete (-2) UTF-8. Consume 1 byte.
    211       if (display_width == width - 1 && ptr + 1 < end) {
    212         fprintf(out, "%s", kUtf8Ellipsis);
    213         display_width++;
    214         break;
    215       } else if (display_width < width) {
    216         fprintf(out, "%c", *ptr);
    217         display_width++;
    218         last_char_width = 1;
    219       }
    220       ptr++;
    221       memset(&state, 0, sizeof(state));
    222       continue;
    223     }
    224 
    225     int w = wcwidth(wc);
    226     size_t char_width = (w < 0) ? 0 : w;
    227 
    228     // Gopher-specific adjustments for character width.
    229     if (gopher_markup) {
    230       if (wc == L'|') {
    231         char_width = 1;
    232       } else if (wc == L'\t') {
    233         // Tab expansion width is handled in the printing logic below.
    234         char_width = 0;
    235         const char* tptr = ptr + bytes;
    236         for (size_t i = 0; i < 8 && display_width + char_width < width; i++) {
    237           char_width++;
    238           // If this isn't the end of the string, we might need an ellipsis.
    239           if (display_width + char_width == width && tptr < end) {
    240             break;
    241           }
    242         }
    243       } else if (wc == L'\r' || wc == L'\n') {
    244         char_width = 0;
    245       }
    246     }
    247 
    248     if (display_width + char_width > width ||
    249         (ptr + bytes < end && display_width + char_width == width &&
    250          char_width > 0)) {
    251       if (display_width < width) {
    252         fprintf(out, "%s", kUtf8Ellipsis);
    253         display_width++;
    254       }
    255       break;
    256     }
    257 
    258     if (gopher_markup && wc == L'|') {
    259       fprintf(out, "\\|");
    260       display_width++;
    261       last_char_width = 1;
    262     } else if (gopher_markup && wc == L'\t') {
    263       for (size_t i = 0; i < 8 && display_width < width; i++) {
    264         fprintf(out, " ");
    265         display_width++;
    266       }
    267       last_char_width = 1;
    268     } else if (gopher_markup && (wc == L'\r' || wc == L'\n')) {
    269       last_char_width = 0;
    270     } else {
    271       // Hack: handle zero-width joiner and variation selector.
    272       if (is_unicode_modifier(wc)) {
    273         display_width -= last_char_width;
    274         char_width = 0;
    275       }
    276 
    277       fprintf(out, "%.*s", (int)bytes, ptr);
    278       display_width += char_width;
    279       last_char_width = char_width;
    280     }
    281 
    282     ptr += bytes;
    283   }
    284 
    285   // Pad remainder.
    286   if (pad_char != '\0') {
    287     while (display_width < width) {
    288       fprintf(out, "%c", pad_char);
    289       display_width++;
    290     }
    291   }
    292 }
    293 
    294 void print_gopher_link_padded(FILE* out,
    295                               const char* str,
    296                               size_t width,
    297                               char pad_char) {
    298   print_padded_internal(out, str, width, pad_char, true);
    299 }
    300 
    301 void print_utf8_padded(FILE* out,
    302                        const char* str,
    303                        size_t width,
    304                        char pad_char) {
    305   print_padded_internal(out, str, width, pad_char, false);
    306 }
	gout A static git page generator
	git clone https://git.bracken.jp/gout.git
	Log \| Files \| Refs \| README \| LICENSE