agate

Simple gemini server for static files
git clone https://github.com/mbrubeck/agate.git
Log | Files | Refs | README

metadata.rs (9487B)


      1 use configparser::ini::Ini;
      2 use glob::{glob_with, MatchOptions};
      3 use std::collections::BTreeMap;
      4 use std::path::{Path, PathBuf};
      5 use std::time::SystemTime;
      6 
      7 static SIDECAR_FILENAME: &str = ".meta";
      8 
      9 /// A struct to store a string of metadata for each file retrieved from
     10 /// sidecar files with the name given by `SIDECAR_FILENAME`.
     11 ///
     12 /// These sidecar file's lines should have the format
     13 /// ```text
     14 /// <filename>:<metadata>
     15 /// ```
     16 /// where `<filename>` is only a filename (not a path) of a file that resides
     17 /// in the same directory and `<metadata>` is the metadata to be stored.
     18 /// Lines that start with optional whitespace and `#` are ignored, as are lines
     19 /// that do not fit the basic format.
     20 /// Both parts are stripped of any leading and/or trailing whitespace.
     21 pub(crate) struct FileOptions {
     22     /// Stores the paths of the side files and when they were last read.
     23     /// By comparing this to the last write time, we can know if the file
     24     /// has changed.
     25     databases_read: BTreeMap<PathBuf, SystemTime>,
     26     /// Stores the metadata for each file
     27     file_meta: BTreeMap<PathBuf, PresetMeta>,
     28     /// The default value to return
     29     default: PresetMeta,
     30 }
     31 
     32 /// A struct to store the different alternatives that a line in the sidecar
     33 /// file can have.
     34 #[derive(Clone, Debug)]
     35 pub(crate) enum PresetMeta {
     36     /// A line that starts with a semicolon in the sidecar file, or an
     37     /// empty line (to overwrite the default language command line flag).
     38     /// ```text
     39     /// index.gmi: ;lang=en-GB
     40     /// ```
     41     /// The content is interpreted as MIME parameters and are appended to what
     42     /// agate guesses as the MIME type if the respective file can be found.
     43     Parameters(String),
     44     /// A line that is neither a `Parameters` line nor a `FullHeader` line.
     45     /// ```text
     46     /// strange.file: text/plain; lang=ee
     47     /// ```
     48     /// Agate will send the complete line as the MIME type of the request if
     49     /// the respective file can be found (i.e. a `20` status code).
     50     FullMime(String),
     51     /// A line that starts with a digit between 1 and 6 inclusive followed by
     52     /// another digit and a space (U+0020). In the categories defined by the
     53     /// Gemini specification you can pick a defined or non-defined status code.
     54     /// ```text
     55     /// gone.gmi: 52 This file is no longer available.
     56     /// ```
     57     /// Agate will send this header line, CR, LF, and nothing else. Agate will
     58     /// not try to access the requested file.
     59     FullHeader(u8, String),
     60 }
     61 
     62 impl FileOptions {
     63     pub(crate) fn new(default: PresetMeta) -> Self {
     64         Self {
     65             databases_read: BTreeMap::new(),
     66             file_meta: BTreeMap::new(),
     67             default,
     68         }
     69     }
     70 
     71     /// Checks wether the database for the directory of the specified file is
     72     /// still up to date and re-reads it if outdated or not yet read.
     73     fn update(&mut self, file: &Path) {
     74         let mut db = if super::ARGS.central_config {
     75             super::ARGS.content_dir.clone()
     76         } else {
     77             file.parent().expect("no parent directory").to_path_buf()
     78         };
     79         db.push(SIDECAR_FILENAME);
     80 
     81         let should_read = if let Ok(metadata) = db.metadata() {
     82             if !metadata.is_file() {
     83                 // it exists, but it is a directory
     84                 false
     85             } else if let (Ok(modified), Some(last_read)) =
     86                 (metadata.modified(), self.databases_read.get(&db))
     87             {
     88                 // check that it was last modified before the read
     89                 // if the times are the same, we might have read the old file
     90                 &modified >= last_read
     91             } else {
     92                 // either the filesystem does not support last modified
     93                 // metadata, so we have to read it again every time; or the
     94                 // file exists but was not read before, so we have to read it
     95                 true
     96             }
     97         } else {
     98             // the file probably does not exist
     99             false
    100         };
    101 
    102         if should_read {
    103             self.read_database(&db);
    104         }
    105     }
    106 
    107     /// (Re)reads a specified sidecar file.
    108     /// This function will allways try to read the file, even if it is current.
    109     fn read_database(&mut self, db: &Path) {
    110         log::debug!("reading database {:?}", db);
    111 
    112         let mut ini = Ini::new_cs();
    113         ini.set_default_section("mime");
    114         ini.set_comment_symbols(&['#']);
    115         let map = ini
    116             .load(db.to_str().expect("config path not UTF-8"))
    117             .and_then(|mut sections| {
    118                 sections
    119                     .remove("mime")
    120                     .ok_or_else(|| "no \"mime\" or default section".to_string())
    121             });
    122         self.databases_read
    123             .insert(db.to_path_buf(), SystemTime::now());
    124         let files = match map {
    125             Ok(section) => section,
    126             Err(err) => {
    127                 log::error!("invalid config file {:?}: {}", db, err);
    128                 return;
    129             }
    130         };
    131 
    132         for (rel_path, header) in files {
    133             // treat unassigned keys as if they had an empty value
    134             let header = header.unwrap_or_default();
    135 
    136             // generate workspace-relative path
    137             let mut path = db.to_path_buf();
    138             path.pop();
    139             path.push(rel_path);
    140 
    141             // parse the preset
    142             let preset = if header.is_empty() || header.starts_with(';') {
    143                 PresetMeta::Parameters(header.to_string())
    144             } else if matches!(header.chars().next(), Some('1'..='6')) {
    145                 if header.len() < 3
    146                     || !header.chars().nth(1).unwrap().is_ascii_digit()
    147                     || !header.chars().nth(2).unwrap().is_whitespace()
    148                 {
    149                     log::error!("Line for {:?} starts like a full header line, but it is incorrect; ignoring it.", path);
    150                     return;
    151                 }
    152                 let separator = header.chars().nth(2).unwrap();
    153                 if separator != ' ' {
    154                     // the Gemini specification says that the third
    155                     // character has to be a space, so correct any
    156                     // other whitespace to it (e.g. tabs)
    157                     log::warn!("Full Header line for {:?} has an invalid character, treating {:?} as a space.", path, separator);
    158                 }
    159                 let status = header
    160                     .chars()
    161                     .take(2)
    162                     .collect::<String>()
    163                     .parse::<u8>()
    164                     // unwrap since we alread checked it's a number
    165                     .unwrap();
    166                 // not taking a slice here because the separator
    167                 // might be a whitespace wider than a byte
    168                 let meta = header.chars().skip(3).collect::<String>();
    169                 PresetMeta::FullHeader(status, meta)
    170             } else {
    171                 // must be a MIME type, but without status code
    172                 PresetMeta::FullMime(header.to_string())
    173             };
    174 
    175             let glob_options = MatchOptions {
    176                 case_sensitive: true,
    177                 // so there is a difference between "*" and "**".
    178                 require_literal_separator: true,
    179                 // security measure because entries for .hidden files
    180                 // would result in them being exposed.
    181                 require_literal_leading_dot: !crate::ARGS.serve_secret,
    182             };
    183 
    184             // process filename as glob
    185             let paths = if let Some(path) = path.to_str() {
    186                 match glob_with(path, glob_options) {
    187                     Ok(paths) => paths.collect::<Vec<_>>(),
    188                     Err(err) => {
    189                         log::error!("incorrect glob pattern in {:?}: {}", path, err);
    190                         continue;
    191                     }
    192                 }
    193             } else {
    194                 log::error!("path is not UTF-8: {:?}", path);
    195                 continue;
    196             };
    197 
    198             if paths.is_empty() {
    199                 // probably an entry for a nonexistent file, glob only works for existing files
    200                 self.file_meta.insert(path, preset);
    201             } else {
    202                 for glob_result in paths {
    203                     match glob_result {
    204                         Ok(path) if path.is_dir() => { /* ignore */ }
    205                         Ok(path) => {
    206                             self.file_meta.insert(path, preset.clone());
    207                         }
    208                         Err(err) => {
    209                             log::warn!("could not process glob path: {}", err);
    210                             continue;
    211                         }
    212                     };
    213                 }
    214             }
    215         }
    216     }
    217 
    218     /// Get the metadata for the specified file. This might need to (re)load a
    219     /// single sidecar file.
    220     /// The file path should consistenly be either absolute or relative to the
    221     /// working/content directory. If inconsistent file paths are used, this can
    222     /// lead to loading and storing sidecar files multiple times.
    223     pub fn get(&mut self, file: &Path) -> PresetMeta {
    224         self.update(file);
    225 
    226         self.file_meta.get(file).unwrap_or(&self.default).clone()
    227     }
    228 
    229     /// Returns true if a configuration exists in a configuration file.
    230     /// Returns false if no or only the default value exists.
    231     pub fn exists(&mut self, file: &Path) -> bool {
    232         self.update(file);
    233 
    234         self.file_meta.contains_key(file)
    235     }
    236 }