metadata.rs (9487B)
1 use configparser::ini::Ini; 2 use glob::{glob_with, MatchOptions}; 3 use std::collections::BTreeMap; 4 use std::path::{Path, PathBuf}; 5 use std::time::SystemTime; 6 7 static SIDECAR_FILENAME: &str = ".meta"; 8 9 /// A struct to store a string of metadata for each file retrieved from 10 /// sidecar files with the name given by `SIDECAR_FILENAME`. 11 /// 12 /// These sidecar file's lines should have the format 13 /// ```text 14 /// <filename>:<metadata> 15 /// ``` 16 /// where `<filename>` is only a filename (not a path) of a file that resides 17 /// in the same directory and `<metadata>` is the metadata to be stored. 18 /// Lines that start with optional whitespace and `#` are ignored, as are lines 19 /// that do not fit the basic format. 20 /// Both parts are stripped of any leading and/or trailing whitespace. 21 pub(crate) struct FileOptions { 22 /// Stores the paths of the side files and when they were last read. 23 /// By comparing this to the last write time, we can know if the file 24 /// has changed. 25 databases_read: BTreeMap<PathBuf, SystemTime>, 26 /// Stores the metadata for each file 27 file_meta: BTreeMap<PathBuf, PresetMeta>, 28 /// The default value to return 29 default: PresetMeta, 30 } 31 32 /// A struct to store the different alternatives that a line in the sidecar 33 /// file can have. 34 #[derive(Clone, Debug)] 35 pub(crate) enum PresetMeta { 36 /// A line that starts with a semicolon in the sidecar file, or an 37 /// empty line (to overwrite the default language command line flag). 38 /// ```text 39 /// index.gmi: ;lang=en-GB 40 /// ``` 41 /// The content is interpreted as MIME parameters and are appended to what 42 /// agate guesses as the MIME type if the respective file can be found. 43 Parameters(String), 44 /// A line that is neither a `Parameters` line nor a `FullHeader` line. 45 /// ```text 46 /// strange.file: text/plain; lang=ee 47 /// ``` 48 /// Agate will send the complete line as the MIME type of the request if 49 /// the respective file can be found (i.e. a `20` status code). 50 FullMime(String), 51 /// A line that starts with a digit between 1 and 6 inclusive followed by 52 /// another digit and a space (U+0020). In the categories defined by the 53 /// Gemini specification you can pick a defined or non-defined status code. 54 /// ```text 55 /// gone.gmi: 52 This file is no longer available. 56 /// ``` 57 /// Agate will send this header line, CR, LF, and nothing else. Agate will 58 /// not try to access the requested file. 59 FullHeader(u8, String), 60 } 61 62 impl FileOptions { 63 pub(crate) fn new(default: PresetMeta) -> Self { 64 Self { 65 databases_read: BTreeMap::new(), 66 file_meta: BTreeMap::new(), 67 default, 68 } 69 } 70 71 /// Checks wether the database for the directory of the specified file is 72 /// still up to date and re-reads it if outdated or not yet read. 73 fn update(&mut self, file: &Path) { 74 let mut db = if super::ARGS.central_config { 75 super::ARGS.content_dir.clone() 76 } else { 77 file.parent().expect("no parent directory").to_path_buf() 78 }; 79 db.push(SIDECAR_FILENAME); 80 81 let should_read = if let Ok(metadata) = db.metadata() { 82 if !metadata.is_file() { 83 // it exists, but it is a directory 84 false 85 } else if let (Ok(modified), Some(last_read)) = 86 (metadata.modified(), self.databases_read.get(&db)) 87 { 88 // check that it was last modified before the read 89 // if the times are the same, we might have read the old file 90 &modified >= last_read 91 } else { 92 // either the filesystem does not support last modified 93 // metadata, so we have to read it again every time; or the 94 // file exists but was not read before, so we have to read it 95 true 96 } 97 } else { 98 // the file probably does not exist 99 false 100 }; 101 102 if should_read { 103 self.read_database(&db); 104 } 105 } 106 107 /// (Re)reads a specified sidecar file. 108 /// This function will allways try to read the file, even if it is current. 109 fn read_database(&mut self, db: &Path) { 110 log::debug!("reading database {:?}", db); 111 112 let mut ini = Ini::new_cs(); 113 ini.set_default_section("mime"); 114 ini.set_comment_symbols(&['#']); 115 let map = ini 116 .load(db.to_str().expect("config path not UTF-8")) 117 .and_then(|mut sections| { 118 sections 119 .remove("mime") 120 .ok_or_else(|| "no \"mime\" or default section".to_string()) 121 }); 122 self.databases_read 123 .insert(db.to_path_buf(), SystemTime::now()); 124 let files = match map { 125 Ok(section) => section, 126 Err(err) => { 127 log::error!("invalid config file {:?}: {}", db, err); 128 return; 129 } 130 }; 131 132 for (rel_path, header) in files { 133 // treat unassigned keys as if they had an empty value 134 let header = header.unwrap_or_default(); 135 136 // generate workspace-relative path 137 let mut path = db.to_path_buf(); 138 path.pop(); 139 path.push(rel_path); 140 141 // parse the preset 142 let preset = if header.is_empty() || header.starts_with(';') { 143 PresetMeta::Parameters(header.to_string()) 144 } else if matches!(header.chars().next(), Some('1'..='6')) { 145 if header.len() < 3 146 || !header.chars().nth(1).unwrap().is_ascii_digit() 147 || !header.chars().nth(2).unwrap().is_whitespace() 148 { 149 log::error!("Line for {:?} starts like a full header line, but it is incorrect; ignoring it.", path); 150 return; 151 } 152 let separator = header.chars().nth(2).unwrap(); 153 if separator != ' ' { 154 // the Gemini specification says that the third 155 // character has to be a space, so correct any 156 // other whitespace to it (e.g. tabs) 157 log::warn!("Full Header line for {:?} has an invalid character, treating {:?} as a space.", path, separator); 158 } 159 let status = header 160 .chars() 161 .take(2) 162 .collect::<String>() 163 .parse::<u8>() 164 // unwrap since we alread checked it's a number 165 .unwrap(); 166 // not taking a slice here because the separator 167 // might be a whitespace wider than a byte 168 let meta = header.chars().skip(3).collect::<String>(); 169 PresetMeta::FullHeader(status, meta) 170 } else { 171 // must be a MIME type, but without status code 172 PresetMeta::FullMime(header.to_string()) 173 }; 174 175 let glob_options = MatchOptions { 176 case_sensitive: true, 177 // so there is a difference between "*" and "**". 178 require_literal_separator: true, 179 // security measure because entries for .hidden files 180 // would result in them being exposed. 181 require_literal_leading_dot: !crate::ARGS.serve_secret, 182 }; 183 184 // process filename as glob 185 let paths = if let Some(path) = path.to_str() { 186 match glob_with(path, glob_options) { 187 Ok(paths) => paths.collect::<Vec<_>>(), 188 Err(err) => { 189 log::error!("incorrect glob pattern in {:?}: {}", path, err); 190 continue; 191 } 192 } 193 } else { 194 log::error!("path is not UTF-8: {:?}", path); 195 continue; 196 }; 197 198 if paths.is_empty() { 199 // probably an entry for a nonexistent file, glob only works for existing files 200 self.file_meta.insert(path, preset); 201 } else { 202 for glob_result in paths { 203 match glob_result { 204 Ok(path) if path.is_dir() => { /* ignore */ } 205 Ok(path) => { 206 self.file_meta.insert(path, preset.clone()); 207 } 208 Err(err) => { 209 log::warn!("could not process glob path: {}", err); 210 continue; 211 } 212 }; 213 } 214 } 215 } 216 } 217 218 /// Get the metadata for the specified file. This might need to (re)load a 219 /// single sidecar file. 220 /// The file path should consistenly be either absolute or relative to the 221 /// working/content directory. If inconsistent file paths are used, this can 222 /// lead to loading and storing sidecar files multiple times. 223 pub fn get(&mut self, file: &Path) -> PresetMeta { 224 self.update(file); 225 226 self.file_meta.get(file).unwrap_or(&self.default).clone() 227 } 228 229 /// Returns true if a configuration exists in a configuration file. 230 /// Returns false if no or only the default value exists. 231 pub fn exists(&mut self, file: &Path) -> bool { 232 self.update(file); 233 234 self.file_meta.contains_key(file) 235 } 236 }