agate

Simple gemini server for static files
git clone https://github.com/mbrubeck/agate.git
Log | Files | Refs | README

commit 548e0f401f35637932f845948d6b4fd789cb652d
parent aa713a2dea89e1f07369e72b7f0ad43ee8fa353a
Author: Johann150 <johann.galle@protonmail.com>
Date:   Thu,  4 Feb 2021 17:00:33 +0100

cache parsing, new syntax

Changed the parsing and also parse once when reading the config file and then
cache the parse result, rather than checking the line format every time the
file is served.

Diffstat:
Msrc/main.rs | 45++++++++++++++++++++++++++-------------------
Msrc/metadata.rs | 80+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
2 files changed, 98 insertions(+), 27 deletions(-)

diff --git a/src/main.rs b/src/main.rs @@ -1,5 +1,5 @@ mod metadata; -use metadata::FileOptions; +use metadata::{FileOptions, PresetMeta}; use { once_cell::sync::Lazy, @@ -27,12 +27,11 @@ fn main() -> Result { env_logger::Builder::new().parse_filters("info").init(); } Runtime::new()?.block_on(async { - let mimetypes = Arc::new(RwLock::new(FileOptions::new( - &ARGS - .language + let mimetypes = Arc::new(RwLock::new(FileOptions::new(PresetMeta::Parameters( + ARGS.language .as_ref() .map_or(String::new(), |lang| format!(";lang={}", lang)), - ))); + )))); let listener = TcpListener::bind(&ARGS.addrs[..]).await?; log::info!("Listening on {:?}...", ARGS.addrs); loop { @@ -317,7 +316,15 @@ impl RequestHandle { } } - // Make sure the file opens successfully before sending the success header. + let data = self.metadata.write().await.get(&path); + + if let PresetMeta::FullHeader(status, meta) = data { + self.send_header(status, &meta).await?; + // do not try to access the file + return Ok(()); + } + + // Make sure the file opens successfully before sending a success header. let mut file = match tokio::fs::File::open(&path).await { Ok(file) => file, Err(e) => { @@ -327,21 +334,21 @@ impl RequestHandle { }; // Send header. - let mut locked = self.metadata.write().await; - let data = locked.get(&path); - let mime = if data.is_empty() || data.starts_with(';') { - // guess MIME type - if path.extension() == Some(OsStr::new("gmi")) { - format!("text/gemini{}", data) - } else { - let mime = mime_guess::from_path(&path).first_or_octet_stream(); - format!("{}{}", mime.essence_str(), data) + let mime = match data { + // this was already handled before opening the file + PresetMeta::FullHeader(..) => unreachable!(), + // treat this as the full MIME type + PresetMeta::FullMime(mime) => mime.clone(), + // guess the MIME type and add the parameters + PresetMeta::Parameters(params) => { + if path.extension() == Some(OsStr::new("gmi")) { + format!("text/gemini{}", params) + } else { + let mime = mime_guess::from_path(&path).first_or_octet_stream(); + format!("{}{}", mime.essence_str(), params) + } } - } else { - // this must be a full MIME type - data.to_owned() }; - drop(locked); self.send_header(20, &mime).await?; // Send body. diff --git a/src/metadata.rs b/src/metadata.rs @@ -3,6 +3,8 @@ use std::io::{BufRead, BufReader}; use std::path::PathBuf; use std::time::SystemTime; +static SIDECAR_FILENAME: &str = ".meta"; + /// A struct to store a string of metadata for each file retrieved from /// sidecar files called `.lang`. /// @@ -21,19 +23,47 @@ pub(crate) struct FileOptions { /// has changed. databases_read: BTreeMap<PathBuf, SystemTime>, /// Stores the metadata for each file - file_meta: BTreeMap<PathBuf, String>, + file_meta: BTreeMap<PathBuf, PresetMeta>, /// The default value to return - default: String, + default: PresetMeta, } -static SIDECAR_FILENAME: &str = ".mime"; +/// A struct to store the different alternatives that a line in the sidecar +/// file can have. +#[derive(Clone, Debug)] +pub(crate) enum PresetMeta { + /// A line that starts with a semicolon in the sidecar file, or an + /// empty line (to overwrite the default language command line flag). + /// ```text + /// index.gmi: ;lang=en-GB + /// ``` + /// The content is interpreted as MIME parameters and are appended to what + /// agate guesses as the MIME type if the respective file can be found. + Parameters(String), + /// A line that is neither a `Parameters` line nor a `FullHeader` line. + /// ```text + /// strange.file: text/plain; lang=ee + /// ``` + /// Agate will send the complete line as the MIME type of the request if + /// the respective file can be found (i.e. a `20` status code). + FullMime(String), + /// A line that starts with a digit between 1 and 6 inclusive followed by + /// another digit and a space (U+0020). In the categories defined by the + /// Gemini specification you can pick a defined or non-defined status code. + /// ```text + /// gone.gmi: 52 This file is no longer available. + /// ``` + /// Agate will send this header line, CR, LF, and nothing else. Agate will + /// not try to access the requested file. + FullHeader(u8, String), +} impl FileOptions { - pub(crate) fn new(default: &str) -> Self { + pub(crate) fn new(default: PresetMeta) -> Self { Self { databases_read: BTreeMap::new(), file_meta: BTreeMap::new(), - default: default.to_string(), + default, } } @@ -93,7 +123,41 @@ impl FileOptions { // generate workspace-unique path let mut path = db_dir.clone(); path.push(parts[0].trim()); - self.file_meta.insert(path, parts[1].trim().to_string()); + // parse the line + let header = parts[1].trim(); + + let preset = if header.is_empty() || header.starts_with(';') { + PresetMeta::Parameters(header.to_string()) + } else if matches!(header.chars().next(), Some('1'..='6')) { + if header.len() < 3 + || !header.chars().nth(1).unwrap().is_ascii_digit() + || !header.chars().nth(2).unwrap().is_whitespace() + { + log::error!("Line for {:?} starts like a full header line, but it is incorrect; ignoring it.", path); + return; + } + let separator = header.chars().nth(2).unwrap(); + if separator != ' ' { + // the Gemini specification says that the third + // character has to be a space, so correct any + // other whitespace to it (e.g. tabs) + log::warn!("Full Header line for {:?} has an invalid character, treating {:?} as a space.", path, separator); + } + let status = header.chars() + .take(2) + .collect::<String>() + .parse::<u8>() + // unwrap since we alread checked it's a number + .unwrap(); + // not taking a slice here because the separator + // might be a whitespace wider than a byte + let meta = header.chars().skip(3).collect::<String>(); + PresetMeta::FullHeader(status, meta) + } else { + // must be a MIME type, but without status code + PresetMeta::FullMime(header.to_string()) + }; + self.file_meta.insert(path, preset); } }); self.databases_read @@ -106,12 +170,12 @@ impl FileOptions { /// The file path should consistenly be either absolute or relative to the /// working/content directory. If inconsisten file paths are used, this can /// lead to loading and storing sidecar files multiple times. - pub fn get(&mut self, file: &PathBuf) -> &str { + pub fn get(&mut self, file: &PathBuf) -> PresetMeta { let dir = file.parent().expect("no parent directory").to_path_buf(); if self.check_outdated(&dir) { self.read_database(&dir); } - self.file_meta.get(file).unwrap_or(&self.default) + self.file_meta.get(file).unwrap_or(&self.default).clone() } }