agate

Simple gemini server for static files
git clone https://github.com/mbrubeck/agate.git
Log | Files | Refs | README

commit 742a423021155eafe1122962a68f90ca3b044ec2
parent 1c28b370802ee39ff385028a3b1dfcee28a21911
Author: Johann150 <20990607+Johann150@users.noreply.github.com>
Date:   Fri,  5 Feb 2021 08:20:19 +0100

merge #16 from Johann150/master

loading metadata from sidecar file
Diffstat:
MCargo.toml | 2+-
MREADME.md | 37++++++++++++++++++++++++++++++++++++-
Msrc/main.rs | 127+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
Asrc/metadata.rs | 181+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 307 insertions(+), 40 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml @@ -12,7 +12,7 @@ edition = "2018" [dependencies] tokio-rustls = "0.22.0" -tokio = { version = "1.0", features = ["fs", "io-util", "net", "rt-multi-thread"] } +tokio = { version = "1.0", features = ["fs", "io-util", "net", "rt-multi-thread", "sync"] } env_logger = { version = "0.8", default-features = false, features = ["atty", "humantime", "termcolor"] } getopts = "0.2.21" log = "0.4" diff --git a/README.md b/README.md @@ -43,7 +43,42 @@ agate --content path/to/content/ \ All of the command-line arguments are optional. Run `agate --help` to see the default values used when arguments are omitted. -When a client requests the URL `gemini://example.com/foo/bar`, Agate will respond with the file at `path/to/content/foo/bar`. If any segment of the requested path starts with a dot, agate will respond with a status code 52, even if the file does not exist (this behaviour can be disabled with `--serve-secret`). If there is a directory at that path, Agate will look for a file named `index.gmi` inside that directory. If there is no such file, but a file named `.directory-listing-ok` exists inside that directory, a basic directory listing is displayed. Files or directories whose name starts with a dot (e.g. the `.directory-listing-ok` file itself) are omitted from the list. +When a client requests the URL `gemini://example.com/foo/bar`, Agate will respond with the file at `path/to/content/foo/bar`. If any segment of the requested path starts with a dot, agate will respond with a status code 52, wether the file exists or not (this behaviour can be disabled with `--serve-secret`). If there is a directory at that path, Agate will look for a file named `index.gmi` inside that directory. + +## Configuration + +### Directory listing + +You can enable a basic directory listing for a directory by putting a file called `.directory-listing-ok` in that directory. This does not have an effect on subdirectories. +The directory listing will hide files and directories whose name starts with a dot (e.g. the `.directory-listing-ok` file itself or also the `.meta` configuration file). + +A file called `index.gmi` will always take precedence over a directory listing. + +### Meta-Presets + +You can put a file called `.meta` in a directory that stores some metadata about these files which Agate will use when serving these files. The file should be UTF-8 encoded. Like the `.directory-listing-ok` file, this file does not have an effect on subdirectories. +Lines starting with a `#` are comments and will be ignored like empty lines. All other lines must start with a file name (not a path), followed by a colon and then the metadata. + +The metadata can take one of four possible forms: +1. empty + Agate will not send a default language parameter, even if it was specified on the command line. +2. starting with a semicolon followed by MIME parameters + Agate will append the specified string onto the MIME type, if the file is found. +3. starting with a gemini status code (i.e. a digit 1-6 inclusive followed by another digit) and a space + Agate will send the metadata wether the file exists or not. The file will not be sent or accessed. +4. a MIME type, may include parameters + Agate will use this MIME type instead of what it would guess, if the file is found. + The default language parameter will not be used, even if it was specified on the command line. + +If a line violates the format or looks like case 3, but is incorrect, it might be ignored. You should check your logs. Please know that this configuration file is first read when a file from the respective directory is accessed. So no log messages after startup does not mean the `.meta` file is okay. + +Such a configuration file might look like this: +```text +# This line will be ignored. +index.gmi:;lang=en-UK +LICENSE:text/plain;charset=UTF-8 +gone.gmi:52 This file is no longer here, sorry. +``` [Gemini]: https://gemini.circumlunar.space/ [Rust]: https://www.rust-lang.org/ diff --git a/src/main.rs b/src/main.rs @@ -1,27 +1,24 @@ +mod metadata; +use metadata::{FileOptions, PresetMeta}; + use { once_cell::sync::Lazy, - percent_encoding::{AsciiSet, CONTROLS, percent_decode_str, percent_encode}, + percent_encoding::{percent_decode_str, percent_encode, AsciiSet, CONTROLS}, rustls::{ internal::pemfile::{certs, pkcs8_private_keys}, NoClientAuth, ServerConfig, }, std::{ - borrow::Cow, - error::Error, - ffi::OsStr, - fmt::Write, - fs::File, - io::BufReader, - net::SocketAddr, - path::Path, - sync::Arc, + borrow::Cow, error::Error, ffi::OsStr, fmt::Write, fs::File, io::BufReader, + net::SocketAddr, path::Path, sync::Arc, }, tokio::{ io::{AsyncReadExt, AsyncWriteExt}, net::{TcpListener, TcpStream}, runtime::Runtime, + sync::Mutex, }, - tokio_rustls::{TlsAcceptor, server::TlsStream}, + tokio_rustls::{server::TlsStream, TlsAcceptor}, url::{Host, Url}, }; @@ -30,12 +27,18 @@ fn main() -> Result { env_logger::Builder::new().parse_filters("info").init(); } Runtime::new()?.block_on(async { + let mimetypes = Arc::new(Mutex::new(FileOptions::new(PresetMeta::Parameters( + ARGS.language + .as_ref() + .map_or(String::new(), |lang| format!(";lang={}", lang)), + )))); let listener = TcpListener::bind(&ARGS.addrs[..]).await?; log::info!("Listening on {:?}...", ARGS.addrs); loop { let (stream, _) = listener.accept().await?; + let arc = mimetypes.clone(); tokio::spawn(async { - match RequestHandle::new(stream).await { + match RequestHandle::new(stream, arc).await { Ok(handle) => match handle.handle().await { Ok(info) => log::info!("{}", info), Err(err) => log::warn!("{}", err), @@ -73,15 +76,49 @@ struct Args { fn args() -> Result<Args> { let args: Vec<String> = std::env::args().collect(); let mut opts = getopts::Options::new(); - opts.optopt("", "content", "Root of the content directory (default ./content)", "DIR"); - opts.optopt("", "cert", "TLS certificate PEM file (default ./cert.pem)", "FILE"); - opts.optopt("", "key", "PKCS8 private key file (default ./key.rsa)", "FILE"); - opts.optmulti("", "addr", "Address to listen on (multiple occurences possible, default 0.0.0.0:1965 and [::]:1965)", "IP:PORT"); - opts.optopt("", "hostname", "Domain name of this Gemini server (optional)", "NAME"); - opts.optopt("", "lang", "RFC 4646 Language code(s) for text/gemini documents", "LANG"); + opts.optopt( + "", + "content", + "Root of the content directory (default ./content)", + "DIR", + ); + opts.optopt( + "", + "cert", + "TLS certificate PEM file (default ./cert.pem)", + "FILE", + ); + opts.optopt( + "", + "key", + "PKCS8 private key file (default ./key.rsa)", + "FILE", + ); + opts.optopt( + "", + "addr", + "Address to listen on (multiple occurences possible, default 0.0.0.0:1965 and [::]:1965)", + "IP:PORT", + ); + opts.optopt( + "", + "hostname", + "Domain name of this Gemini server (optional)", + "NAME", + ); + opts.optopt( + "", + "lang", + "RFC 4646 Language code(s) for text/gemini documents", + "LANG", + ); opts.optflag("s", "silent", "Disable logging output"); opts.optflag("h", "help", "Print this help menu"); - opts.optflag("", "serve-secret", "Enable serving secret files (files/directories starting with a dot)"); + opts.optflag( + "", + "serve-secret", + "Enable serving secret files (files/directories starting with a dot)", + ); opts.optflag("", "log-ip", "Output IP addresses when logging"); let matches = opts.parse(&args[1..]).map_err(|f| f.to_string())?; @@ -142,12 +179,13 @@ fn acceptor() -> Result<TlsAcceptor> { struct RequestHandle { stream: TlsStream<TcpStream>, log_line: String, + metadata: Arc<Mutex<FileOptions>>, } impl RequestHandle { /// Creates a new request handle for the given stream. If establishing the TLS /// session fails, returns a corresponding log line. - async fn new(stream: TcpStream) -> Result<Self, String> { + async fn new(stream: TcpStream, metadata: Arc<Mutex<FileOptions>>) -> Result<Self, String> { let log_line = format!( "{} {}", stream.local_addr().unwrap(), @@ -163,7 +201,11 @@ impl RequestHandle { ); match TLS.accept(stream).await { - Ok(stream) => Ok(Self { stream, log_line }), + Ok(stream) => Ok(Self { + stream, + log_line, + metadata, + }), Err(e) => Err(format!("{} error:{}", log_line, e)), } } @@ -274,7 +316,15 @@ impl RequestHandle { } } - // Make sure the file opens successfully before sending the success header. + let data = self.metadata.lock().await.get(&path); + + if let PresetMeta::FullHeader(status, meta) = data { + self.send_header(status, &meta).await?; + // do not try to access the file + return Ok(()); + } + + // Make sure the file opens successfully before sending a success header. let mut file = match tokio::fs::File::open(&path).await { Ok(file) => file, Err(e) => { @@ -284,12 +334,22 @@ impl RequestHandle { }; // Send header. - if path.extension() == Some(OsStr::new("gmi")) { - self.send_text_gemini_header().await?; - } else { - let mime = mime_guess::from_path(&path).first_or_octet_stream(); - self.send_header(20, mime.essence_str()).await?; - } + let mime = match data { + // this was already handled before opening the file + PresetMeta::FullHeader(..) => unreachable!(), + // treat this as the full MIME type + PresetMeta::FullMime(mime) => mime.clone(), + // guess the MIME type and add the parameters + PresetMeta::Parameters(params) => { + if path.extension() == Some(OsStr::new("gmi")) { + format!("text/gemini{}", params) + } else { + let mime = mime_guess::from_path(&path).first_or_octet_stream(); + format!("{}{}", mime.essence_str(), params) + } + } + }; + self.send_header(20, &mime).await?; // Send body. tokio::io::copy(&mut file, &mut self.stream).await?; @@ -310,7 +370,7 @@ impl RequestHandle { .add(b'}'); log::info!("Listing directory {:?}", path); - self.send_text_gemini_header().await?; + self.send_header(20, "text/gemini").await?; let mut entries = tokio::fs::read_dir(path).await?; let mut lines = vec![]; while let Some(entry) = entries.next_entry().await? { @@ -346,13 +406,4 @@ impl RequestHandle { .await?; Ok(()) } - - async fn send_text_gemini_header(&mut self) -> Result { - if let Some(lang) = ARGS.language.as_deref() { - self.send_header(20, &format!("text/gemini;lang={}", lang)) - .await - } else { - self.send_header(20, "text/gemini").await - } - } } diff --git a/src/metadata.rs b/src/metadata.rs @@ -0,0 +1,181 @@ +use std::collections::BTreeMap; +use std::io::{BufRead, BufReader}; +use std::path::PathBuf; +use std::time::SystemTime; + +static SIDECAR_FILENAME: &str = ".meta"; + +/// A struct to store a string of metadata for each file retrieved from +/// sidecar files called `.lang`. +/// +/// These sidecar file's lines should have the format +/// ```text +/// <filename>:<metadata> +/// ``` +/// where `<filename>` is only a filename (not a path) of a file that resides +/// in the same directory and `<metadata>` is the metadata to be stored. +/// Lines that start with optional whitespace and `#` are ignored, as are lines +/// that do not fit the basic format. +/// Both parts are stripped of any leading and/or trailing whitespace. +pub(crate) struct FileOptions { + /// Stores the paths of the side files and when they were last read. + /// By comparing this to the last write time, we can know if the file + /// has changed. + databases_read: BTreeMap<PathBuf, SystemTime>, + /// Stores the metadata for each file + file_meta: BTreeMap<PathBuf, PresetMeta>, + /// The default value to return + default: PresetMeta, +} + +/// A struct to store the different alternatives that a line in the sidecar +/// file can have. +#[derive(Clone, Debug)] +pub(crate) enum PresetMeta { + /// A line that starts with a semicolon in the sidecar file, or an + /// empty line (to overwrite the default language command line flag). + /// ```text + /// index.gmi: ;lang=en-GB + /// ``` + /// The content is interpreted as MIME parameters and are appended to what + /// agate guesses as the MIME type if the respective file can be found. + Parameters(String), + /// A line that is neither a `Parameters` line nor a `FullHeader` line. + /// ```text + /// strange.file: text/plain; lang=ee + /// ``` + /// Agate will send the complete line as the MIME type of the request if + /// the respective file can be found (i.e. a `20` status code). + FullMime(String), + /// A line that starts with a digit between 1 and 6 inclusive followed by + /// another digit and a space (U+0020). In the categories defined by the + /// Gemini specification you can pick a defined or non-defined status code. + /// ```text + /// gone.gmi: 52 This file is no longer available. + /// ``` + /// Agate will send this header line, CR, LF, and nothing else. Agate will + /// not try to access the requested file. + FullHeader(u8, String), +} + +impl FileOptions { + pub(crate) fn new(default: PresetMeta) -> Self { + Self { + databases_read: BTreeMap::new(), + file_meta: BTreeMap::new(), + default, + } + } + + /// Checks wether the database for the respective directory is still + /// up to date. + /// Will only return true if the database should be (re)read, i.e. it will + /// return false if there is no database file in the specified directory. + fn check_outdated(&self, db_dir: &PathBuf) -> bool { + let mut db = db_dir.clone(); + db.push(SIDECAR_FILENAME); + let db = db.as_path(); + + if let Ok(metadata) = db.metadata() { + if !metadata.is_file() { + // it exists, but it is a directory + false + } else if let (Ok(modified), Some(last_read)) = + (metadata.modified(), self.databases_read.get(db_dir)) + { + // check that it was last modified before the read + // if the times are the same, we might have read the old file + &modified >= last_read + } else { + // either the filesystem does not support last modified + // metadata, so we have to read it again every time; or the + // file exists but was not read before, so we have to read it + true + } + } else { + // the file probably does not exist + false + } + } + + /// (Re)reads a specific sidecar file that resides in the specified + /// directory. The function takes a directory to minimize path + /// alterations "on the fly". + /// This function will allways try to read the file, even if it is current. + fn read_database(&mut self, db_dir: &PathBuf) { + log::trace!("reading database for {:?}", db_dir); + let mut db = db_dir.clone(); + db.push(SIDECAR_FILENAME); + let db = db.as_path(); + + if let Ok(file) = std::fs::File::open(db) { + let r = BufReader::new(file); + r.lines() + // discard any I/O errors + .filter_map(|line| line.ok()) + // filter out comment lines + .filter(|line| !line.trim_start().starts_with('#')) + .for_each(|line| { + // split line at colon + let parts = line.splitn(2, ':').collect::<Vec<_>>(); + // only continue if line fits the format + if parts.len() == 2 { + // generate workspace-unique path + let mut path = db_dir.clone(); + path.push(parts[0].trim()); + // parse the line + let header = parts[1].trim(); + + let preset = if header.is_empty() || header.starts_with(';') { + PresetMeta::Parameters(header.to_string()) + } else if matches!(header.chars().next(), Some('1'..='6')) { + if header.len() < 3 + || !header.chars().nth(1).unwrap().is_ascii_digit() + || !header.chars().nth(2).unwrap().is_whitespace() + { + log::error!("Line for {:?} starts like a full header line, but it is incorrect; ignoring it.", path); + return; + } + let separator = header.chars().nth(2).unwrap(); + if separator != ' ' { + // the Gemini specification says that the third + // character has to be a space, so correct any + // other whitespace to it (e.g. tabs) + log::warn!("Full Header line for {:?} has an invalid character, treating {:?} as a space.", path, separator); + } + let status = header.chars() + .take(2) + .collect::<String>() + .parse::<u8>() + // unwrap since we alread checked it's a number + .unwrap(); + // not taking a slice here because the separator + // might be a whitespace wider than a byte + let meta = header.chars().skip(3).collect::<String>(); + PresetMeta::FullHeader(status, meta) + } else { + // must be a MIME type, but without status code + PresetMeta::FullMime(header.to_string()) + }; + self.file_meta.insert(path, preset); + } + }); + self.databases_read + .insert(db_dir.clone(), SystemTime::now()); + } + } + + /// Get the metadata for the specified file. This might need to (re)load a + /// single sidecar file. + /// The file path should consistenly be either absolute or relative to the + /// working/content directory. If inconsisten file paths are used, this can + /// lead to loading and storing sidecar files multiple times. + pub fn get(&mut self, file: &PathBuf) -> PresetMeta { + let dir = file.parent().expect("no parent directory").to_path_buf(); + if self.check_outdated(&dir) { + self.read_database(&dir); + } + + self.file_meta.get(file).unwrap_or(&self.default).clone() + } +}