agate

Simple gemini server for static files
git clone https://github.com/mbrubeck/agate.git
Log | Files | Refs | README

commit 197e4592b94d56d92d3280cf04555ea9d1eb301f
parent fdca5305910b16b9874aaf267d0b03e6394489a0
Author: Johann150 <johann.galle@protonmail.com>
Date:   Fri, 12 Feb 2021 16:51:42 +0100

use configparser crate

This parser can correctly read globs from configuration keys which allows
their use not just in theory in the server logic but in the config file too.

Diffstat:
MCHANGELOG.md | 2+-
MCargo.lock | 47+++++++++++++++++++----------------------------
MCargo.toml | 4++--
MREADME.md | 14++++++++------
Msrc/metadata.rs | 200+++++++++++++++++++++++++++++++++++++------------------------------------------
5 files changed, 123 insertions(+), 144 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md @@ -17,7 +17,7 @@ Thank you to @gegeweb for contributing to this release. * The `.meta` configuration file now allows for globs to be used. ### Changed -* The configuration files are now parsed as YAML. The syntax only changes in that a space is now required behind the colon. +* The `.meta` file parser now uses the `configparser` crate. The syntax does not change. * The changelog is now also kept in this file in addition to the GitHub releases. * Certificate chain and key file are now only loaded once at startup, certificate changes need a restart to take effect. * Hidden files are now served if there is an explicit setting in a `.meta` file for them, regardless of the `--serve-secret` flag. diff --git a/Cargo.lock b/Cargo.lock @@ -4,6 +4,7 @@ name = "agate" version = "2.4.1" dependencies = [ + "configparser", "env_logger", "getopts", "glob", @@ -15,7 +16,6 @@ dependencies = [ "tokio", "tokio-rustls", "url", - "yaml-rust", ] [[package]] @@ -66,10 +66,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] +name = "configparser" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2616d8c1fbf887d76dd8e067ec1bc3be7669994378428b4415a8e4ad57baae1" + +[[package]] name = "env_logger" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26ecb66b4bdca6c1409b40fb255eefc2bd4f6d135dab3c3124f80ffa2a9661e" +checksum = "17392a012ea30ef05a610aa97dfb49496e71c9f676b27879922ea5bdf60d9d3f" dependencies = [ "atty", "humantime", @@ -119,9 +125,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "idna" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02e2673c30ee86b5b96a9cb52ad15718aa1f966f5ab9ad54a8b95d5ca33120a9" +checksum = "de910d521f7cc3135c4de8db1cb910e0b5ed1dc6f57c381cd07e8e661ce10094" dependencies = [ "matches", "unicode-bidi", @@ -145,15 +151,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.85" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ccac4b00700875e6a07c6cde370d44d32fa01c5a65cdd2fca6858c479d28bb3" - -[[package]] -name = "linked-hash-map" -version = "0.5.4" +version = "0.2.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3" +checksum = "b7282d924be3275cec7f6756ff4121987bc6481325397dde6ba3e7802b1a8b1c" [[package]] name = "log" @@ -263,9 +263,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.8" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "991431c3519a3f36861882da93630ce66b52918dcf1b8e2fd66b397fc96f28df" +checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" dependencies = [ "proc-macro2", ] @@ -362,9 +362,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" [[package]] name = "tokio" -version = "1.1.1" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6714d663090b6b0acb0fa85841c6d66233d150cdb2602c8f9b8abb03370beb3f" +checksum = "e8190d04c665ea9e6b6a0dc45523ade572c088d2e6566244c1122671dbf4ae3a" dependencies = [ "autocfg", "bytes", @@ -406,9 +406,9 @@ dependencies = [ [[package]] name = "unicode-normalization" -version = "0.1.16" +version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a13e63ab62dbe32aeee58d1c5408d35c36c392bba5d9d3142287219721afe606" +checksum = "07fbfce1c8a97d547e8b5334978438d9d6ec8c20e38f56d4a4374d181493eaef" dependencies = [ "tinyvec", ] @@ -553,12 +553,3 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "yaml-rust" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" -dependencies = [ - "linked-hash-map", -] diff --git a/Cargo.toml b/Cargo.toml @@ -13,7 +13,7 @@ exclude = ["/tools", ".github/", "release.sh", "/content"] [dependencies] tokio-rustls = "0.22.0" -tokio = { version = "1.1", features = ["fs", "io-util", "net", "rt-multi-thread", "sync"] } +tokio = { version = "1.2", features = ["fs", "io-util", "net", "rt-multi-thread", "sync"] } env_logger = { version = "0.8", default-features = false, features = ["atty", "humantime", "termcolor"] } getopts = "0.2.21" log = "0.4" @@ -22,8 +22,8 @@ once_cell = "1.5" percent-encoding = "2.1" rustls = "0.19.0" url = "2.2" -yaml-rust = "0.4" glob = "0.3" +configparser = "2.0" [profile.release] lto = true diff --git a/README.md b/README.md @@ -68,19 +68,19 @@ A file called `index.gmi` will always take precedence over a directory listing. You can put a file called `.meta` in any content directory. This file stores some metadata about the adjacent files which Agate will use when serving these files. The `.meta` file must be UTF-8 encoded. You can also enable a central configuration file with the `-C` flag (or the long version `--central-conf`). In this case Agate will always look for the `.meta` configuration file in the content root directory and will ignore `.meta` files in other directories. -The `.meta` file is parsed as a YAML file and should contain a "hash" datatype with file names as the keys. This means: -* Lines starting with a `#` are comments and will be ignored, as will empty lines. -* All other lines must have the form `<path>: <metadata`, i.e. start with a file path, followed by a colon and a space and then the metadata. +The `.meta` file has the following format (*1): +* Lines starting with a `#` or a ';' are comments and will be ignored, as will empty lines. +* All other lines must have the form `<path>:<metadata>`, i.e. start with a file path, followed by a colon and then the metadata. `<path>` is a case sensitive file path, which may or may not exist on disk. If <path> leads to a directory, it is ignored. -If central configuration file mode is not used, using a path that is not a file in the current directory is undefined behaviour (for example: `../index.gmi` would be undefined behaviour). +If central configuration file mode is not used, using a path that is not a file in the current directory is undefined behaviour (for example `../index.gmi` would be undefined behaviour). You can use Unix style patterns in existing paths. For example `content/*` will match any file within `content`, and `content/**` will additionally match any files in subdirectories of `content`. -However, the `*` and `**` globs on their own will by default not match files or directories that start with a dot because of their special meaning (see Directory listing). +However, the `*` and `**` globs on their own will by default not match files or directories that start with a dot because of their special meaning. This behaviour can be disabled with `--serve-secret` or by explicitly matching files starting with a dot with e.g. `content/.*` or `content/**/.*` respectively. For more information on the patterns you can use, please see the [documentation of `glob::Pattern`](https://https://docs.rs/glob/0.3.0/glob/struct.Pattern.html). Rules can overwrite other rules, so if a file is matched by multiple rules, the last one applies. -The metadata can take one of four possible forms: +`<metadata>` can take one of four possible forms: 1. empty Agate will not send a default language parameter, even if it was specified on the command line. 2. starting with a semicolon followed by MIME parameters @@ -112,6 +112,8 @@ requested filename|response header any non-hidden file ending in `.de.gmi` (including in non-hidden subdirectories)|`20 text/gemini;lang=de` any non-hidden file in the `nl` directory ending in `.gmi` (including in non-hidden subdirectories)|`20 text/gemini;lang=nl` +(*1) In theory the syntax is that of a typical INI-like file and also allows for sections with `[section]` (the default section is set to `m̀ime` in the parser), since all other sections are disregarded, this does not make a difference. This also means that you can in theory also use `=` instead of `:`. For even more information, you can visit the [documentation of `configparser`](https://docs.rs/configparser/2.0). + ### Logging Verbosity Agate uses the `env_logger` crate and allows you to set the logging verbosity by setting the default `RUST_LOG` environment variable. For more information, please see the [documentation of `env_logger`]. diff --git a/src/metadata.rs b/src/metadata.rs @@ -1,8 +1,8 @@ +use configparser::ini::Ini; use glob::{glob_with, MatchOptions}; use std::collections::BTreeMap; use std::path::{Path, PathBuf}; use std::time::SystemTime; -use yaml_rust::YamlLoader; static SIDECAR_FILENAME: &str = ".meta"; @@ -78,7 +78,7 @@ impl FileOptions { }; db.push(SIDECAR_FILENAME); - let should_read = if let Ok(metadata) = db.as_path().metadata() { + let should_read = if let Ok(metadata) = db.metadata() { if !metadata.is_file() { // it exists, but it is a directory false @@ -109,122 +109,108 @@ impl FileOptions { fn read_database(&mut self, db: &PathBuf) { log::trace!("reading database {:?}", db); - if let Ok(contents) = std::fs::read_to_string(db) { - self.databases_read - .insert(db.to_path_buf(), SystemTime::now()); + let mut ini = Ini::new_cs(); + ini.set_default_section("mime"); + let map = ini + .load(db.to_str().expect("config path not UTF-8")) + .and_then(|mut sections| { + sections + .remove("mime") + .ok_or_else(|| "no \"mime\" or default section".to_string()) + }); + self.databases_read + .insert(db.to_path_buf(), SystemTime::now()); + let files = match map { + Ok(section) => section, + Err(err) => { + log::error!("invalid config file {:?}: {}", db, err); + return; + } + }; - let docs = match YamlLoader::load_from_str(&contents) { - Ok(docs) => docs, - Err(e) => { - log::error!("Invalid YAML document in {:?}: {}", db, e); + for (rel_path, header) in files { + // treat unassigned keys as if they had an empty value + let header = header.unwrap_or_default(); + + // generate workspace-relative path + let mut path = db.clone(); + path.pop(); + path.push(rel_path); + + // parse the preset + let preset = if header.is_empty() || header.starts_with(';') { + PresetMeta::Parameters(header.to_string()) + } else if matches!(header.chars().next(), Some('1'..='6')) { + if header.len() < 3 + || !header.chars().nth(1).unwrap().is_ascii_digit() + || !header.chars().nth(2).unwrap().is_whitespace() + { + log::error!("Line for {:?} starts like a full header line, but it is incorrect; ignoring it.", path); return; } + let separator = header.chars().nth(2).unwrap(); + if separator != ' ' { + // the Gemini specification says that the third + // character has to be a space, so correct any + // other whitespace to it (e.g. tabs) + log::warn!("Full Header line for {:?} has an invalid character, treating {:?} as a space.", path, separator); + } + let status = header + .chars() + .take(2) + .collect::<String>() + .parse::<u8>() + // unwrap since we alread checked it's a number + .unwrap(); + // not taking a slice here because the separator + // might be a whitespace wider than a byte + let meta = header.chars().skip(3).collect::<String>(); + PresetMeta::FullHeader(status, meta) + } else { + // must be a MIME type, but without status code + PresetMeta::FullMime(header.to_string()) }; - if let Some(files) = docs.get(0).and_then(|hash| hash.as_hash()) { - for (rel_path, header) in files { - // from YAML to Rust types - let rel_path = if let Some(rel_path) = rel_path.as_str() { - rel_path - } else { - log::error!( - "Expected string filename, but got {:?} in {:?}", - rel_path, - db - ); - continue; - }; - let header = if let Some(header) = header.as_str() { - header - } else { - log::error!("Expected string contents, but got {:?} in {:?}", header, db); - continue; - }; - - // generate workspace-relative path - let mut path = db.clone(); - path.pop(); - path.push(rel_path); - - // parse the preset - let preset = if header.is_empty() || header.starts_with(';') { - PresetMeta::Parameters(header.to_string()) - } else if matches!(header.chars().next(), Some('1'..='6')) { - if header.len() < 3 - || !header.chars().nth(1).unwrap().is_ascii_digit() - || !header.chars().nth(2).unwrap().is_whitespace() - { - log::error!("Line for {:?} starts like a full header line, but it is incorrect; ignoring it.", path); - return; - } - let separator = header.chars().nth(2).unwrap(); - if separator != ' ' { - // the Gemini specification says that the third - // character has to be a space, so correct any - // other whitespace to it (e.g. tabs) - log::warn!("Full Header line for {:?} has an invalid character, treating {:?} as a space.", path, separator); - } - let status = header - .chars() - .take(2) - .collect::<String>() - .parse::<u8>() - // unwrap since we alread checked it's a number - .unwrap(); - // not taking a slice here because the separator - // might be a whitespace wider than a byte - let meta = header.chars().skip(3).collect::<String>(); - PresetMeta::FullHeader(status, meta) - } else { - // must be a MIME type, but without status code - PresetMeta::FullMime(header.to_string()) - }; - let glob_options = MatchOptions { - case_sensitive: true, - // so there is a difference between "*" and "**". - require_literal_separator: true, - // security measure because entries for .hidden files - // would result in them being exposed. - require_literal_leading_dot: !crate::ARGS.serve_secret, - }; + let glob_options = MatchOptions { + case_sensitive: true, + // so there is a difference between "*" and "**". + require_literal_separator: true, + // security measure because entries for .hidden files + // would result in them being exposed. + require_literal_leading_dot: !crate::ARGS.serve_secret, + }; - // process filename as glob - let paths = if let Some(path) = path.to_str() { - match glob_with(path, glob_options) { - Ok(paths) => paths.collect::<Vec<_>>(), - Err(err) => { - log::error!("incorrect glob pattern: {}", err); - continue; - } - } - } else { - log::error!("path is not UTF-8: {:?}", path); + // process filename as glob + let paths = if let Some(path) = path.to_str() { + match glob_with(path, glob_options) { + Ok(paths) => paths.collect::<Vec<_>>(), + Err(err) => { + log::error!("incorrect glob pattern in {:?}: {}", path, err); continue; - }; - - if paths.is_empty() { - // probably an entry for a nonexistent file, glob only works for existing files - self.file_meta.insert(path, preset); - } else { - for glob_result in paths { - match glob_result { - Ok(path) if path.is_dir() => { /* ignore */ } - Ok(path) => { - self.file_meta.insert(path, preset.clone()); - } - Err(err) => { - log::warn!("could not process glob path: {}", err); - continue; - } - }; - } } } } else { - log::error!("no YAML document {:?}", db); + log::error!("path is not UTF-8: {:?}", path); + continue; + }; + + if paths.is_empty() { + // probably an entry for a nonexistent file, glob only works for existing files + self.file_meta.insert(path, preset); + } else { + for glob_result in paths { + match glob_result { + Ok(path) if path.is_dir() => { /* ignore */ } + Ok(path) => { + self.file_meta.insert(path, preset.clone()); + } + Err(err) => { + log::warn!("could not process glob path: {}", err); + continue; + } + }; + } } - } else { - log::error!("could not read configuration file {:?}", db); } }