commit a049beb66b84a9f120381e48a055042661004bef
parent 039057b8dbbb02412f629bd5eded393c08d3b617
Author: Matt Brubeck <mbrubeck@limpet.net>
Date: Fri, 22 May 2020 08:22:26 -0700
Improved request parsing.
Only read up to 1024 bytes plus CRLF. This avoids allocations and
copies, and prevents malicious/buggy clients from making us allocate
unbounded amounts of memory.
We also stop if we see a lone LF, rather than timing out until CRLF is
received.
Diffstat:
M | src/main.rs | | | 39 | ++++++++++++++++++++------------------- |
1 file changed, 20 insertions(+), 19 deletions(-)
diff --git a/src/main.rs b/src/main.rs
@@ -82,30 +82,31 @@ async fn connection(stream: TcpStream) -> Result {
}
async fn parse_request(stream: &mut TlsStream<TcpStream>) -> Result<Url> {
- let mut stream = async_std::io::BufReader::new(stream);
- let mut request = Vec::new();
- stream.read_until(b'\r', &mut request).await?;
-
- // Check line ending.
- let eol = &mut [0];
- stream.read_exact(eol).await?;
- if eol != b"\n" {
- Err("CR without LF")?
- }
- // Check request length.
- if request.len() > 1026 {
- Err("Too long")?
+ // Read one line up to 1024 bytes, plus 2 bytes for CRLF.
+ let mut request = [0; 1026];
+ let mut buf = &mut request[..];
+ let mut len = 0;
+ while !buf.is_empty() {
+ let n = stream.read(buf).await?;
+ len += n;
+ if n == 0 || request[..len].ends_with(b"\r\n") {
+ break;
+ }
+ buf = &mut request[len..];
}
- // Handle scheme-relative URLs.
- if request.starts_with(b"//") {
- request.splice(..0, "gemini:".bytes());
+ if !request[..len].ends_with(b"\r\n") {
+ Err("Missing CRLF")?
}
- // Parse URL.
- let url = Url::parse(str::from_utf8(&request)?.trim_end())?;
+ let request = str::from_utf8(&request[..len - 2])?;
+
+ let url = if request.starts_with("//") {
+ Url::parse(&format!("gemini:{}", request))?
+ } else {
+ Url::parse(request)?
+ };
if url.scheme() != "gemini" {
Err("unsupported URL scheme")?
}
- // TODO: Validate hostname and port.
Ok(url)
}