gout

A static git page generator
git clone https://git.bracken.jp/gout.git
Log | Files | Refs | README | LICENSE

commit 3e3a7690b515a054ca79df9e2c17e8c71a72470d
parent 4100698bbc792f50bb38eca67839a37b0932f7be
Author: Chris Bracken <chris@bracken.jp>
Date:   Wed, 23 Jul 2025 19:36:44 -0700

Validate URLs and email addresses

Adds validation or URLs and email addresses emitted in HTML <a> links.

* Verifies that only http, https, and git URL schemes are allowed.
* Verifies that risky email addresses are not emitted in links.

If a malicious user created a commit with an email address of the form
`javascript:alert('foo')`, we'd emit an <a> tag like:

   <a href="mailto:javascript:alert('foo')">...</a>

Most modern browsers will protect against this, but it's not guaranteed.
One could imagine naughtier applications than an alert.

Diffstat:
Mutils.c | 12++++++++++++
Mutils.h | 6++++++
Mutils_tests.c | 29+++++++++++++++++++++++++++++
Mwriter/html/commit.c | 17++++++++++++-----
Mwriter/html/page.c | 16+++++++++++-----
5 files changed, 70 insertions(+), 10 deletions(-)

diff --git a/utils.c b/utils.c @@ -125,3 +125,15 @@ bool is_safe_repo_path(const char* path) { } return true; } + +bool is_safe_url(const char* url) { + return strncmp(url, "http://", 7) == 0 || strncmp(url, "https://", 8) == 0 || + strncmp(url, "git://", 6) == 0; +} + +bool is_safe_mailto(const char* email) { + /* A basic sanity check. We don't need a full RFC 5322 validator. We just + * want to prevent injection of HTML/JS. Disallow characters that are special + * in HTML or could be used to craft malicious URIs. */ + return strpbrk(email, "<>\"'\\\r\n") == NULL; +} diff --git a/utils.h b/utils.h @@ -31,4 +31,10 @@ void checkfileerror(FILE* fp, const char* name, char mode); /* Validates that a path is safe to use. Returns true if safe. */ bool is_safe_repo_path(const char* path); +/* Returns true if the URL has a safe scheme. */ +bool is_safe_url(const char* url); + +/* Returns true if the email address is safe to use in a mailto: link. */ +bool is_safe_mailto(const char* email); + #endif // GITOUT_UTILS_H_ diff --git a/utils_tests.c b/utils_tests.c @@ -43,3 +43,32 @@ UTEST(is_safe_repo_path, RejectsInvalidPaths) { EXPECT_FALSE(is_safe_repo_path("foo/../bar")); EXPECT_FALSE(is_safe_repo_path("foo/./../bar")); } + +UTEST(is_safe_url, AllowsValidSchemes) { + EXPECT_TRUE(is_safe_url("http://example.com")); + EXPECT_TRUE(is_safe_url("https://example.com")); + EXPECT_TRUE(is_safe_url("git://example.com")); +} + +UTEST(is_safe_url, RejectsInvalidSchemes) { + EXPECT_FALSE(is_safe_url("ftp://example.com")); + EXPECT_FALSE(is_safe_url("javascript:alert(1)")); + EXPECT_FALSE(is_safe_url("example.com")); + EXPECT_FALSE(is_safe_url("http:/example.com")); + EXPECT_FALSE(is_safe_url("http//example.com")); + EXPECT_FALSE(is_safe_url("")); +} + +UTEST(is_safe_mailto, AllowsValidEmails) { + EXPECT_TRUE(is_safe_mailto("test@example.com")); + EXPECT_TRUE(is_safe_mailto("test.user+alias@example.co.uk")); +} + +UTEST(is_safe_mailto, RejectsInvalidCharacters) { + EXPECT_FALSE(is_safe_mailto("test<script>@example.com")); + EXPECT_FALSE(is_safe_mailto("test>@example.com")); + EXPECT_FALSE(is_safe_mailto("test\"@example.com")); + EXPECT_FALSE(is_safe_mailto("test'@example.com")); + EXPECT_FALSE(is_safe_mailto("test\\@example.com")); + EXPECT_FALSE(is_safe_mailto("test@example.com\n")); +} diff --git a/writer/html/commit.c b/writer/html/commit.c @@ -109,11 +109,18 @@ void html_commit_write_summary(HtmlCommit* commit, fprintf(out, "<b>Author:</b> "); print_xml_encoded(out, gitcommit_author_name(git_commit)); - fprintf(out, " &lt;<a href=\"mailto:"); - print_xml_encoded(out, gitcommit_author_email(git_commit)); - fprintf(out, "\">"); - print_xml_encoded(out, gitcommit_author_email(git_commit)); - fprintf(out, "</a>&gt;\n"); + const char* email = gitcommit_author_email(git_commit); + if (is_safe_mailto(email)) { + fprintf(out, " &lt;<a href=\"mailto:"); + print_xml_encoded(out, email); + fprintf(out, "\">"); + print_xml_encoded(out, email); + fprintf(out, "</a>&gt;\n"); + } else { + fprintf(out, " &lt;"); + print_xml_encoded(out, email); + fprintf(out, "&gt;\n"); + } fprintf(out, "<b>Date:</b> "); print_time(out, gitcommit_author_time(git_commit), gitcommit_author_timezone_offset(git_commit)); diff --git a/writer/html/page.c b/writer/html/page.c @@ -88,11 +88,17 @@ void html_page_begin(HtmlPage* page) { const char* clone_url = gitrepo_clone_url(page->repo); if (clone_url[0] != '\0') { - fprintf(out, "<tr class=\"url\"><td></td><td>git clone <a href=\""); - print_xml_encoded(out, clone_url); - fprintf(out, "\">"); - print_xml_encoded(out, clone_url); - fprintf(out, "</a></td></tr>"); + fprintf(out, "<tr class=\"url\"><td></td><td>git clone "); + if (is_safe_url(clone_url)) { + fprintf(out, "<a href=\""); + print_xml_encoded(out, clone_url); + fprintf(out, "\">"); + print_xml_encoded(out, clone_url); + fprintf(out, "</a>"); + } else { + print_xml_encoded(out, clone_url); + } + fprintf(out, "</td></tr>"); } fprintf(out, "<tr><td></td><td>\n"); fprintf(out, "<a href=\"%slog.html\">Log</a> | ", relpath);