commit 3e3a7690b515a054ca79df9e2c17e8c71a72470d
parent 4100698bbc792f50bb38eca67839a37b0932f7be
Author: Chris Bracken <chris@bracken.jp>
Date: Wed, 23 Jul 2025 19:36:44 -0700
Validate URLs and email addresses
Adds validation or URLs and email addresses emitted in HTML <a> links.
* Verifies that only http, https, and git URL schemes are allowed.
* Verifies that risky email addresses are not emitted in links.
If a malicious user created a commit with an email address of the form
`javascript:alert('foo')`, we'd emit an <a> tag like:
<a href="mailto:javascript:alert('foo')">...</a>
Most modern browsers will protect against this, but it's not guaranteed.
One could imagine naughtier applications than an alert.
Diffstat:
5 files changed, 70 insertions(+), 10 deletions(-)
diff --git a/utils.c b/utils.c
@@ -125,3 +125,15 @@ bool is_safe_repo_path(const char* path) {
}
return true;
}
+
+bool is_safe_url(const char* url) {
+ return strncmp(url, "http://", 7) == 0 || strncmp(url, "https://", 8) == 0 ||
+ strncmp(url, "git://", 6) == 0;
+}
+
+bool is_safe_mailto(const char* email) {
+ /* A basic sanity check. We don't need a full RFC 5322 validator. We just
+ * want to prevent injection of HTML/JS. Disallow characters that are special
+ * in HTML or could be used to craft malicious URIs. */
+ return strpbrk(email, "<>\"'\\\r\n") == NULL;
+}
diff --git a/utils.h b/utils.h
@@ -31,4 +31,10 @@ void checkfileerror(FILE* fp, const char* name, char mode);
/* Validates that a path is safe to use. Returns true if safe. */
bool is_safe_repo_path(const char* path);
+/* Returns true if the URL has a safe scheme. */
+bool is_safe_url(const char* url);
+
+/* Returns true if the email address is safe to use in a mailto: link. */
+bool is_safe_mailto(const char* email);
+
#endif // GITOUT_UTILS_H_
diff --git a/utils_tests.c b/utils_tests.c
@@ -43,3 +43,32 @@ UTEST(is_safe_repo_path, RejectsInvalidPaths) {
EXPECT_FALSE(is_safe_repo_path("foo/../bar"));
EXPECT_FALSE(is_safe_repo_path("foo/./../bar"));
}
+
+UTEST(is_safe_url, AllowsValidSchemes) {
+ EXPECT_TRUE(is_safe_url("http://example.com"));
+ EXPECT_TRUE(is_safe_url("https://example.com"));
+ EXPECT_TRUE(is_safe_url("git://example.com"));
+}
+
+UTEST(is_safe_url, RejectsInvalidSchemes) {
+ EXPECT_FALSE(is_safe_url("ftp://example.com"));
+ EXPECT_FALSE(is_safe_url("javascript:alert(1)"));
+ EXPECT_FALSE(is_safe_url("example.com"));
+ EXPECT_FALSE(is_safe_url("http:/example.com"));
+ EXPECT_FALSE(is_safe_url("http//example.com"));
+ EXPECT_FALSE(is_safe_url(""));
+}
+
+UTEST(is_safe_mailto, AllowsValidEmails) {
+ EXPECT_TRUE(is_safe_mailto("test@example.com"));
+ EXPECT_TRUE(is_safe_mailto("test.user+alias@example.co.uk"));
+}
+
+UTEST(is_safe_mailto, RejectsInvalidCharacters) {
+ EXPECT_FALSE(is_safe_mailto("test<script>@example.com"));
+ EXPECT_FALSE(is_safe_mailto("test>@example.com"));
+ EXPECT_FALSE(is_safe_mailto("test\"@example.com"));
+ EXPECT_FALSE(is_safe_mailto("test'@example.com"));
+ EXPECT_FALSE(is_safe_mailto("test\\@example.com"));
+ EXPECT_FALSE(is_safe_mailto("test@example.com\n"));
+}
diff --git a/writer/html/commit.c b/writer/html/commit.c
@@ -109,11 +109,18 @@ void html_commit_write_summary(HtmlCommit* commit,
fprintf(out, "<b>Author:</b> ");
print_xml_encoded(out, gitcommit_author_name(git_commit));
- fprintf(out, " <<a href=\"mailto:");
- print_xml_encoded(out, gitcommit_author_email(git_commit));
- fprintf(out, "\">");
- print_xml_encoded(out, gitcommit_author_email(git_commit));
- fprintf(out, "</a>>\n");
+ const char* email = gitcommit_author_email(git_commit);
+ if (is_safe_mailto(email)) {
+ fprintf(out, " <<a href=\"mailto:");
+ print_xml_encoded(out, email);
+ fprintf(out, "\">");
+ print_xml_encoded(out, email);
+ fprintf(out, "</a>>\n");
+ } else {
+ fprintf(out, " <");
+ print_xml_encoded(out, email);
+ fprintf(out, ">\n");
+ }
fprintf(out, "<b>Date:</b> ");
print_time(out, gitcommit_author_time(git_commit),
gitcommit_author_timezone_offset(git_commit));
diff --git a/writer/html/page.c b/writer/html/page.c
@@ -88,11 +88,17 @@ void html_page_begin(HtmlPage* page) {
const char* clone_url = gitrepo_clone_url(page->repo);
if (clone_url[0] != '\0') {
- fprintf(out, "<tr class=\"url\"><td></td><td>git clone <a href=\"");
- print_xml_encoded(out, clone_url);
- fprintf(out, "\">");
- print_xml_encoded(out, clone_url);
- fprintf(out, "</a></td></tr>");
+ fprintf(out, "<tr class=\"url\"><td></td><td>git clone ");
+ if (is_safe_url(clone_url)) {
+ fprintf(out, "<a href=\"");
+ print_xml_encoded(out, clone_url);
+ fprintf(out, "\">");
+ print_xml_encoded(out, clone_url);
+ fprintf(out, "</a>");
+ } else {
+ print_xml_encoded(out, clone_url);
+ }
+ fprintf(out, "</td></tr>");
}
fprintf(out, "<tr><td></td><td>\n");
fprintf(out, "<a href=\"%slog.html\">Log</a> | ", relpath);