commit 50c95f1beffa3d80a0e636a28c23dd51a6bd7803
parent 4d4e88bfa26de6a638e4f3c0e3b7c315d29e9e50
Author: Michael F. Schönitzer <michael@schoenitzer.de>
Date: Sun, 17 Jun 2018 16:10:44 +0200
Remove all protocols and tlds on URL-cleaning
Also make removal of protocol, tld and 'www' more robust against
fails-positives by considering the position in the url.
Diffstat:
1 file changed, 10 insertions(+), 13 deletions(-)
diff --git a/autoload/vimwiki/base.vim b/autoload/vimwiki/base.vim
@@ -1858,20 +1858,17 @@ endfunction
function! s:clean_url(url)
- let url = split(a:url, '/\|=\|-\|&\|?\|\.')
+ " remove protocol and tld
+ let url = substitute(a:url, '^\a\+://', '', '')
+ let url = substitute(url, '^\([^/]\+\).\a\{2,4}/', '\1/', '')
+ let url = split(url, '/\|=\|-\|&\|?\|\.')
let url = filter(url, 'v:val !=# ""')
- let url = filter(url, 'v:val !=# "www"')
- let url = filter(url, 'v:val !=# "com"')
- let url = filter(url, 'v:val !=# "org"')
- let url = filter(url, 'v:val !=# "net"')
- let url = filter(url, 'v:val !=# "edu"')
- let url = filter(url, 'v:val !=# "http\:"')
- let url = filter(url, 'v:val !=# "https\:"')
- let url = filter(url, 'v:val !=# "file\:"')
- let url = filter(url, 'v:val !=# "xml\:"')
- let url = filter(url, 'v:val !=# "html"')
- let url = filter(url, 'v:val !=# "htm"')
- let url = filter(url, 'v:val !=# "php"')
+ if url[0] == "www"
+ let url = url[1:]
+ endif
+ if url[-1] =~ '^\(htm\|html\|php\)$'
+ let url = url[0:-2]
+ endif
" remove words consisting of only hexadecimal digits or non-word characters
let url = filter(url, 'v:val !~ "^\\A\\{4,}$"')
let url = filter(url, 'v:val !~ "^\\x\\{4,}$" || v:val !~ "\\d"')