From 054ba3c48729036fd8ab1166fb0b1d0b4dd1e465 Mon Sep 17 00:00:00 2001 From: Aidan Woods Date: Wed, 3 May 2017 17:01:27 +0100 Subject: [PATCH] urlencode urls that are potentially unsafe: this should break urls that attempt to include a protocol, or port (these are absolute URLs and should have a whitelisted protocol for use) but URLs that are relative, or relative from the site root should be preserved (though characters non essential for the URL structure may be urlencoded) this approach has significant advantages over attempting to locate something like `javascript:alert(1)` or `javascript:alert(1)` (which are both valid) because browsers have been known to ignore ridiculous characters when encountered (meaning something like `jav\ta\0\0script:alert(1)` would be xss :( ). Instead of trying to chase down a way to interpret a URL to decide whether there is a protocol, this approach ensures that two essential characters needed to achieve a colon are encoded `:` (obviously) and `;` (from `:`). If these characters appear in a relative URL then they are equivalent to their URL encoded form and so this change will be non breaking for that case. --- Parsedown.php | 10 ++++++++-- test/data/inline_link.html | 2 +- test/data/xss_bad_url.html | 32 ++++++++++++++++---------------- 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/Parsedown.php b/Parsedown.php index 0bd81e2..702b041 100644 --- a/Parsedown.php +++ b/Parsedown.php @@ -87,7 +87,6 @@ class Parsedown protected $safeLinksWhitelist = array( 'http://', 'https://', - '/', 'ftp://', 'ftps://', 'mailto:', @@ -1554,7 +1553,14 @@ class Parsedown if ( ! $safe) { - unset($Element['attributes'][$attribute]); + $Element['attributes'][$attribute] = preg_replace_callback( + '/[^\/#?&=%]++/', + function (array $match) + { + return urlencode($match[0]); + }, + $Element['attributes'][$attribute] + ); } } diff --git a/test/data/inline_link.html b/test/data/inline_link.html index cef29cf..7a3131b 100644 --- a/test/data/inline_link.html +++ b/test/data/inline_link.html @@ -1,5 +1,5 @@

link

-

link with parentheses in URL

+

link with parentheses in URL

(link) in parentheses

link

MD Logo

diff --git a/test/data/xss_bad_url.html b/test/data/xss_bad_url.html index 93dd0d8..8e43877 100644 --- a/test/data/xss_bad_url.html +++ b/test/data/xss_bad_url.html @@ -1,16 +1,16 @@ -

xss

-

xss

-

xss

-

xss

-

xss

-

xss

-

xss

-

xss

-

xss

-

xss

-

xss

-

xss

-

xss

-

xss

-

xss

-

xss

\ No newline at end of file +

xss

+

xss

+

xss

+

xss

+

xss

+

xss

+

xss

+

xss

+

xss

+

xss

+

xss

+

xss

+

xss

+

xss

+

xss

+

xss

\ No newline at end of file