diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..c1a5d9a --- /dev/null +++ b/.gitattributes @@ -0,0 +1,6 @@ +# Ignore all tests for archive +/test export-ignore +/.gitattributes export-ignore +/.gitignore export-ignore +/.travis.yml export-ignore +/phpunit.xml.dist export-ignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d8a7996 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +composer.lock +vendor/ diff --git a/.travis.yml b/.travis.yml index fa5ca98..b9361a6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,16 +1,29 @@ language: php -php: - - 7.1 - - 7.0 - - 5.6 - - 5.5 - - 5.4 - - 5.3 - - hhvm - - hhvm-nightly +dist: trusty +sudo: false matrix: + include: + - php: 5.3 + dist: precise + - php: 5.4 + - php: 5.5 + - php: 5.6 + - php: 7.0 + - php: 7.1 + - php: nightly + - php: hhvm + - php: hhvm-nightly fast_finish: true allow_failures: + - php: nightly - php: hhvm-nightly + +install: + - composer install --prefer-dist --no-interaction --no-progress + +script: + - vendor/bin/phpunit + - vendor/bin/phpunit test/CommonMarkTestWeak.php || true + - '[ -z "$TRAVIS_TAG" ] || [ "$TRAVIS_TAG" == "$(php -r "require(\"Parsedown.php\"); echo Parsedown::version;")" ]' diff --git a/LICENSE.txt b/LICENSE.txt index baca86f..8e7c764 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2013 Emanuil Rusev, erusev.com +Copyright (c) 2013-2018 Emanuil Rusev, erusev.com Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in @@ -17,4 +17,4 @@ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Parsedown.php b/Parsedown.php index de80f3f..0d44669 100644 --- a/Parsedown.php +++ b/Parsedown.php @@ -17,7 +17,7 @@ class Parsedown { # ~ - const version = '1.6.0'; + const version = '1.7.1'; # ~ @@ -75,6 +75,32 @@ class Parsedown protected $urlsLinked = true; + function setSafeMode($safeMode) + { + $this->safeMode = (bool) $safeMode; + + return $this; + } + + protected $safeMode; + + protected $safeLinksWhitelist = array( + 'http://', + 'https://', + 'ftp://', + 'ftps://', + 'mailto:', + 'data:image/png;base64,', + 'data:image/gif;base64,', + 'data:image/jpeg;base64,', + 'irc:', + 'ircs:', + 'git:', + 'ssh:', + 'news:', + 'steam:', + ); + # # Lines # @@ -342,8 +368,6 @@ class Parsedown { $text = $Block['element']['text']['text']; - $text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8'); - $Block['element']['text']['text'] = $text; return $Block; @@ -354,7 +378,7 @@ class Parsedown protected function blockComment($Line) { - if ($this->markupEscaped) + if ($this->markupEscaped or $this->safeMode) { return; } @@ -396,7 +420,7 @@ class Parsedown protected function blockFencedCode($Line) { - if (preg_match('/^['.$Line['text'][0].']{3,}[ ]*([\w-]+)?[ ]*$/', $Line['text'], $matches)) + if (preg_match('/^['.$Line['text'][0].']{3,}[ ]*([^`]+)?[ ]*$/', $Line['text'], $matches)) { $Element = array( 'name' => 'code', @@ -448,7 +472,7 @@ class Parsedown return $Block; } - $Block['element']['text']['text'] .= "\n".$Line['body'];; + $Block['element']['text']['text'] .= "\n".$Line['body']; return $Block; } @@ -457,8 +481,6 @@ class Parsedown { $text = $Block['element']['text']['text']; - $text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8'); - $Block['element']['text']['text'] = $text; return $Block; @@ -533,7 +555,7 @@ class Parsedown ), ); - if($name === 'ol') + if($name === 'ol') { $listStart = ltrim(strstr($matches[1], $Block['data']['markerType'], true), '0') ?: '0'; @@ -584,6 +606,8 @@ class Parsedown { $Block['li']['text'] []= ''; + $Block['loose'] = true; + unset($Block['interrupted']); } @@ -641,6 +665,22 @@ class Parsedown } } + protected function blockListComplete(array $Block) + { + if (isset($Block['loose'])) + { + foreach ($Block['element']['text'] as &$li) + { + if (end($li['text']) !== '') + { + $li['text'] []= ''; + } + } + } + + return $Block; + } + # # Quote @@ -711,8 +751,10 @@ class Parsedown return; } - if (chop($Line['text'], $Line['text'][0]) === '') - { + if ( + chop(chop($Line['text'], ' '), $Line['text'][0]) === '' + and $Line['indent'] < 4 + ) { $Block['element']['name'] = $Line['text'][0] === '=' ? 'h1' : 'h2'; return $Block; @@ -724,12 +766,12 @@ class Parsedown protected function blockMarkup($Line) { - if ($this->markupEscaped) + if ($this->markupEscaped or $this->safeMode) { return; } - if (preg_match('/^<(\w*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches)) + if (preg_match('/^<[\/]?+(\w*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches)) { $element = strtolower($matches[1]); @@ -740,71 +782,20 @@ class Parsedown $Block = array( 'name' => $matches[1], - 'depth' => 0, 'markup' => $Line['text'], ); - $length = strlen($matches[0]); - - $remainder = substr($Line['text'], $length); - - if (trim($remainder) === '') - { - if (isset($matches[2]) or in_array($matches[1], $this->voidElements)) - { - $Block['closed'] = true; - - $Block['void'] = true; - } - } - else - { - if (isset($matches[2]) or in_array($matches[1], $this->voidElements)) - { - return; - } - - if (preg_match('/<\/'.$matches[1].'>[ ]*$/i', $remainder)) - { - $Block['closed'] = true; - } - } - return $Block; } } protected function blockMarkupContinue($Line, array $Block) { - if (isset($Block['closed'])) + if (isset($Block['closed']) or isset($Block['interrupted'])) { return; } - if (preg_match('/^<'.$Block['name'].'(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*>/i', $Line['text'])) # open - { - $Block['depth'] ++; - } - - if (preg_match('/(.*?)<\/'.$Block['name'].'>[ ]*$/i', $Line['text'], $matches)) # close - { - if ($Block['depth'] > 0) - { - $Block['depth'] --; - } - else - { - $Block['closed'] = true; - } - } - - if (isset($Block['interrupted'])) - { - $Block['markup'] .= "\n"; - - unset($Block['interrupted']); - } - $Block['markup'] .= "\n".$Line['body']; return $Block; @@ -1043,7 +1034,7 @@ class Parsedown # ~ # - public function line($text) + public function line($text, $nonNestables=array()) { $markup = ''; @@ -1059,6 +1050,13 @@ class Parsedown foreach ($this->InlineTypes[$marker] as $inlineType) { + # check to see if the current inline type is nestable in the current context + + if ( ! empty($nonNestables) and in_array($inlineType, $nonNestables)) + { + continue; + } + $Inline = $this->{'inline'.$inlineType}($Excerpt); if ( ! isset($Inline)) @@ -1080,6 +1078,13 @@ class Parsedown $Inline['position'] = $markerPosition; } + # cause the new element to 'inherit' our non nestables + + foreach ($nonNestables as $non_nestable) + { + $Inline['element']['nonNestables'][] = $non_nestable; + } + # the text that comes before the inline $unmarkedText = substr($text, 0, $Inline['position']); @@ -1120,7 +1125,6 @@ class Parsedown if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(?') !== false and preg_match('/^<((mailto:)?\S+?@\S+?)>/i', $Excerpt['text'], $matches)) - { + $hostnameLabel = '[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?'; + + $commonMarkEmail = '[a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]++@' + . $hostnameLabel . '(?:\.' . $hostnameLabel . ')*'; + + if (strpos($Excerpt['text'], '>') !== false + and preg_match("/^<((mailto:)?$commonMarkEmail)>/i", $Excerpt['text'], $matches) + ){ $url = $matches[1]; if ( ! isset($matches[2])) @@ -1239,6 +1249,7 @@ class Parsedown $Element = array( 'name' => 'a', 'handler' => 'line', + 'nonNestables' => array('Url', 'Link'), 'text' => null, 'attributes' => array( 'href' => null, @@ -1250,7 +1261,7 @@ class Parsedown $remainder = $Excerpt['text']; - if (preg_match('/\[((?:[^][]|(?R))*)\]/', $remainder, $matches)) + if (preg_match('/\[((?:[^][]++|(?R))*+)\]/', $remainder, $matches)) { $Element['text'] = $matches[1]; @@ -1263,7 +1274,7 @@ class Parsedown return; } - if (preg_match('/^[(]((?:[^ ()]|[(][^ )]+[)])+)(?:[ ]+("[^"]*"|\'[^\']*\'))?[)]/', $remainder, $matches)) + if (preg_match('/^[(]\s*+((?:[^ ()]++|[(][^ )]+[)])++)(?:[ ]+("[^"]*"|\'[^\']*\'))?\s*[)]/', $remainder, $matches)) { $Element['attributes']['href'] = $matches[1]; @@ -1299,8 +1310,6 @@ class Parsedown $Element['attributes']['title'] = $Definition['title']; } - $Element['attributes']['href'] = str_replace(array('&', '<'), array('&', '<'), $Element['attributes']['href']); - return array( 'extent' => $extent, 'element' => $Element, @@ -1309,12 +1318,12 @@ class Parsedown protected function inlineMarkup($Excerpt) { - if ($this->markupEscaped or strpos($Excerpt['text'], '>') === false) + if ($this->markupEscaped or $this->safeMode or strpos($Excerpt['text'], '>') === false) { return; } - if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w*[ ]*>/s', $Excerpt['text'], $matches)) + if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w[\w-]*[ ]*>/s', $Excerpt['text'], $matches)) { return array( 'markup' => $matches[0], @@ -1330,7 +1339,7 @@ class Parsedown ); } - if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w*(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*\/?>/s', $Excerpt['text'], $matches)) + if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w[\w-]*(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*\/?>/s', $Excerpt['text'], $matches)) { return array( 'markup' => $matches[0], @@ -1389,14 +1398,16 @@ class Parsedown if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE)) { + $url = $matches[0][0]; + $Inline = array( 'extent' => strlen($matches[0][0]), 'position' => $matches[0][1], 'element' => array( 'name' => 'a', - 'text' => $matches[0][0], + 'text' => $url, 'attributes' => array( - 'href' => $matches[0][0], + 'href' => $url, ), ), ); @@ -1409,7 +1420,7 @@ class Parsedown { if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w+:\/{2}[^ >]+)>/i', $Excerpt['text'], $matches)) { - $url = str_replace(array('&', '<'), array('&', '<'), $matches[1]); + $url = $matches[1]; return array( 'extent' => strlen($matches[0]), @@ -1447,37 +1458,74 @@ class Parsedown protected function element(array $Element) { - $markup = '<'.$Element['name']; - - if (isset($Element['attributes'])) + if ($this->safeMode) { - foreach ($Element['attributes'] as $name => $value) - { - if ($value === null) - { - continue; - } + $Element = $this->sanitiseElement($Element); + } - $markup .= ' '.$name.'="'.$value.'"'; + $hasName = isset($Element['name']); + + $markup = ''; + + if ($hasName) + { + $markup .= '<'.$Element['name']; + + if (isset($Element['attributes'])) + { + foreach ($Element['attributes'] as $name => $value) + { + if ($value === null) + { + continue; + } + + $markup .= ' '.$name.'="'.self::escape($value).'"'; + } } } + $permitRawHtml = false; + if (isset($Element['text'])) { - $markup .= '>'; + $text = $Element['text']; + } + // very strongly consider an alternative if you're writing an + // extension + elseif (isset($Element['rawHtml'])) + { + $text = $Element['rawHtml']; + + $allowRawHtmlInSafeMode = isset($Element['allowRawHtmlInSafeMode']) && $Element['allowRawHtmlInSafeMode']; + $permitRawHtml = !$this->safeMode || $allowRawHtmlInSafeMode; + } + + if (isset($text)) + { + $markup .= $hasName ? '>' : ''; + + if (!isset($Element['nonNestables'])) + { + $Element['nonNestables'] = array(); + } if (isset($Element['handler'])) { - $markup .= $this->{$Element['handler']}($Element['text']); + $markup .= $this->{$Element['handler']}($text, $Element['nonNestables']); + } + elseif (!$permitRawHtml) + { + $markup .= self::escape($text, true); } else { - $markup .= $Element['text']; + $markup .= $text; } - $markup .= ''; + $markup .= $hasName ? '' : ''; } - else + elseif ($hasName) { $markup .= ' />'; } @@ -1531,10 +1579,77 @@ class Parsedown return $markup; } + protected function sanitiseElement(array $Element) + { + static $goodAttribute = '/^[a-zA-Z0-9][a-zA-Z0-9-_]*+$/'; + static $safeUrlNameToAtt = array( + 'a' => 'href', + 'img' => 'src', + ); + + if (isset($safeUrlNameToAtt[$Element['name']])) + { + $Element = $this->filterUnsafeUrlInAttribute($Element, $safeUrlNameToAtt[$Element['name']]); + } + + if ( ! empty($Element['attributes'])) + { + foreach ($Element['attributes'] as $att => $val) + { + # filter out badly parsed attribute + if ( ! preg_match($goodAttribute, $att)) + { + unset($Element['attributes'][$att]); + } + # dump onevent attribute + elseif (self::striAtStart($att, 'on')) + { + unset($Element['attributes'][$att]); + } + } + } + + return $Element; + } + + protected function filterUnsafeUrlInAttribute(array $Element, $attribute) + { + foreach ($this->safeLinksWhitelist as $scheme) + { + if (self::striAtStart($Element['attributes'][$attribute], $scheme)) + { + return $Element; + } + } + + $Element['attributes'][$attribute] = str_replace(':', '%3A', $Element['attributes'][$attribute]); + + return $Element; + } + # # Static Methods # + protected static function escape($text, $allowQuotes = false) + { + return htmlspecialchars($text, $allowQuotes ? ENT_NOQUOTES : ENT_QUOTES, 'UTF-8'); + } + + protected static function striAtStart($string, $needle) + { + $len = strlen($needle); + + if ($len > strlen($string)) + { + return false; + } + else + { + return strtolower(substr($string, 0, $len)) === strtolower($needle); + } + } + static function instance($name = 'default') { if (isset(self::$instances[$name])) @@ -1585,10 +1700,10 @@ class Parsedown 'b', 'em', 'big', 'cite', 'small', 'spacer', 'listing', 'i', 'rp', 'del', 'code', 'strike', 'marquee', 'q', 'rt', 'ins', 'font', 'strong', - 's', 'tt', 'sub', 'mark', - 'u', 'xm', 'sup', 'nobr', - 'var', 'ruby', - 'wbr', 'span', - 'time', + 's', 'tt', 'kbd', 'mark', + 'u', 'xm', 'sub', 'nobr', + 'sup', 'ruby', + 'var', 'span', + 'wbr', 'time', ); } diff --git a/README.md b/README.md index 7ef230f..9a4bb36 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -> You might also like [Caret](http://caret.io?ref=parsedown) - our Markdown editor for Mac / Windows / Linux. +> I also make [Caret](https://caret.io?ref=parsedown) - a Markdown editor for Mac and PC. ## Parsedown @@ -15,6 +15,7 @@ Better Markdown Parser in PHP ### Features * One File +* No Dependencies * Super Fast * Extensible * [GitHub flavored](https://help.github.com/articles/github-flavored-markdown) @@ -31,15 +32,46 @@ Include `Parsedown.php` or install [the composer package](https://packagist.org/ $Parsedown = new Parsedown(); echo $Parsedown->text('Hello _Parsedown_!'); # prints:

Hello Parsedown!

+// you can also parse inline markdown only +echo $Parsedown->line('Hello _Parsedown_!'); # prints: Hello Parsedown! ``` More examples in [the wiki](https://github.com/erusev/parsedown/wiki/) and in [this video tutorial](http://youtu.be/wYZBY8DEikI). +### Security + +Parsedown is capable of escaping user-input within the HTML that it generates. Additionally Parsedown will apply sanitisation to additional scripting vectors (such as scripting link destinations) that are introduced by the markdown syntax itself. + +To tell Parsedown that it is processing untrusted user-input, use the following: +```php +$parsedown = new Parsedown; +$parsedown->setSafeMode(true); +``` + +If instead, you wish to allow HTML within untrusted user-input, but still want output to be free from XSS it is recommended that you make use of a HTML sanitiser that allows HTML tags to be whitelisted, like [HTML Purifier](http://htmlpurifier.org/). + +In both cases you should strongly consider employing defence-in-depth measures, like [deploying a Content-Security-Policy](https://scotthelme.co.uk/content-security-policy-an-introduction/) (a browser security feature) so that your page is likely to be safe even if an attacker finds a vulnerability in one of the first lines of defence above. + +#### Security of Parsedown Extensions + +Safe mode does not necessarily yield safe results when using extensions to Parsedown. Extensions should be evaluated on their own to determine their specific safety against XSS. + +### Escaping HTML +> ⚠️  **WARNING:** This method isn't safe from XSS! + +If you wish to escape HTML **in trusted input**, you can use the following: +```php +$parsedown = new Parsedown; +$parsedown->setMarkupEscaped(true); +``` + +Beware that this still allows users to insert unsafe scripting vectors, such as links like `[xss](javascript:alert%281%29)`. + ### Questions **How does Parsedown work?** -It tries to read Markdown like a human. First, it looks at the lines. It’s interested in how the lines start. This helps it recognise blocks. It knows, for example, that if a line start with a `-` then it perhaps belong to a list. Once it recognises the blocks, it continues to the content. As it reads, it watches out for special characters. This helps it recognise inline elements (or inlines). +It tries to read Markdown like a human. First, it looks at the lines. It’s interested in how the lines start. This helps it recognise blocks. It knows, for example, that if a line starts with a `-` then perhaps it belongs to a list. Once it recognises the blocks, it continues to the content. As it reads, it watches out for special characters. This helps it recognise inline elements (or inlines). We call this approach "line based". We believe that Parsedown is the first Markdown parser to use it. Since the release of Parsedown, other developers have used the same approach to develop other Markdown parsers in PHP and in other languages. @@ -49,7 +81,7 @@ It passes most of the CommonMark tests. Most of the tests that don't pass deal w **Who uses it?** -[phpDocumentor](http://www.phpdoc.org/), [October CMS](http://octobercms.com/), [Bolt CMS](http://bolt.cm/), [Kirby CMS](http://getkirby.com/), [Grav CMS](http://getgrav.org/), [Statamic CMS](http://www.statamic.com/), [Herbie CMS](http://www.getherbie.org/), [RaspberryPi.org](http://www.raspberrypi.org/) and [more](https://packagist.org/packages/erusev/parsedown/dependents). +[Laravel Framework](https://laravel.com/), [Bolt CMS](http://bolt.cm/), [Grav CMS](http://getgrav.org/), [Herbie CMS](http://www.getherbie.org/), [Kirby CMS](http://getkirby.com/), [October CMS](http://octobercms.com/), [Pico CMS](http://picocms.org), [Statamic CMS](http://www.statamic.com/), [phpDocumentor](http://www.phpdoc.org/), [RaspberryPi.org](http://www.raspberrypi.org/), [Symfony demo](https://github.com/symfony/symfony-demo) and [more](https://packagist.org/packages/erusev/parsedown/dependents). **How can I help?** diff --git a/composer.json b/composer.json index 28145af..f8b40f8 100644 --- a/composer.json +++ b/composer.json @@ -13,9 +13,21 @@ } ], "require": { - "php": ">=5.3.0" + "php": ">=5.3.0", + "ext-mbstring": "*" + }, + "require-dev": { + "phpunit/phpunit": "^4.8.35" }, "autoload": { "psr-0": {"Parsedown": ""} + }, + "autoload-dev": { + "psr-0": { + "TestParsedown": "test/", + "ParsedownTest": "test/", + "CommonMarkTest": "test/", + "CommonMarkTestWeak": "test/" + } } } diff --git a/phpunit.xml.dist b/phpunit.xml.dist index b2d5e9d..4fe3177 100644 --- a/phpunit.xml.dist +++ b/phpunit.xml.dist @@ -1,8 +1,8 @@ - + test/ParsedownTest.php - \ No newline at end of file + diff --git a/test/CommonMarkTest.php b/test/CommonMarkTest.php deleted file mode 100644 index 9b8d116..0000000 --- a/test/CommonMarkTest.php +++ /dev/null @@ -1,74 +0,0 @@ -setUrlsLinked(false); - - $actualHtml = $Parsedown->text($markdown); - $actualHtml = $this->normalizeMarkup($actualHtml); - - $this->assertEquals($expectedHtml, $actualHtml); - } - - function data() - { - $spec = file_get_contents(self::SPEC_URL); - $spec = strstr($spec, '', true); - - $tests = array(); - $currentSection = ''; - - preg_replace_callback( - '/^\.\n([\s\S]*?)^\.\n([\s\S]*?)^\.$|^#{1,6} *(.*)$/m', - function($matches) use ( & $tests, & $currentSection, & $testCount) { - if (isset($matches[3]) and $matches[3]) { - $currentSection = $matches[3]; - } else { - $testCount++; - $markdown = $matches[1]; - $markdown = preg_replace('/→/', "\t", $markdown); - $expectedHtml = $matches[2]; - $expectedHtml = $this->normalizeMarkup($expectedHtml); - $tests []= array( - $currentSection, # section - $markdown, # markdown - $expectedHtml, # html - ); - } - }, - $spec - ); - - return $tests; - } - - private function normalizeMarkup($markup) - { - $markup = preg_replace("/\n+/", "\n", $markup); - $markup = preg_replace('/^\s+/m', '', $markup); - $markup = preg_replace('/^((?:<[\w]+>)+)\n/m', '$1', $markup); - $markup = preg_replace('/\n((?:<\/[\w]+>)+)$/m', '$1', $markup); - $markup = trim($markup); - - return $markup; - } -} diff --git a/test/CommonMarkTestStrict.php b/test/CommonMarkTestStrict.php new file mode 100644 index 0000000..3837738 --- /dev/null +++ b/test/CommonMarkTestStrict.php @@ -0,0 +1,71 @@ +parsedown = new TestParsedown(); + $this->parsedown->setUrlsLinked(false); + } + + /** + * @dataProvider data + * @param $id + * @param $section + * @param $markdown + * @param $expectedHtml + */ + public function testExample($id, $section, $markdown, $expectedHtml) + { + $actualHtml = $this->parsedown->text($markdown); + $this->assertEquals($expectedHtml, $actualHtml); + } + + /** + * @return array + */ + public function data() + { + $spec = file_get_contents(self::SPEC_URL); + if ($spec === false) { + $this->fail('Unable to load CommonMark spec from ' . self::SPEC_URL); + } + + $spec = str_replace("\r\n", "\n", $spec); + $spec = strstr($spec, '', true); + + $matches = array(); + preg_match_all('/^`{32} example\n((?s).*?)\n\.\n(?:|((?s).*?)\n)`{32}$|^#{1,6} *(.*?)$/m', $spec, $matches, PREG_SET_ORDER); + + $data = array(); + $currentId = 0; + $currentSection = ''; + foreach ($matches as $match) { + if (isset($match[3])) { + $currentSection = $match[3]; + } else { + $currentId++; + $markdown = str_replace('→', "\t", $match[1]); + $expectedHtml = isset($match[2]) ? str_replace('→', "\t", $match[2]) : ''; + + $data[$currentId] = array( + 'id' => $currentId, + 'section' => $currentSection, + 'markdown' => $markdown, + 'expectedHtml' => $expectedHtml + ); + } + } + + return $data; + } +} diff --git a/test/CommonMarkTestWeak.php b/test/CommonMarkTestWeak.php new file mode 100644 index 0000000..ef4081a --- /dev/null +++ b/test/CommonMarkTestWeak.php @@ -0,0 +1,63 @@ +parsedown->getTextLevelElements(); + array_walk($textLevelElements, function (&$element) { + $element = preg_quote($element, '/'); + }); + $this->textLevelElementRegex = '\b(?:' . implode('|', $textLevelElements) . ')\b'; + } + + /** + * @dataProvider data + * @param $id + * @param $section + * @param $markdown + * @param $expectedHtml + */ + public function testExample($id, $section, $markdown, $expectedHtml) + { + $expectedHtml = $this->cleanupHtml($expectedHtml); + + $actualHtml = $this->parsedown->text($markdown); + $actualHtml = $this->cleanupHtml($actualHtml); + + $this->assertEquals($expectedHtml, $actualHtml); + } + + protected function cleanupHtml($markup) + { + // invisible whitespaces at the beginning and end of block elements + // however, whitespaces at the beginning of
 elements do matter
+        $markup = preg_replace(
+            array(
+                '/(<(?!(?:' . $this->textLevelElementRegex . '|\bpre\b))\w+\b[^>]*>(?:<' . $this->textLevelElementRegex . '[^>]*>)*)\s+/s',
+                '/\s+((?:<\/' . $this->textLevelElementRegex . '>)*<\/(?!' . $this->textLevelElementRegex . ')\w+\b>)/s'
+            ),
+            '$1',
+            $markup
+        );
+
+        return $markup;
+    }
+}
diff --git a/test/ParsedownTest.php b/test/ParsedownTest.php
index c922ab1..2f58418 100644
--- a/test/ParsedownTest.php
+++ b/test/ParsedownTest.php
@@ -1,6 +1,9 @@
 Parsedown->setSafeMode(substr($test, 0, 3) === 'xss');
+
         $actualMarkup = $this->Parsedown->text($markdown);
 
         $this->assertEquals($expectedMarkup, $actualMarkup);
     }
 
+    function testRawHtml()
+    {
+        $markdown = "```php\nfoobar\n```";
+        $expectedMarkup = '

foobar

'; + $expectedSafeMarkup = '
<p>foobar</p>
'; + + $unsafeExtension = new UnsafeExtension; + $actualMarkup = $unsafeExtension->text($markdown); + + $this->assertEquals($expectedMarkup, $actualMarkup); + + $unsafeExtension->setSafeMode(true); + $actualSafeMarkup = $unsafeExtension->text($markdown); + + $this->assertEquals($expectedSafeMarkup, $actualSafeMarkup); + } + + function testTrustDelegatedRawHtml() + { + $markdown = "```php\nfoobar\n```"; + $expectedMarkup = '

foobar

'; + $expectedSafeMarkup = $expectedMarkup; + + $unsafeExtension = new TrustDelegatedExtension; + $actualMarkup = $unsafeExtension->text($markdown); + + $this->assertEquals($expectedMarkup, $actualMarkup); + + $unsafeExtension->setSafeMode(true); + $actualSafeMarkup = $unsafeExtension->text($markdown); + + $this->assertEquals($expectedSafeMarkup, $actualSafeMarkup); + } + function data() { $data = array(); @@ -132,15 +172,14 @@ color: red;

comment

<!-- html comment -->

EXPECTED_HTML; - $parsedownWithNoMarkup = new Parsedown(); + + $parsedownWithNoMarkup = new TestParsedown(); $parsedownWithNoMarkup->setMarkupEscaped(true); $this->assertEquals($expectedHtml, $parsedownWithNoMarkup->text($markdownWithHtml)); } public function testLateStaticBinding() { - include 'test/TestParsedown.php'; - $parsedown = Parsedown::instance(); $this->assertInstanceOf('Parsedown', $parsedown); diff --git a/test/SampleExtensions.php b/test/SampleExtensions.php new file mode 100644 index 0000000..c66190c --- /dev/null +++ b/test/SampleExtensions.php @@ -0,0 +1,40 @@ +$text

"; + + return $Block; + } +} + + +class TrustDelegatedExtension extends Parsedown +{ + protected function blockFencedCodeComplete($Block) + { + $text = $Block['element']['text']['text']; + unset($Block['element']['text']['text']); + + // WARNING: There is almost always a better way of doing things! + // + // This behaviour is NOT needed in the demonstrated case. + // Only use this if you are sure that the result being added into + // rawHtml is safe. + // (e.g. using an external parser with escaping capabilities). + $Block['element']['text']['rawHtml'] = "

$text

"; + $Block['element']['text']['allowRawHtmlInSafeMode'] = true; + + return $Block; + } +} diff --git a/test/TestParsedown.php b/test/TestParsedown.php index 7024dfb..2faa0ab 100644 --- a/test/TestParsedown.php +++ b/test/TestParsedown.php @@ -2,4 +2,8 @@ class TestParsedown extends Parsedown { + public function getTextLevelElements() + { + return $this->textLevelElements; + } } diff --git a/test/bootstrap.php b/test/bootstrap.php deleted file mode 100644 index 5f264d2..0000000 --- a/test/bootstrap.php +++ /dev/null @@ -1,3 +0,0 @@ -my email is me@example.com

\ No newline at end of file +

my email is me@example.com

+

html tags shouldn't start an email autolink first.last@example.com

\ No newline at end of file diff --git a/test/data/email.md b/test/data/email.md index 26b7b6c..00b6969 100644 --- a/test/data/email.md +++ b/test/data/email.md @@ -1 +1,3 @@ -my email is \ No newline at end of file +my email is + +html tags shouldn't start an email autolink first.last@example.com \ No newline at end of file diff --git a/test/data/fenced_code_block.html b/test/data/fenced_code_block.html index 8bdabba..565a541 100644 --- a/test/data/fenced_code_block.html +++ b/test/data/fenced_code_block.html @@ -3,4 +3,9 @@ $message = 'fenced code block'; echo $message;
tilde
-
echo 'language identifier';
\ No newline at end of file +
echo 'language identifier';
+
echo 'language identifier with non words';
+
<?php
+echo "Hello World";
+?>
+<a href="http://auraphp.com" >Aura Project</a>
\ No newline at end of file diff --git a/test/data/fenced_code_block.md b/test/data/fenced_code_block.md index cbed8eb..62db24a 100644 --- a/test/data/fenced_code_block.md +++ b/test/data/fenced_code_block.md @@ -11,4 +11,15 @@ tilde ```php echo 'language identifier'; +``` + +```c# +echo 'language identifier with non words'; +``` + +```html+php + +Aura Project ``` \ No newline at end of file diff --git a/test/data/inline_link.html b/test/data/inline_link.html index 5ad564a..cef29cf 100644 --- a/test/data/inline_link.html +++ b/test/data/inline_link.html @@ -3,4 +3,5 @@

(link) in parentheses

link

MD Logo

-

MD Logo and text

\ No newline at end of file +

MD Logo and text

+

MD Logo and text

\ No newline at end of file diff --git a/test/data/inline_link.md b/test/data/inline_link.md index 6bac0b3..1ba24b7 100644 --- a/test/data/inline_link.md +++ b/test/data/inline_link.md @@ -8,4 +8,6 @@ [![MD Logo](http://parsedown.org/md.png)](http://example.com) -[![MD Logo](http://parsedown.org/md.png) and text](http://example.com) \ No newline at end of file +[![MD Logo](http://parsedown.org/md.png) and text](http://example.com) + +[![MD Logo]() and text](http://example.com) \ No newline at end of file diff --git a/test/data/markup_consecutive_one.html b/test/data/markup_consecutive_one.html new file mode 100644 index 0000000..d4c5005 --- /dev/null +++ b/test/data/markup_consecutive_one.html @@ -0,0 +1,3 @@ +
Markup
+_No markdown_ without blank line for **strict** compliance with CommonMark. +

Markdown

\ No newline at end of file diff --git a/test/data/markup_consecutive_one.md b/test/data/markup_consecutive_one.md new file mode 100644 index 0000000..18b4dcb --- /dev/null +++ b/test/data/markup_consecutive_one.md @@ -0,0 +1,4 @@ +
Markup
+_No markdown_ without blank line for **strict** compliance with CommonMark. + +**Markdown** \ No newline at end of file diff --git a/test/data/markup_consecutive_one_line.html b/test/data/markup_consecutive_one_line.html new file mode 100644 index 0000000..a89b4fd --- /dev/null +++ b/test/data/markup_consecutive_one_line.html @@ -0,0 +1,4 @@ +
One markup on +two lines
+_No markdown_ +

Markdown

\ No newline at end of file diff --git a/test/data/markup_consecutive_one_line.md b/test/data/markup_consecutive_one_line.md new file mode 100644 index 0000000..daf945a --- /dev/null +++ b/test/data/markup_consecutive_one_line.md @@ -0,0 +1,5 @@ +
One markup on +two lines
+_No markdown_ + +**Markdown** \ No newline at end of file diff --git a/test/data/markup_consecutive_one_stripped.html b/test/data/markup_consecutive_one_stripped.html new file mode 100644 index 0000000..ccc01f1 --- /dev/null +++ b/test/data/markup_consecutive_one_stripped.html @@ -0,0 +1,3 @@ +

Stripped markup

+_No markdown_ +

Markdown

\ No newline at end of file diff --git a/test/data/markup_consecutive_one_stripped.md b/test/data/markup_consecutive_one_stripped.md new file mode 100644 index 0000000..7f8df0c --- /dev/null +++ b/test/data/markup_consecutive_one_stripped.md @@ -0,0 +1,4 @@ +

Stripped markup

+_No markdown_ + +**Markdown** \ No newline at end of file diff --git a/test/data/markup_consecutive_two.html b/test/data/markup_consecutive_two.html new file mode 100644 index 0000000..f7e71c7 --- /dev/null +++ b/test/data/markup_consecutive_two.html @@ -0,0 +1,3 @@ +
First markup

and second markup on the same line.

+_No markdown_ +

Markdown

\ No newline at end of file diff --git a/test/data/markup_consecutive_two.md b/test/data/markup_consecutive_two.md new file mode 100644 index 0000000..83f3af7 --- /dev/null +++ b/test/data/markup_consecutive_two.md @@ -0,0 +1,4 @@ +
First markup

and second markup on the same line.

+_No markdown_ + +**Markdown** \ No newline at end of file diff --git a/test/data/markup_consecutive_two_lines.html b/test/data/markup_consecutive_two_lines.html new file mode 100644 index 0000000..ffa4728 --- /dev/null +++ b/test/data/markup_consecutive_two_lines.html @@ -0,0 +1,4 @@ +
First markup

and partial markup +on two lines.

+_No markdown_ +

Markdown

\ No newline at end of file diff --git a/test/data/markup_consecutive_two_lines.md b/test/data/markup_consecutive_two_lines.md new file mode 100644 index 0000000..dc70bb7 --- /dev/null +++ b/test/data/markup_consecutive_two_lines.md @@ -0,0 +1,5 @@ +
First markup

and partial markup +on two lines.

+_No markdown_ + +**Markdown** \ No newline at end of file diff --git a/test/data/markup_consecutive_two_stripped.html b/test/data/markup_consecutive_two_stripped.html new file mode 100644 index 0000000..707d6be --- /dev/null +++ b/test/data/markup_consecutive_two_stripped.html @@ -0,0 +1,4 @@ +

Stripped markup +on two lines

+_No markdown_ +

Markdown

\ No newline at end of file diff --git a/test/data/markup_consecutive_two_stripped.md b/test/data/markup_consecutive_two_stripped.md new file mode 100644 index 0000000..af5b781 --- /dev/null +++ b/test/data/markup_consecutive_two_stripped.md @@ -0,0 +1,5 @@ +

Stripped markup +on two lines

+_No markdown_ + +**Markdown** \ No newline at end of file diff --git a/test/data/multiline_lists.html b/test/data/multiline_lists.html new file mode 100644 index 0000000..a223792 --- /dev/null +++ b/test/data/multiline_lists.html @@ -0,0 +1,10 @@ +
    +
  1. +

    One +First body copy

    +
  2. +
  3. +

    Two +Last body copy

    +
  4. +
\ No newline at end of file diff --git a/test/data/multiline_lists.md b/test/data/multiline_lists.md new file mode 100644 index 0000000..6251115 --- /dev/null +++ b/test/data/multiline_lists.md @@ -0,0 +1,5 @@ +1. One + First body copy + +2. Two + Last body copy diff --git a/test/data/paragraph_list.html b/test/data/paragraph_list.html index ced1c43..00a612c 100644 --- a/test/data/paragraph_list.html +++ b/test/data/paragraph_list.html @@ -8,5 +8,7 @@
  • li

  • -
  • li
  • +
  • +

    li

    +
  • \ No newline at end of file diff --git a/test/data/self-closing_html.md b/test/data/self-closing_html.md index acb2032..61d16a3 100644 --- a/test/data/self-closing_html.md +++ b/test/data/self-closing_html.md @@ -1,12 +1,18 @@
    + paragraph
    + paragraph
    + paragraph
    + paragraph
    + paragraph
    + paragraph \ No newline at end of file diff --git a/test/data/setext_header_spaces.html b/test/data/setext_header_spaces.html new file mode 100644 index 0000000..daf97c0 --- /dev/null +++ b/test/data/setext_header_spaces.html @@ -0,0 +1,12 @@ +

    trailing space

    +

    trailing space

    +

    leading and trailing space

    +

    leading and trailing space

    +

    1 leading space

    +

    1 leading space

    +

    3 leading spaces

    +

    3 leading spaces

    +

    too many leading spaces +==

    +

    too many leading spaces +--

    \ No newline at end of file diff --git a/test/data/setext_header_spaces.md b/test/data/setext_header_spaces.md new file mode 100644 index 0000000..4ac35bb --- /dev/null +++ b/test/data/setext_header_spaces.md @@ -0,0 +1,29 @@ +trailing space +== + +trailing space +-- + +leading and trailing space + == + +leading and trailing space + -- + +1 leading space + == + +1 leading space + -- + +3 leading spaces + == + +3 leading spaces + -- + +too many leading spaces + == + +too many leading spaces + -- \ No newline at end of file diff --git a/test/data/sparse_dense_list.html b/test/data/sparse_dense_list.html index 095bc73..58923f8 100644 --- a/test/data/sparse_dense_list.html +++ b/test/data/sparse_dense_list.html @@ -2,6 +2,10 @@
  • li

  • -
  • li
  • -
  • li
  • +
  • +

    li

    +
  • +
  • +

    li

    +
  • \ No newline at end of file diff --git a/test/data/sparse_html.html b/test/data/sparse_html.html index 9e89627..6e0213f 100644 --- a/test/data/sparse_html.html +++ b/test/data/sparse_html.html @@ -1,8 +1,6 @@
    line 1 - -line 2 -line 3 - -line 4 +

    line 2 +line 3

    +

    line 4

    \ No newline at end of file diff --git a/test/data/sparse_list.html b/test/data/sparse_list.html index 452b2b8..9803d27 100644 --- a/test/data/sparse_list.html +++ b/test/data/sparse_list.html @@ -2,7 +2,9 @@
  • li

  • -
  • li
  • +
  • +

    li

    +