From 849a89b1217b8a90c95bae85f378eeca81f87723 Mon Sep 17 00:00:00 2001 From: Haralan Dobrev Date: Fri, 22 Nov 2013 23:06:20 +0200 Subject: [PATCH] Use UTF-8 encoding for htmlspecialchars. See #36. Prior to PHP 5.4.0 the default encoding for `htmlentities()` and `htmlspecialchars` is "ISO-8859-1". For PHP 5.4+ is "UTF-8". This ensures always the right encoding is used no matter the PHP version and the locale settings. --- Parsedown.php | 4 ++-- tests/data/special_characters.html | 1 + tests/data/special_characters.md | 2 ++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Parsedown.php b/Parsedown.php index 8a93030..0ab3faf 100755 --- a/Parsedown.php +++ b/Parsedown.php @@ -568,7 +568,7 @@ class Parsedown case 'code_block': case 'fenced_code_block': - $text = htmlspecialchars($element['text'], ENT_NOQUOTES); + $text = htmlspecialchars($element['text'], ENT_NOQUOTES, 'UTF-8'); strpos($text, "\x1A\\") !== FALSE and $text = strtr($text, $this->escape_sequence_map); @@ -634,7 +634,7 @@ class Parsedown foreach ($matches as $matches) { $element_text = $matches[1]; - $element_text = htmlspecialchars($element_text, ENT_NOQUOTES); + $element_text = htmlspecialchars($element_text, ENT_NOQUOTES, 'UTF-8'); # decodes escape sequences diff --git a/tests/data/special_characters.html b/tests/data/special_characters.html index 7fd3fbb..001d629 100644 --- a/tests/data/special_characters.html +++ b/tests/data/special_characters.html @@ -1,4 +1,5 @@

AT&T has an ampersand in their name

+
Let's play some cards ♠ ♣ ♥ ♦

AT&T is another way to write it

this & that

4 < 5 and 6 > 5

diff --git a/tests/data/special_characters.md b/tests/data/special_characters.md index 440c05e..94de13e 100644 --- a/tests/data/special_characters.md +++ b/tests/data/special_characters.md @@ -1,5 +1,7 @@ AT&T has an ampersand in their name + Let's play some cards ♠ ♣ ♥ ♦ + AT&T is another way to write it this & that