diff --git a/Parsedown.php b/Parsedown.php index 3a09711..86f1e80 100755 --- a/Parsedown.php +++ b/Parsedown.php @@ -372,7 +372,9 @@ class Parsedown if (isset($Line['text'][3]) and $Line['text'][3] === '-' and $Line['text'][2] === '-' and $Line['text'][1] === '!') { $Block = array( - 'element' => $Line['body'], + 'element' => array( + 'text' => $Line['body'], + ), ); if (preg_match('/-->$/', $Line['text'])) @@ -391,7 +393,7 @@ class Parsedown return; } - $Block['element'] .= "\n" . $Line['body']; + $Block['element']['text'] .= "\n" . $Line['body']; if (preg_match('/-->$/', $Line['text'])) { @@ -643,29 +645,77 @@ class Parsedown return; } - if (preg_match('/^<(\w[\w\d]*)(?:[ ][^>]*)?(\/?)[ ]*>/', $Line['text'], $matches)) + $attrName = '[a-zA-Z_:][\w:.-]*'; + $attrValue = '(?:[^"\'=<>`\s]+|".*?"|\'.*?\')'; + + preg_match('/^<(\w[\d\w]*)((?:\s'.$attrName.'(?:\s*=\s*'.$attrValue.')?)*)\s*(\/?)>/', $Line['text'], $matches); + + if ( ! $matches or in_array($matches[1], $this->textLevelElements)) { - if (in_array($matches[1], $this->textLevelElements)) + return; + } + + $Block = array( + 'depth' => 0, + 'element' => array( + 'name' => $matches[1], + 'text' => null, + ), + ); + + $remainder = substr($Line['text'], strlen($matches[0])); + + if (trim($remainder) === '') + { + if ($matches[3] or in_array($matches[1], $this->voidElements)) + { + $Block['closed'] = true; + } + } + else + { + if ($matches[3] or in_array($matches[1], $this->voidElements)) { return; } - $Block = array( - 'element' => $Line['body'], - ); + preg_match('/(.*)<\/'.$matches[1].'>\s*$/i', $remainder, $nestedMatches); - if ($matches[2] or in_array($matches[1], $this->voidElements) or preg_match('/<\/'.$matches[1].'>[ ]*$/', $Line['text'])) + if ($nestedMatches) { $Block['closed'] = true; + $Block['element']['text'] = $nestedMatches[1]; } else { - $Block['depth'] = 0; - $Block['name'] = $matches[1]; + $Block['element']['text'] = $remainder; } + } + if ( ! $matches[2]) + { return $Block; } + + preg_match_all('/\s('.$attrName.')(?:\s*=\s*('.$attrValue.'))?/', $matches[2], $nestedMatches, PREG_SET_ORDER); + + foreach ($nestedMatches as $nestedMatch) + { + if ( ! isset($nestedMatch[2])) + { + $Block['element']['attributes'][$nestedMatch[1]] = ''; + } + elseif ($nestedMatch[2][0] === '"' or $nestedMatch[2][0] === '\'') + { + $Block['element']['attributes'][$nestedMatch[1]] = substr($nestedMatch[2], 1, - 1); + } + else + { + $Block['element']['attributes'][$nestedMatch[1]] = $nestedMatch[2]; + } + } + + return $Block; } protected function addToMarkup($Line, array $Block) @@ -675,12 +725,12 @@ class Parsedown return; } - if (preg_match('/<'.$Block['name'].'([ ].*[\'"])?[ ]*>/', $Line['text'])) # opening tag + if (preg_match('/^<'.$Block['element']['name'].'(?:\s.*[\'"])?\s*>/i', $Line['text'])) # open { $Block['depth'] ++; } - if (stripos($Line['text'], '') !== false) # closing tag + if (preg_match('/(.*?)<\/'.$Block['element']['name'].'>\s*$/i', $Line['text'], $matches)) # close { if ($Block['depth'] > 0) { @@ -688,18 +738,25 @@ class Parsedown } else { + $Block['element']['text'] .= "\n"; + $Block['closed'] = true; } + + $Block['element']['text'] .= $matches[1]; } if (isset($Block['interrupted'])) { - $Block['element'] .= "\n"; + $Block['element']['text'] .= "\n"; unset($Block['interrupted']); } - $Block['element'] .= "\n".$Line['body']; + if ( ! isset($Block['closed'])) + { + $Block['element']['text'] .= "\n".$Line['body']; + } return $Block; } @@ -741,7 +798,7 @@ class Parsedown $alignment = 'left'; } - if (substr($dividerCell, -1) === ':') + if (substr($dividerCell, - 1) === ':') { $alignment = $alignment === 'left' ? 'center' : 'right'; } @@ -871,6 +928,7 @@ class Parsedown 'id' => strtolower($matches[1]), 'data' => array( 'url' => $matches[2], + 'title' => null, ), ); @@ -906,20 +964,39 @@ class Parsedown protected function element(array $Element) { - $markup = '<'.$Element['name']; + $markup = ''; - if (isset($Element['attributes'])) + if (isset($Element['name'])) { - foreach ($Element['attributes'] as $name => $value) + $markup .= '<'.$Element['name']; + + if (isset($Element['attributes'])) { - $markup .= ' '.$name.'="'.$value.'"'; + foreach ($Element['attributes'] as $name => $value) + { + if ($value === null) + { + continue; + } + + $markup .= ' '.$name.'="'.$value.'"'; + } + } + + if (isset($Element['text'])) + { + $markup .= '>'; + } + else + { + $markup .= ' />'; + + return $markup; } } if (isset($Element['text'])) { - $markup .= '>'; - if (isset($Element['handler'])) { $markup .= $this->$Element['handler']($Element['text']); @@ -928,12 +1005,11 @@ class Parsedown { $markup .= $Element['text']; } - - $markup .= ''; } - else + + if (isset($Element['name'])) { - $markup .= ' />'; + $markup .= ''; } return $markup; @@ -950,16 +1026,7 @@ class Parsedown continue; } - $markup .= "\n"; - - if (is_string($Element)) # because of Markup - { - $markup .= $Element; - - continue; - } - - $markup .= $this->element($Element); + $markup .= "\n" . $this->element($Element); } $markup .= "\n"; @@ -972,8 +1039,8 @@ class Parsedown # protected $SpanTypes = array( - '!' => array('Link'), # ? '"' => array('QuotationMark'), + '!' => array('Image'), '&' => array('Ampersand'), '*' => array('Emphasis'), '/' => array('Url'), @@ -1234,91 +1301,97 @@ class Parsedown } } + protected function identifyImage($Excerpt) + { + if ( ! isset($Excerpt['text'][1]) or $Excerpt['text'][1] !== '[') + { + return; + } + + $Excerpt['text'] = substr($Excerpt['text'], 1); + + $Span = $this->identifyLink($Excerpt); + + $Span['extent'] ++; + + $Span['element'] = array( + 'name' => 'img', + 'attributes' => array( + 'src' => $Span['element']['attributes']['href'], + 'alt' => $Span['element']['text'], + 'title' => $Span['element']['attributes']['title'], + ), + ); + + return $Span; + } + protected function identifyLink($Excerpt) { - $extent = $Excerpt['text'][0] === '!' ? 1 : 0; + $Element = array( + 'name' => 'a', + 'handler' => 'line', + 'text' => null, + 'attributes' => array( + 'href' => null, + 'title' => null, + ), + ); - if (strpos($Excerpt['text'], ']') and preg_match('/\[((?:[^][]|(?R))*)\]/', $Excerpt['text'], $matches)) + $extent = 0; + + $remainder = $Excerpt['text']; + + if (preg_match('/\[((?:[^][]|(?R))*)\]/', $remainder, $matches)) { - $Link = array('text' => $matches[1], 'label' => strtolower($matches[1])); + $Element['text'] = $matches[1]; $extent += strlen($matches[0]); - $substring = substr($Excerpt['text'], $extent); - - if (preg_match('/^\s*\[([^][]+)\]/', $substring, $matches)) - { - $Link['label'] = strtolower($matches[1]); - - if (isset($this->Definitions['Reference'][$Link['label']])) - { - $Link += $this->Definitions['Reference'][$Link['label']]; - - $extent += strlen($matches[0]); - } - else - { - return; - } - } - elseif (isset($this->Definitions['Reference'][$Link['label']])) - { - $Link += $this->Definitions['Reference'][$Link['label']]; - - if (preg_match('/^[ ]*\[\]/', $substring, $matches)) - { - $extent += strlen($matches[0]); - } - } - elseif (preg_match('/^\([ ]*(.*?)(?:[ ]+[\'"](.+?)[\'"])?[ ]*\)/', $substring, $matches)) - { - $Link['url'] = $matches[1]; - - if (isset($matches[2])) - { - $Link['title'] = $matches[2]; - } - - $extent += strlen($matches[0]); - } - else - { - return; - } + $remainder = substr($remainder, $extent); } else { return; } - $url = str_replace(array('&', '<'), array('&', '<'), $Link['url']); - - if ($Excerpt['text'][0] === '!') + if (preg_match('/^\([ ]*([^ ]+)(?:[ ]+(".+?"|\'.+?\'))?[ ]*\)/', $remainder, $matches)) { - $Element = array( - 'name' => 'img', - 'attributes' => array( - 'src' => $url, - 'alt' => $Link['text'], - ), - ); + $Element['attributes']['href'] = $matches[1]; + + if (isset($matches[2])) + { + $Element['attributes']['title'] = substr($matches[2], 1, - 1); + } + + $extent += strlen($matches[0]); } else { - $Element = array( - 'name' => 'a', - 'handler' => 'line', - 'text' => $Link['text'], - 'attributes' => array( - 'href' => $url, - ), - ); + if (preg_match('/^\s*\[(.*?)\]/', $remainder, $matches)) + { + $definition = $matches[1] ?: $Element['text']; + $definition = strtolower($definition); + + $extent += strlen($matches[0]); + } + else + { + $definition = strtolower($Element['text']); + } + + if ( ! isset($this->Definitions['Reference'][$definition])) + { + return; + } + + $Definition = $this->Definitions['Reference'][$definition]; + + $Element['attributes']['href'] = $Definition['url']; + $Element['attributes']['title'] = $Definition['title']; } - if (isset($Link['title'])) - { - $Element['attributes']['title'] = $Link['title']; - } + $Element['attributes']['href'] = str_replace(array('&', '<'), array('&', '<'), $Element['attributes']['href']); return array( 'extent' => $extent, diff --git a/test/data/self-closing_html.html b/test/data/self-closing_html.html index 4d072b4..9458910 100644 --- a/test/data/self-closing_html.html +++ b/test/data/self-closing_html.html @@ -1,12 +1,12 @@ -
+

paragraph

-
+

paragraph


paragraph


paragraph

-
+

paragraph

-
+

paragraph

\ No newline at end of file