diff --git a/Think/Parser/Driver/Markdown.php b/Think/Parser/Driver/Markdown.php
new file mode 100644
index 00000000..b0bfb92e
--- /dev/null
+++ b/Think/Parser/Driver/Markdown.php
@@ -0,0 +1,1523 @@
+
+#
+# Original Markdown
+# Copyright (c) 2004-2006 John Gruber
+#
s around + # "paragraphs" that are wrapped in non-block-level tags, such as anchors, + # phrase emphasis, and spans. The list of tags we're looking for is + # hard-coded: + # + # * List "a" is made of tags which can be both inline or block-level. + # These will be treated block-level when the start tag is alone on + # its line, otherwise they're not matched here and will be taken as + # inline later. + # * List "b" is made of tags which are always block-level; + # + $block_tags_a_re = 'ins|del'; + $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'. + 'script|noscript|form|fieldset|iframe|math|svg|'. + 'article|section|nav|aside|hgroup|header|footer|'. + 'figure'; + + # Regular expression for the content of a block tag. + $nested_tags_level = 4; + $attr = ' + (?> # optional tag attributes + \s # starts with whitespace + (?> + [^>"/]+ # text outside quotes + | + /+(?!>) # slash not followed by ">" + | + "[^"]*" # text inside double quotes (tolerate ">") + | + \'[^\']*\' # text inside single quotes (tolerate ">") + )* + )? + '; + $content = + str_repeat(' + (?> + [^<]+ # content without tag + | + <\2 # nested opening tag + '.$attr.' # attributes + (?> + /> + | + >', $nested_tags_level). # end of opening tag + '.*?'. # last level nested tag content + str_repeat(' + \2\s*> # closing nested tag + ) + | + <(?!/\2\s*> # other tags with a different name + ) + )*', + $nested_tags_level); + $content2 = str_replace('\2', '\3', $content); + + # First, look for nested blocks, e.g.: + #
` blocks.
+ #
+ $text = preg_replace_callback('{
+ (?:\n\n|\A\n?)
+ ( # $1 = the code block -- one or more lines, starting with a space/tab
+ (?>
+ [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces
+ .*\n+
+ )+
+ )
+ ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
+ }xm',
+ array(&$this, '_doCodeBlocks_callback'), $text);
+
+ return $text;
+ }
+ protected function _doCodeBlocks_callback($matches) {
+ $codeblock = $matches[1];
+
+ $codeblock = $this->outdent($codeblock);
+ $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
+
+ # trim leading newlines and trailing newlines
+ $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
+
+ $codeblock = "$codeblock\n
";
+ return "\n\n".$this->hashBlock($codeblock)."\n\n";
+ }
+
+
+ protected function makeCodeSpan($code) {
+ #
+ # Create a code span markup for $code. Called from handleSpanToken.
+ #
+ $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
+ return $this->hashPart("$code");
+ }
+
+
+ protected $em_relist = array(
+ '' => '(?:(? '(?<=\S|^)(? '(?<=\S|^)(? '(?:(? '(?<=\S|^)(? '(?<=\S|^)(? '(?:(? '(?<=\S|^)(? '(?<=\S|^)(?em_relist as $em => $em_re) {
+ foreach ($this->strong_relist as $strong => $strong_re) {
+ # Construct list of allowed token expressions.
+ $token_relist = array();
+ if (isset($this->em_strong_relist["$em$strong"])) {
+ $token_relist[] = $this->em_strong_relist["$em$strong"];
+ }
+ $token_relist[] = $em_re;
+ $token_relist[] = $strong_re;
+
+ # Construct master expression from list.
+ $token_re = '{('. implode('|', $token_relist) .')}';
+ $this->em_strong_prepared_relist["$em$strong"] = $token_re;
+ }
+ }
+ }
+
+ protected function doItalicsAndBold($text) {
+ $token_stack = array('');
+ $text_stack = array('');
+ $em = '';
+ $strong = '';
+ $tree_char_em = false;
+
+ while (1) {
+ #
+ # Get prepared regular expression for seraching emphasis tokens
+ # in current context.
+ #
+ $token_re = $this->em_strong_prepared_relist["$em$strong"];
+
+ #
+ # Each loop iteration search for the next emphasis token.
+ # Each token is then passed to handleSpanToken.
+ #
+ $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
+ $text_stack[0] .= $parts[0];
+ $token =& $parts[1];
+ $text =& $parts[2];
+
+ if (empty($token)) {
+ # Reached end of text span: empty stack without emitting.
+ # any more emphasis.
+ while ($token_stack[0]) {
+ $text_stack[1] .= array_shift($token_stack);
+ $text_stack[0] .= array_shift($text_stack);
+ }
+ break;
+ }
+
+ $token_len = strlen($token);
+ if ($tree_char_em) {
+ # Reached closing marker while inside a three-char emphasis.
+ if ($token_len == 3) {
+ # Three-char closing marker, close em and strong.
+ array_shift($token_stack);
+ $span = array_shift($text_stack);
+ $span = $this->runSpanGamut($span);
+ $span = "$span";
+ $text_stack[0] .= $this->hashPart($span);
+ $em = '';
+ $strong = '';
+ } else {
+ # Other closing marker: close one em or strong and
+ # change current token state to match the other
+ $token_stack[0] = str_repeat($token{0}, 3-$token_len);
+ $tag = $token_len == 2 ? "strong" : "em";
+ $span = $text_stack[0];
+ $span = $this->runSpanGamut($span);
+ $span = "<$tag>$span$tag>";
+ $text_stack[0] = $this->hashPart($span);
+ $$tag = ''; # $$tag stands for $em or $strong
+ }
+ $tree_char_em = false;
+ } else if ($token_len == 3) {
+ if ($em) {
+ # Reached closing marker for both em and strong.
+ # Closing strong marker:
+ for ($i = 0; $i < 2; ++$i) {
+ $shifted_token = array_shift($token_stack);
+ $tag = strlen($shifted_token) == 2 ? "strong" : "em";
+ $span = array_shift($text_stack);
+ $span = $this->runSpanGamut($span);
+ $span = "<$tag>$span$tag>";
+ $text_stack[0] .= $this->hashPart($span);
+ $$tag = ''; # $$tag stands for $em or $strong
+ }
+ } else {
+ # Reached opening three-char emphasis marker. Push on token
+ # stack; will be handled by the special condition above.
+ $em = $token{0};
+ $strong = "$em$em";
+ array_unshift($token_stack, $token);
+ array_unshift($text_stack, '');
+ $tree_char_em = true;
+ }
+ } else if ($token_len == 2) {
+ if ($strong) {
+ # Unwind any dangling emphasis marker:
+ if (strlen($token_stack[0]) == 1) {
+ $text_stack[1] .= array_shift($token_stack);
+ $text_stack[0] .= array_shift($text_stack);
+ }
+ # Closing strong marker:
+ array_shift($token_stack);
+ $span = array_shift($text_stack);
+ $span = $this->runSpanGamut($span);
+ $span = "$span";
+ $text_stack[0] .= $this->hashPart($span);
+ $strong = '';
+ } else {
+ array_unshift($token_stack, $token);
+ array_unshift($text_stack, '');
+ $strong = $token;
+ }
+ } else {
+ # Here $token_len == 1
+ if ($em) {
+ if (strlen($token_stack[0]) == 1) {
+ # Closing emphasis marker:
+ array_shift($token_stack);
+ $span = array_shift($text_stack);
+ $span = $this->runSpanGamut($span);
+ $span = "$span";
+ $text_stack[0] .= $this->hashPart($span);
+ $em = '';
+ } else {
+ $text_stack[0] .= $token;
+ }
+ } else {
+ array_unshift($token_stack, $token);
+ array_unshift($text_stack, '');
+ $em = $token;
+ }
+ }
+ }
+ return $text_stack[0];
+ }
+
+
+ protected function doBlockQuotes($text) {
+ $text = preg_replace_callback('/
+ ( # Wrap whole match in $1
+ (?>
+ ^[ ]*>[ ]? # ">" at the start of a line
+ .+\n # rest of the first line
+ (.+\n)* # subsequent consecutive lines
+ \n* # blanks
+ )+
+ )
+ /xm',
+ array(&$this, '_doBlockQuotes_callback'), $text);
+
+ return $text;
+ }
+ protected function _doBlockQuotes_callback($matches) {
+ $bq = $matches[1];
+ # trim one level of quoting - trim whitespace-only lines
+ $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
+ $bq = $this->runBlockGamut($bq); # recurse
+
+ $bq = preg_replace('/^/m', " ", $bq);
+ # These leading spaces cause problem with content,
+ # so we need to fix that:
+ $bq = preg_replace_callback('{(\s*.+?
)}sx',
+ array(&$this, '_doBlockQuotes_callback2'), $bq);
+
+ return "\n". $this->hashBlock("\n$bq\n
")."\n\n";
+ }
+ protected function _doBlockQuotes_callback2($matches) {
+ $pre = $matches[1];
+ $pre = preg_replace('/^ /m', '', $pre);
+ return $pre;
+ }
+
+
+ protected function formParagraphs($text) {
+ #
+ # Params:
+ # $text - string to process with html tags
+ #
+ # Strip leading and trailing lines:
+ $text = preg_replace('/\A\n+|\n+\z/', '', $text);
+
+ $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
+
+ #
+ # Wrap
tags and unhashify HTML blocks
+ #
+ foreach ($grafs as $key => $value) {
+ if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
+ # Is a paragraph.
+ $value = $this->runSpanGamut($value);
+ $value = preg_replace('/^([ ]*)/', "
", $value);
+ $value .= "
";
+ $grafs[$key] = $this->unhash($value);
+ }
+ else {
+ # Is a block.
+ # Modify elements of @grafs in-place...
+ $graf = $value;
+ $block = $this->html_hashes[$graf];
+ $graf = $block;
+// if (preg_match('{
+// \A
+// ( # $1 = tag
+// ]*
+// \b
+// markdown\s*=\s* ([\'"]) # $2 = attr quote char
+// 1
+// \2
+// [^>]*
+// >
+// )
+// ( # $3 = contents
+// .*
+// )
+// () # $4 = closing tag
+// \z
+// }xs', $block, $matches))
+// {
+// list(, $div_open, , $div_content, $div_close) = $matches;
+//
+// # We can't call Markdown(), because that resets the hash;
+// # that initialization code should be pulled into its own sub, though.
+// $div_content = $this->hashHTMLBlocks($div_content);
+//
+// # Run document gamut methods on the content.
+// foreach ($this->document_gamut as $method => $priority) {
+// $div_content = $this->$method($div_content);
+// }
+//
+// $div_open = preg_replace(
+// '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
+//
+// $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
+// }
+ $grafs[$key] = $graf;
+ }
+ }
+
+ return implode("\n\n", $grafs);
+ }
+
+
+ protected function encodeAttribute($text) {
+ #
+ # Encode text for a double-quoted HTML attribute. This function
+ # is *not* suitable for attributes enclosed in single quotes.
+ #
+ $text = $this->encodeAmpsAndAngles($text);
+ $text = str_replace('"', '"', $text);
+ return $text;
+ }
+
+
+ protected function encodeAmpsAndAngles($text) {
+ #
+ # Smart processing for ampersands and angle brackets that need to
+ # be encoded. Valid character entities are left alone unless the
+ # no-entities mode is set.
+ #
+ if ($this->no_entities) {
+ $text = str_replace('&', '&', $text);
+ } else {
+ # Ampersand-encoding based entirely on Nat Irons's Amputator
+ # MT plugin:
+ $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
+ '&', $text);;
+ }
+ # Encode remaining <'s
+ $text = str_replace('<', '<', $text);
+
+ return $text;
+ }
+
+
+ protected function doAutoLinks($text) {
+ $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i',
+ array(&$this, '_doAutoLinks_url_callback'), $text);
+
+ # Email addresses:
+ $text = preg_replace_callback('{
+ <
+ (?:mailto:)?
+ (
+ (?:
+ [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
+ |
+ ".*?"
+ )
+ \@
+ (?:
+ [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
+ |
+ \[[\d.a-fA-F:]+\] # IPv4 & IPv6
+ )
+ )
+ >
+ }xi',
+ array(&$this, '_doAutoLinks_email_callback'), $text);
+
+ return $text;
+ }
+ protected function _doAutoLinks_url_callback($matches) {
+ $url = $this->encodeAttribute($matches[1]);
+ $link = "$url";
+ return $this->hashPart($link);
+ }
+ protected function _doAutoLinks_email_callback($matches) {
+ $address = $matches[1];
+ $link = $this->encodeEmailAddress($address);
+ return $this->hashPart($link);
+ }
+
+
+ protected function encodeEmailAddress($addr) {
+ #
+ # Input: an email address, e.g. "foo@example.com"
+ #
+ # Output: the email address as a mailto link, with each character
+ # of the address encoded as either a decimal or hex entity, in
+ # the hopes of foiling most address harvesting spam bots. E.g.:
+ #
+ #
+ #
+ # Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
+ # With some optimizations by Milian Wolff.
+ #
+ $addr = "mailto:" . $addr;
+ $chars = preg_split('/(? $char) {
+ $ord = ord($char);
+ # Ignore non-ascii chars.
+ if ($ord < 128) {
+ $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
+ # roughly 10% raw, 45% hex, 45% dec
+ # '@' *must* be encoded. I insist.
+ if ($r > 90 && $char != '@') /* do nothing */;
+ else if ($r < 45) $chars[$key] = ''.dechex($ord).';';
+ else $chars[$key] = ''.$ord.';';
+ }
+ }
+
+ $addr = implode('', $chars);
+ $text = implode('', array_slice($chars, 7)); # text without `mailto:`
+ $addr = "$text";
+
+ return $addr;
+ }
+
+
+ protected function parseSpan($str) {
+ #
+ # Take the string $str and parse it into tokens, hashing embeded HTML,
+ # escaped characters and handling code spans.
+ #
+ $output = '';
+
+ $span_re = '{
+ (
+ \\\\'.$this->escape_chars_re.'
+ |
+ (?no_markup ? '' : '
+ |
+ # comment
+ |
+ <\?.*?\?> | <%.*?%> # processing instruction
+ |
+ <[!$]?[-a-zA-Z0-9:_]+ # regular tags
+ (?>
+ \s
+ (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
+ )?
+ >
+ |
+ <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag
+ |
+ [-a-zA-Z0-9:_]+\s*> # closing tag
+ ').'
+ )
+ }xs';
+
+ while (1) {
+ #
+ # Each loop iteration seach for either the next tag, the next
+ # openning code span marker, or the next escaped character.
+ # Each token is then passed to handleSpanToken.
+ #
+ $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
+
+ # Create token from text preceding tag.
+ if ($parts[0] != "") {
+ $output .= $parts[0];
+ }
+
+ # Check if we reach the end.
+ if (isset($parts[1])) {
+ $output .= $this->handleSpanToken($parts[1], $parts[2]);
+ $str = $parts[2];
+ }
+ else {
+ break;
+ }
+ }
+
+ return $output;
+ }
+
+
+ protected function handleSpanToken($token, &$str) {
+ #
+ # Handle $token provided by parseSpan by determining its nature and
+ # returning the corresponding value that should replace it.
+ #
+ switch ($token{0}) {
+ case "\\":
+ return $this->hashPart("". ord($token{1}). ";");
+ case "`":
+ # Search for end marker in remaining text.
+ if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
+ $str, $matches))
+ {
+ $str = $matches[2];
+ $codespan = $this->makeCodeSpan($matches[1]);
+ return $this->hashPart($codespan);
+ }
+ return $token; // return as text since no ending marker found.
+ default:
+ return $this->hashPart($token);
+ }
+ }
+
+
+ protected function outdent($text) {
+ #
+ # Remove one level of line-leading tabs or spaces
+ #
+ return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
+ }
+
+
+ # String length function for detab. `_initDetab` will create a function to
+ # hanlde UTF-8 if the default function does not exist.
+ protected $utf8_strlen = 'mb_strlen';
+
+ protected function detab($text) {
+ #
+ # Replace tabs with the appropriate amount of space.
+ #
+ # For each line we separate the line in blocks delemited by
+ # tab characters. Then we reconstruct every line by adding the
+ # appropriate number of space between each blocks.
+
+ $text = preg_replace_callback('/^.*\t.*$/m',
+ array(&$this, '_detab_callback'), $text);
+
+ return $text;
+ }
+ protected function _detab_callback($matches) {
+ $line = $matches[0];
+ $strlen = $this->utf8_strlen; # strlen function for UTF-8.
+
+ # Split in blocks.
+ $blocks = explode("\t", $line);
+ # Add each blocks to the line.
+ $line = $blocks[0];
+ unset($blocks[0]); # Do not add first block twice.
+ foreach ($blocks as $block) {
+ # Calculate amount of space, insert spaces, insert block.
+ $amount = $this->tab_width -
+ $strlen($line, 'UTF-8') % $this->tab_width;
+ $line .= str_repeat(" ", $amount) . $block;
+ }
+ return $line;
+ }
+ protected function _initDetab() {
+ #
+ # Check for the availability of the function in the `utf8_strlen` property
+ # (initially `mb_strlen`). If the function is not available, create a
+ # function that will loosely count the number of UTF-8 characters with a
+ # regular expression.
+ #
+ if (function_exists($this->utf8_strlen)) return;
+ $this->utf8_strlen = create_function('$text', 'return preg_match_all(
+ "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
+ $text, $m);');
+ }
+
+
+ protected function unhash($text) {
+ #
+ # Swap back in all the tags hashed by _HashHTMLBlocks.
+ #
+ return preg_replace_callback('/(.)\x1A[0-9]+\1/',
+ array(&$this, '_unhash_callback'), $text);
+ }
+ protected function _unhash_callback($matches) {
+ return $this->html_hashes[$matches[0]];
+ }
+
+}
diff --git a/Think/Parser/Driver/Ubb.php b/Think/Parser/Driver/Ubb.php
new file mode 100644
index 00000000..667a4456
--- /dev/null
+++ b/Think/Parser/Driver/Ubb.php
@@ -0,0 +1,291 @@
+
+// +----------------------------------------------------------------------
+// | Ubb.php 2013-04-03
+// +----------------------------------------------------------------------
+
+namespace Think\Parser\Driver;
+
+class Ubb{
+ /**
+ * UBB标签匹配规则
+ * @var array
+ */
+ private $ubb = [
+ ['table' , '\[table(?:=([\d%]*))?\]', '\[\/table\]', 'width'],
+ ['tr' , '\[tr\]', '\[\/tr\]', 'tag'],
+ ['th' , '\[th(?:=([\d%]*)(?:,([\d%]*))?)?\]', '\[\/th\]', 'widthAndHeight'],
+ ['td' , '\[td(?:=([\d%]*)(?:,([\d%]*))?)?\]', '\[\/td\]', 'widthAndHeight'],
+ ['img' , '\[img(?:=([\d%]*)(?:,([\d%]*))?)?\]', '\[\/img\]', 'imgWidthAndHeight'],
+ ['img' , '\[img=(.*?)(?:,([\d%]*)(?:,([\d%]*))?)?\/\]', 'img'],
+ ['a' , '\[url(?:=(.*?)(?:,([\w\-]*))?)?\]', '\[\/url\]', 'urlClass'],
+ ['a' , '\[a(?:=(.*?)(?:,([\w\-]*))?)?\]', '\[\/a\]', 'urlClass'],
+ ['a' , '\[url=(.*?)(?:,([\w\-]*))?\/\]', 'url'],
+ ['a' , '\[a=(.*?)(?:,([\w\-]*))?\/\]', 'url'],
+ ['a' , '\[email(?:=([\w\-]*))?\]', '\[\/email\]', 'emailClass'],
+ ['ul' , '\[ul(?:=([\w\-]*))?\]', '\[\/ul\]', 'class'],
+ ['ol' , '\[ol(?:=([\w\-]*))?\]', '\[\/ol\]', 'class'],
+ ['li' , '\[li(?:=([\w\-]*))?\]', '\[\/li\]', 'class'],
+ ['span' , '\[span(?:=([\w\-]*))?\]', '\[\/span\]', 'class'],
+ ['div' , '\[div(?:=([\w\-]*))?\]', '\[\/div\]', 'class'],
+ ['p' , '\[p(?:=([\w\-]*))?\]', '\[\/p\]', 'class'],
+ ['strong' , '\[b\]', '\[\/b\]', 'tag'],
+ ['strong' , '\[strong\]', '\[\/strong\]', 'tag'],
+ ['i' , '\[i\]', '\[\/i\]', 'tag'],
+ ['em' , '\[em\]', '\[\/em\]', 'tag'],
+ ['sub' , '\[sub\]', '\[\/sub\]', 'tag'],
+ ['sup' , '\[sup\]', '\[\/sup\]', 'tag'],
+ ['pre' , '\[code(?:=([a-z#\+\/]*))?\]', '\[\/code\]', 'code'],
+ ['code' , '\[line(?:=([a-z#\+\/]*))?\]', '\[\/line\]', 'code'],
+ ];
+
+ /**
+ * 解析UBB代码为HTML
+ * @param string $content 要解析的UBB代码
+ * @return string 解析后的HTML代码
+ */
+ public function parse($content = ''){
+ if(empty($content)) return '';
+
+ for($i = 0, $count = count($this->ubb); $i < $count; $i++){
+ if(count($this->ubb[$i]) == 4){ //解析闭合标签
+ $content = $this->closeTag($content, $this->ubb[$i]);
+ } else {
+ $content = $this->onceTag($content, $this->ubb[$i]);
+ }
+ }
+
+ return nl2br($content);
+ }
+
+ /**
+ * 解析闭合标签,支持嵌套
+ * @param string $data 要解析的数据
+ * @param array $rule 解析规则
+ * @return string 解析后的内容
+ */
+ private function closeTag($data, $rule = ''){
+ static $tag, $reg, $func, $count = 0;
+ if(is_string($data)){
+ list($tag, $reg[0], $reg[1], $func) = $rule;
+ do{
+ $data = preg_replace_callback("/({$reg[0]})(.*?)({$reg[1]})/is",
+ [$this, 'closeTag'], $data);
+ } while ($count && $count--); //递归解析,直到嵌套解析完毕
+ return $data;
+ } elseif(is_array($data)){
+ $num = count($data);
+ if(preg_match("/{$reg[0]}/is", $data[$num-2])){ //存在嵌套,进一步解析
+ $count = 1;
+ $data[$num-2] = preg_replace_callback("/({$reg[0]})(.*?)({$reg[1]})/is",
+ [$this, 'closeTag'], $data[$num-2] . $data[$num-1]);
+ return $data[1] . $data[$num-2];
+ } else { //不存在嵌套,直接解析内容
+ $parse = '_' . $func;
+ $data[$num-2] = trim($data[$num-2], "\r\n"); //去掉标签内容两端的空格
+ return $this->$parse($tag, $data);
+ }
+ }
+ }
+
+ /**
+ * 解析单标签
+ * @param string $data 要解析的数据
+ * @param array $rule 解析规则
+ * @return string 解析后的内容
+ */
+ private function onceTag($data, $rule = ''){
+ list($tag, $reg, $func) = $rule;
+ return preg_replace_callback("/{$reg}/is", [$this, '_' . $func], $data);
+ }
+
+ /**
+ * 解析img单标签
+ * @param array $data 解析数据
+ * @return string 解析后的标签
+ */
+ private function _img($data){
+ $data[4] = $data[1];
+ return $this->_imgWidthAndHeight('', $data);
+ }
+
+ /**
+ * 解析url单标签
+ * @param array $data 解析数据
+ * @return string 解析后的标签
+ */
+ private function _url($data){
+ $data[3] = $data[2];
+ $data[4] = $data[2] = $data[1];
+ return $this->_urlClass('', $data);
+ }
+
+ /**
+ * 解析没有属性的标签
+ * @param string $name 标签名
+ * @param array $data 解析数据 [2] - 标签内容
+ * @return string 解析后的标签
+ */
+ private function _tag($name, $data){
+ return "<{$name}>{$data[2]}{$name}>";
+ }
+
+ /**
+ * 解析代码
+ * @param string $name 标签名
+ * @param array $data 解析数据 [2] - 语言类型,[3] - 代码内容
+ * @return string 解析后的标签
+ */
+ private function _code($name, $data){
+ $fix = ($name == 'pre') ? ['', '
'] : ['', ''];
+ if(empty($data[2])){
+ $data = "{$fix[0]}{$data[3]}{$fix[1]}";
+ } else {
+ $data = "{$fix[0]}{$data[3]}{$fix[1]}";
+ }
+ return $data;
+ }
+
+ /**
+ * 解析含有width属性的标签
+ * @param string $name 标签名
+ * @param array $data 解析数据 [2] - width, [3] - 标签内容
+ * @return string 解析后的标签
+ */
+ private function _width($name, $data){
+ if(empty($data[2])){
+ $data = "<{$name}>{$data[3]}{$name}>";
+ } else {
+ $data = "<{$name} width=\"{$data[2]}\">{$data[3]}{$name}>";
+ }
+ return $data;
+ }
+
+ /**
+ * 解析含有width和height属性的标签
+ * @param string $name 标签名
+ * @param array $data 解析数据 [2] - width, [3] - height, [4] - 标签内容
+ * @return string 解析后的标签
+ */
+ private function _widthAndHeight($name, $data){
+ if(empty($data[2]) && empty($data[3])){
+ $data = "<{$name}>{$data[4]}{$name}>";
+ } elseif(!empty($data[2]) && empty($data[3])) {
+ $data = "<{$name} width=\"{$data[2]}\">{$data[4]}{$name}>";
+ } elseif(empty($data[2]) && !empty($data[3])) {
+ $data = "<{$name} height=\"{$data[3]}\">{$data[4]}{$name}>";
+ } else {
+ $data = "<{$name} width=\"{$data[2]}\" height=\"{$data[3]}\">{$data[4]}{$name}>";
+ }
+ return $data;
+ }
+
+ /**
+ * 解析含有width和height属性的图片标签
+ * @param string $name 标签名
+ * @param array $data 解析数据 [2] - width, [3] - height, [4] - 图片URL
+ * @return string 解析后的标签
+ */
+ private function _imgWidthAndHeight($name, $data){
+ if(empty($data[2]) && empty($data[3])){
+ $data = "
";
+ } elseif(!empty($data[2]) && empty($data[3])) {
+ $data = "
";
+ } elseif(empty($data[2]) && !empty($data[3])) {
+ $data = "
";
+ } else {
+ $data = "
";
+ }
+ return $data;
+ }
+
+ /**
+ * 解析含有class属性的标签
+ * @param string $name 标签名
+ * @param array $data 解析数据 [2] - class, [3] - 标签内容
+ * @return string 解析后的标签
+ */
+ private function _class($name, $data){
+ if(empty($data[2])){
+ $data = "<{$name}>{$data[3]}{$name}>";
+ } else {
+ $data = "<{$name} class=\"{$data[2]}\">{$data[3]}{$name}>";
+ }
+ return $data;
+ }
+
+ /**
+ * 解析含有class属性的url标签
+ * @param string $name 标签名
+ * @param array $data 解析数据 [2] - url, [3] - text
+ * @return string 解析后的标签
+ */
+ private function _urlClass($name, $data){
+ empty($data[2]) && $data[2] = $data[4];
+ if(empty($data[3])){
+ $data = "{$data[4]}";
+ } else {
+ $data = "{$data[4]}";
+ }
+ return $data;
+ }
+
+ /**
+ * 解析含有class属性的email标签
+ * @param string $name 标签名
+ * @param array $data 解析数据 [2] - class, [3] - email地址
+ * @return string 解析后的标签
+ */
+ private function _emailClass($name, $data){
+ //不是正确的EMAIL则不解析
+ if(preg_match('/^\w+([-+.]\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*$/', $data[3]))
+ return $data[0];
+
+ //编码email地址,防治被采集
+ $email = $this->encodeEmailAddress($data[3]);
+
+ if(empty($data[2])){
+ $data = "{$email[1]}";
+ } else {
+ $data = "{$email[1]}";
+ }
+ return $data;
+ }
+
+ /**
+ * 编码EMAIL地址,可以防治部分采集软件
+ * @param string $addr EMAIL地址
+ * @return array 编码后的EMAIL地址 [0] - 带mailto, [1] - 不带mailto
+ */
+ private function encodeEmailAddress($addr) {
+ $addr = "mailto:" . $addr;
+ $chars = preg_split('/(? $char) {
+ $ord = ord($char);
+ # Ignore non-ascii chars.
+ if ($ord < 128) {
+ $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
+ # roughly 10% raw, 45% hex, 45% dec
+ # '@' *must* be encoded. I insist.
+ if ($r > 90 && $char != '@') /* do nothing */;
+ else if ($r < 45) $chars[$key] = ''.dechex($ord).';';
+ else $chars[$key] = ''.$ord.';';
+ }
+ }
+
+ $addr = implode('', $chars);
+ $text = implode('', array_slice($chars, 7)); # text without `mailto:`
+
+ return [$addr, $text];
+ }
+
+}