2 // PukiWiki - Yet another WikiWikiWeb clone
3 // $Id: convert_html.php,v 1.18 2006/05/13 07:29:58 henoheno Exp $
5 // 2002-2005 PukiWiki Developers Team
6 // 2001-2002 Originally written by yu-ji
7 // License: GPL v2 or (at your option) any later version
9 // function 'convert_html()', wiki text parser
10 // and related classes-and-functions
12 function convert_html($lines)
14 global $vars, $digest;
15 static $contents_id = 0;
18 $digest = md5(join('', get_source($vars['page'])));
20 if (! is_array($lines)) $lines = explode("\n", $lines);
22 $body = & new Body(++$contents_id);
25 return $body->toString();
32 var $elements; // References of childs
33 var $last; // Insert new one at the back of the $last
37 $this->elements = array();
38 $this->last = & $this;
41 function setParent(& $parent)
43 $this->parent = & $parent;
46 function & add(& $obj)
48 if ($this->canContain($obj)) {
49 return $this->insert($obj);
51 return $this->parent->add($obj);
55 function & insert(& $obj)
57 $obj->setParent($this);
58 $this->elements[] = & $obj;
60 return $this->last = & $obj->last;
63 function canContain($obj)
68 function wrap($string, $tag, $param = '', $canomit = TRUE)
70 return ($canomit && $string == '') ? '' :
71 '<' . $tag . $param . '>' . $string . '</' . $tag . '>';
77 foreach (array_keys($this->elements) as $key)
78 $ret[] = $this->elements[$key]->toString();
79 return join("\n", $ret);
82 function dump($indent = 0)
84 $ret = str_repeat(' ', $indent) . get_class($this) . "\n";
86 foreach (array_keys($this->elements) as $key) {
87 $ret .= is_object($this->elements[$key]) ?
88 $this->elements[$key]->dump($indent) : '';
89 //str_repeat(' ', $indent) . $this->elements[$key];
95 // Returns inline-related object
96 function & Factory_Inline($text)
98 // Check the first letter of the line
99 if (substr($text, 0, 1) == '~') {
100 return new Paragraph(' ' . substr($text, 1));
102 return new Inline($text);
106 function & Factory_DList(& $root, $text)
108 $out = explode('|', ltrim($text), 2);
109 if (count($out) < 2) {
110 return Factory_Inline($text);
112 return new DList($out);
116 // '|'-separated table
117 function & Factory_Table(& $root, $text)
119 if (! preg_match('/^\|(.+)\|([hHfFcC]?)$/', $text, $out)) {
120 return Factory_Inline($text);
122 return new Table($out);
126 // Comma-separated table
127 function & Factory_YTable(& $root, $text)
130 return Factory_Inline($text);
132 return new YTable(csv_explode(',', substr($text, 1)));
136 function & Factory_Div(& $root, $text)
140 // Seems block plugin?
141 if (PKWKEXP_DISABLE_MULTILINE_PLUGIN_HACK) {
143 if (preg_match('/^\#([^\(]+)(?:\((.*)\))?/', $text, $matches) &&
144 exist_plugin_convert($matches[1])) {
145 return new Div($matches);
149 if(preg_match('/^#([^\(\{]+)(?:\(([^\r]*)\))?(\{*)/', $text, $matches) &&
150 exist_plugin_convert($matches[1])) {
151 $len = strlen($matches[3]);
154 return new Div($matches); // Seems legacy block plugin
155 } else if (preg_match('/\{{' . $len . '}\s*\r(.*)\r\}{' . $len . '}/', $text, $body)) {
156 $matches[2] .= "\r" . $body[1] . "\r";
157 return new Div($matches); // Seems multiline-enabled block plugin
162 return new Paragraph($text);
166 class Inline extends Element
168 function Inline($text)
171 $this->elements[] = trim((substr($text, 0, 1) == "\n") ?
172 $text : make_link($text));
175 function & insert(& $obj)
177 $this->elements[] = $obj->elements[0];
181 function canContain($obj)
183 return is_a($obj, 'Inline');
189 return join(($line_break ? '<br />' . "\n" : "\n"), $this->elements);
192 function & toPara($class = '')
194 $obj = & new Paragraph('', $class);
200 // Paragraph: blank-line-separated sentences
201 class Paragraph extends Element
205 function Paragraph($text, $param = '')
208 $this->param = $param;
209 if ($text == '') return;
211 if (substr($text, 0, 1) == '~')
212 $text = ' ' . substr($text, 1);
214 $this->insert(Factory_Inline($text));
217 function canContain($obj)
219 return is_a($obj, 'Inline');
224 return $this->wrap(parent::toString(), 'p', $this->param);
231 class Heading extends Element
237 function Heading(& $root, $text)
241 $this->level = min(3, strspn($text, '*'));
242 list($text, $this->msg_top, $this->id) = $root->getAnchor($text, $this->level);
243 $this->insert(Factory_Inline($text));
244 $this->level++; // h2,h3,h4
247 function & insert(& $obj)
249 parent::insert($obj);
250 return $this->last = & $this;
253 function canContain(& $obj)
260 return $this->msg_top . $this->wrap(parent::toString(),
261 'h' . $this->level, ' id="' . $this->id . '"');
267 class HRule extends Element
269 function HRule(& $root, $text)
274 function canContain(& $obj)
286 // Lists (UL, OL, DL)
287 class ListContainer extends Element
296 function ListContainer($tag, $tag2, $head, $text)
300 $var_margin = '_' . $tag . '_margin';
301 $var_left_margin = '_' . $tag . '_left_margin';
302 global $$var_margin, $$var_left_margin;
304 $this->margin = $$var_margin;
305 $this->left_margin = $$var_left_margin;
309 $this->level = min(3, strspn($text, $head));
310 $text = ltrim(substr($text, $this->level));
312 parent::insert(new ListElement($this->level, $tag2));
314 $this->last = & $this->last->insert(Factory_Inline($text));
317 function canContain(& $obj)
319 return (! is_a($obj, 'ListContainer')
320 || ($this->tag == $obj->tag && $this->level == $obj->level));
323 function setParent(& $parent)
325 global $_list_pad_str;
327 parent::setParent($parent);
329 $step = $this->level;
330 if (isset($parent->parent) && is_a($parent->parent, 'ListContainer'))
331 $step -= $parent->parent->level;
333 $margin = $this->margin * $step;
334 if ($step == $this->level)
335 $margin += $this->left_margin;
337 $this->style = sprintf($_list_pad_str, $this->level, $margin, $margin);
340 function & insert(& $obj)
342 if (! is_a($obj, get_class($this)))
343 return $this->last = & $this->last->insert($obj);
345 // Break if no elements found (BugTrack/524)
346 if (count($obj->elements) == 1 && empty($obj->elements[0]->elements))
347 return $this->last->parent; // up to ListElement
350 foreach(array_keys($obj->elements) as $key)
351 parent::insert($obj->elements[$key]);
358 return $this->wrap(parent::toString(), $this->tag, $this->style);
362 class ListElement extends Element
364 function ListElement($level, $head)
367 $this->level = $level;
371 function canContain(& $obj)
373 return (! is_a($obj, 'ListContainer') || ($obj->level > $this->level));
378 return $this->wrap(parent::toString(), $this->head);
385 class UList extends ListContainer
387 function UList(& $root, $text)
389 parent::ListContainer('ul', 'li', '-', $text);
396 class OList extends ListContainer
398 function OList(& $root, $text)
400 parent::ListContainer('ol', 'li', '+', $text);
404 // : definition1 | description1
405 // : definition2 | description2
406 // : definition3 | description3
407 class DList extends ListContainer
411 parent::ListContainer('dl', 'dt', ':', $out[0]);
412 $this->last = & Element::insert(new ListElement($this->level, 'dd'));
414 $this->last = & $this->last->insert(Factory_Inline($out[1]));
419 // > like E-mail text
420 class BQuote extends Element
424 function BQuote(& $root, $text)
428 $head = substr($text, 0, 1);
429 $this->level = min(3, strspn($text, $head));
430 $text = ltrim(substr($text, $this->level));
432 if ($head == '<') { // Blockquote close
433 $level = $this->level;
435 $this->last = & $this->end($root, $level);
437 $this->last = & $this->last->insert(Factory_Inline($text));
439 $this->insert(Factory_Inline($text));
443 function canContain(& $obj)
445 return (! is_a($obj, get_class($this)) || $obj->level >= $this->level);
448 function & insert(& $obj)
450 // BugTrack/521, BugTrack/545
451 if (is_a($obj, 'inline'))
452 return parent::insert($obj->toPara(' class="quotation"'));
454 if (is_a($obj, 'BQuote') && $obj->level == $this->level && count($obj->elements)) {
455 $obj = & $obj->elements[0];
456 if (is_a($this->last, 'Paragraph') && count($obj->elements))
457 $obj = & $obj->elements[0];
459 return parent::insert($obj);
464 return $this->wrap(parent::toString(), 'blockquote');
467 function & end(& $root, $level)
469 $parent = & $root->last;
471 while (is_object($parent)) {
472 if (is_a($parent, 'BQuote') && $parent->level == $level)
473 return $parent->parent;
474 $parent = & $parent->parent;
480 class TableCell extends Element
482 var $tag = 'td'; // {td|th}
485 var $style; // is array('width'=>, 'align'=>...);
487 function TableCell($text, $is_template = FALSE)
490 $this->style = $matches = array();
492 while (preg_match('/^(?:(LEFT|CENTER|RIGHT)|(BG)?COLOR\(([#\w]+)\)|SIZE\((\d+)\)):(.*)$/',
495 $this->style['align'] = 'text-align:' . strtolower($matches[1]) . ';';
497 } else if ($matches[3]) {
498 $name = $matches[2] ? 'background-color' : 'color';
499 $this->style[$name] = $name . ':' . htmlspecialchars($matches[3]) . ';';
501 } else if ($matches[4]) {
502 $this->style['size'] = 'font-size:' . htmlspecialchars($matches[4]) . 'px;';
506 if ($is_template && is_numeric($text))
507 $this->style['width'] = 'width:' . $text . 'px;';
511 } else if ($text == '~') {
513 } else if (substr($text, 0, 1) == '~') {
515 $text = substr($text, 1);
518 if ($text != '' && $text{0} == '#') {
519 // Try using Div class for this $text
520 $obj = & Factory_Div($this, $text);
521 if (is_a($obj, 'Paragraph'))
522 $obj = & $obj->elements[0];
524 $obj = & Factory_Inline($text);
530 function setStyle(& $style)
532 foreach ($style as $key=>$value)
533 if (! isset($this->style[$key]))
534 $this->style[$key] = $value;
539 if ($this->rowspan == 0 || $this->colspan == 0) return '';
541 $param = ' class="style_' . $this->tag . '"';
542 if ($this->rowspan > 1)
543 $param .= ' rowspan="' . $this->rowspan . '"';
544 if ($this->colspan > 1) {
545 $param .= ' colspan="' . $this->colspan . '"';
546 unset($this->style['width']);
548 if (! empty($this->style))
549 $param .= ' style="' . join(' ', $this->style) . '"';
551 return $this->wrap(parent::toString(), $this->tag, $param, FALSE);
555 // | title1 | title2 | title3 |
556 // | cell1 | cell2 | cell3 |
557 // | cell4 | cell5 | cell6 |
558 class Table extends Element
562 var $col; // number of column
568 $cells = explode('|', $out[1]);
569 $this->col = count($cells);
570 $this->type = strtolower($out[2]);
571 $this->types = array($this->type);
572 $is_template = ($this->type == 'c');
574 foreach ($cells as $cell)
575 $row[] = & new TableCell($cell, $is_template);
576 $this->elements[] = $row;
579 function canContain(& $obj)
581 return is_a($obj, 'Table') && ($obj->col == $this->col);
584 function & insert(& $obj)
586 $this->elements[] = $obj->elements[0];
587 $this->types[] = $obj->type;
593 static $parts = array('h'=>'thead', 'f'=>'tfoot', ''=>'tbody');
595 // Set rowspan (from bottom, to top)
596 for ($ncol = 0; $ncol < $this->col; $ncol++) {
598 foreach (array_reverse(array_keys($this->elements)) as $nrow) {
599 $row = & $this->elements[$nrow];
600 if ($row[$ncol]->rowspan == 0) {
604 $row[$ncol]->rowspan = $rowspan;
607 $this->types[$nrow + $rowspan] = $this->types[$nrow];
612 // Set colspan and style
614 foreach (array_keys($this->elements) as $nrow) {
615 $row = & $this->elements[$nrow];
616 if ($this->types[$nrow] == 'c')
619 foreach (array_keys($row) as $ncol) {
620 if ($row[$ncol]->colspan == 0) {
624 $row[$ncol]->colspan = $colspan;
625 if ($stylerow !== NULL) {
626 $row[$ncol]->setStyle($stylerow[$ncol]->style);
627 // Inherits column style
629 $row[$ncol - $colspan]->setStyle($stylerow[$ncol]->style);
637 foreach ($parts as $type => $part)
640 foreach (array_keys($this->elements) as $nrow) {
641 if ($this->types[$nrow] != $type)
643 $row = & $this->elements[$nrow];
645 foreach (array_keys($row) as $ncol)
646 $row_string .= $row[$ncol]->toString();
647 $part_string .= $this->wrap($row_string, 'tr');
649 $string .= $this->wrap($part_string, $part);
651 $string = $this->wrap($string, 'table', ' class="style_table" cellspacing="1" border="0"');
653 return $this->wrap($string, 'div', ' class="ie5"');
657 // , title1 , title2 , title3
658 // , cell1 , cell2 , cell3
659 // , cell4 , cell5 , cell6
660 class YTable extends Element
664 function YTable($_value)
668 $align = $value = $matches = array();
669 foreach($_value as $val) {
670 if (preg_match('/^(\s+)?(.+?)(\s+)?$/', $val, $matches)) {
671 $align[] =($matches[1] != '') ?
672 ((isset($matches[3]) && $matches[3] != '') ?
673 ' style="text-align:center"' :
674 ' style="text-align:right"'
676 $value[] = $matches[2];
682 $this->col = count($value);
684 foreach ($value as $val)
685 $colspan[] = ($val == '==') ? 0 : 1;
687 $count = count($value);
688 for ($i = 0; $i < $count; $i++) {
690 while ($i + $colspan[$i] < $count && $value[$i + $colspan[$i]] == '==')
692 $colspan[$i] = ($colspan[$i] > 1) ? ' colspan="' . $colspan[$i] . '"' : '';
693 $str .= '<td class="style_td"' . $align[$i] . $colspan[$i] . '>' . make_link($value[$i]) . '</td>';
696 $this->elements[] = $str;
699 function canContain(& $obj)
701 return is_a($obj, 'YTable') && ($obj->col == $this->col);
704 function & insert(& $obj)
706 $this->elements[] = $obj->elements[0];
713 foreach ($this->elements as $str)
714 $rows .= "\n" . '<tr class="style_tr">' . $str . '</tr>' . "\n";
715 $rows = $this->wrap($rows, 'table', ' class="style_table" cellspacing="1" border="0"');
716 return $this->wrap($rows, 'div', ' class="ie5"');
720 // ' 'Space-beginning sentence
721 // ' 'Space-beginning sentence
722 // ' 'Space-beginning sentence
723 class Pre extends Element
725 function Pre(& $root, $text)
727 global $preformat_ltrim;
729 $this->elements[] = htmlspecialchars(
730 (! $preformat_ltrim || $text == '' || $text{0} != ' ') ? $text : substr($text, 1));
733 function canContain(& $obj)
735 return is_a($obj, 'Pre');
738 function & insert(& $obj)
740 $this->elements[] = $obj->elements[0];
746 return $this->wrap(join("\n", $this->elements), 'pre');
750 // Block plugin: #something (started with '#')
751 class Div extends Element
759 list(, $this->name, $this->param) = array_pad($out, 3, '');
762 function canContain(& $obj)
770 return do_plugin_convert($this->name, $this->param);
774 // LEFT:/CENTER:/RIGHT:
775 class Align extends Element
779 function Align($align)
782 $this->align = $align;
785 function canContain(& $obj)
787 return is_a($obj, 'Inline');
792 return $this->wrap(parent::toString(), 'div', ' style="text-align:' . $this->align . '"');
797 class Body extends Element
803 var $classes = array(
808 var $factories = array(
817 $this->contents = & new Element();
818 $this->contents_last = & $this->contents;
822 function parse(& $lines)
824 $this->last = & $this;
827 while (! empty($lines)) {
828 $line = array_shift($lines);
831 if (substr($line, 0, 2) == '//') continue;
833 if (preg_match('/^(LEFT|CENTER|RIGHT):(.*)$/', $line, $matches)) {
834 // <div style="text-align:...">
835 $this->last = & $this->last->add(new Align(strtolower($matches[1])));
836 if ($matches[2] == '') continue;
840 $line = rtrim($line, "\r\n");
844 $this->last = & $this;
849 if (substr($line, 0, 4) == '----') {
850 $this->insert(new HRule($this, $line));
854 // Multiline-enabled block plugin
855 if (! PKWKEXP_DISABLE_MULTILINE_PLUGIN_HACK &&
856 preg_match('/^#[^{]+(\{\{+)\s*$/', $line, $matches)) {
857 $len = strlen($matches[1]);
858 $line .= "\r"; // Delimiter
859 while (! empty($lines)) {
860 $next_line = preg_replace("/[\r\n]*$/", '', array_shift($lines));
861 if (preg_match('/\}{' . $len . '}/', $next_line)) {
865 $line .= $next_line .= "\r"; // Delimiter
870 // The first character
875 $this->insert(new Heading($this, $line));
880 if ($head == ' ' || $head == "\t") {
881 $this->last = & $this->last->add(new Pre($this, $line));
886 if (substr($line, -1) == '~')
887 $line = substr($line, 0, -1) . "\r";
890 if (isset($this->classes[$head])) {
891 $classname = $this->classes[$head];
892 $this->last = & $this->last->add(new $classname($this, $line));
897 if (isset($this->factories[$head])) {
898 $factoryname = 'Factory_' . $this->factories[$head];
899 $this->last = & $this->last->add($factoryname($this, $line));
904 $this->last = & $this->last->add(Factory_Inline($line));
908 function getAnchor($text, $level)
910 global $top, $_symbol_anchor;
912 // Heading id (auto-generated)
913 $autoid = 'content_' . $this->id . '_' . $this->count;
916 // Heading id (specified by users)
917 $id = make_heading($text, FALSE); // Cut fixed-anchor from $text
923 $anchor = ' &aname(' . $id . ',super,full){' . $_symbol_anchor . '};';
928 // Add 'page contents' link to its heading
929 $this->contents_last = & $this->contents_last->add(new Contents_UList($text, $level, $id));
932 return array($text . $anchor, $this->count > 1 ? "\n" . $top : '', $autoid);
935 function & insert(& $obj)
937 if (is_a($obj, 'Inline')) $obj = & $obj->toPara();
938 return parent::insert($obj);
945 $text = parent::toString();
948 $text = preg_replace_callback('/<#_contents_>/',
949 array(& $this, 'replace_contents'), $text);
954 function replace_contents($arr)
956 $contents = '<div class="contents">' . "\n" .
957 '<a id="contents_' . $this->id . '"></a>' . "\n" .
958 $this->contents->toString() . "\n" .
964 class Contents_UList extends ListContainer
966 function Contents_UList($text, $level, $id)
968 // Reformatting $text
969 // A line started with "\n" means "preformatted" ... X(
971 $text = "\n" . '<a href="#' . $id . '">' . $text . '</a>' . "\n";
972 parent::ListContainer('ul', 'li', '-', str_repeat('-', $level));
973 $this->insert(Factory_Inline($text));
976 function setParent(& $parent)
978 global $_list_pad_str;
980 parent::setParent($parent);
981 $step = $this->level;
982 $margin = $this->left_margin;
983 if (isset($parent->parent) && is_a($parent->parent, 'ListContainer')) {
984 $step -= $parent->parent->level;
987 $margin += $this->margin * ($step == $this->level ? 1 : $step);
988 $this->style = sprintf($_list_pad_str, $this->level, $margin, $margin);