2 // PukiWiki - Yet another WikiWikiWeb clone
5 // 2002-2016 PukiWiki Development Team
6 // 2001-2002 Originally written by yu-ji
7 // License: GPL v2 or (at your option) any later version
9 // function 'convert_html()', wiki text parser
10 // and related classes-and-functions
12 function convert_html($lines)
14 global $vars, $digest;
15 static $contents_id = 0;
18 $digest = md5(join('', get_source($vars['page'])));
20 if (! is_array($lines)) $lines = explode("\n", $lines);
22 $body = new Body(++$contents_id);
25 return $body->toString();
32 var $elements; // References of childs
33 var $last; // Insert new one at the back of the $last
39 function __construct()
41 $this->elements = array();
42 $this->last = & $this;
45 function setParent(& $parent)
47 $this->parent = & $parent;
50 function & add(& $obj)
52 if ($this->canContain($obj)) {
53 return $this->insert($obj);
55 return $this->parent->add($obj);
59 function & insert(& $obj)
61 $obj->setParent($this);
62 $this->elements[] = & $obj;
64 return $this->last = & $obj->last;
67 function canContain($obj)
72 function wrap($string, $tag, $param = '', $canomit = TRUE)
74 return ($canomit && $string == '') ? '' :
75 '<' . $tag . $param . '>' . $string . '</' . $tag . '>';
81 foreach (array_keys($this->elements) as $key)
82 $ret[] = $this->elements[$key]->toString();
83 return join("\n", $ret);
86 function dump($indent = 0)
88 $ret = str_repeat(' ', $indent) . get_class($this) . "\n";
90 foreach (array_keys($this->elements) as $key) {
91 $ret .= is_object($this->elements[$key]) ?
92 $this->elements[$key]->dump($indent) : '';
93 //str_repeat(' ', $indent) . $this->elements[$key];
99 // Returns inline-related object
100 function & Factory_Inline($text)
102 // Check the first letter of the line
103 if (substr($text, 0, 1) == '~') {
104 return new Paragraph(' ' . substr($text, 1));
106 return new Inline($text);
110 function & Factory_DList(& $root, $text)
112 $out = explode('|', ltrim($text), 2);
113 if (count($out) < 2) {
114 return Factory_Inline($text);
116 return new DList($out);
120 // '|'-separated table
121 function & Factory_Table(& $root, $text)
123 if (! preg_match('/^\|(.+)\|([hHfFcC]?)$/', $text, $out)) {
124 return Factory_Inline($text);
126 return new Table($out);
130 // Comma-separated table
131 function & Factory_YTable(& $root, $text)
134 return Factory_Inline($text);
136 return new YTable(csv_explode(',', substr($text, 1)));
140 function & Factory_Div(& $root, $text)
144 // Seems block plugin?
145 if (PKWKEXP_DISABLE_MULTILINE_PLUGIN_HACK) {
147 if (preg_match('/^\#([^\(]+)(?:\((.*)\))?/', $text, $matches) &&
148 exist_plugin_convert($matches[1])) {
149 return new Div($matches);
153 if(preg_match('/^#([^\(\{]+)(?:\(([^\r]*)\))?(\{*)/', $text, $matches) &&
154 exist_plugin_convert($matches[1])) {
155 $len = strlen($matches[3]);
158 return new Div($matches); // Seems legacy block plugin
159 } else if (preg_match('/\{{' . $len . '}\s*\r(.*)\r\}{' . $len . '}/', $text, $body)) {
160 $matches[2] .= "\r" . $body[1] . "\r";
161 return new Div($matches); // Seems multiline-enabled block plugin
166 return new Paragraph($text);
170 class Inline extends Element
172 function Inline($text)
174 $this->__construct($text);
176 function __construct($text)
178 parent::__construct();
179 $this->elements[] = trim((substr($text, 0, 1) == "\n") ?
180 $text : make_link($text));
183 function & insert(& $obj)
185 $this->elements[] = $obj->elements[0];
189 function canContain($obj)
191 return is_a($obj, 'Inline');
197 return join(($line_break ? '<br />' . "\n" : "\n"), $this->elements);
200 function & toPara($class = '')
202 $obj = new Paragraph('', $class);
208 // Paragraph: blank-line-separated sentences
209 class Paragraph extends Element
213 function Paragraph($text, $param = '')
215 $this->__construct($text, $param);
217 function __construct($text, $param = '')
219 parent::__construct();
220 $this->param = $param;
221 if ($text == '') return;
223 if (substr($text, 0, 1) == '~')
224 $text = ' ' . substr($text, 1);
226 $this->insert(Factory_Inline($text));
229 function canContain($obj)
231 return is_a($obj, 'Inline');
236 return $this->wrap(parent::toString(), 'p', $this->param);
243 class Heading extends Element
249 function Heading(& $root, $text)
251 $this->__construct($root, $text);
253 function __construct(& $root, $text)
255 parent::__construct();
257 $this->level = min(3, strspn($text, '*'));
258 list($text, $this->msg_top, $this->id) = $root->getAnchor($text, $this->level);
259 $this->insert(Factory_Inline($text));
260 $this->level++; // h2,h3,h4
263 function & insert(& $obj)
265 parent::insert($obj);
266 return $this->last = & $this;
269 function canContain(& $obj)
276 return $this->msg_top . $this->wrap(parent::toString(),
277 'h' . $this->level, ' id="' . $this->id . '"');
283 class HRule extends Element
285 function HRule(& $root, $text)
287 $this->__construct($root, $text);
289 function __construct(& $root, $text)
291 parent::__construct();
294 function canContain(& $obj)
306 // Lists (UL, OL, DL)
307 class ListContainer extends Element
316 function ListContainer($tag, $tag2, $head, $text)
318 $this->__construct($tag, $tag2, $head, $text);
320 function __construct($tag, $tag2, $head, $text)
322 parent::__construct();
324 $var_margin = '_' . $tag . '_margin';
325 $var_left_margin = '_' . $tag . '_left_margin';
326 global $$var_margin, $$var_left_margin;
328 $this->margin = $$var_margin;
329 $this->left_margin = $$var_left_margin;
333 $this->level = min(3, strspn($text, $head));
334 $text = ltrim(substr($text, $this->level));
336 parent::insert(new ListElement($this->level, $tag2));
338 $this->last = & $this->last->insert(Factory_Inline($text));
341 function canContain(& $obj)
343 return (! is_a($obj, 'ListContainer')
344 || ($this->tag == $obj->tag && $this->level == $obj->level));
347 function setParent(& $parent)
349 global $_list_pad_str;
351 parent::setParent($parent);
353 $step = $this->level;
354 if (isset($parent->parent) && is_a($parent->parent, 'ListContainer'))
355 $step -= $parent->parent->level;
357 $margin = $this->margin * $step;
358 if ($step == $this->level)
359 $margin += $this->left_margin;
361 $this->style = sprintf($_list_pad_str, $this->level, $margin, $margin);
364 function & insert(& $obj)
366 if (! is_a($obj, get_class($this)))
367 return $this->last = & $this->last->insert($obj);
369 // Break if no elements found (BugTrack/524)
370 if (count($obj->elements) == 1 && empty($obj->elements[0]->elements))
371 return $this->last->parent; // up to ListElement
374 foreach(array_keys($obj->elements) as $key)
375 parent::insert($obj->elements[$key]);
382 return $this->wrap(parent::toString(), $this->tag, $this->style);
386 class ListElement extends Element
388 function ListElement($level, $head)
390 $this->__construct($level, $head);
392 function __construct($level, $head)
394 parent::__construct();
395 $this->level = $level;
399 function canContain(& $obj)
401 return (! is_a($obj, 'ListContainer') || ($obj->level > $this->level));
406 return $this->wrap(parent::toString(), $this->head);
413 class UList extends ListContainer
415 function UList(& $root, $text)
417 $this->__construct($root, $text);
419 function __construct(& $root, $text)
421 parent::__construct('ul', 'li', '-', $text);
428 class OList extends ListContainer
430 function OList(& $root, $text)
432 $this->__construct($root, $text);
434 function __construct(& $root, $text)
436 parent::__construct('ol', 'li', '+', $text);
440 // : definition1 | description1
441 // : definition2 | description2
442 // : definition3 | description3
443 class DList extends ListContainer
447 $this->__construct($out);
449 function __construct($out)
451 parent::__construct('dl', 'dt', ':', $out[0]);
452 $this->last = & Element::insert(new ListElement($this->level, 'dd'));
454 $this->last = & $this->last->insert(Factory_Inline($out[1]));
459 // > like E-mail text
460 class BQuote extends Element
464 function BQuote(& $root, $text)
466 $this->__construct($root, $text);
468 function __construct(& $root, $text)
470 parent::__construct();
472 $head = substr($text, 0, 1);
473 $this->level = min(3, strspn($text, $head));
474 $text = ltrim(substr($text, $this->level));
476 if ($head == '<') { // Blockquote close
477 $level = $this->level;
479 $this->last = & $this->end($root, $level);
481 $this->last = & $this->last->insert(Factory_Inline($text));
483 $this->insert(Factory_Inline($text));
487 function canContain(& $obj)
489 return (! is_a($obj, get_class($this)) || $obj->level >= $this->level);
492 function & insert(& $obj)
494 // BugTrack/521, BugTrack/545
495 if (is_a($obj, 'inline'))
496 return parent::insert($obj->toPara(' class="quotation"'));
498 if (is_a($obj, 'BQuote') && $obj->level == $this->level && count($obj->elements)) {
499 $obj = & $obj->elements[0];
500 if (is_a($this->last, 'Paragraph') && count($obj->elements))
501 $obj = & $obj->elements[0];
503 return parent::insert($obj);
508 return $this->wrap(parent::toString(), 'blockquote');
511 function & end(& $root, $level)
513 $parent = & $root->last;
515 while (is_object($parent)) {
516 if (is_a($parent, 'BQuote') && $parent->level == $level)
517 return $parent->parent;
518 $parent = & $parent->parent;
524 class TableCell extends Element
526 var $tag = 'td'; // {td|th}
529 var $style; // is array('width'=>, 'align'=>...);
531 function TableCell($text, $is_template = FALSE)
533 $this->__construct($text, $is_template);
535 function __construct($text, $is_template = FALSE)
537 parent::__construct();
538 $this->style = $matches = array();
540 while (preg_match('/^(?:(LEFT|CENTER|RIGHT)|(BG)?COLOR\(([#\w]+)\)|SIZE\((\d+)\)):(.*)$/',
543 $this->style['align'] = 'text-align:' . strtolower($matches[1]) . ';';
545 } else if ($matches[3]) {
546 $name = $matches[2] ? 'background-color' : 'color';
547 $this->style[$name] = $name . ':' . htmlsc($matches[3]) . ';';
549 } else if ($matches[4]) {
550 $this->style['size'] = 'font-size:' . htmlsc($matches[4]) . 'px;';
554 if ($is_template && is_numeric($text))
555 $this->style['width'] = 'width:' . $text . 'px;';
559 } else if ($text == '~') {
561 } else if (substr($text, 0, 1) == '~') {
563 $text = substr($text, 1);
566 if ($text != '' && $text{0} == '#') {
567 // Try using Div class for this $text
568 $obj = & Factory_Div($this, $text);
569 if (is_a($obj, 'Paragraph'))
570 $obj = & $obj->elements[0];
572 $obj = & Factory_Inline($text);
578 function setStyle(& $style)
580 foreach ($style as $key=>$value)
581 if (! isset($this->style[$key]))
582 $this->style[$key] = $value;
587 if ($this->rowspan == 0 || $this->colspan == 0) return '';
589 $param = ' class="style_' . $this->tag . '"';
590 if ($this->rowspan > 1)
591 $param .= ' rowspan="' . $this->rowspan . '"';
592 if ($this->colspan > 1) {
593 $param .= ' colspan="' . $this->colspan . '"';
594 unset($this->style['width']);
596 if (! empty($this->style))
597 $param .= ' style="' . join(' ', $this->style) . '"';
599 return $this->wrap(parent::toString(), $this->tag, $param, FALSE);
603 // | title1 | title2 | title3 |
604 // | cell1 | cell2 | cell3 |
605 // | cell4 | cell5 | cell6 |
606 class Table extends Element
610 var $col; // number of column
614 $this->__construct($out);
616 function __construct($out)
618 parent::__construct();
620 $cells = explode('|', $out[1]);
621 $this->col = count($cells);
622 $this->type = strtolower($out[2]);
623 $this->types = array($this->type);
624 $is_template = ($this->type == 'c');
626 foreach ($cells as $cell)
627 $row[] = new TableCell($cell, $is_template);
628 $this->elements[] = $row;
631 function canContain(& $obj)
633 return is_a($obj, 'Table') && ($obj->col == $this->col);
636 function & insert(& $obj)
638 $this->elements[] = $obj->elements[0];
639 $this->types[] = $obj->type;
645 static $parts = array('h'=>'thead', 'f'=>'tfoot', ''=>'tbody');
647 // Set rowspan (from bottom, to top)
648 for ($ncol = 0; $ncol < $this->col; $ncol++) {
650 foreach (array_reverse(array_keys($this->elements)) as $nrow) {
651 $row = & $this->elements[$nrow];
652 if ($row[$ncol]->rowspan == 0) {
656 $row[$ncol]->rowspan = $rowspan;
659 $this->types[$nrow + $rowspan] = $this->types[$nrow];
664 // Set colspan and style
666 foreach (array_keys($this->elements) as $nrow) {
667 $row = & $this->elements[$nrow];
668 if ($this->types[$nrow] == 'c')
671 foreach (array_keys($row) as $ncol) {
672 if ($row[$ncol]->colspan == 0) {
676 $row[$ncol]->colspan = $colspan;
677 if ($stylerow !== NULL) {
678 $row[$ncol]->setStyle($stylerow[$ncol]->style);
679 // Inherits column style
681 $row[$ncol - $colspan]->setStyle($stylerow[$ncol]->style);
689 foreach ($parts as $type => $part)
692 foreach (array_keys($this->elements) as $nrow) {
693 if ($this->types[$nrow] != $type)
695 $row = & $this->elements[$nrow];
697 foreach (array_keys($row) as $ncol)
698 $row_string .= $row[$ncol]->toString();
699 $part_string .= $this->wrap($row_string, 'tr');
701 $string .= $this->wrap($part_string, $part);
703 $string = $this->wrap($string, 'table', ' class="style_table" cellspacing="1" border="0"');
705 return $this->wrap($string, 'div', ' class="ie5"');
709 // , cell1 , cell2 , cell3
710 // , cell4 , cell5 , cell6
711 // , cell7 , right,==
713 class YTable extends Element
715 var $col; // Number of columns
717 function YTable($row = array('cell1 ', ' cell2 ', ' cell3'))
719 $this->__construct($row);
721 // TODO: Seems unable to show literal '==' without tricks.
722 // But it will be imcompatible.
723 // TODO: Why toString() or toXHTML() here
724 function __construct($row = array('cell1 ', ' cell2 ', ' cell3'))
726 parent::__construct();
731 $matches = $_value = $_align = array();
732 foreach($row as $cell) {
733 if (preg_match('/^(\s+)?(.+?)(\s+)?$/', $cell, $matches)) {
734 if ($matches[2] == '==') {
739 $_value[] = $matches[2];
740 if ($matches[1] == '') {
741 $_align[] = ''; // left
742 } else if (isset($matches[3])) {
743 $_align[] = 'center';
754 for ($i = 0; $i < $col; $i++) {
755 if ($_value[$i] === FALSE) continue;
757 while (isset($_value[$i + $colspan]) && $_value[$i + $colspan] === FALSE) ++$colspan;
758 $colspan = ($colspan > 1) ? ' colspan="' . $colspan . '"' : '';
759 $align = $_align[$i] ? ' style="text-align:' . $_align[$i] . '"' : '';
760 $str[] = '<td class="style_td"' . $align . $colspan . '>';
761 $str[] = make_link($_value[$i]);
763 unset($_value[$i], $_align[$i]);
767 $this->elements[] = implode('', $str);
770 function canContain(& $obj)
772 return is_a($obj, 'YTable') && ($obj->col == $this->col);
775 function & insert(& $obj)
777 $this->elements[] = $obj->elements[0];
784 foreach ($this->elements as $str) {
785 $rows .= "\n" . '<tr class="style_tr">' . $str . '</tr>' . "\n";
787 $rows = $this->wrap($rows, 'table', ' class="style_table" cellspacing="1" border="0"');
788 return $this->wrap($rows, 'div', ' class="ie5"');
792 // ' 'Space-beginning sentence
793 // ' 'Space-beginning sentence
794 // ' 'Space-beginning sentence
795 class Pre extends Element
797 function Pre(& $root, $text)
799 $this->__construct($root, $text);
801 function __construct(& $root, $text)
803 global $preformat_ltrim;
804 parent::__construct();
805 $this->elements[] = htmlsc(
806 (! $preformat_ltrim || $text == '' || $text{0} != ' ') ? $text : substr($text, 1));
809 function canContain(& $obj)
811 return is_a($obj, 'Pre');
814 function & insert(& $obj)
816 $this->elements[] = $obj->elements[0];
822 return $this->wrap(join("\n", $this->elements), 'pre');
826 // Block plugin: #something (started with '#')
827 class Div extends Element
834 $this->__construct($out);
836 function __construct($out)
838 parent::__construct();
839 list(, $this->name, $this->param) = array_pad($out, 3, '');
842 function canContain(& $obj)
850 return do_plugin_convert($this->name, $this->param);
854 // LEFT:/CENTER:/RIGHT:
855 class Align extends Element
859 function Align($align)
861 $this->__construct($align);
863 function __construct($align)
865 parent::__construct();
866 $this->align = $align;
869 function canContain(& $obj)
871 return is_a($obj, 'Inline');
876 return $this->wrap(parent::toString(), 'div', ' style="text-align:' . $this->align . '"');
881 class Body extends Element
887 var $classes = array(
892 var $factories = array(
900 $this->__construct($id);
902 function __construct($id)
905 $this->contents = new Element();
906 $this->contents_last = & $this->contents;
907 parent::__construct();
910 function parse(& $lines)
912 $this->last = & $this;
915 while (! empty($lines)) {
916 $line = array_shift($lines);
919 if (substr($line, 0, 2) == '//') continue;
921 if (preg_match('/^(LEFT|CENTER|RIGHT):(.*)$/', $line, $matches)) {
922 // <div style="text-align:...">
923 $this->last = & $this->last->add(new Align(strtolower($matches[1])));
924 if ($matches[2] == '') continue;
928 $line = rtrim($line, "\r\n");
932 $this->last = & $this;
937 if (substr($line, 0, 4) == '----') {
938 $this->insert(new HRule($this, $line));
942 // Multiline-enabled block plugin
943 if (! PKWKEXP_DISABLE_MULTILINE_PLUGIN_HACK &&
944 preg_match('/^#[^{]+(\{\{+)\s*$/', $line, $matches)) {
945 $len = strlen($matches[1]);
946 $line .= "\r"; // Delimiter
947 while (! empty($lines)) {
948 $next_line = preg_replace("/[\r\n]*$/", '', array_shift($lines));
949 if (preg_match('/\}{' . $len . '}/', $next_line)) {
953 $line .= $next_line .= "\r"; // Delimiter
958 // The first character
963 $this->insert(new Heading($this, $line));
968 if ($head == ' ' || $head == "\t") {
969 $this->last = & $this->last->add(new Pre($this, $line));
974 if (substr($line, -1) == '~')
975 $line = substr($line, 0, -1) . "\r";
978 if (isset($this->classes[$head])) {
979 $classname = $this->classes[$head];
980 $this->last = & $this->last->add(new $classname($this, $line));
985 if (isset($this->factories[$head])) {
986 $factoryname = 'Factory_' . $this->factories[$head];
987 $this->last = & $this->last->add($factoryname($this, $line));
992 $this->last = & $this->last->add(Factory_Inline($line));
996 function getAnchor($text, $level)
998 global $top, $_symbol_anchor;
1000 // Heading id (auto-generated)
1001 $autoid = 'content_' . $this->id . '_' . $this->count;
1004 // Heading id (specified by users)
1005 $id = make_heading($text, FALSE); // Cut fixed-anchor from $text
1011 $anchor = ' &aname(' . $id . ',super,full){' . $_symbol_anchor . '};';
1014 $text = ' ' . $text;
1016 // Add 'page contents' link to its heading
1017 $this->contents_last = & $this->contents_last->add(new Contents_UList($text, $level, $id));
1020 return array($text . $anchor, $this->count > 1 ? "\n" . $top : '', $autoid);
1023 function & insert(& $obj)
1025 if (is_a($obj, 'Inline')) $obj = & $obj->toPara();
1026 return parent::insert($obj);
1033 $text = parent::toString();
1036 $text = preg_replace_callback('/<#_contents_>/',
1037 array(& $this, 'replace_contents'), $text);
1039 return $text . "\n";
1042 function replace_contents($arr)
1044 $contents = '<div class="contents">' . "\n" .
1045 '<a id="contents_' . $this->id . '"></a>' . "\n" .
1046 $this->contents->toString() . "\n" .
1052 class Contents_UList extends ListContainer
1054 function Contents_UList($text, $level, $id)
1056 $this->__construct($text, $level, $id);
1058 function __construct($text, $level, $id)
1060 // Reformatting $text
1061 // A line started with "\n" means "preformatted" ... X(
1062 make_heading($text);
1063 $text = "\n" . '<a href="#' . $id . '">' . $text . '</a>' . "\n";
1064 parent::__construct('ul', 'li', '-', str_repeat('-', $level));
1065 $this->insert(Factory_Inline($text));
1068 function setParent(& $parent)
1070 global $_list_pad_str;
1072 parent::setParent($parent);
1073 $step = $this->level;
1074 $margin = $this->left_margin;
1075 if (isset($parent->parent) && is_a($parent->parent, 'ListContainer')) {
1076 $step -= $parent->parent->level;
1079 $margin += $this->margin * ($step == $this->level ? 1 : $step);
1080 $this->style = sprintf($_list_pad_str, $this->level, $margin, $margin);