HTMLPurifier 4.4.0
/home/ezyang/Dev/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php
Go to the documentation of this file.
00001 <?php
00002 
00032 class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
00033 {
00034     public $allow_empty = false;
00035     public $type = 'table';
00036     public $elements = array('tr' => true, 'tbody' => true, 'thead' => true,
00037         'tfoot' => true, 'caption' => true, 'colgroup' => true, 'col' => true);
00038     public function __construct() {}
00039     public function validateChildren($tokens_of_children, $config, $context) {
00040         if (empty($tokens_of_children)) return false;
00041 
00042         // this ensures that the loop gets run one last time before closing
00043         // up. It's a little bit of a hack, but it works! Just make sure you
00044         // get rid of the token later.
00045         $tokens_of_children[] = false;
00046 
00047         // only one of these elements is allowed in a table
00048         $caption = false;
00049         $thead   = false;
00050         $tfoot   = false;
00051 
00052         // as many of these as you want
00053         $cols    = array();
00054         $content = array();
00055 
00056         $nesting = 0; // current depth so we can determine nodes
00057         $is_collecting = false; // are we globbing together tokens to package
00058                                 // into one of the collectors?
00059         $collection = array(); // collected nodes
00060         $tag_index = 0; // the first node might be whitespace,
00061                             // so this tells us where the start tag is
00062         $tbody_mode = false; // if true, then we need to wrap any stray
00063                              // <tr>s with a <tbody>.
00064 
00065         foreach ($tokens_of_children as $token) {
00066             $is_child = ($nesting == 0);
00067 
00068             if ($token === false) {
00069                 // terminating sequence started
00070             } elseif ($token instanceof HTMLPurifier_Token_Start) {
00071                 $nesting++;
00072             } elseif ($token instanceof HTMLPurifier_Token_End) {
00073                 $nesting--;
00074             }
00075 
00076             // handle node collection
00077             if ($is_collecting) {
00078                 if ($is_child) {
00079                     // okay, let's stash the tokens away
00080                     // first token tells us the type of the collection
00081                     switch ($collection[$tag_index]->name) {
00082                         case 'tbody':
00083                             $tbody_mode = true;
00084                         case 'tr':
00085                             $content[] = $collection;
00086                             break;
00087                         case 'caption':
00088                             if ($caption !== false) break;
00089                             $caption = $collection;
00090                             break;
00091                         case 'thead':
00092                         case 'tfoot':
00093                             $tbody_mode = true;
00094                             // XXX This breaks rendering properties with
00095                             // Firefox, which never floats a <thead> to
00096                             // the top. Ever. (Our scheme will float the
00097                             // first <thead> to the top.)  So maybe
00098                             // <thead>s that are not first should be
00099                             // turned into <tbody>? Very tricky, indeed.
00100 
00101                             // access the appropriate variable, $thead or $tfoot
00102                             $var = $collection[$tag_index]->name;
00103                             if ($$var === false) {
00104                                 $$var = $collection;
00105                             } else {
00106                                 // Oops, there's a second one! What
00107                                 // should we do?  Current behavior is to
00108                                 // transmutate the first and last entries into
00109                                 // tbody tags, and then put into content.
00110                                 // Maybe a better idea is to *attach
00111                                 // it* to the existing thead or tfoot?
00112                                 // We don't do this, because Firefox
00113                                 // doesn't float an extra tfoot to the
00114                                 // bottom like it does for the first one.
00115                                 $collection[$tag_index]->name = 'tbody';
00116                                 $collection[count($collection)-1]->name = 'tbody';
00117                                 $content[] = $collection;
00118                             }
00119                             break;
00120                          case 'colgroup':
00121                             $cols[] = $collection;
00122                             break;
00123                     }
00124                     $collection = array();
00125                     $is_collecting = false;
00126                     $tag_index = 0;
00127                 } else {
00128                     // add the node to the collection
00129                     $collection[] = $token;
00130                 }
00131             }
00132 
00133             // terminate
00134             if ($token === false) break;
00135 
00136             if ($is_child) {
00137                 // determine what we're dealing with
00138                 if ($token->name == 'col') {
00139                     // the only empty tag in the possie, we can handle it
00140                     // immediately
00141                     $cols[] = array_merge($collection, array($token));
00142                     $collection = array();
00143                     $tag_index = 0;
00144                     continue;
00145                 }
00146                 switch($token->name) {
00147                     case 'caption':
00148                     case 'colgroup':
00149                     case 'thead':
00150                     case 'tfoot':
00151                     case 'tbody':
00152                     case 'tr':
00153                         $is_collecting = true;
00154                         $collection[] = $token;
00155                         continue;
00156                     default:
00157                         if (!empty($token->is_whitespace)) {
00158                             $collection[] = $token;
00159                             $tag_index++;
00160                         }
00161                         continue;
00162                 }
00163             }
00164         }
00165 
00166         if (empty($content)) return false;
00167 
00168         $ret = array();
00169         if ($caption !== false) $ret = array_merge($ret, $caption);
00170         if ($cols !== false)    foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
00171         if ($thead !== false)   $ret = array_merge($ret, $thead);
00172         if ($tfoot !== false)   $ret = array_merge($ret, $tfoot);
00173 
00174         if ($tbody_mode) {
00175             // a little tricky, since the start of the collection may be
00176             // whitespace
00177             $inside_tbody = false;
00178             foreach ($content as $token_array) {
00179                 // find the starting token
00180                 foreach ($token_array as $t) {
00181                     if ($t->name === 'tr' || $t->name === 'tbody') {
00182                         break;
00183                     }
00184                 } // iterator variable carries over
00185                 if ($t->name === 'tr') {
00186                     if ($inside_tbody) {
00187                         $ret = array_merge($ret, $token_array);
00188                     } else {
00189                         $ret[] = new HTMLPurifier_Token_Start('tbody');
00190                         $ret = array_merge($ret, $token_array);
00191                         $inside_tbody = true;
00192                     }
00193                 } elseif ($t->name === 'tbody') {
00194                     if ($inside_tbody) {
00195                         $ret[] = new HTMLPurifier_Token_End('tbody');
00196                         $inside_tbody = false;
00197                         $ret = array_merge($ret, $token_array);
00198                     } else {
00199                         $ret = array_merge($ret, $token_array);
00200                     }
00201                 } else {
00202                     trigger_error("tr/tbody in content invariant failed in Table ChildDef", E_USER_ERROR);
00203                 }
00204             }
00205             if ($inside_tbody) {
00206                 $ret[] = new HTMLPurifier_Token_End('tbody');
00207             }
00208         } else {
00209             foreach ($content as $token_array) {
00210                 // invariant: everything in here is <tr>s
00211                 $ret = array_merge($ret, $token_array);
00212             }
00213         }
00214 
00215         if (!empty($collection) && $is_collecting == false){
00216             // grab the trailing space
00217             $ret = array_merge($ret, $collection);
00218         }
00219 
00220         array_pop($tokens_of_children); // remove phantom token
00221 
00222         return ($ret === $tokens_of_children) ? true : $ret;
00223 
00224     }
00225 }
00226 
00227 // vim: et sw=4 sts=4