HTMLPurifier 4.4.0
/home/ezyang/Dev/htmlpurifier/library/HTMLPurifier/Lexer/PH5P.php
Go to the documentation of this file.
00001 <?php
00002 
00013 class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex {
00014     
00015     public function tokenizeHTML($html, $config, $context) {
00016         $new_html = $this->normalize($html, $config, $context);
00017         $new_html = $this->wrapHTML($new_html, $config, $context);
00018         try {
00019             $parser = new HTML5($new_html);
00020             $doc = $parser->save();
00021         } catch (DOMException $e) {
00022             // Uh oh, it failed. Punt to DirectLex.
00023             $lexer = new HTMLPurifier_Lexer_DirectLex();
00024             $context->register('PH5PError', $e); // save the error, so we can detect it
00025             return $lexer->tokenizeHTML($html, $config, $context); // use original HTML
00026         }
00027         $tokens = array();
00028         $this->tokenizeDOM(
00029             $doc->getElementsByTagName('html')->item(0)-> // <html>
00030                   getElementsByTagName('body')->item(0)-> //   <body>
00031                   getElementsByTagName('div')->item(0)    //     <div>
00032             , $tokens);
00033         return $tokens;
00034     }
00035     
00036 }
00037 
00038 /*
00039 
00040 Copyright 2007 Jeroen van der Meer <http://jero.net/> 
00041 
00042 Permission is hereby granted, free of charge, to any person obtaining a 
00043 copy of this software and associated documentation files (the 
00044 "Software"), to deal in the Software without restriction, including 
00045 without limitation the rights to use, copy, modify, merge, publish, 
00046 distribute, sublicense, and/or sell copies of the Software, and to 
00047 permit persons to whom the Software is furnished to do so, subject to 
00048 the following conditions: 
00049 
00050 The above copyright notice and this permission notice shall be included 
00051 in all copies or substantial portions of the Software. 
00052 
00053 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 
00054 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
00055 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
00056 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
00057 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
00058 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
00059 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
00060 
00061 */
00062 
00063 class HTML5 {
00064     private $data;
00065     private $char;
00066     private $EOF;
00067     private $state;
00068     private $tree;
00069     private $token;
00070     private $content_model;
00071     private $escape = false;
00072     private $entities = array('AElig;','AElig','AMP;','AMP','Aacute;','Aacute',
00073     'Acirc;','Acirc','Agrave;','Agrave','Alpha;','Aring;','Aring','Atilde;',
00074     'Atilde','Auml;','Auml','Beta;','COPY;','COPY','Ccedil;','Ccedil','Chi;',
00075     'Dagger;','Delta;','ETH;','ETH','Eacute;','Eacute','Ecirc;','Ecirc','Egrave;',
00076     'Egrave','Epsilon;','Eta;','Euml;','Euml','GT;','GT','Gamma;','Iacute;',
00077     'Iacute','Icirc;','Icirc','Igrave;','Igrave','Iota;','Iuml;','Iuml','Kappa;',
00078     'LT;','LT','Lambda;','Mu;','Ntilde;','Ntilde','Nu;','OElig;','Oacute;',
00079     'Oacute','Ocirc;','Ocirc','Ograve;','Ograve','Omega;','Omicron;','Oslash;',
00080     'Oslash','Otilde;','Otilde','Ouml;','Ouml','Phi;','Pi;','Prime;','Psi;',
00081     'QUOT;','QUOT','REG;','REG','Rho;','Scaron;','Sigma;','THORN;','THORN',
00082     'TRADE;','Tau;','Theta;','Uacute;','Uacute','Ucirc;','Ucirc','Ugrave;',
00083     'Ugrave','Upsilon;','Uuml;','Uuml','Xi;','Yacute;','Yacute','Yuml;','Zeta;',
00084     'aacute;','aacute','acirc;','acirc','acute;','acute','aelig;','aelig',
00085     'agrave;','agrave','alefsym;','alpha;','amp;','amp','and;','ang;','apos;',
00086     'aring;','aring','asymp;','atilde;','atilde','auml;','auml','bdquo;','beta;',
00087     'brvbar;','brvbar','bull;','cap;','ccedil;','ccedil','cedil;','cedil',
00088     'cent;','cent','chi;','circ;','clubs;','cong;','copy;','copy','crarr;',
00089     'cup;','curren;','curren','dArr;','dagger;','darr;','deg;','deg','delta;',
00090     'diams;','divide;','divide','eacute;','eacute','ecirc;','ecirc','egrave;',
00091     'egrave','empty;','emsp;','ensp;','epsilon;','equiv;','eta;','eth;','eth',
00092     'euml;','euml','euro;','exist;','fnof;','forall;','frac12;','frac12',
00093     'frac14;','frac14','frac34;','frac34','frasl;','gamma;','ge;','gt;','gt',
00094     'hArr;','harr;','hearts;','hellip;','iacute;','iacute','icirc;','icirc',
00095     'iexcl;','iexcl','igrave;','igrave','image;','infin;','int;','iota;',
00096     'iquest;','iquest','isin;','iuml;','iuml','kappa;','lArr;','lambda;','lang;',
00097     'laquo;','laquo','larr;','lceil;','ldquo;','le;','lfloor;','lowast;','loz;',
00098     'lrm;','lsaquo;','lsquo;','lt;','lt','macr;','macr','mdash;','micro;','micro',
00099     'middot;','middot','minus;','mu;','nabla;','nbsp;','nbsp','ndash;','ne;',
00100     'ni;','not;','not','notin;','nsub;','ntilde;','ntilde','nu;','oacute;',
00101     'oacute','ocirc;','ocirc','oelig;','ograve;','ograve','oline;','omega;',
00102     'omicron;','oplus;','or;','ordf;','ordf','ordm;','ordm','oslash;','oslash',
00103     'otilde;','otilde','otimes;','ouml;','ouml','para;','para','part;','permil;',
00104     'perp;','phi;','pi;','piv;','plusmn;','plusmn','pound;','pound','prime;',
00105     'prod;','prop;','psi;','quot;','quot','rArr;','radic;','rang;','raquo;',
00106     'raquo','rarr;','rceil;','rdquo;','real;','reg;','reg','rfloor;','rho;',
00107     'rlm;','rsaquo;','rsquo;','sbquo;','scaron;','sdot;','sect;','sect','shy;',
00108     'shy','sigma;','sigmaf;','sim;','spades;','sub;','sube;','sum;','sup1;',
00109     'sup1','sup2;','sup2','sup3;','sup3','sup;','supe;','szlig;','szlig','tau;',
00110     'there4;','theta;','thetasym;','thinsp;','thorn;','thorn','tilde;','times;',
00111     'times','trade;','uArr;','uacute;','uacute','uarr;','ucirc;','ucirc',
00112     'ugrave;','ugrave','uml;','uml','upsih;','upsilon;','uuml;','uuml','weierp;',
00113     'xi;','yacute;','yacute','yen;','yen','yuml;','yuml','zeta;','zwj;','zwnj;');
00114 
00115     const PCDATA    = 0;
00116     const RCDATA    = 1;
00117     const CDATA     = 2;
00118     const PLAINTEXT = 3;
00119 
00120     const DOCTYPE  = 0;
00121     const STARTTAG = 1;
00122     const ENDTAG   = 2;
00123     const COMMENT  = 3;
00124     const CHARACTR = 4;
00125     const EOF      = 5;
00126 
00127     public function __construct($data) {
00128 
00129         $this->data = $data;
00130         $this->char = -1;
00131         $this->EOF  = strlen($data);
00132         $this->tree = new HTML5TreeConstructer;
00133         $this->content_model = self::PCDATA;
00134 
00135         $this->state = 'data';
00136 
00137         while($this->state !== null) {
00138             $this->{$this->state.'State'}();
00139         }
00140     }
00141 
00142     public function save() {
00143         return $this->tree->save();
00144     }
00145 
00146     private function char() {
00147         return ($this->char < $this->EOF)
00148             ? $this->data[$this->char]
00149             : false;
00150     }
00151 
00152     private function character($s, $l = 0) {
00153         if($s + $l < $this->EOF) {
00154             if($l === 0) {
00155                 return $this->data[$s];
00156             } else {
00157                 return substr($this->data, $s, $l);
00158             }
00159         }
00160     }
00161 
00162     private function characters($char_class, $start) {
00163         return preg_replace('#^(['.$char_class.']+).*#s', '\\1', substr($this->data, $start));
00164     }
00165 
00166     private function dataState() {
00167         // Consume the next input character
00168         $this->char++;
00169         $char = $this->char();
00170 
00171         if($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) {
00172             /* U+0026 AMPERSAND (&)
00173             When the content model flag is set to one of the PCDATA or RCDATA
00174             states: switch to the entity data state. Otherwise: treat it as per
00175             the "anything else"    entry below. */
00176             $this->state = 'entityData';
00177 
00178         } elseif($char === '-') {
00179             /* If the content model flag is set to either the RCDATA state or
00180             the CDATA state, and the escape flag is false, and there are at
00181             least three characters before this one in the input stream, and the
00182             last four characters in the input stream, including this one, are
00183             U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS,
00184             and U+002D HYPHEN-MINUS ("<!--"), then set the escape flag to true. */
00185             if(($this->content_model === self::RCDATA || $this->content_model ===
00186             self::CDATA) && $this->escape === false &&
00187             $this->char >= 3 && $this->character($this->char - 4, 4) === '<!--') {
00188                 $this->escape = true;
00189             }
00190 
00191             /* In any case, emit the input character as a character token. Stay
00192             in the data state. */
00193             $this->emitToken(array(
00194                 'type' => self::CHARACTR,
00195                 'data' => $char
00196             ));
00197 
00198         /* U+003C LESS-THAN SIGN (<) */
00199         } elseif($char === '<' && ($this->content_model === self::PCDATA ||
00200         (($this->content_model === self::RCDATA ||
00201         $this->content_model === self::CDATA) && $this->escape === false))) {
00202             /* When the content model flag is set to the PCDATA state: switch
00203             to the tag open state.
00204 
00205             When the content model flag is set to either the RCDATA state or
00206             the CDATA state and the escape flag is false: switch to the tag
00207             open state.
00208 
00209             Otherwise: treat it as per the "anything else" entry below. */
00210             $this->state = 'tagOpen';
00211 
00212         /* U+003E GREATER-THAN SIGN (>) */
00213         } elseif($char === '>') {
00214             /* If the content model flag is set to either the RCDATA state or
00215             the CDATA state, and the escape flag is true, and the last three
00216             characters in the input stream including this one are U+002D
00217             HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN ("-->"),
00218             set the escape flag to false. */
00219             if(($this->content_model === self::RCDATA ||
00220             $this->content_model === self::CDATA) && $this->escape === true &&
00221             $this->character($this->char, 3) === '-->') {
00222                 $this->escape = false;
00223             }
00224 
00225             /* In any case, emit the input character as a character token.
00226             Stay in the data state. */
00227             $this->emitToken(array(
00228                 'type' => self::CHARACTR,
00229                 'data' => $char
00230             ));
00231 
00232         } elseif($this->char === $this->EOF) {
00233             /* EOF
00234             Emit an end-of-file token. */
00235             $this->EOF();
00236 
00237         } elseif($this->content_model === self::PLAINTEXT) {
00238             /* When the content model flag is set to the PLAINTEXT state
00239             THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of
00240             the text and emit it as a character token. */
00241             $this->emitToken(array(
00242                 'type' => self::CHARACTR,
00243                 'data' => substr($this->data, $this->char)
00244             ));
00245 
00246             $this->EOF();
00247 
00248         } else {
00249             /* Anything else
00250             THIS DIFFERS GREATLY FROM THE SPEC: Get as many character that
00251             otherwise would also be treated as a character token and emit it
00252             as a single character token. Stay in the data state. */
00253             $len  = strcspn($this->data, '<&', $this->char);
00254             $char = substr($this->data, $this->char, $len);
00255             $this->char += $len - 1;
00256 
00257             $this->emitToken(array(
00258                 'type' => self::CHARACTR,
00259                 'data' => $char
00260             ));
00261 
00262             $this->state = 'data';
00263         }
00264     }
00265 
00266     private function entityDataState() {
00267         // Attempt to consume an entity.
00268         $entity = $this->entity();
00269 
00270         // If nothing is returned, emit a U+0026 AMPERSAND character token.
00271         // Otherwise, emit the character token that was returned.
00272         $char = (!$entity) ? '&' : $entity;
00273         $this->emitToken(array(
00274             'type' => self::CHARACTR,
00275             'data' => $char
00276         ));
00277 
00278         // Finally, switch to the data state.
00279         $this->state = 'data';
00280     }
00281 
00282     private function tagOpenState() {
00283         switch($this->content_model) {
00284             case self::RCDATA:
00285             case self::CDATA:
00286                 /* If the next input character is a U+002F SOLIDUS (/) character,
00287                 consume it and switch to the close tag open state. If the next
00288                 input character is not a U+002F SOLIDUS (/) character, emit a
00289                 U+003C LESS-THAN SIGN character token and switch to the data
00290                 state to process the next input character. */
00291                 if($this->character($this->char + 1) === '/') {
00292                     $this->char++;
00293                     $this->state = 'closeTagOpen';
00294 
00295                 } else {
00296                     $this->emitToken(array(
00297                         'type' => self::CHARACTR,
00298                         'data' => '<'
00299                     ));
00300 
00301                     $this->state = 'data';
00302                 }
00303             break;
00304 
00305             case self::PCDATA:
00306                 // If the content model flag is set to the PCDATA state
00307                 // Consume the next input character:
00308                 $this->char++;
00309                 $char = $this->char();
00310 
00311                 if($char === '!') {
00312                     /* U+0021 EXCLAMATION MARK (!)
00313                     Switch to the markup declaration open state. */
00314                     $this->state = 'markupDeclarationOpen';
00315 
00316                 } elseif($char === '/') {
00317                     /* U+002F SOLIDUS (/)
00318                     Switch to the close tag open state. */
00319                     $this->state = 'closeTagOpen';
00320 
00321                 } elseif(preg_match('/^[A-Za-z]$/', $char)) {
00322                     /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
00323                     Create a new start tag token, set its tag name to the lowercase
00324                     version of the input character (add 0x0020 to the character's code
00325                     point), then switch to the tag name state. (Don't emit the token
00326                     yet; further details will be filled in before it is emitted.) */
00327                     $this->token = array(
00328                         'name'  => strtolower($char),
00329                         'type'  => self::STARTTAG,
00330                         'attr'  => array()
00331                     );
00332 
00333                     $this->state = 'tagName';
00334 
00335                 } elseif($char === '>') {
00336                     /* U+003E GREATER-THAN SIGN (>)
00337                     Parse error. Emit a U+003C LESS-THAN SIGN character token and a
00338                     U+003E GREATER-THAN SIGN character token. Switch to the data state. */
00339                     $this->emitToken(array(
00340                         'type' => self::CHARACTR,
00341                         'data' => '<>'
00342                     ));
00343 
00344                     $this->state = 'data';
00345 
00346                 } elseif($char === '?') {
00347                     /* U+003F QUESTION MARK (?)
00348                     Parse error. Switch to the bogus comment state. */
00349                     $this->state = 'bogusComment';
00350 
00351                 } else {
00352                     /* Anything else
00353                     Parse error. Emit a U+003C LESS-THAN SIGN character token and
00354                     reconsume the current input character in the data state. */
00355                     $this->emitToken(array(
00356                         'type' => self::CHARACTR,
00357                         'data' => '<'
00358                     ));
00359 
00360                     $this->char--;
00361                     $this->state = 'data';
00362                 }
00363             break;
00364         }
00365     }
00366 
00367     private function closeTagOpenState() {
00368         $next_node = strtolower($this->characters('A-Za-z', $this->char + 1));
00369         $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName;
00370 
00371         if(($this->content_model === self::RCDATA || $this->content_model === self::CDATA) &&
00372         (!$the_same || ($the_same && (!preg_match('/[\t\n\x0b\x0c >\/]/',
00373         $this->character($this->char + 1 + strlen($next_node))) || $this->EOF === $this->char)))) {
00374             /* If the content model flag is set to the RCDATA or CDATA states then
00375             examine the next few characters. If they do not match the tag name of
00376             the last start tag token emitted (case insensitively), or if they do but
00377             they are not immediately followed by one of the following characters:
00378                 * U+0009 CHARACTER TABULATION
00379                 * U+000A LINE FEED (LF)
00380                 * U+000B LINE TABULATION
00381                 * U+000C FORM FEED (FF)
00382                 * U+0020 SPACE
00383                 * U+003E GREATER-THAN SIGN (>)
00384                 * U+002F SOLIDUS (/)
00385                 * EOF
00386             ...then there is a parse error. Emit a U+003C LESS-THAN SIGN character
00387             token, a U+002F SOLIDUS character token, and switch to the data state
00388             to process the next input character. */
00389             $this->emitToken(array(
00390                 'type' => self::CHARACTR,
00391                 'data' => '</'
00392             ));
00393 
00394             $this->state = 'data';
00395 
00396         } else {
00397             /* Otherwise, if the content model flag is set to the PCDATA state,
00398             or if the next few characters do match that tag name, consume the
00399             next input character: */
00400             $this->char++;
00401             $char = $this->char();
00402 
00403             if(preg_match('/^[A-Za-z]$/', $char)) {
00404                 /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
00405                 Create a new end tag token, set its tag name to the lowercase version
00406                 of the input character (add 0x0020 to the character's code point), then
00407                 switch to the tag name state. (Don't emit the token yet; further details
00408                 will be filled in before it is emitted.) */
00409                 $this->token = array(
00410                     'name'  => strtolower($char),
00411                     'type'  => self::ENDTAG
00412                 );
00413 
00414                 $this->state = 'tagName';
00415 
00416             } elseif($char === '>') {
00417                 /* U+003E GREATER-THAN SIGN (>)
00418                 Parse error. Switch to the data state. */
00419                 $this->state = 'data';
00420 
00421             } elseif($this->char === $this->EOF) {
00422                 /* EOF
00423                 Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F
00424                 SOLIDUS character token. Reconsume the EOF character in the data state. */
00425                 $this->emitToken(array(
00426                     'type' => self::CHARACTR,
00427                     'data' => '</'
00428                 ));
00429 
00430                 $this->char--;
00431                 $this->state = 'data';
00432 
00433             } else {
00434                 /* Parse error. Switch to the bogus comment state. */
00435                 $this->state = 'bogusComment';
00436             }
00437         }
00438     }
00439 
00440     private function tagNameState() {
00441         // Consume the next input character:
00442         $this->char++;
00443         $char = $this->character($this->char);
00444 
00445         if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
00446             /* U+0009 CHARACTER TABULATION
00447             U+000A LINE FEED (LF)
00448             U+000B LINE TABULATION
00449             U+000C FORM FEED (FF)
00450             U+0020 SPACE
00451             Switch to the before attribute name state. */
00452             $this->state = 'beforeAttributeName';
00453 
00454         } elseif($char === '>') {
00455             /* U+003E GREATER-THAN SIGN (>)
00456             Emit the current tag token. Switch to the data state. */
00457             $this->emitToken($this->token);
00458             $this->state = 'data';
00459 
00460         } elseif($this->char === $this->EOF) {
00461             /* EOF
00462             Parse error. Emit the current tag token. Reconsume the EOF
00463             character in the data state. */
00464             $this->emitToken($this->token);
00465 
00466             $this->char--;
00467             $this->state = 'data';
00468 
00469         } elseif($char === '/') {
00470             /* U+002F SOLIDUS (/)
00471             Parse error unless this is a permitted slash. Switch to the before
00472             attribute name state. */
00473             $this->state = 'beforeAttributeName';
00474 
00475         } else {
00476             /* Anything else
00477             Append the current input character to the current tag token's tag name.
00478             Stay in the tag name state. */
00479             $this->token['name'] .= strtolower($char);
00480             $this->state = 'tagName';
00481         }
00482     }
00483 
00484     private function beforeAttributeNameState() {
00485         // Consume the next input character:
00486         $this->char++;
00487         $char = $this->character($this->char);
00488 
00489         if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
00490             /* U+0009 CHARACTER TABULATION
00491             U+000A LINE FEED (LF)
00492             U+000B LINE TABULATION
00493             U+000C FORM FEED (FF)
00494             U+0020 SPACE
00495             Stay in the before attribute name state. */
00496             $this->state = 'beforeAttributeName';
00497 
00498         } elseif($char === '>') {
00499             /* U+003E GREATER-THAN SIGN (>)
00500             Emit the current tag token. Switch to the data state. */
00501             $this->emitToken($this->token);
00502             $this->state = 'data';
00503 
00504         } elseif($char === '/') {
00505             /* U+002F SOLIDUS (/)
00506             Parse error unless this is a permitted slash. Stay in the before
00507             attribute name state. */
00508             $this->state = 'beforeAttributeName';
00509 
00510         } elseif($this->char === $this->EOF) {
00511             /* EOF
00512             Parse error. Emit the current tag token. Reconsume the EOF
00513             character in the data state. */
00514             $this->emitToken($this->token);
00515 
00516             $this->char--;
00517             $this->state = 'data';
00518 
00519         } else {
00520             /* Anything else
00521             Start a new attribute in the current tag token. Set that attribute's
00522             name to the current input character, and its value to the empty string.
00523             Switch to the attribute name state. */
00524             $this->token['attr'][] = array(
00525                 'name'  => strtolower($char),
00526                 'value' => null
00527             );
00528 
00529             $this->state = 'attributeName';
00530         }
00531     }
00532 
00533     private function attributeNameState() {
00534         // Consume the next input character:
00535         $this->char++;
00536         $char = $this->character($this->char);
00537 
00538         if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
00539             /* U+0009 CHARACTER TABULATION
00540             U+000A LINE FEED (LF)
00541             U+000B LINE TABULATION
00542             U+000C FORM FEED (FF)
00543             U+0020 SPACE
00544             Stay in the before attribute name state. */
00545             $this->state = 'afterAttributeName';
00546 
00547         } elseif($char === '=') {
00548             /* U+003D EQUALS SIGN (=)
00549             Switch to the before attribute value state. */
00550             $this->state = 'beforeAttributeValue';
00551 
00552         } elseif($char === '>') {
00553             /* U+003E GREATER-THAN SIGN (>)
00554             Emit the current tag token. Switch to the data state. */
00555             $this->emitToken($this->token);
00556             $this->state = 'data';
00557 
00558         } elseif($char === '/' && $this->character($this->char + 1) !== '>') {
00559             /* U+002F SOLIDUS (/)
00560             Parse error unless this is a permitted slash. Switch to the before
00561             attribute name state. */
00562             $this->state = 'beforeAttributeName';
00563 
00564         } elseif($this->char === $this->EOF) {
00565             /* EOF
00566             Parse error. Emit the current tag token. Reconsume the EOF
00567             character in the data state. */
00568             $this->emitToken($this->token);
00569 
00570             $this->char--;
00571             $this->state = 'data';
00572 
00573         } else {
00574             /* Anything else
00575             Append the current input character to the current attribute's name.
00576             Stay in the attribute name state. */
00577             $last = count($this->token['attr']) - 1;
00578             $this->token['attr'][$last]['name'] .= strtolower($char);
00579 
00580             $this->state = 'attributeName';
00581         }
00582     }
00583 
00584     private function afterAttributeNameState() {
00585         // Consume the next input character:
00586         $this->char++;
00587         $char = $this->character($this->char);
00588 
00589         if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
00590             /* U+0009 CHARACTER TABULATION
00591             U+000A LINE FEED (LF)
00592             U+000B LINE TABULATION
00593             U+000C FORM FEED (FF)
00594             U+0020 SPACE
00595             Stay in the after attribute name state. */
00596             $this->state = 'afterAttributeName';
00597 
00598         } elseif($char === '=') {
00599             /* U+003D EQUALS SIGN (=)
00600             Switch to the before attribute value state. */
00601             $this->state = 'beforeAttributeValue';
00602 
00603         } elseif($char === '>') {
00604             /* U+003E GREATER-THAN SIGN (>)
00605             Emit the current tag token. Switch to the data state. */
00606             $this->emitToken($this->token);
00607             $this->state = 'data';
00608 
00609         } elseif($char === '/' && $this->character($this->char + 1) !== '>') {
00610             /* U+002F SOLIDUS (/)
00611             Parse error unless this is a permitted slash. Switch to the
00612             before attribute name state. */
00613             $this->state = 'beforeAttributeName';
00614 
00615         } elseif($this->char === $this->EOF) {
00616             /* EOF
00617             Parse error. Emit the current tag token. Reconsume the EOF
00618             character in the data state. */
00619             $this->emitToken($this->token);
00620 
00621             $this->char--;
00622             $this->state = 'data';
00623 
00624         } else {
00625             /* Anything else
00626             Start a new attribute in the current tag token. Set that attribute's
00627             name to the current input character, and its value to the empty string.
00628             Switch to the attribute name state. */
00629             $this->token['attr'][] = array(
00630                 'name'  => strtolower($char),
00631                 'value' => null
00632             );
00633 
00634             $this->state = 'attributeName';
00635         }
00636     }
00637 
00638     private function beforeAttributeValueState() {
00639         // Consume the next input character:
00640         $this->char++;
00641         $char = $this->character($this->char);
00642 
00643         if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
00644             /* U+0009 CHARACTER TABULATION
00645             U+000A LINE FEED (LF)
00646             U+000B LINE TABULATION
00647             U+000C FORM FEED (FF)
00648             U+0020 SPACE
00649             Stay in the before attribute value state. */
00650             $this->state = 'beforeAttributeValue';
00651 
00652         } elseif($char === '"') {
00653             /* U+0022 QUOTATION MARK (")
00654             Switch to the attribute value (double-quoted) state. */
00655             $this->state = 'attributeValueDoubleQuoted';
00656 
00657         } elseif($char === '&') {
00658             /* U+0026 AMPERSAND (&)
00659             Switch to the attribute value (unquoted) state and reconsume
00660             this input character. */
00661             $this->char--;
00662             $this->state = 'attributeValueUnquoted';
00663 
00664         } elseif($char === '\'') {
00665             /* U+0027 APOSTROPHE (')
00666             Switch to the attribute value (single-quoted) state. */
00667             $this->state = 'attributeValueSingleQuoted';
00668 
00669         } elseif($char === '>') {
00670             /* U+003E GREATER-THAN SIGN (>)
00671             Emit the current tag token. Switch to the data state. */
00672             $this->emitToken($this->token);
00673             $this->state = 'data';
00674 
00675         } else {
00676             /* Anything else
00677             Append the current input character to the current attribute's value.
00678             Switch to the attribute value (unquoted) state. */
00679             $last = count($this->token['attr']) - 1;
00680             $this->token['attr'][$last]['value'] .= $char;
00681 
00682             $this->state = 'attributeValueUnquoted';
00683         }
00684     }
00685 
00686     private function attributeValueDoubleQuotedState() {
00687         // Consume the next input character:
00688         $this->char++;
00689         $char = $this->character($this->char);
00690 
00691         if($char === '"') {
00692             /* U+0022 QUOTATION MARK (")
00693             Switch to the before attribute name state. */
00694             $this->state = 'beforeAttributeName';
00695 
00696         } elseif($char === '&') {
00697             /* U+0026 AMPERSAND (&)
00698             Switch to the entity in attribute value state. */
00699             $this->entityInAttributeValueState('double');
00700 
00701         } elseif($this->char === $this->EOF) {
00702             /* EOF
00703             Parse error. Emit the current tag token. Reconsume the character
00704             in the data state. */
00705             $this->emitToken($this->token);
00706 
00707             $this->char--;
00708             $this->state = 'data';
00709 
00710         } else {
00711             /* Anything else
00712             Append the current input character to the current attribute's value.
00713             Stay in the attribute value (double-quoted) state. */
00714             $last = count($this->token['attr']) - 1;
00715             $this->token['attr'][$last]['value'] .= $char;
00716 
00717             $this->state = 'attributeValueDoubleQuoted';
00718         }
00719     }
00720 
00721     private function attributeValueSingleQuotedState() {
00722         // Consume the next input character:
00723         $this->char++;
00724         $char = $this->character($this->char);
00725 
00726         if($char === '\'') {
00727             /* U+0022 QUOTATION MARK (')
00728             Switch to the before attribute name state. */
00729             $this->state = 'beforeAttributeName';
00730 
00731         } elseif($char === '&') {
00732             /* U+0026 AMPERSAND (&)
00733             Switch to the entity in attribute value state. */
00734             $this->entityInAttributeValueState('single');
00735 
00736         } elseif($this->char === $this->EOF) {
00737             /* EOF
00738             Parse error. Emit the current tag token. Reconsume the character
00739             in the data state. */
00740             $this->emitToken($this->token);
00741 
00742             $this->char--;
00743             $this->state = 'data';
00744 
00745         } else {
00746             /* Anything else
00747             Append the current input character to the current attribute's value.
00748             Stay in the attribute value (single-quoted) state. */
00749             $last = count($this->token['attr']) - 1;
00750             $this->token['attr'][$last]['value'] .= $char;
00751 
00752             $this->state = 'attributeValueSingleQuoted';
00753         }
00754     }
00755 
00756     private function attributeValueUnquotedState() {
00757         // Consume the next input character:
00758         $this->char++;
00759         $char = $this->character($this->char);
00760 
00761         if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
00762             /* U+0009 CHARACTER TABULATION
00763             U+000A LINE FEED (LF)
00764             U+000B LINE TABULATION
00765             U+000C FORM FEED (FF)
00766             U+0020 SPACE
00767             Switch to the before attribute name state. */
00768             $this->state = 'beforeAttributeName';
00769 
00770         } elseif($char === '&') {
00771             /* U+0026 AMPERSAND (&)
00772             Switch to the entity in attribute value state. */
00773             $this->entityInAttributeValueState();
00774 
00775         } elseif($char === '>') {
00776             /* U+003E GREATER-THAN SIGN (>)
00777             Emit the current tag token. Switch to the data state. */
00778             $this->emitToken($this->token);
00779             $this->state = 'data';
00780 
00781         } else {
00782             /* Anything else
00783             Append the current input character to the current attribute's value.
00784             Stay in the attribute value (unquoted) state. */
00785             $last = count($this->token['attr']) - 1;
00786             $this->token['attr'][$last]['value'] .= $char;
00787 
00788             $this->state = 'attributeValueUnquoted';
00789         }
00790     }
00791 
00792     private function entityInAttributeValueState() {
00793         // Attempt to consume an entity.
00794         $entity = $this->entity();
00795 
00796         // If nothing is returned, append a U+0026 AMPERSAND character to the
00797         // current attribute's value. Otherwise, emit the character token that
00798         // was returned.
00799         $char = (!$entity)
00800             ? '&'
00801             : $entity;
00802 
00803         $last = count($this->token['attr']) - 1;
00804         $this->token['attr'][$last]['value'] .= $char;
00805     }
00806 
00807     private function bogusCommentState() {
00808         /* Consume every character up to the first U+003E GREATER-THAN SIGN
00809         character (>) or the end of the file (EOF), whichever comes first. Emit
00810         a comment token whose data is the concatenation of all the characters
00811         starting from and including the character that caused the state machine
00812         to switch into the bogus comment state, up to and including the last
00813         consumed character before the U+003E character, if any, or up to the
00814         end of the file otherwise. (If the comment was started by the end of
00815         the file (EOF), the token is empty.) */
00816         $data = $this->characters('^>', $this->char);
00817         $this->emitToken(array(
00818             'data' => $data,
00819             'type' => self::COMMENT
00820         ));
00821 
00822         $this->char += strlen($data);
00823 
00824         /* Switch to the data state. */
00825         $this->state = 'data';
00826 
00827         /* If the end of the file was reached, reconsume the EOF character. */
00828         if($this->char === $this->EOF) {
00829             $this->char = $this->EOF - 1;
00830         }
00831     }
00832 
00833     private function markupDeclarationOpenState() {
00834         /* If the next two characters are both U+002D HYPHEN-MINUS (-)
00835         characters, consume those two characters, create a comment token whose
00836         data is the empty string, and switch to the comment state. */
00837         if($this->character($this->char + 1, 2) === '--') {
00838             $this->char += 2;
00839             $this->state = 'comment';
00840             $this->token = array(
00841                 'data' => null,
00842                 'type' => self::COMMENT
00843             );
00844 
00845         /* Otherwise if the next seven chacacters are a case-insensitive match
00846         for the word "DOCTYPE", then consume those characters and switch to the
00847         DOCTYPE state. */
00848         } elseif(strtolower($this->character($this->char + 1, 7)) === 'doctype') {
00849             $this->char += 7;
00850             $this->state = 'doctype';
00851 
00852         /* Otherwise, is is a parse error. Switch to the bogus comment state.
00853         The next character that is consumed, if any, is the first character
00854         that will be in the comment. */
00855         } else {
00856             $this->char++;
00857             $this->state = 'bogusComment';
00858         }
00859     }
00860 
00861     private function commentState() {
00862         /* Consume the next input character: */
00863         $this->char++;
00864         $char = $this->char();
00865 
00866         /* U+002D HYPHEN-MINUS (-) */
00867         if($char === '-') {
00868             /* Switch to the comment dash state  */
00869             $this->state = 'commentDash';
00870 
00871         /* EOF */
00872         } elseif($this->char === $this->EOF) {
00873             /* Parse error. Emit the comment token. Reconsume the EOF character
00874             in the data state. */
00875             $this->emitToken($this->token);
00876             $this->char--;
00877             $this->state = 'data';
00878 
00879         /* Anything else */
00880         } else {
00881             /* Append the input character to the comment token's data. Stay in
00882             the comment state. */
00883             $this->token['data'] .= $char;
00884         }
00885     }
00886 
00887     private function commentDashState() {
00888         /* Consume the next input character: */
00889         $this->char++;
00890         $char = $this->char();
00891 
00892         /* U+002D HYPHEN-MINUS (-) */
00893         if($char === '-') {
00894             /* Switch to the comment end state  */
00895             $this->state = 'commentEnd';
00896 
00897         /* EOF */
00898         } elseif($this->char === $this->EOF) {
00899             /* Parse error. Emit the comment token. Reconsume the EOF character
00900             in the data state. */
00901             $this->emitToken($this->token);
00902             $this->char--;
00903             $this->state = 'data';
00904 
00905         /* Anything else */
00906         } else {
00907             /* Append a U+002D HYPHEN-MINUS (-) character and the input
00908             character to the comment token's data. Switch to the comment state. */
00909             $this->token['data'] .= '-'.$char;
00910             $this->state = 'comment';
00911         }
00912     }
00913 
00914     private function commentEndState() {
00915         /* Consume the next input character: */
00916         $this->char++;
00917         $char = $this->char();
00918 
00919         if($char === '>') {
00920             $this->emitToken($this->token);
00921             $this->state = 'data';
00922 
00923         } elseif($char === '-') {
00924             $this->token['data'] .= '-';
00925 
00926         } elseif($this->char === $this->EOF) {
00927             $this->emitToken($this->token);
00928             $this->char--;
00929             $this->state = 'data';
00930 
00931         } else {
00932             $this->token['data'] .= '--'.$char;
00933             $this->state = 'comment';
00934         }
00935     }
00936 
00937     private function doctypeState() {
00938         /* Consume the next input character: */
00939         $this->char++;
00940         $char = $this->char();
00941 
00942         if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
00943             $this->state = 'beforeDoctypeName';
00944 
00945         } else {
00946             $this->char--;
00947             $this->state = 'beforeDoctypeName';
00948         }
00949     }
00950 
00951     private function beforeDoctypeNameState() {
00952         /* Consume the next input character: */
00953         $this->char++;
00954         $char = $this->char();
00955 
00956         if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
00957             // Stay in the before DOCTYPE name state.
00958 
00959         } elseif(preg_match('/^[a-z]$/', $char)) {
00960             $this->token = array(
00961                 'name' => strtoupper($char),
00962                 'type' => self::DOCTYPE,
00963                 'error' => true
00964             );
00965 
00966             $this->state = 'doctypeName';
00967 
00968         } elseif($char === '>') {
00969             $this->emitToken(array(
00970                 'name' => null,
00971                 'type' => self::DOCTYPE,
00972                 'error' => true
00973             ));
00974 
00975             $this->state = 'data';
00976 
00977         } elseif($this->char === $this->EOF) {
00978             $this->emitToken(array(
00979                 'name' => null,
00980                 'type' => self::DOCTYPE,
00981                 'error' => true
00982             ));
00983 
00984             $this->char--;
00985             $this->state = 'data';
00986 
00987         } else {
00988             $this->token = array(
00989                 'name' => $char,
00990                 'type' => self::DOCTYPE,
00991                 'error' => true
00992             );
00993 
00994             $this->state = 'doctypeName';
00995         }
00996     }
00997 
00998     private function doctypeNameState() {
00999         /* Consume the next input character: */
01000         $this->char++;
01001         $char = $this->char();
01002 
01003         if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
01004             $this->state = 'AfterDoctypeName';
01005 
01006         } elseif($char === '>') {
01007             $this->emitToken($this->token);
01008             $this->state = 'data';
01009 
01010         } elseif(preg_match('/^[a-z]$/', $char)) {
01011             $this->token['name'] .= strtoupper($char);
01012 
01013         } elseif($this->char === $this->EOF) {
01014             $this->emitToken($this->token);
01015             $this->char--;
01016             $this->state = 'data';
01017 
01018         } else {
01019             $this->token['name'] .= $char;
01020         }
01021 
01022         $this->token['error'] = ($this->token['name'] === 'HTML')
01023             ? false
01024             : true;
01025     }
01026 
01027     private function afterDoctypeNameState() {
01028         /* Consume the next input character: */
01029         $this->char++;
01030         $char = $this->char();
01031 
01032         if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
01033             // Stay in the DOCTYPE name state.
01034 
01035         } elseif($char === '>') {
01036             $this->emitToken($this->token);
01037             $this->state = 'data';
01038 
01039         } elseif($this->char === $this->EOF) {
01040             $this->emitToken($this->token);
01041             $this->char--;
01042             $this->state = 'data';
01043 
01044         } else {
01045             $this->token['error'] = true;
01046             $this->state = 'bogusDoctype';
01047         }
01048     }
01049 
01050     private function bogusDoctypeState() {
01051         /* Consume the next input character: */
01052         $this->char++;
01053         $char = $this->char();
01054 
01055         if($char === '>') {
01056             $this->emitToken($this->token);
01057             $this->state = 'data';
01058 
01059         } elseif($this->char === $this->EOF) {
01060             $this->emitToken($this->token);
01061             $this->char--;
01062             $this->state = 'data';
01063 
01064         } else {
01065             // Stay in the bogus DOCTYPE state.
01066         }
01067     }
01068 
01069     private function entity() {
01070         $start = $this->char;
01071 
01072         // This section defines how to consume an entity. This definition is
01073         // used when parsing entities in text and in attributes.
01074 
01075         // The behaviour depends on the identity of the next character (the
01076         // one immediately after the U+0026 AMPERSAND character): 
01077 
01078         switch($this->character($this->char + 1)) {
01079             // U+0023 NUMBER SIGN (#)
01080             case '#':
01081 
01082                 // The behaviour further depends on the character after the
01083                 // U+0023 NUMBER SIGN:
01084                 switch($this->character($this->char + 1)) {
01085                     // U+0078 LATIN SMALL LETTER X
01086                     // U+0058 LATIN CAPITAL LETTER X
01087                     case 'x':
01088                     case 'X':
01089                         // Follow the steps below, but using the range of
01090                         // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
01091                         // NINE, U+0061 LATIN SMALL LETTER A through to U+0066
01092                         // LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER
01093                         // A, through to U+0046 LATIN CAPITAL LETTER F (in other
01094                         // words, 0-9, A-F, a-f).
01095                         $char = 1;
01096                         $char_class = '0-9A-Fa-f';
01097                     break;
01098 
01099                     // Anything else
01100                     default:
01101                         // Follow the steps below, but using the range of
01102                         // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
01103                         // NINE (i.e. just 0-9).
01104                         $char = 0;
01105                         $char_class = '0-9';
01106                     break;
01107                 }
01108 
01109                 // Consume as many characters as match the range of characters
01110                 // given above.
01111                 $this->char++;
01112                 $e_name = $this->characters($char_class, $this->char + $char + 1);
01113                 $entity = $this->character($start, $this->char);
01114                 $cond = strlen($e_name) > 0;
01115 
01116                 // The rest of the parsing happens bellow.
01117             break;
01118 
01119             // Anything else
01120             default:
01121                 // Consume the maximum number of characters possible, with the
01122                 // consumed characters case-sensitively matching one of the
01123                 // identifiers in the first column of the entities table.
01124                 $e_name = $this->characters('0-9A-Za-z;', $this->char + 1);
01125                 $len = strlen($e_name);
01126 
01127                 for($c = 1; $c <= $len; $c++) {
01128                     $id = substr($e_name, 0, $c);
01129                     $this->char++;
01130 
01131                     if(in_array($id, $this->entities)) {
01132                         if ($e_name[$c-1] !== ';') {
01133                             if ($c < $len && $e_name[$c] == ';') {
01134                                 $this->char++; // consume extra semicolon
01135                             }
01136                         }
01137                         $entity = $id;
01138                         break;
01139                     }
01140                 }
01141 
01142                 $cond = isset($entity);
01143                 // The rest of the parsing happens bellow.
01144             break;
01145         }
01146 
01147         if(!$cond) {
01148             // If no match can be made, then this is a parse error. No
01149             // characters are consumed, and nothing is returned.
01150             $this->char = $start;
01151             return false;
01152         }
01153 
01154         // Return a character token for the character corresponding to the
01155         // entity name (as given by the second column of the entities table).
01156         return html_entity_decode('&'.$entity.';', ENT_QUOTES, 'UTF-8');
01157     }
01158 
01159     private function emitToken($token) {
01160         $emit = $this->tree->emitToken($token);
01161 
01162         if(is_int($emit)) {
01163             $this->content_model = $emit;
01164 
01165         } elseif($token['type'] === self::ENDTAG) {
01166             $this->content_model = self::PCDATA;
01167         }
01168     }
01169 
01170     private function EOF() {
01171         $this->state = null;
01172         $this->tree->emitToken(array(
01173             'type' => self::EOF
01174         ));
01175     }
01176 }
01177 
01178 class HTML5TreeConstructer {
01179     public $stack = array();
01180 
01181     private $phase;
01182     private $mode;
01183     private $dom;
01184     private $foster_parent = null;
01185     private $a_formatting  = array();
01186 
01187     private $head_pointer = null;
01188     private $form_pointer = null;
01189 
01190     private $scoping = array('button','caption','html','marquee','object','table','td','th');
01191     private $formatting = array('a','b','big','em','font','i','nobr','s','small','strike','strong','tt','u');
01192     private $special = array('address','area','base','basefont','bgsound',
01193     'blockquote','body','br','center','col','colgroup','dd','dir','div','dl',
01194     'dt','embed','fieldset','form','frame','frameset','h1','h2','h3','h4','h5',
01195     'h6','head','hr','iframe','image','img','input','isindex','li','link',
01196     'listing','menu','meta','noembed','noframes','noscript','ol','optgroup',
01197     'option','p','param','plaintext','pre','script','select','spacer','style',
01198     'tbody','textarea','tfoot','thead','title','tr','ul','wbr');
01199 
01200     // The different phases.
01201     const INIT_PHASE = 0;
01202     const ROOT_PHASE = 1;
01203     const MAIN_PHASE = 2;
01204     const END_PHASE  = 3;
01205 
01206     // The different insertion modes for the main phase.
01207     const BEFOR_HEAD = 0;
01208     const IN_HEAD    = 1;
01209     const AFTER_HEAD = 2;
01210     const IN_BODY    = 3;
01211     const IN_TABLE   = 4;
01212     const IN_CAPTION = 5;
01213     const IN_CGROUP  = 6;
01214     const IN_TBODY   = 7;
01215     const IN_ROW     = 8;
01216     const IN_CELL    = 9;
01217     const IN_SELECT  = 10;
01218     const AFTER_BODY = 11;
01219     const IN_FRAME   = 12;
01220     const AFTR_FRAME = 13;
01221 
01222     // The different types of elements.
01223     const SPECIAL    = 0;
01224     const SCOPING    = 1;
01225     const FORMATTING = 2;
01226     const PHRASING   = 3;
01227 
01228     const MARKER     = 0;
01229 
01230     public function __construct() {
01231         $this->phase = self::INIT_PHASE;
01232         $this->mode = self::BEFOR_HEAD;
01233         $this->dom = new DOMDocument;
01234 
01235         $this->dom->encoding = 'UTF-8';
01236         $this->dom->preserveWhiteSpace = true;
01237         $this->dom->substituteEntities = true;
01238         $this->dom->strictErrorChecking = false;
01239     }
01240 
01241     // Process tag tokens
01242     public function emitToken($token) {
01243         switch($this->phase) {
01244             case self::INIT_PHASE: return $this->initPhase($token); break;
01245             case self::ROOT_PHASE: return $this->rootElementPhase($token); break;
01246             case self::MAIN_PHASE: return $this->mainPhase($token); break;
01247             case self::END_PHASE : return $this->trailingEndPhase($token); break;
01248         }
01249     }
01250 
01251     private function initPhase($token) {
01252         /* Initially, the tree construction stage must handle each token
01253         emitted from the tokenisation stage as follows: */
01254 
01255         /* A DOCTYPE token that is marked as being in error
01256         A comment token
01257         A start tag token
01258         An end tag token
01259         A character token that is not one of one of U+0009 CHARACTER TABULATION,
01260             U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
01261             or U+0020 SPACE
01262         An end-of-file token */
01263         if((isset($token['error']) && $token['error']) ||
01264         $token['type'] === HTML5::COMMENT ||
01265         $token['type'] === HTML5::STARTTAG ||
01266         $token['type'] === HTML5::ENDTAG ||
01267         $token['type'] === HTML5::EOF ||
01268         ($token['type'] === HTML5::CHARACTR && isset($token['data']) &&
01269         !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']))) {
01270             /* This specification does not define how to handle this case. In
01271             particular, user agents may ignore the entirety of this specification
01272             altogether for such documents, and instead invoke special parse modes
01273             with a greater emphasis on backwards compatibility. */
01274 
01275             $this->phase = self::ROOT_PHASE;
01276             return $this->rootElementPhase($token);
01277 
01278         /* A DOCTYPE token marked as being correct */
01279         } elseif(isset($token['error']) && !$token['error']) {
01280             /* Append a DocumentType node to the Document  node, with the name
01281             attribute set to the name given in the DOCTYPE token (which will be
01282             "HTML"), and the other attributes specific to DocumentType objects
01283             set to null, empty lists, or the empty string as appropriate. */
01284             $doctype = new DOMDocumentType(null, null, 'HTML');
01285 
01286             /* Then, switch to the root element phase of the tree construction
01287             stage. */
01288             $this->phase = self::ROOT_PHASE;
01289 
01290         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
01291         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
01292         or U+0020 SPACE */
01293         } elseif(isset($token['data']) && preg_match('/^[\t\n\x0b\x0c ]+$/',
01294         $token['data'])) {
01295             /* Append that character  to the Document node. */
01296             $text = $this->dom->createTextNode($token['data']);
01297             $this->dom->appendChild($text);
01298         }
01299     }
01300 
01301     private function rootElementPhase($token) {
01302         /* After the initial phase, as each token is emitted from the tokenisation
01303         stage, it must be processed as described in this section. */
01304 
01305         /* A DOCTYPE token */
01306         if($token['type'] === HTML5::DOCTYPE) {
01307             // Parse error. Ignore the token.
01308 
01309         /* A comment token */
01310         } elseif($token['type'] === HTML5::COMMENT) {
01311             /* Append a Comment node to the Document object with the data
01312             attribute set to the data given in the comment token. */
01313             $comment = $this->dom->createComment($token['data']);
01314             $this->dom->appendChild($comment);
01315 
01316         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
01317         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
01318         or U+0020 SPACE */
01319         } elseif($token['type'] === HTML5::CHARACTR &&
01320         preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
01321             /* Append that character  to the Document node. */
01322             $text = $this->dom->createTextNode($token['data']);
01323             $this->dom->appendChild($text);
01324 
01325         /* A character token that is not one of U+0009 CHARACTER TABULATION,
01326             U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED
01327             (FF), or U+0020 SPACE
01328         A start tag token
01329         An end tag token
01330         An end-of-file token */
01331         } elseif(($token['type'] === HTML5::CHARACTR &&
01332         !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||
01333         $token['type'] === HTML5::STARTTAG ||
01334         $token['type'] === HTML5::ENDTAG ||
01335         $token['type'] === HTML5::EOF) {
01336             /* Create an HTMLElement node with the tag name html, in the HTML
01337             namespace. Append it to the Document object. Switch to the main
01338             phase and reprocess the current token. */
01339             $html = $this->dom->createElement('html');
01340             $this->dom->appendChild($html);
01341             $this->stack[] = $html;
01342 
01343             $this->phase = self::MAIN_PHASE;
01344             return $this->mainPhase($token);
01345         }
01346     }
01347 
01348     private function mainPhase($token) {
01349         /* Tokens in the main phase must be handled as follows: */
01350 
01351         /* A DOCTYPE token */
01352         if($token['type'] === HTML5::DOCTYPE) {
01353             // Parse error. Ignore the token.
01354 
01355         /* A start tag token with the tag name "html" */
01356         } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') {
01357             /* If this start tag token was not the first start tag token, then
01358             it is a parse error. */
01359 
01360             /* For each attribute on the token, check to see if the attribute
01361             is already present on the top element of the stack of open elements.
01362             If it is not, add the attribute and its corresponding value to that
01363             element. */
01364             foreach($token['attr'] as $attr) {
01365                 if(!$this->stack[0]->hasAttribute($attr['name'])) {
01366                     $this->stack[0]->setAttribute($attr['name'], $attr['value']);
01367                 }
01368             }
01369 
01370         /* An end-of-file token */
01371         } elseif($token['type'] === HTML5::EOF) {
01372             /* Generate implied end tags. */
01373             $this->generateImpliedEndTags();
01374 
01375         /* Anything else. */
01376         } else {
01377             /* Depends on the insertion mode: */
01378             switch($this->mode) {
01379                 case self::BEFOR_HEAD: return $this->beforeHead($token); break;
01380                 case self::IN_HEAD:    return $this->inHead($token); break;
01381                 case self::AFTER_HEAD: return $this->afterHead($token); break;
01382                 case self::IN_BODY:    return $this->inBody($token); break;
01383                 case self::IN_TABLE:   return $this->inTable($token); break;
01384                 case self::IN_CAPTION: return $this->inCaption($token); break;
01385                 case self::IN_CGROUP:  return $this->inColumnGroup($token); break;
01386                 case self::IN_TBODY:   return $this->inTableBody($token); break;
01387                 case self::IN_ROW:     return $this->inRow($token); break;
01388                 case self::IN_CELL:    return $this->inCell($token); break;
01389                 case self::IN_SELECT:  return $this->inSelect($token); break;
01390                 case self::AFTER_BODY: return $this->afterBody($token); break;
01391                 case self::IN_FRAME:   return $this->inFrameset($token); break;
01392                 case self::AFTR_FRAME: return $this->afterFrameset($token); break;
01393                 case self::END_PHASE:  return $this->trailingEndPhase($token); break;
01394             }
01395         }
01396     }
01397 
01398     private function beforeHead($token) {
01399         /* Handle the token as follows: */
01400 
01401         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
01402         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
01403         or U+0020 SPACE */
01404         if($token['type'] === HTML5::CHARACTR &&
01405         preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
01406             /* Append the character to the current node. */
01407             $this->insertText($token['data']);
01408 
01409         /* A comment token */
01410         } elseif($token['type'] === HTML5::COMMENT) {
01411             /* Append a Comment node to the current node with the data attribute
01412             set to the data given in the comment token. */
01413             $this->insertComment($token['data']);
01414 
01415         /* A start tag token with the tag name "head" */
01416         } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') {
01417             /* Create an element for the token, append the new element to the
01418             current node and push it onto the stack of open elements. */
01419             $element = $this->insertElement($token);
01420 
01421             /* Set the head element pointer to this new element node. */
01422             $this->head_pointer = $element;
01423 
01424             /* Change the insertion mode to "in head". */
01425             $this->mode = self::IN_HEAD;
01426 
01427         /* A start tag token whose tag name is one of: "base", "link", "meta",
01428         "script", "style", "title". Or an end tag with the tag name "html".
01429         Or a character token that is not one of U+0009 CHARACTER TABULATION,
01430         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
01431         or U+0020 SPACE. Or any other start tag token */
01432         } elseif($token['type'] === HTML5::STARTTAG ||
01433         ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') ||
01434         ($token['type'] === HTML5::CHARACTR && !preg_match('/^[\t\n\x0b\x0c ]$/',
01435         $token['data']))) {
01436             /* Act as if a start tag token with the tag name "head" and no
01437             attributes had been seen, then reprocess the current token. */
01438             $this->beforeHead(array(
01439                 'name' => 'head',
01440                 'type' => HTML5::STARTTAG,
01441                 'attr' => array()
01442             ));
01443 
01444             return $this->inHead($token);
01445 
01446         /* Any other end tag */
01447         } elseif($token['type'] === HTML5::ENDTAG) {
01448             /* Parse error. Ignore the token. */
01449         }
01450     }
01451 
01452     private function inHead($token) {
01453         /* Handle the token as follows: */
01454 
01455         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
01456         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
01457         or U+0020 SPACE.
01458 
01459         THIS DIFFERS FROM THE SPEC: If the current node is either a title, style
01460         or script element, append the character to the current node regardless
01461         of its content. */
01462         if(($token['type'] === HTML5::CHARACTR &&
01463         preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || (
01464         $token['type'] === HTML5::CHARACTR && in_array(end($this->stack)->nodeName,
01465         array('title', 'style', 'script')))) {
01466             /* Append the character to the current node. */
01467             $this->insertText($token['data']);
01468 
01469         /* A comment token */
01470         } elseif($token['type'] === HTML5::COMMENT) {
01471             /* Append a Comment node to the current node with the data attribute
01472             set to the data given in the comment token. */
01473             $this->insertComment($token['data']);
01474 
01475         } elseif($token['type'] === HTML5::ENDTAG &&
01476         in_array($token['name'], array('title', 'style', 'script'))) {
01477             array_pop($this->stack);
01478             return HTML5::PCDATA;
01479 
01480         /* A start tag with the tag name "title" */
01481         } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') {
01482             /* Create an element for the token and append the new element to the
01483             node pointed to by the head element pointer, or, if that is null
01484             (innerHTML case), to the current node. */
01485             if($this->head_pointer !== null) {
01486                 $element = $this->insertElement($token, false);
01487                 $this->head_pointer->appendChild($element);
01488 
01489             } else {
01490                 $element = $this->insertElement($token);
01491             }
01492 
01493             /* Switch the tokeniser's content model flag  to the RCDATA state. */
01494             return HTML5::RCDATA;
01495 
01496         /* A start tag with the tag name "style" */
01497         } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') {
01498             /* Create an element for the token and append the new element to the
01499             node pointed to by the head element pointer, or, if that is null
01500             (innerHTML case), to the current node. */
01501             if($this->head_pointer !== null) {
01502                 $element = $this->insertElement($token, false);
01503                 $this->head_pointer->appendChild($element);
01504 
01505             } else {
01506                 $this->insertElement($token);
01507             }
01508 
01509             /* Switch the tokeniser's content model flag  to the CDATA state. */
01510             return HTML5::CDATA;
01511 
01512         /* A start tag with the tag name "script" */
01513         } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') {
01514             /* Create an element for the token. */
01515             $element = $this->insertElement($token, false);
01516             $this->head_pointer->appendChild($element);
01517 
01518             /* Switch the tokeniser's content model flag  to the CDATA state. */
01519             return HTML5::CDATA;
01520 
01521         /* A start tag with the tag name "base", "link", or "meta" */
01522         } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
01523         array('base', 'link', 'meta'))) {
01524             /* Create an element for the token and append the new element to the
01525             node pointed to by the head element pointer, or, if that is null
01526             (innerHTML case), to the current node. */
01527             if($this->head_pointer !== null) {
01528                 $element = $this->insertElement($token, false);
01529                 $this->head_pointer->appendChild($element);
01530                 array_pop($this->stack);
01531 
01532             } else {
01533                 $this->insertElement($token);
01534             }
01535 
01536         /* An end tag with the tag name "head" */
01537         } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') {
01538             /* If the current node is a head element, pop the current node off
01539             the stack of open elements. */
01540             if($this->head_pointer->isSameNode(end($this->stack))) {
01541                 array_pop($this->stack);
01542 
01543             /* Otherwise, this is a parse error. */
01544             } else {
01545                 // k
01546             }
01547 
01548             /* Change the insertion mode to "after head". */
01549             $this->mode = self::AFTER_HEAD;
01550 
01551         /* A start tag with the tag name "head" or an end tag except "html". */
01552         } elseif(($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') ||
01553         ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html')) {
01554             // Parse error. Ignore the token.
01555 
01556         /* Anything else */
01557         } else {
01558             /* If the current node is a head element, act as if an end tag
01559             token with the tag name "head" had been seen. */
01560             if($this->head_pointer->isSameNode(end($this->stack))) {
01561                 $this->inHead(array(
01562                     'name' => 'head',
01563                     'type' => HTML5::ENDTAG
01564                 ));
01565 
01566             /* Otherwise, change the insertion mode to "after head". */
01567             } else {
01568                 $this->mode = self::AFTER_HEAD;
01569             }
01570 
01571             /* Then, reprocess the current token. */
01572             return $this->afterHead($token);
01573         }
01574     }
01575 
01576     private function afterHead($token) {
01577         /* Handle the token as follows: */
01578 
01579         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
01580         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
01581         or U+0020 SPACE */
01582         if($token['type'] === HTML5::CHARACTR &&
01583         preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
01584             /* Append the character to the current node. */
01585             $this->insertText($token['data']);
01586 
01587         /* A comment token */
01588         } elseif($token['type'] === HTML5::COMMENT) {
01589             /* Append a Comment node to the current node with the data attribute
01590             set to the data given in the comment token. */
01591             $this->insertComment($token['data']);
01592 
01593         /* A start tag token with the tag name "body" */
01594         } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') {
01595             /* Insert a body element for the token. */
01596             $this->insertElement($token);
01597 
01598             /* Change the insertion mode to "in body". */
01599             $this->mode = self::IN_BODY;
01600 
01601         /* A start tag token with the tag name "frameset" */
01602         } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') {
01603             /* Insert a frameset element for the token. */
01604             $this->insertElement($token);
01605 
01606             /* Change the insertion mode to "in frameset". */
01607             $this->mode = self::IN_FRAME;
01608 
01609         /* A start tag token whose tag name is one of: "base", "link", "meta",
01610         "script", "style", "title" */
01611         } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
01612         array('base', 'link', 'meta', 'script', 'style', 'title'))) {
01613             /* Parse error. Switch the insertion mode back to "in head" and
01614             reprocess the token. */
01615             $this->mode = self::IN_HEAD;
01616             return $this->inHead($token);
01617 
01618         /* Anything else */
01619         } else {
01620             /* Act as if a start tag token with the tag name "body" and no
01621             attributes had been seen, and then reprocess the current token. */
01622             $this->afterHead(array(
01623                 'name' => 'body',
01624                 'type' => HTML5::STARTTAG,
01625                 'attr' => array()
01626             ));
01627 
01628             return $this->inBody($token);
01629         }
01630     }
01631 
01632     private function inBody($token) {
01633         /* Handle the token as follows: */
01634 
01635         switch($token['type']) {
01636             /* A character token */
01637             case HTML5::CHARACTR:
01638                 /* Reconstruct the active formatting elements, if any. */
01639                 $this->reconstructActiveFormattingElements();
01640 
01641                 /* Append the token's character to the current node. */
01642                 $this->insertText($token['data']);
01643             break;
01644 
01645             /* A comment token */
01646             case HTML5::COMMENT:
01647                 /* Append a Comment node to the current node with the data
01648                 attribute set to the data given in the comment token. */
01649                 $this->insertComment($token['data']);
01650             break;
01651 
01652             case HTML5::STARTTAG:
01653             switch($token['name']) {
01654                 /* A start tag token whose tag name is one of: "script",
01655                 "style" */
01656                 case 'script': case 'style':
01657                     /* Process the token as if the insertion mode had been "in
01658                     head". */
01659                     return $this->inHead($token);
01660                 break;
01661 
01662                 /* A start tag token whose tag name is one of: "base", "link",
01663                 "meta", "title" */
01664                 case 'base': case 'link': case 'meta': case 'title':
01665                     /* Parse error. Process the token as if the insertion mode
01666                     had    been "in head". */
01667                     return $this->inHead($token);
01668                 break;
01669 
01670                 /* A start tag token with the tag name "body" */
01671                 case 'body':
01672                     /* Parse error. If the second element on the stack of open
01673                     elements is not a body element, or, if the stack of open
01674                     elements has only one node on it, then ignore the token.
01675                     (innerHTML case) */
01676                     if(count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') {
01677                         // Ignore
01678 
01679                     /* Otherwise, for each attribute on the token, check to see
01680                     if the attribute is already present on the body element (the
01681                     second element)    on the stack of open elements. If it is not,
01682                     add the attribute and its corresponding value to that
01683                     element. */
01684                     } else {
01685                         foreach($token['attr'] as $attr) {
01686                             if(!$this->stack[1]->hasAttribute($attr['name'])) {
01687                                 $this->stack[1]->setAttribute($attr['name'], $attr['value']);
01688                             }
01689                         }
01690                     }
01691                 break;
01692 
01693                 /* A start tag whose tag name is one of: "address",
01694                 "blockquote", "center", "dir", "div", "dl", "fieldset",
01695                 "listing", "menu", "ol", "p", "ul" */
01696                 case 'address': case 'blockquote': case 'center': case 'dir':
01697                 case 'div': case 'dl': case 'fieldset': case 'listing':
01698                 case 'menu': case 'ol': case 'p': case 'ul':
01699                     /* If the stack of open elements has a p element in scope,
01700                     then act as if an end tag with the tag name p had been
01701                     seen. */
01702                     if($this->elementInScope('p')) {
01703                         $this->emitToken(array(
01704                             'name' => 'p',
01705                             'type' => HTML5::ENDTAG
01706                         ));
01707                     }
01708 
01709                     /* Insert an HTML element for the token. */
01710                     $this->insertElement($token);
01711                 break;
01712 
01713                 /* A start tag whose tag name is "form" */
01714                 case 'form':
01715                     /* If the form element pointer is not null, ignore the
01716                     token with a parse error. */
01717                     if($this->form_pointer !== null) {
01718                         // Ignore.
01719 
01720                     /* Otherwise: */
01721                     } else {
01722                         /* If the stack of open elements has a p element in
01723                         scope, then act as if an end tag with the tag name p
01724                         had been seen. */
01725                         if($this->elementInScope('p')) {
01726                             $this->emitToken(array(
01727                                 'name' => 'p',
01728                                 'type' => HTML5::ENDTAG
01729                             ));
01730                         }
01731 
01732                         /* Insert an HTML element for the token, and set the
01733                         form element pointer to point to the element created. */
01734                         $element = $this->insertElement($token);
01735                         $this->form_pointer = $element;
01736                     }
01737                 break;
01738 
01739                 /* A start tag whose tag name is "li", "dd" or "dt" */
01740                 case 'li': case 'dd': case 'dt':
01741                     /* If the stack of open elements has a p  element in scope,
01742                     then act as if an end tag with the tag name p had been
01743                     seen. */
01744                     if($this->elementInScope('p')) {
01745                         $this->emitToken(array(
01746                             'name' => 'p',
01747                             'type' => HTML5::ENDTAG
01748                         ));
01749                     }
01750 
01751                     $stack_length = count($this->stack) - 1;
01752 
01753                     for($n = $stack_length; 0 <= $n; $n--) {
01754                         /* 1. Initialise node to be the current node (the
01755                         bottommost node of the stack). */
01756                         $stop = false;
01757                         $node = $this->stack[$n];
01758                         $cat  = $this->getElementCategory($node->tagName);
01759 
01760                         /* 2. If node is an li, dd or dt element, then pop all
01761                         the    nodes from the current node up to node, including
01762                         node, then stop this algorithm. */
01763                         if($token['name'] === $node->tagName ||    ($token['name'] !== 'li'
01764                         && ($node->tagName === 'dd' || $node->tagName === 'dt'))) {
01765                             for($x = $stack_length; $x >= $n ; $x--) {
01766                                 array_pop($this->stack);
01767                             }
01768 
01769                             break;
01770                         }
01771 
01772                         /* 3. If node is not in the formatting category, and is
01773                         not    in the phrasing category, and is not an address or
01774                         div element, then stop this algorithm. */
01775                         if($cat !== self::FORMATTING && $cat !== self::PHRASING &&
01776                         $node->tagName !== 'address' && $node->tagName !== 'div') {
01777                             break;
01778                         }
01779                     }
01780 
01781                     /* Finally, insert an HTML element with the same tag
01782                     name as the    token's. */
01783                     $this->insertElement($token);
01784                 break;
01785 
01786                 /* A start tag token whose tag name is "plaintext" */
01787                 case 'plaintext':
01788                     /* If the stack of open elements has a p  element in scope,
01789                     then act as if an end tag with the tag name p had been
01790                     seen. */
01791                     if($this->elementInScope('p')) {
01792                         $this->emitToken(array(
01793                             'name' => 'p',
01794                             'type' => HTML5::ENDTAG
01795                         ));
01796                     }
01797 
01798                     /* Insert an HTML element for the token. */
01799                     $this->insertElement($token);
01800 
01801                     return HTML5::PLAINTEXT;
01802                 break;
01803 
01804                 /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4",
01805                 "h5", "h6" */
01806                 case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
01807                     /* If the stack of open elements has a p  element in scope,
01808                     then act as if an end tag with the tag name p had been seen. */
01809                     if($this->elementInScope('p')) {
01810                         $this->emitToken(array(
01811                             'name' => 'p',
01812                             'type' => HTML5::ENDTAG
01813                         ));
01814                     }
01815 
01816                     /* If the stack of open elements has in scope an element whose
01817                     tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
01818                     this is a parse error; pop elements from the stack until an
01819                     element with one of those tag names has been popped from the
01820                     stack. */
01821                     while($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) {
01822                         array_pop($this->stack);
01823                     }
01824 
01825                     /* Insert an HTML element for the token. */
01826                     $this->insertElement($token);
01827                 break;
01828 
01829                 /* A start tag whose tag name is "a" */
01830                 case 'a':
01831                     /* If the list of active formatting elements contains
01832                     an element whose tag name is "a" between the end of the
01833                     list and the last marker on the list (or the start of
01834                     the list if there is no marker on the list), then this
01835                     is a parse error; act as if an end tag with the tag name
01836                     "a" had been seen, then remove that element from the list
01837                     of active formatting elements and the stack of open
01838                     elements if the end tag didn't already remove it (it
01839                     might not have if the element is not in table scope). */
01840                     $leng = count($this->a_formatting);
01841 
01842                     for($n = $leng - 1; $n >= 0; $n--) {
01843                         if($this->a_formatting[$n] === self::MARKER) {
01844                             break;
01845 
01846                         } elseif($this->a_formatting[$n]->nodeName === 'a') {
01847                             $this->emitToken(array(
01848                                 'name' => 'a',
01849                                 'type' => HTML5::ENDTAG
01850                             ));
01851                             break;
01852                         }
01853                     }
01854 
01855                     /* Reconstruct the active formatting elements, if any. */
01856                     $this->reconstructActiveFormattingElements();
01857 
01858                     /* Insert an HTML element for the token. */
01859                     $el = $this->insertElement($token);
01860 
01861                     /* Add that element to the list of active formatting
01862                     elements. */
01863                     $this->a_formatting[] = $el;
01864                 break;
01865 
01866                 /* A start tag whose tag name is one of: "b", "big", "em", "font",
01867                 "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
01868                 case 'b': case 'big': case 'em': case 'font': case 'i':
01869                 case 'nobr': case 's': case 'small': case 'strike':
01870                 case 'strong': case 'tt': case 'u':
01871                     /* Reconstruct the active formatting elements, if any. */
01872                     $this->reconstructActiveFormattingElements();
01873 
01874                     /* Insert an HTML element for the token. */
01875                     $el = $this->insertElement($token);
01876 
01877                     /* Add that element to the list of active formatting
01878                     elements. */
01879                     $this->a_formatting[] = $el;
01880                 break;
01881 
01882                 /* A start tag token whose tag name is "button" */
01883                 case 'button':
01884                     /* If the stack of open elements has a button element in scope,
01885                     then this is a parse error; act as if an end tag with the tag
01886                     name "button" had been seen, then reprocess the token. (We don't
01887                     do that. Unnecessary.) */
01888                     if($this->elementInScope('button')) {
01889                         $this->inBody(array(
01890                             'name' => 'button',
01891                             'type' => HTML5::ENDTAG
01892                         ));
01893                     }
01894 
01895                     /* Reconstruct the active formatting elements, if any. */
01896                     $this->reconstructActiveFormattingElements();
01897 
01898                     /* Insert an HTML element for the token. */
01899                     $this->insertElement($token);
01900 
01901                     /* Insert a marker at the end of the list of active
01902                     formatting elements. */
01903                     $this->a_formatting[] = self::MARKER;
01904                 break;
01905 
01906                 /* A start tag token whose tag name is one of: "marquee", "object" */
01907                 case 'marquee': case 'object':
01908                     /* Reconstruct the active formatting elements, if any. */
01909                     $this->reconstructActiveFormattingElements();
01910 
01911                     /* Insert an HTML element for the token. */
01912                     $this->insertElement($token);
01913 
01914                     /* Insert a marker at the end of the list of active
01915                     formatting elements. */
01916                     $this->a_formatting[] = self::MARKER;
01917                 break;
01918 
01919                 /* A start tag token whose tag name is "xmp" */
01920                 case 'xmp':
01921                     /* Reconstruct the active formatting elements, if any. */
01922                     $this->reconstructActiveFormattingElements();
01923 
01924                     /* Insert an HTML element for the token. */
01925                     $this->insertElement($token);
01926 
01927                     /* Switch the content model flag to the CDATA state. */
01928                     return HTML5::CDATA;
01929                 break;
01930 
01931                 /* A start tag whose tag name is "table" */
01932                 case 'table':
01933                     /* If the stack of open elements has a p element in scope,
01934                     then act as if an end tag with the tag name p had been seen. */
01935                     if($this->elementInScope('p')) {
01936                         $this->emitToken(array(
01937                             'name' => 'p',
01938                             'type' => HTML5::ENDTAG
01939                         ));
01940                     }
01941 
01942                     /* Insert an HTML element for the token. */
01943                     $this->insertElement($token);
01944 
01945                     /* Change the insertion mode to "in table". */
01946                     $this->mode = self::IN_TABLE;
01947                 break;
01948 
01949                 /* A start tag whose tag name is one of: "area", "basefont",
01950                 "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */
01951                 case 'area': case 'basefont': case 'bgsound': case 'br':
01952                 case 'embed': case 'img': case 'param': case 'spacer':
01953                 case 'wbr':
01954                     /* Reconstruct the active formatting elements, if any. */
01955                     $this->reconstructActiveFormattingElements();
01956 
01957                     /* Insert an HTML element for the token. */
01958                     $this->insertElement($token);
01959 
01960                     /* Immediately pop the current node off the stack of open elements. */
01961                     array_pop($this->stack);
01962                 break;
01963 
01964                 /* A start tag whose tag name is "hr" */
01965                 case 'hr':
01966                     /* If the stack of open elements has a p element in scope,
01967                     then act as if an end tag with the tag name p had been seen. */
01968                     if($this->elementInScope('p')) {
01969                         $this->emitToken(array(
01970                             'name' => 'p',
01971                             'type' => HTML5::ENDTAG
01972                         ));
01973                     }
01974 
01975                     /* Insert an HTML element for the token. */
01976                     $this->insertElement($token);
01977 
01978                     /* Immediately pop the current node off the stack of open elements. */
01979                     array_pop($this->stack);
01980                 break;
01981 
01982                 /* A start tag whose tag name is "image" */
01983                 case 'image':
01984                     /* Parse error. Change the token's tag name to "img" and
01985                     reprocess it. (Don't ask.) */
01986                     $token['name'] = 'img';
01987                     return $this->inBody($token);
01988                 break;
01989 
01990                 /* A start tag whose tag name is "input" */
01991                 case 'input':
01992                     /* Reconstruct the active formatting elements, if any. */
01993                     $this->reconstructActiveFormattingElements();
01994 
01995                     /* Insert an input element for the token. */
01996                     $element = $this->insertElement($token, false);
01997 
01998                     /* If the form element pointer is not null, then associate the
01999                     input element with the form element pointed to by the form
02000                     element pointer. */
02001                     $this->form_pointer !== null
02002                         ? $this->form_pointer->appendChild($element)
02003                         : end($this->stack)->appendChild($element);
02004 
02005                     /* Pop that input element off the stack of open elements. */
02006                     array_pop($this->stack);
02007                 break;
02008 
02009                 /* A start tag whose tag name is "isindex" */
02010                 case 'isindex':
02011                     /* Parse error. */
02012                     // w/e
02013 
02014                     /* If the form element pointer is not null,
02015                     then ignore the token. */
02016                     if($this->form_pointer === null) {
02017                         /* Act as if a start tag token with the tag name "form" had
02018                         been seen. */
02019                         $this->inBody(array(
02020                             'name' => 'body',
02021                             'type' => HTML5::STARTTAG,
02022                             'attr' => array()
02023                         ));
02024 
02025                         /* Act as if a start tag token with the tag name "hr" had
02026                         been seen. */
02027                         $this->inBody(array(
02028                             'name' => 'hr',
02029                             'type' => HTML5::STARTTAG,
02030                             'attr' => array()
02031                         ));
02032 
02033                         /* Act as if a start tag token with the tag name "p" had
02034                         been seen. */
02035                         $this->inBody(array(
02036                             'name' => 'p',
02037                             'type' => HTML5::STARTTAG,
02038                             'attr' => array()
02039                         ));
02040 
02041                         /* Act as if a start tag token with the tag name "label"
02042                         had been seen. */
02043                         $this->inBody(array(
02044                             'name' => 'label',
02045                             'type' => HTML5::STARTTAG,
02046                             'attr' => array()
02047                         ));
02048 
02049                         /* Act as if a stream of character tokens had been seen. */
02050                         $this->insertText('This is a searchable index. '.
02051                         'Insert your search keywords here: ');
02052 
02053                         /* Act as if a start tag token with the tag name "input"
02054                         had been seen, with all the attributes from the "isindex"
02055                         token, except with the "name" attribute set to the value
02056                         "isindex" (ignoring any explicit "name" attribute). */
02057                         $attr = $token['attr'];
02058                         $attr[] = array('name' => 'name', 'value' => 'isindex');
02059 
02060                         $this->inBody(array(
02061                             'name' => 'input',
02062                             'type' => HTML5::STARTTAG,
02063                             'attr' => $attr
02064                         ));
02065 
02066                         /* Act as if a stream of character tokens had been seen
02067                         (see below for what they should say). */
02068                         $this->insertText('This is a searchable index. '.
02069                         'Insert your search keywords here: ');
02070 
02071                         /* Act as if an end tag token with the tag name "label"
02072                         had been seen. */
02073                         $this->inBody(array(
02074                             'name' => 'label',
02075                             'type' => HTML5::ENDTAG
02076                         ));
02077 
02078                         /* Act as if an end tag token with the tag name "p" had
02079                         been seen. */
02080                         $this->inBody(array(
02081                             'name' => 'p',
02082                             'type' => HTML5::ENDTAG
02083                         ));
02084 
02085                         /* Act as if a start tag token with the tag name "hr" had
02086                         been seen. */
02087                         $this->inBody(array(
02088                             'name' => 'hr',
02089                             'type' => HTML5::ENDTAG
02090                         ));
02091 
02092                         /* Act as if an end tag token with the tag name "form" had
02093                         been seen. */
02094                         $this->inBody(array(
02095                             'name' => 'form',
02096                             'type' => HTML5::ENDTAG
02097                         ));
02098                     }
02099                 break;
02100 
02101                 /* A start tag whose tag name is "textarea" */
02102                 case 'textarea':
02103                     $this->insertElement($token);
02104 
02105                     /* Switch the tokeniser's content model flag to the
02106                     RCDATA state. */
02107                     return HTML5::RCDATA;
02108                 break;
02109 
02110                 /* A start tag whose tag name is one of: "iframe", "noembed",
02111                 "noframes" */
02112                 case 'iframe': case 'noembed': case 'noframes':
02113                     $this->insertElement($token);
02114 
02115                     /* Switch the tokeniser's content model flag to the CDATA state. */
02116                     return HTML5::CDATA;
02117                 break;
02118 
02119                 /* A start tag whose tag name is "select" */
02120                 case 'select':
02121                     /* Reconstruct the active formatting elements, if any. */
02122                     $this->reconstructActiveFormattingElements();
02123 
02124                     /* Insert an HTML element for the token. */
02125                     $this->insertElement($token);
02126 
02127                     /* Change the insertion mode to "in select". */
02128                     $this->mode = self::IN_SELECT;
02129                 break;
02130 
02131                 /* A start or end tag whose tag name is one of: "caption", "col",
02132                 "colgroup", "frame", "frameset", "head", "option", "optgroup",
02133                 "tbody", "td", "tfoot", "th", "thead", "tr". */
02134                 case 'caption': case 'col': case 'colgroup': case 'frame':
02135                 case 'frameset': case 'head': case 'option': case 'optgroup':
02136                 case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead':
02137                 case 'tr':
02138                     // Parse error. Ignore the token.
02139                 break;
02140 
02141                 /* A start or end tag whose tag name is one of: "event-source",
02142                 "section", "nav", "article", "aside", "header", "footer",
02143                 "datagrid", "command" */
02144                 case 'event-source': case 'section': case 'nav': case 'article':
02145                 case 'aside': case 'header': case 'footer': case 'datagrid':
02146                 case 'command':
02147                     // Work in progress!
02148                 break;
02149 
02150                 /* A start tag token not covered by the previous entries */
02151                 default:
02152                     /* Reconstruct the active formatting elements, if any. */
02153                     $this->reconstructActiveFormattingElements();
02154 
02155                     $this->insertElement($token, true, true);
02156                 break;
02157             }
02158             break;
02159 
02160             case HTML5::ENDTAG:
02161             switch($token['name']) {
02162                 /* An end tag with the tag name "body" */
02163                 case 'body':
02164                     /* If the second element in the stack of open elements is
02165                     not a body element, this is a parse error. Ignore the token.
02166                     (innerHTML case) */
02167                     if(count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') {
02168                         // Ignore.
02169 
02170                     /* If the current node is not the body element, then this
02171                     is a parse error. */
02172                     } elseif(end($this->stack)->nodeName !== 'body') {
02173                         // Parse error.
02174                     }
02175 
02176                     /* Change the insertion mode to "after body". */
02177                     $this->mode = self::AFTER_BODY;
02178                 break;
02179 
02180                 /* An end tag with the tag name "html" */
02181                 case 'html':
02182                     /* Act as if an end tag with tag name "body" had been seen,
02183                     then, if that token wasn't ignored, reprocess the current
02184                     token. */
02185                     $this->inBody(array(
02186                         'name' => 'body',
02187                         'type' => HTML5::ENDTAG
02188                     ));
02189 
02190                     return $this->afterBody($token);
02191                 break;
02192 
02193                 /* An end tag whose tag name is one of: "address", "blockquote",
02194                 "center", "dir", "div", "dl", "fieldset", "listing", "menu",
02195                 "ol", "pre", "ul" */
02196                 case 'address': case 'blockquote': case 'center': case 'dir':
02197                 case 'div': case 'dl': case 'fieldset': case 'listing':
02198                 case 'menu': case 'ol': case 'pre': case 'ul':
02199                     /* If the stack of open elements has an element in scope
02200                     with the same tag name as that of the token, then generate
02201                     implied end tags. */
02202                     if($this->elementInScope($token['name'])) {
02203                         $this->generateImpliedEndTags();
02204 
02205                         /* Now, if the current node is not an element with
02206                         the same tag name as that of the token, then this
02207                         is a parse error. */
02208                         // w/e
02209 
02210                         /* If the stack of open elements has an element in
02211                         scope with the same tag name as that of the token,
02212                         then pop elements from this stack until an element
02213                         with that tag name has been popped from the stack. */
02214                         for($n = count($this->stack) - 1; $n >= 0; $n--) {
02215                             if($this->stack[$n]->nodeName === $token['name']) {
02216                                 $n = -1;
02217                             }
02218 
02219                             array_pop($this->stack);
02220                         }
02221                     }
02222                 break;
02223 
02224                 /* An end tag whose tag name is "form" */
02225                 case 'form':
02226                     /* If the stack of open elements has an element in scope
02227                     with the same tag name as that of the token, then generate
02228                     implied    end tags. */
02229                     if($this->elementInScope($token['name'])) {
02230                         $this->generateImpliedEndTags();
02231 
02232                     } 
02233 
02234                     if(end($this->stack)->nodeName !== $token['name']) {
02235                         /* Now, if the current node is not an element with the
02236                         same tag name as that of the token, then this is a parse
02237                         error. */
02238                         // w/e
02239 
02240                     } else {
02241                         /* Otherwise, if the current node is an element with
02242                         the same tag name as that of the token pop that element
02243                         from the stack. */
02244                         array_pop($this->stack);
02245                     }
02246 
02247                     /* In any case, set the form element pointer to null. */
02248                     $this->form_pointer = null;
02249                 break;
02250 
02251                 /* An end tag whose tag name is "p" */
02252                 case 'p':
02253                     /* If the stack of open elements has a p element in scope,
02254                     then generate implied end tags, except for p elements. */
02255                     if($this->elementInScope('p')) {
02256                         $this->generateImpliedEndTags(array('p'));
02257 
02258                         /* If the current node is not a p element, then this is
02259                         a parse error. */
02260                         // k
02261 
02262                         /* If the stack of open elements has a p element in
02263                         scope, then pop elements from this stack until the stack
02264                         no longer has a p element in scope. */
02265                         for($n = count($this->stack) - 1; $n >= 0; $n--) {
02266                             if($this->elementInScope('p')) {
02267                                 array_pop($this->stack);
02268 
02269                             } else {
02270                                 break;
02271                             }
02272                         }
02273                     }
02274                 break;
02275 
02276                 /* An end tag whose tag name is "dd", "dt", or "li" */
02277                 case 'dd': case 'dt': case 'li':
02278                     /* If the stack of open elements has an element in scope
02279                     whose tag name matches the tag name of the token, then
02280                     generate implied end tags, except for elements with the
02281                     same tag name as the token. */
02282                     if($this->elementInScope($token['name'])) {
02283                         $this->generateImpliedEndTags(array($token['name']));
02284 
02285                         /* If the current node is not an element with the same
02286                         tag name as the token, then this is a parse error. */
02287                         // w/e
02288 
02289                         /* If the stack of open elements has an element in scope
02290                         whose tag name matches the tag name of the token, then
02291                         pop elements from this stack until an element with that
02292                         tag name has been popped from the stack. */
02293                         for($n = count($this->stack) - 1; $n >= 0; $n--) {
02294                             if($this->stack[$n]->nodeName === $token['name']) {
02295                                 $n = -1;
02296                             }
02297 
02298                             array_pop($this->stack);
02299                         }
02300                     }
02301                 break;
02302 
02303                 /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4",
02304                 "h5", "h6" */
02305                 case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
02306                     $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
02307 
02308                     /* If the stack of open elements has in scope an element whose
02309                     tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
02310                     generate implied end tags. */
02311                     if($this->elementInScope($elements)) {
02312                         $this->generateImpliedEndTags();
02313 
02314                         /* Now, if the current node is not an element with the same
02315                         tag name as that of the token, then this is a parse error. */
02316                         // w/e
02317 
02318                         /* If the stack of open elements has in scope an element
02319                         whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
02320                         "h6", then pop elements from the stack until an element
02321                         with one of those tag names has been popped from the stack. */
02322                         while($this->elementInScope($elements)) {
02323                             array_pop($this->stack);
02324                         }
02325                     }
02326                 break;
02327 
02328                 /* An end tag whose tag name is one of: "a", "b", "big", "em",
02329                 "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
02330                 case 'a': case 'b': case 'big': case 'em': case 'font':
02331                 case 'i': case 'nobr': case 's': case 'small': case 'strike':
02332                 case 'strong': case 'tt': case 'u':
02333                     /* 1. Let the formatting element be the last element in
02334                     the list of active formatting elements that:
02335                         * is between the end of the list and the last scope
02336                         marker in the list, if any, or the start of the list
02337                         otherwise, and
02338                         * has the same tag name as the token.
02339                     */
02340                     while(true) {
02341                         for($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
02342                             if($this->a_formatting[$a] === self::MARKER) {
02343                                 break;
02344 
02345                             } elseif($this->a_formatting[$a]->tagName === $token['name']) {
02346                                 $formatting_element = $this->a_formatting[$a];
02347                                 $in_stack = in_array($formatting_element, $this->stack, true);
02348                                 $fe_af_pos = $a;
02349                                 break;
02350                             }
02351                         }
02352 
02353                         /* If there is no such node, or, if that node is
02354                         also in the stack of open elements but the element
02355                         is not in scope, then this is a parse error. Abort
02356                         these steps. The token is ignored. */
02357                         if(!isset($formatting_element) || ($in_stack &&
02358                         !$this->elementInScope($token['name']))) {
02359                             break;
02360 
02361                         /* Otherwise, if there is such a node, but that node
02362                         is not in the stack of open elements, then this is a
02363                         parse error; remove the element from the list, and
02364                         abort these steps. */
02365                         } elseif(isset($formatting_element) && !$in_stack) {
02366                             unset($this->a_formatting[$fe_af_pos]);
02367                             $this->a_formatting = array_merge($this->a_formatting);
02368                             break;
02369                         }
02370 
02371                         /* 2. Let the furthest block be the topmost node in the
02372                         stack of open elements that is lower in the stack
02373                         than the formatting element, and is not an element in
02374                         the phrasing or formatting categories. There might
02375                         not be one. */
02376                         $fe_s_pos = array_search($formatting_element, $this->stack, true);
02377                         $length = count($this->stack);
02378 
02379                         for($s = $fe_s_pos + 1; $s < $length; $s++) {
02380                             $category = $this->getElementCategory($this->stack[$s]->nodeName);
02381 
02382                             if($category !== self::PHRASING && $category !== self::FORMATTING) {
02383                                 $furthest_block = $this->stack[$s];
02384                             }
02385                         }
02386 
02387                         /* 3. If there is no furthest block, then the UA must
02388                         skip the subsequent steps and instead just pop all
02389                         the nodes from the bottom of the stack of open
02390                         elements, from the current node up to the formatting
02391                         element, and remove the formatting element from the
02392                         list of active formatting elements. */
02393                         if(!isset($furthest_block)) {
02394                             for($n = $length - 1; $n >= $fe_s_pos; $n--) {
02395                                 array_pop($this->stack);
02396                             }
02397 
02398                             unset($this->a_formatting[$fe_af_pos]);
02399                             $this->a_formatting = array_merge($this->a_formatting);
02400                             break;
02401                         }
02402 
02403                         /* 4. Let the common ancestor be the element
02404                         immediately above the formatting element in the stack
02405                         of open elements. */
02406                         $common_ancestor = $this->stack[$fe_s_pos - 1];
02407 
02408                         /* 5. If the furthest block has a parent node, then
02409                         remove the furthest block from its parent node. */
02410                         if($furthest_block->parentNode !== null) {
02411                             $furthest_block->parentNode->removeChild($furthest_block);
02412                         }
02413 
02414                         /* 6. Let a bookmark note the position of the
02415                         formatting element in the list of active formatting
02416                         elements relative to the elements on either side
02417                         of it in the list. */
02418                         $bookmark = $fe_af_pos;
02419 
02420                         /* 7. Let node and last node  be the furthest block.
02421                         Follow these steps: */
02422                         $node = $furthest_block;
02423                         $last_node = $furthest_block;
02424 
02425                         while(true) {
02426                             for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {
02427                                 /* 7.1 Let node be the element immediately
02428                                 prior to node in the stack of open elements. */
02429                                 $node = $this->stack[$n];
02430 
02431                                 /* 7.2 If node is not in the list of active
02432                                 formatting elements, then remove node from
02433                                 the stack of open elements and then go back
02434                                 to step 1. */
02435                                 if(!in_array($node, $this->a_formatting, true)) {
02436                                     unset($this->stack[$n]);
02437                                     $this->stack = array_merge($this->stack);
02438 
02439                                 } else {
02440                                     break;
02441                                 }
02442                             }
02443 
02444                             /* 7.3 Otherwise, if node is the formatting
02445                             element, then go to the next step in the overall
02446                             algorithm. */
02447                             if($node === $formatting_element) {
02448                                 break;
02449 
02450                             /* 7.4 Otherwise, if last node is the furthest
02451                             block, then move the aforementioned bookmark to
02452                             be immediately after the node in the list of
02453                             active formatting elements. */
02454                             } elseif($last_node === $furthest_block) {
02455                                 $bookmark = array_search($node, $this->a_formatting, true) + 1;
02456                             }
02457 
02458                             /* 7.5 If node has any children, perform a
02459                             shallow clone of node, replace the entry for
02460                             node in the list of active formatting elements
02461                             with an entry for the clone, replace the entry
02462                             for node in the stack of open elements with an
02463                             entry for the clone, and let node be the clone. */
02464                             if($node->hasChildNodes()) {
02465                                 $clone = $node->cloneNode();
02466                                 $s_pos = array_search($node, $this->stack, true);
02467                                 $a_pos = array_search($node, $this->a_formatting, true);
02468 
02469                                 $this->stack[$s_pos] = $clone;
02470                                 $this->a_formatting[$a_pos] = $clone;
02471                                 $node = $clone;
02472                             }
02473 
02474                             /* 7.6 Insert last node into node, first removing
02475                             it from its previous parent node if any. */
02476                             if($last_node->parentNode !== null) {
02477                                 $last_node->parentNode->removeChild($last_node);
02478                             }
02479 
02480                             $node->appendChild($last_node);
02481 
02482                             /* 7.7 Let last node be node. */
02483                             $last_node = $node;
02484                         }
02485 
02486                         /* 8. Insert whatever last node ended up being in
02487                         the previous step into the common ancestor node,
02488                         first removing it from its previous parent node if
02489                         any. */
02490                         if($last_node->parentNode !== null) {
02491                             $last_node->parentNode->removeChild($last_node);
02492                         }
02493 
02494                         $common_ancestor->appendChild($last_node);
02495 
02496                         /* 9. Perform a shallow clone of the formatting
02497                         element. */
02498                         $clone = $formatting_element->cloneNode();
02499 
02500                         /* 10. Take all of the child nodes of the furthest
02501                         block and append them to the clone created in the
02502                         last step. */
02503                         while($furthest_block->hasChildNodes()) {
02504                             $child = $furthest_block->firstChild;
02505                             $furthest_block->removeChild($child);
02506                             $clone->appendChild($child);
02507                         }
02508 
02509                         /* 11. Append that clone to the furthest block. */
02510                         $furthest_block->appendChild($clone);
02511 
02512                         /* 12. Remove the formatting element from the list
02513                         of active formatting elements, and insert the clone
02514                         into the list of active formatting elements at the
02515                         position of the aforementioned bookmark. */
02516                         $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);
02517                         unset($this->a_formatting[$fe_af_pos]);
02518                         $this->a_formatting = array_merge($this->a_formatting);
02519 
02520                         $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
02521                         $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting));
02522                         $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
02523 
02524                         /* 13. Remove the formatting element from the stack
02525                         of open elements, and insert the clone into the stack
02526                         of open elements immediately after (i.e. in a more
02527                         deeply nested position than) the position of the
02528                         furthest block in that stack. */
02529                         $fe_s_pos = array_search($formatting_element, $this->stack, true);
02530                         $fb_s_pos = array_search($furthest_block, $this->stack, true);
02531                         unset($this->stack[$fe_s_pos]);
02532 
02533                         $s_part1 = array_slice($this->stack, 0, $fb_s_pos);
02534                         $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack));
02535                         $this->stack = array_merge($s_part1, array($clone), $s_part2);
02536 
02537                         /* 14. Jump back to step 1 in this series of steps. */
02538                         unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
02539                     }
02540                 break;
02541 
02542                 /* An end tag token whose tag name is one of: "button",
02543                 "marquee", "object" */
02544                 case 'button': case 'marquee': case 'object':
02545                     /* If the stack of open elements has an element in scope whose
02546                     tag name matches the tag name of the token, then generate implied
02547                     tags. */
02548                     if($this->elementInScope($token['name'])) {
02549                         $this->generateImpliedEndTags();
02550 
02551                         /* Now, if the current node is not an element with the same
02552                         tag name as the token, then this is a parse error. */
02553                         // k
02554 
02555                         /* Now, if the stack of open elements has an element in scope
02556                         whose tag name matches the tag name of the token, then pop
02557                         elements from the stack until that element has been popped from
02558                         the stack, and clear the list of active formatting elements up
02559                         to the last marker. */
02560                         for($n = count($this->stack) - 1; $n >= 0; $n--) {
02561                             if($this->stack[$n]->nodeName === $token['name']) {
02562                                 $n = -1;
02563                             }
02564 
02565                             array_pop($this->stack);
02566                         }
02567 
02568                         $marker = end(array_keys($this->a_formatting, self::MARKER, true));
02569 
02570                         for($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
02571                             array_pop($this->a_formatting);
02572                         }
02573                     }
02574                 break;
02575 
02576                 /* Or an end tag whose tag name is one of: "area", "basefont",
02577                 "bgsound", "br", "embed", "hr", "iframe", "image", "img",
02578                 "input", "isindex", "noembed", "noframes", "param", "select",
02579                 "spacer", "table", "textarea", "wbr" */
02580                 case 'area': case 'basefont': case 'bgsound': case 'br':
02581                 case 'embed': case 'hr': case 'iframe': case 'image':
02582                 case 'img': case 'input': case 'isindex': case 'noembed':
02583                 case 'noframes': case 'param': case 'select': case 'spacer':
02584                 case 'table': case 'textarea': case 'wbr':
02585                     // Parse error. Ignore the token.
02586                 break;
02587 
02588                 /* An end tag token not covered by the previous entries */
02589                 default:
02590                     for($n = count($this->stack) - 1; $n >= 0; $n--) {
02591                         /* Initialise node to be the current node (the bottommost
02592                         node of the stack). */
02593                         $node = end($this->stack);
02594 
02595                         /* If node has the same tag name as the end tag token,
02596                         then: */
02597                         if($token['name'] === $node->nodeName) {
02598                             /* Generate implied end tags. */
02599                             $this->generateImpliedEndTags();
02600 
02601                             /* If the tag name of the end tag token does not
02602                             match the tag name of the current node, this is a
02603                             parse error. */
02604                             // k
02605 
02606                             /* Pop all the nodes from the current node up to
02607                             node, including node, then stop this algorithm. */
02608                             for($x = count($this->stack) - $n; $x >= $n; $x--) {
02609                                 array_pop($this->stack);
02610                             }
02611                                     
02612                         } else {
02613                             $category = $this->getElementCategory($node);
02614 
02615                             if($category !== self::SPECIAL && $category !== self::SCOPING) {
02616                                 /* Otherwise, if node is in neither the formatting
02617                                 category nor the phrasing category, then this is a
02618                                 parse error. Stop this algorithm. The end tag token
02619                                 is ignored. */
02620                                 return false;
02621                             }
02622                         }
02623                     }
02624                 break;
02625             }
02626             break;
02627         }
02628     }
02629 
02630     private function inTable($token) {
02631         $clear = array('html', 'table');
02632 
02633         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
02634         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
02635         or U+0020 SPACE */
02636         if($token['type'] === HTML5::CHARACTR &&
02637         preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
02638             /* Append the character to the current node. */
02639             $text = $this->dom->createTextNode($token['data']);
02640             end($this->stack)->appendChild($text);
02641 
02642         /* A comment token */
02643         } elseif($token['type'] === HTML5::COMMENT) {
02644             /* Append a Comment node to the current node with the data
02645             attribute set to the data given in the comment token. */
02646             $comment = $this->dom->createComment($token['data']);
02647             end($this->stack)->appendChild($comment);
02648 
02649         /* A start tag whose tag name is "caption" */
02650         } elseif($token['type'] === HTML5::STARTTAG &&
02651         $token['name'] === 'caption') {
02652             /* Clear the stack back to a table context. */
02653             $this->clearStackToTableContext($clear);
02654 
02655             /* Insert a marker at the end of the list of active
02656             formatting elements. */
02657             $this->a_formatting[] = self::MARKER;
02658 
02659             /* Insert an HTML element for the token, then switch the
02660             insertion mode to "in caption". */
02661             $this->insertElement($token);
02662             $this->mode = self::IN_CAPTION;
02663 
02664         /* A start tag whose tag name is "colgroup" */
02665         } elseif($token['type'] === HTML5::STARTTAG &&
02666         $token['name'] === 'colgroup') {
02667             /* Clear the stack back to a table context. */
02668             $this->clearStackToTableContext($clear);
02669 
02670             /* Insert an HTML element for the token, then switch the
02671             insertion mode to "in column group". */
02672             $this->insertElement($token);
02673             $this->mode = self::IN_CGROUP;
02674 
02675         /* A start tag whose tag name is "col" */
02676         } elseif($token['type'] === HTML5::STARTTAG &&
02677         $token['name'] === 'col') {
02678             $this->inTable(array(
02679                 'name' => 'colgroup',
02680                 'type' => HTML5::STARTTAG,
02681                 'attr' => array()
02682             ));
02683 
02684             $this->inColumnGroup($token);
02685 
02686         /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
02687         } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
02688         array('tbody', 'tfoot', 'thead'))) {
02689             /* Clear the stack back to a table context. */
02690             $this->clearStackToTableContext($clear);
02691 
02692             /* Insert an HTML element for the token, then switch the insertion
02693             mode to "in table body". */
02694             $this->insertElement($token);
02695             $this->mode = self::IN_TBODY;
02696 
02697         /* A start tag whose tag name is one of: "td", "th", "tr" */
02698         } elseif($token['type'] === HTML5::STARTTAG &&
02699         in_array($token['name'], array('td', 'th', 'tr'))) {
02700             /* Act as if a start tag token with the tag name "tbody" had been
02701             seen, then reprocess the current token. */
02702             $this->inTable(array(
02703                 'name' => 'tbody',
02704                 'type' => HTML5::STARTTAG,
02705                 'attr' => array()
02706             ));
02707 
02708             return $this->inTableBody($token);
02709 
02710         /* A start tag whose tag name is "table" */
02711         } elseif($token['type'] === HTML5::STARTTAG &&
02712         $token['name'] === 'table') {
02713             /* Parse error. Act as if an end tag token with the tag name "table"
02714             had been seen, then, if that token wasn't ignored, reprocess the
02715             current token. */
02716             $this->inTable(array(
02717                 'name' => 'table',
02718                 'type' => HTML5::ENDTAG
02719             ));
02720 
02721             return $this->mainPhase($token);
02722 
02723         /* An end tag whose tag name is "table" */
02724         } elseif($token['type'] === HTML5::ENDTAG &&
02725         $token['name'] === 'table') {
02726             /* If the stack of open elements does not have an element in table
02727             scope with the same tag name as the token, this is a parse error.
02728             Ignore the token. (innerHTML case) */
02729             if(!$this->elementInScope($token['name'], true)) {
02730                 return false;
02731 
02732             /* Otherwise: */
02733             } else {
02734                 /* Generate implied end tags. */
02735                 $this->generateImpliedEndTags();
02736 
02737                 /* Now, if the current node is not a table element, then this
02738                 is a parse error. */
02739                 // w/e
02740 
02741                 /* Pop elements from this stack until a table element has been
02742                 popped from the stack. */
02743                 while(true) {
02744                     $current = end($this->stack)->nodeName;
02745                     array_pop($this->stack);
02746 
02747                     if($current === 'table') {
02748                         break;
02749                     }
02750                 }
02751 
02752                 /* Reset the insertion mode appropriately. */
02753                 $this->resetInsertionMode();
02754             }
02755 
02756         /* An end tag whose tag name is one of: "body", "caption", "col",
02757         "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
02758         } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
02759         array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td',
02760         'tfoot', 'th', 'thead', 'tr'))) {
02761             // Parse error. Ignore the token.
02762 
02763         /* Anything else */
02764         } else {
02765             /* Parse error. Process the token as if the insertion mode was "in
02766             body", with the following exception: */
02767 
02768             /* If the current node is a table, tbody, tfoot, thead, or tr
02769             element, then, whenever a node would be inserted into the current
02770             node, it must instead be inserted into the foster parent element. */
02771             if(in_array(end($this->stack)->nodeName,
02772             array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
02773                 /* The foster parent element is the parent element of the last
02774                 table element in the stack of open elements, if there is a
02775                 table element and it has such a parent element. If there is no
02776                 table element in the stack of open elements (innerHTML case),
02777                 then the foster parent element is the first element in the
02778                 stack of open elements (the html  element). Otherwise, if there
02779                 is a table element in the stack of open elements, but the last
02780                 table element in the stack of open elements has no parent, or
02781                 its parent node is not an element, then the foster parent
02782                 element is the element before the last table element in the
02783                 stack of open elements. */
02784                 for($n = count($this->stack) - 1; $n >= 0; $n--) {
02785                     if($this->stack[$n]->nodeName === 'table') {
02786                         $table = $this->stack[$n];
02787                         break;
02788                     }
02789                 }
02790 
02791                 if(isset($table) && $table->parentNode !== null) {
02792                     $this->foster_parent = $table->parentNode;
02793 
02794                 } elseif(!isset($table)) {
02795                     $this->foster_parent = $this->stack[0];
02796 
02797                 } elseif(isset($table) && ($table->parentNode === null ||
02798                 $table->parentNode->nodeType !== XML_ELEMENT_NODE)) {
02799                     $this->foster_parent = $this->stack[$n - 1];
02800                 }
02801             }
02802 
02803             $this->inBody($token);
02804         }
02805     }
02806 
02807     private function inCaption($token) {
02808         /* An end tag whose tag name is "caption" */
02809         if($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') {
02810             /* If the stack of open elements does not have an element in table
02811             scope with the same tag name as the token, this is a parse error.
02812             Ignore the token. (innerHTML case) */
02813             if(!$this->elementInScope($token['name'], true)) {
02814                 // Ignore
02815 
02816             /* Otherwise: */
02817             } else {
02818                 /* Generate implied end tags. */
02819                 $this->generateImpliedEndTags();
02820 
02821                 /* Now, if the current node is not a caption element, then this
02822                 is a parse error. */
02823                 // w/e
02824 
02825                 /* Pop elements from this stack until a caption element has
02826                 been popped from the stack. */
02827                 while(true) {
02828                     $node = end($this->stack)->nodeName;
02829                     array_pop($this->stack);
02830 
02831                     if($node === 'caption') {
02832                         break;
02833                     }
02834                 }
02835 
02836                 /* Clear the list of active formatting elements up to the last
02837                 marker. */
02838                 $this->clearTheActiveFormattingElementsUpToTheLastMarker();
02839 
02840                 /* Switch the insertion mode to "in table". */
02841                 $this->mode = self::IN_TABLE;
02842             }
02843 
02844         /* A start tag whose tag name is one of: "caption", "col", "colgroup",
02845         "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag
02846         name is "table" */
02847         } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'],
02848         array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
02849         'thead', 'tr'))) || ($token['type'] === HTML5::ENDTAG &&
02850         $token['name'] === 'table')) {
02851             /* Parse error. Act as if an end tag with the tag name "caption"
02852             had been seen, then, if that token wasn't ignored, reprocess the
02853             current token. */
02854             $this->inCaption(array(
02855                 'name' => 'caption',
02856                 'type' => HTML5::ENDTAG
02857             ));
02858 
02859             return $this->inTable($token);
02860 
02861         /* An end tag whose tag name is one of: "body", "col", "colgroup",
02862         "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
02863         } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
02864         array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th',
02865         'thead', 'tr'))) {
02866             // Parse error. Ignore the token.
02867 
02868         /* Anything else */
02869         } else {
02870             /* Process the token as if the insertion mode was "in body". */
02871             $this->inBody($token);
02872         }
02873     }
02874 
02875     private function inColumnGroup($token) {
02876         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
02877         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
02878         or U+0020 SPACE */
02879         if($token['type'] === HTML5::CHARACTR &&
02880         preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
02881             /* Append the character to the current node. */
02882             $text = $this->dom->createTextNode($token['data']);
02883             end($this->stack)->appendChild($text);
02884 
02885         /* A comment token */
02886         } elseif($token['type'] === HTML5::COMMENT) {
02887             /* Append a Comment node to the current node with the data
02888             attribute set to the data given in the comment token. */
02889             $comment = $this->dom->createComment($token['data']);
02890             end($this->stack)->appendChild($comment);
02891 
02892         /* A start tag whose tag name is "col" */
02893         } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') {
02894             /* Insert a col element for the token. Immediately pop the current
02895             node off the stack of open elements. */
02896             $this->insertElement($token);
02897             array_pop($this->stack);
02898 
02899         /* An end tag whose tag name is "colgroup" */
02900         } elseif($token['type'] === HTML5::ENDTAG &&
02901         $token['name'] === 'colgroup') {
02902             /* If the current node is the root html element, then this is a
02903             parse error, ignore the token. (innerHTML case) */
02904             if(end($this->stack)->nodeName === 'html') {
02905                 // Ignore
02906 
02907             /* Otherwise, pop the current node (which will be a colgroup
02908             element) from the stack of open elements. Switch the insertion
02909             mode to "in table". */
02910             } else {
02911                 array_pop($this->stack);
02912                 $this->mode = self::IN_TABLE;
02913             }
02914 
02915         /* An end tag whose tag name is "col" */
02916         } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') {
02917             /* Parse error. Ignore the token. */
02918 
02919         /* Anything else */
02920         } else {
02921             /* Act as if an end tag with the tag name "colgroup" had been seen,
02922             and then, if that token wasn't ignored, reprocess the current token. */
02923             $this->inColumnGroup(array(
02924                 'name' => 'colgroup',
02925                 'type' => HTML5::ENDTAG
02926             ));
02927 
02928             return $this->inTable($token);
02929         }
02930     }
02931 
02932     private function inTableBody($token) {
02933         $clear = array('tbody', 'tfoot', 'thead', 'html');
02934 
02935         /* A start tag whose tag name is "tr" */
02936         if($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') {
02937             /* Clear the stack back to a table body context. */
02938             $this->clearStackToTableContext($clear);
02939 
02940             /* Insert a tr element for the token, then switch the insertion
02941             mode to "in row". */
02942             $this->insertElement($token);
02943             $this->mode = self::IN_ROW;
02944 
02945         /* A start tag whose tag name is one of: "th", "td" */
02946         } elseif($token['type'] === HTML5::STARTTAG &&
02947         ($token['name'] === 'th' ||    $token['name'] === 'td')) {
02948             /* Parse error. Act as if a start tag with the tag name "tr" had
02949             been seen, then reprocess the current token. */
02950             $this->inTableBody(array(
02951                 'name' => 'tr',
02952                 'type' => HTML5::STARTTAG,
02953                 'attr' => array()
02954             ));
02955 
02956             return $this->inRow($token);
02957 
02958         /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
02959         } elseif($token['type'] === HTML5::ENDTAG &&
02960         in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
02961             /* If the stack of open elements does not have an element in table
02962             scope with the same tag name as the token, this is a parse error.
02963             Ignore the token. */
02964             if(!$this->elementInScope($token['name'], true)) {
02965                 // Ignore
02966 
02967             /* Otherwise: */
02968             } else {
02969                 /* Clear the stack back to a table body context. */
02970                 $this->clearStackToTableContext($clear);
02971 
02972                 /* Pop the current node from the stack of open elements. Switch
02973                 the insertion mode to "in table". */
02974                 array_pop($this->stack);
02975                 $this->mode = self::IN_TABLE;
02976             }
02977 
02978         /* A start tag whose tag name is one of: "caption", "col", "colgroup",
02979         "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */
02980         } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'],
02981         array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead'))) ||
02982         ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table')) {
02983             /* If the stack of open elements does not have a tbody, thead, or
02984             tfoot element in table scope, this is a parse error. Ignore the
02985             token. (innerHTML case) */
02986             if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) {
02987                 // Ignore.
02988 
02989             /* Otherwise: */
02990             } else {
02991                 /* Clear the stack back to a table body context. */
02992                 $this->clearStackToTableContext($clear);
02993 
02994                 /* Act as if an end tag with the same tag name as the current
02995                 node ("tbody", "tfoot", or "thead") had been seen, then
02996                 reprocess the current token. */
02997                 $this->inTableBody(array(
02998                     'name' => end($this->stack)->nodeName,
02999                     'type' => HTML5::ENDTAG
03000                 ));
03001 
03002                 return $this->mainPhase($token);
03003             }
03004 
03005         /* An end tag whose tag name is one of: "body", "caption", "col",
03006         "colgroup", "html", "td", "th", "tr" */
03007         } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
03008         array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) {
03009             /* Parse error. Ignore the token. */
03010 
03011         /* Anything else */
03012         } else {
03013             /* Process the token as if the insertion mode was "in table". */
03014             $this->inTable($token);
03015         }
03016     }
03017 
03018     private function inRow($token) {
03019         $clear = array('tr', 'html');
03020 
03021         /* A start tag whose tag name is one of: "th", "td" */
03022         if($token['type'] === HTML5::STARTTAG &&
03023         ($token['name'] === 'th' || $token['name'] === 'td')) {
03024             /* Clear the stack back to a table row context. */
03025             $this->clearStackToTableContext($clear);
03026 
03027             /* Insert an HTML element for the token, then switch the insertion
03028             mode to "in cell". */
03029             $this->insertElement($token);
03030             $this->mode = self::IN_CELL;
03031 
03032             /* Insert a marker at the end of the list of active formatting
03033             elements. */
03034             $this->a_formatting[] = self::MARKER;
03035 
03036         /* An end tag whose tag name is "tr" */
03037         } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') {
03038             /* If the stack of open elements does not have an element in table
03039             scope with the same tag name as the token, this is a parse error.
03040             Ignore the token. (innerHTML case) */
03041             if(!$this->elementInScope($token['name'], true)) {
03042                 // Ignore.
03043 
03044             /* Otherwise: */
03045             } else {
03046                 /* Clear the stack back to a table row context. */
03047                 $this->clearStackToTableContext($clear);
03048 
03049                 /* Pop the current node (which will be a tr element) from the
03050                 stack of open elements. Switch the insertion mode to "in table
03051                 body". */
03052                 array_pop($this->stack);
03053                 $this->mode = self::IN_TBODY;
03054             }
03055 
03056         /* A start tag whose tag name is one of: "caption", "col", "colgroup",
03057         "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */
03058         } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
03059         array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) {
03060             /* Act as if an end tag with the tag name "tr" had been seen, then,
03061             if that token wasn't ignored, reprocess the current token. */
03062             $this->inRow(array(
03063                 'name' => 'tr',
03064                 'type' => HTML5::ENDTAG
03065             ));
03066 
03067             return $this->inCell($token);
03068 
03069         /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
03070         } elseif($token['type'] === HTML5::ENDTAG &&
03071         in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
03072             /* If the stack of open elements does not have an element in table
03073             scope with the same tag name as the token, this is a parse error.
03074             Ignore the token. */
03075             if(!$this->elementInScope($token['name'], true)) {
03076                 // Ignore.
03077 
03078             /* Otherwise: */
03079             } else {
03080                 /* Otherwise, act as if an end tag with the tag name "tr" had
03081                 been seen, then reprocess the current token. */
03082                 $this->inRow(array(
03083                     'name' => 'tr',
03084                     'type' => HTML5::ENDTAG
03085                 ));
03086 
03087                 return $this->inCell($token);
03088             }
03089 
03090         /* An end tag whose tag name is one of: "body", "caption", "col",
03091         "colgroup", "html", "td", "th" */
03092         } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
03093         array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) {
03094             /* Parse error. Ignore the token. */
03095 
03096         /* Anything else */
03097         } else {
03098             /* Process the token as if the insertion mode was "in table". */
03099             $this->inTable($token);
03100         }
03101     }
03102 
03103     private function inCell($token) {
03104         /* An end tag whose tag name is one of: "td", "th" */
03105         if($token['type'] === HTML5::ENDTAG &&
03106         ($token['name'] === 'td' || $token['name'] === 'th')) {
03107             /* If the stack of open elements does not have an element in table
03108             scope with the same tag name as that of the token, then this is a
03109             parse error and the token must be ignored. */
03110             if(!$this->elementInScope($token['name'], true)) {
03111                 // Ignore.
03112 
03113             /* Otherwise: */
03114             } else {
03115                 /* Generate implied end tags, except for elements with the same
03116                 tag name as the token. */
03117                 $this->generateImpliedEndTags(array($token['name']));
03118 
03119                 /* Now, if the current node is not an element with the same tag
03120                 name as the token, then this is a parse error. */
03121                 // k
03122 
03123                 /* Pop elements from this stack until an element with the same
03124                 tag name as the token has been popped from the stack. */
03125                 while(true) {
03126                     $node = end($this->stack)->nodeName;
03127                     array_pop($this->stack);
03128 
03129                     if($node === $token['name']) {
03130                         break;
03131                     }
03132                 }
03133 
03134                 /* Clear the list of active formatting elements up to the last
03135                 marker. */
03136                 $this->clearTheActiveFormattingElementsUpToTheLastMarker();
03137 
03138                 /* Switch the insertion mode to "in row". (The current node
03139                 will be a tr element at this point.) */
03140                 $this->mode = self::IN_ROW;
03141             }
03142 
03143         /* A start tag whose tag name is one of: "caption", "col", "colgroup",
03144         "tbody", "td", "tfoot", "th", "thead", "tr" */
03145         } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
03146         array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
03147         'thead', 'tr'))) {
03148             /* If the stack of open elements does not have a td or th element
03149             in table scope, then this is a parse error; ignore the token.
03150             (innerHTML case) */
03151             if(!$this->elementInScope(array('td', 'th'), true)) {
03152                 // Ignore.
03153 
03154             /* Otherwise, close the cell (see below) and reprocess the current
03155             token. */
03156             } else {
03157                 $this->closeCell();
03158                 return $this->inRow($token);
03159             }
03160 
03161         /* A start tag whose tag name is one of: "caption", "col", "colgroup",
03162         "tbody", "td", "tfoot", "th", "thead", "tr" */
03163         } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
03164         array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
03165         'thead', 'tr'))) {
03166             /* If the stack of open elements does not have a td or th element
03167             in table scope, then this is a parse error; ignore the token.
03168             (innerHTML case) */
03169             if(!$this->elementInScope(array('td', 'th'), true)) {
03170                 // Ignore.
03171 
03172             /* Otherwise, close the cell (see below) and reprocess the current
03173             token. */
03174             } else {
03175                 $this->closeCell();
03176                 return $this->inRow($token);
03177             }
03178 
03179         /* An end tag whose tag name is one of: "body", "caption", "col",
03180         "colgroup", "html" */
03181         } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
03182         array('body', 'caption', 'col', 'colgroup', 'html'))) {
03183             /* Parse error. Ignore the token. */
03184 
03185         /* An end tag whose tag name is one of: "table", "tbody", "tfoot",
03186         "thead", "tr" */
03187         } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
03188         array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
03189             /* If the stack of open elements does not have an element in table
03190             scope with the same tag name as that of the token (which can only
03191             happen for "tbody", "tfoot" and "thead", or, in the innerHTML case),
03192             then this is a parse error and the token must be ignored. */
03193             if(!$this->elementInScope($token['name'], true)) {
03194                 // Ignore.
03195 
03196             /* Otherwise, close the cell (see below) and reprocess the current
03197             token. */
03198             } else {
03199                 $this->closeCell();
03200                 return $this->inRow($token);
03201             }
03202 
03203         /* Anything else */
03204         } else {
03205             /* Process the token as if the insertion mode was "in body". */
03206             $this->inBody($token);
03207         }
03208     }
03209 
03210     private function inSelect($token) {
03211         /* Handle the token as follows: */
03212 
03213         /* A character token */
03214         if($token['type'] === HTML5::CHARACTR) {
03215             /* Append the token's character to the current node. */
03216             $this->insertText($token['data']);
03217 
03218         /* A comment token */
03219         } elseif($token['type'] === HTML5::COMMENT) {
03220             /* Append a Comment node to the current node with the data
03221             attribute set to the data given in the comment token. */
03222             $this->insertComment($token['data']);
03223 
03224         /* A start tag token whose tag name is "option" */
03225         } elseif($token['type'] === HTML5::STARTTAG &&
03226         $token['name'] === 'option') {
03227             /* If the current node is an option element, act as if an end tag
03228             with the tag name "option" had been seen. */
03229             if(end($this->stack)->nodeName === 'option') {
03230                 $this->inSelect(array(
03231                     'name' => 'option',
03232                     'type' => HTML5::ENDTAG
03233                 ));
03234             }
03235 
03236             /* Insert an HTML element for the token. */
03237             $this->insertElement($token);
03238 
03239         /* A start tag token whose tag name is "optgroup" */
03240         } elseif($token['type'] === HTML5::STARTTAG &&
03241         $token['name'] === 'optgroup') {
03242             /* If the current node is an option element, act as if an end tag
03243             with the tag name "option" had been seen. */
03244             if(end($this->stack)->nodeName === 'option') {
03245                 $this->inSelect(array(
03246                     'name' => 'option',
03247                     'type' => HTML5::ENDTAG
03248                 ));
03249             }
03250 
03251             /* If the current node is an optgroup element, act as if an end tag
03252             with the tag name "optgroup" had been seen. */
03253             if(end($this->stack)->nodeName === 'optgroup') {
03254                 $this->inSelect(array(
03255                     'name' => 'optgroup',
03256                     'type' => HTML5::ENDTAG
03257                 ));
03258             }
03259 
03260             /* Insert an HTML element for the token. */
03261             $this->insertElement($token);
03262 
03263         /* An end tag token whose tag name is "optgroup" */
03264         } elseif($token['type'] === HTML5::ENDTAG &&
03265         $token['name'] === 'optgroup') {
03266             /* First, if the current node is an option element, and the node
03267             immediately before it in the stack of open elements is an optgroup
03268             element, then act as if an end tag with the tag name "option" had
03269             been seen. */
03270             $elements_in_stack = count($this->stack);
03271 
03272             if($this->stack[$elements_in_stack - 1]->nodeName === 'option' &&
03273             $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup') {
03274                 $this->inSelect(array(
03275                     'name' => 'option',
03276                     'type' => HTML5::ENDTAG
03277                 ));
03278             }
03279 
03280             /* If the current node is an optgroup element, then pop that node
03281             from the stack of open elements. Otherwise, this is a parse error,
03282             ignore the token. */
03283             if($this->stack[$elements_in_stack - 1] === 'optgroup') {
03284                 array_pop($this->stack);
03285             }
03286 
03287         /* An end tag token whose tag name is "option" */
03288         } elseif($token['type'] === HTML5::ENDTAG &&
03289         $token['name'] === 'option') {
03290             /* If the current node is an option element, then pop that node
03291             from the stack of open elements. Otherwise, this is a parse error,
03292             ignore the token. */
03293             if(end($this->stack)->nodeName === 'option') {
03294                 array_pop($this->stack);
03295             }
03296 
03297         /* An end tag whose tag name is "select" */
03298         } elseif($token['type'] === HTML5::ENDTAG &&
03299         $token['name'] === 'select') {
03300             /* If the stack of open elements does not have an element in table
03301             scope with the same tag name as the token, this is a parse error.
03302             Ignore the token. (innerHTML case) */
03303             if(!$this->elementInScope($token['name'], true)) {
03304                 // w/e
03305 
03306             /* Otherwise: */
03307             } else {
03308                 /* Pop elements from the stack of open elements until a select
03309                 element has been popped from the stack. */
03310                 while(true) {
03311                     $current = end($this->stack)->nodeName;
03312                     array_pop($this->stack);
03313 
03314                     if($current === 'select') {
03315                         break;
03316                     }
03317                 }
03318 
03319                 /* Reset the insertion mode appropriately. */
03320                 $this->resetInsertionMode();
03321             }
03322 
03323         /* A start tag whose tag name is "select" */
03324         } elseif($token['name'] === 'select' &&
03325         $token['type'] === HTML5::STARTTAG) {
03326             /* Parse error. Act as if the token had been an end tag with the
03327             tag name "select" instead. */
03328             $this->inSelect(array(
03329                 'name' => 'select',
03330                 'type' => HTML5::ENDTAG
03331             ));
03332 
03333         /* An end tag whose tag name is one of: "caption", "table", "tbody",
03334         "tfoot", "thead", "tr", "td", "th" */
03335         } elseif(in_array($token['name'], array('caption', 'table', 'tbody',
03336         'tfoot', 'thead', 'tr', 'td', 'th')) && $token['type'] === HTML5::ENDTAG) {
03337             /* Parse error. */
03338             // w/e
03339 
03340             /* If the stack of open elements has an element in table scope with
03341             the same tag name as that of the token, then act as if an end tag
03342             with the tag name "select" had been seen, and reprocess the token.
03343             Otherwise, ignore the token. */
03344             if($this->elementInScope($token['name'], true)) {
03345                 $this->inSelect(array(
03346                     'name' => 'select',
03347                     'type' => HTML5::ENDTAG
03348                 ));
03349 
03350                 $this->mainPhase($token);
03351             }
03352 
03353         /* Anything else */
03354         } else {
03355             /* Parse error. Ignore the token. */
03356         }
03357     }
03358 
03359     private function afterBody($token) {
03360         /* Handle the token as follows: */
03361 
03362         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
03363         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
03364         or U+0020 SPACE */
03365         if($token['type'] === HTML5::CHARACTR &&
03366         preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
03367             /* Process the token as it would be processed if the insertion mode
03368             was "in body". */
03369             $this->inBody($token);
03370 
03371         /* A comment token */
03372         } elseif($token['type'] === HTML5::COMMENT) {
03373             /* Append a Comment node to the first element in the stack of open
03374             elements (the html element), with the data attribute set to the
03375             data given in the comment token. */
03376             $comment = $this->dom->createComment($token['data']);
03377             $this->stack[0]->appendChild($comment);
03378 
03379         /* An end tag with the tag name "html" */
03380         } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') {
03381             /* If the parser was originally created in order to handle the
03382             setting of an element's innerHTML attribute, this is a parse error;
03383             ignore the token. (The element will be an html element in this
03384             case.) (innerHTML case) */
03385 
03386             /* Otherwise, switch to the trailing end phase. */
03387             $this->phase = self::END_PHASE;
03388 
03389         /* Anything else */
03390         } else {
03391             /* Parse error. Set the insertion mode to "in body" and reprocess
03392             the token. */
03393             $this->mode = self::IN_BODY;
03394             return $this->inBody($token);
03395         }
03396     }
03397 
03398     private function inFrameset($token) {
03399         /* Handle the token as follows: */
03400 
03401         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
03402         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
03403         U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
03404         if($token['type'] === HTML5::CHARACTR &&
03405         preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
03406             /* Append the character to the current node. */
03407             $this->insertText($token['data']);
03408 
03409         /* A comment token */
03410         } elseif($token['type'] === HTML5::COMMENT) {
03411             /* Append a Comment node to the current node with the data
03412             attribute set to the data given in the comment token. */
03413             $this->insertComment($token['data']);
03414 
03415         /* A start tag with the tag name "frameset" */
03416         } elseif($token['name'] === 'frameset' &&
03417         $token['type'] === HTML5::STARTTAG) {
03418             $this->insertElement($token);
03419 
03420         /* An end tag with the tag name "frameset" */
03421         } elseif($token['name'] === 'frameset' &&
03422         $token['type'] === HTML5::ENDTAG) {
03423             /* If the current node is the root html element, then this is a
03424             parse error; ignore the token. (innerHTML case) */
03425             if(end($this->stack)->nodeName === 'html') {
03426                 // Ignore
03427 
03428             } else {
03429                 /* Otherwise, pop the current node from the stack of open
03430                 elements. */
03431                 array_pop($this->stack);
03432 
03433                 /* If the parser was not originally created in order to handle
03434                 the setting of an element's innerHTML attribute (innerHTML case),
03435                 and the current node is no longer a frameset element, then change
03436                 the insertion mode to "after frameset". */
03437                 $this->mode = self::AFTR_FRAME;
03438             }
03439 
03440         /* A start tag with the tag name "frame" */
03441         } elseif($token['name'] === 'frame' &&
03442         $token['type'] === HTML5::STARTTAG) {
03443             /* Insert an HTML element for the token. */
03444             $this->insertElement($token);
03445 
03446             /* Immediately pop the current node off the stack of open elements. */
03447             array_pop($this->stack);
03448 
03449         /* A start tag with the tag name "noframes" */
03450         } elseif($token['name'] === 'noframes' &&
03451         $token['type'] === HTML5::STARTTAG) {
03452             /* Process the token as if the insertion mode had been "in body". */
03453             $this->inBody($token);
03454 
03455         /* Anything else */
03456         } else {
03457             /* Parse error. Ignore the token. */
03458         }
03459     }
03460 
03461     private function afterFrameset($token) {
03462         /* Handle the token as follows: */
03463 
03464         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
03465         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
03466         U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
03467         if($token['type'] === HTML5::CHARACTR &&
03468         preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
03469             /* Append the character to the current node. */
03470             $this->insertText($token['data']);
03471 
03472         /* A comment token */
03473         } elseif($token['type'] === HTML5::COMMENT) {
03474             /* Append a Comment node to the current node with the data
03475             attribute set to the data given in the comment token. */
03476             $this->insertComment($token['data']);
03477 
03478         /* An end tag with the tag name "html" */
03479         } elseif($token['name'] === 'html' &&
03480         $token['type'] === HTML5::ENDTAG) {
03481             /* Switch to the trailing end phase. */
03482             $this->phase = self::END_PHASE;
03483 
03484         /* A start tag with the tag name "noframes" */
03485         } elseif($token['name'] === 'noframes' &&
03486         $token['type'] === HTML5::STARTTAG) {
03487             /* Process the token as if the insertion mode had been "in body". */
03488             $this->inBody($token);
03489 
03490         /* Anything else */
03491         } else {
03492             /* Parse error. Ignore the token. */
03493         }
03494     }
03495 
03496     private function trailingEndPhase($token) {
03497         /* After the main phase, as each token is emitted from the tokenisation
03498         stage, it must be processed as described in this section. */
03499 
03500         /* A DOCTYPE token */
03501         if($token['type'] === HTML5::DOCTYPE) {
03502             // Parse error. Ignore the token.
03503 
03504         /* A comment token */
03505         } elseif($token['type'] === HTML5::COMMENT) {
03506             /* Append a Comment node to the Document object with the data
03507             attribute set to the data given in the comment token. */
03508             $comment = $this->dom->createComment($token['data']);
03509             $this->dom->appendChild($comment);
03510 
03511         /* A character token that is one of one of U+0009 CHARACTER TABULATION,
03512         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
03513         or U+0020 SPACE */
03514         } elseif($token['type'] === HTML5::CHARACTR &&
03515         preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
03516             /* Process the token as it would be processed in the main phase. */
03517             $this->mainPhase($token);
03518 
03519         /* A character token that is not one of U+0009 CHARACTER TABULATION,
03520         U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
03521         or U+0020 SPACE. Or a start tag token. Or an end tag token. */
03522         } elseif(($token['type'] === HTML5::CHARACTR &&
03523         preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||
03524         $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG) {
03525             /* Parse error. Switch back to the main phase and reprocess the
03526             token. */
03527             $this->phase = self::MAIN_PHASE;
03528             return $this->mainPhase($token);
03529 
03530         /* An end-of-file token */
03531         } elseif($token['type'] === HTML5::EOF) {
03532             /* OMG DONE!! */
03533         }
03534     }
03535 
03536     private function insertElement($token, $append = true, $check = false) {
03537         // Proprietary workaround for libxml2's limitations with tag names
03538         if ($check) {
03539             // Slightly modified HTML5 tag-name modification,
03540             // removing anything that's not an ASCII letter, digit, or hyphen
03541             $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']);
03542             // Remove leading hyphens and numbers
03543             $token['name'] = ltrim($token['name'], '-0..9');
03544             // In theory, this should ever be needed, but just in case
03545             if ($token['name'] === '') $token['name'] = 'span'; // arbitrary generic choice
03546         }
03547         
03548         $el = $this->dom->createElement($token['name']);
03549 
03550         foreach($token['attr'] as $attr) {
03551             if(!$el->hasAttribute($attr['name'])) {
03552                 $el->setAttribute($attr['name'], $attr['value']);
03553             }
03554         }
03555 
03556         $this->appendToRealParent($el);
03557         $this->stack[] = $el;
03558 
03559         return $el;
03560     }
03561 
03562     private function insertText($data) {
03563         $text = $this->dom->createTextNode($data);
03564         $this->appendToRealParent($text);
03565     }
03566 
03567     private function insertComment($data) {
03568         $comment = $this->dom->createComment($data);
03569         $this->appendToRealParent($comment);
03570     }
03571 
03572     private function appendToRealParent($node) {
03573         if($this->foster_parent === null) {
03574             end($this->stack)->appendChild($node);
03575 
03576         } elseif($this->foster_parent !== null) {
03577             /* If the foster parent element is the parent element of the
03578             last table element in the stack of open elements, then the new
03579             node must be inserted immediately before the last table element
03580             in the stack of open elements in the foster parent element;
03581             otherwise, the new node must be appended to the foster parent
03582             element. */
03583             for($n = count($this->stack) - 1; $n >= 0; $n--) {
03584                 if($this->stack[$n]->nodeName === 'table' &&
03585                 $this->stack[$n]->parentNode !== null) {
03586                     $table = $this->stack[$n];
03587                     break;
03588                 }
03589             }
03590 
03591             if(isset($table) && $this->foster_parent->isSameNode($table->parentNode))
03592                 $this->foster_parent->insertBefore($node, $table);
03593             else
03594                 $this->foster_parent->appendChild($node);
03595 
03596             $this->foster_parent = null;
03597         }
03598     }
03599 
03600     private function elementInScope($el, $table = false) {
03601         if(is_array($el)) {
03602             foreach($el as $element) {
03603                 if($this->elementInScope($element, $table)) {
03604                     return true;
03605                 }
03606             }
03607 
03608             return false;
03609         }
03610 
03611         $leng = count($this->stack);
03612 
03613         for($n = 0; $n < $leng; $n++) {
03614             /* 1. Initialise node to be the current node (the bottommost node of
03615             the stack). */
03616             $node = $this->stack[$leng - 1 - $n];
03617 
03618             if($node->tagName === $el) {
03619                 /* 2. If node is the target node, terminate in a match state. */
03620                 return true;
03621 
03622             } elseif($node->tagName === 'table') {
03623                 /* 3. Otherwise, if node is a table element, terminate in a failure
03624                 state. */
03625                 return false;
03626 
03627             } elseif($table === true && in_array($node->tagName, array('caption', 'td',
03628             'th', 'button', 'marquee', 'object'))) {
03629                 /* 4. Otherwise, if the algorithm is the "has an element in scope"
03630                 variant (rather than the "has an element in table scope" variant),
03631                 and node is one of the following, terminate in a failure state. */
03632                 return false;
03633 
03634             } elseif($node === $node->ownerDocument->documentElement) {
03635                 /* 5. Otherwise, if node is an html element (root element), terminate
03636                 in a failure state. (This can only happen if the node is the topmost
03637                 node of the    stack of open elements, and prevents the next step from
03638                 being invoked if there are no more elements in the stack.) */
03639                 return false;
03640             }
03641 
03642             /* Otherwise, set node to the previous entry in the stack of open
03643             elements and return to step 2. (This will never fail, since the loop
03644             will always terminate in the previous step if the top of the stack
03645             is reached.) */
03646         }
03647     }
03648 
03649     private function reconstructActiveFormattingElements() {
03650         /* 1. If there are no entries in the list of active formatting elements,
03651         then there is nothing to reconstruct; stop this algorithm. */
03652         $formatting_elements = count($this->a_formatting);
03653 
03654         if($formatting_elements === 0) {
03655             return false;
03656         }
03657 
03658         /* 3. Let entry be the last (most recently added) element in the list
03659         of active formatting elements. */
03660         $entry = end($this->a_formatting);
03661 
03662         /* 2. If the last (most recently added) entry in the list of active
03663         formatting elements is a marker, or if it is an element that is in the
03664         stack of open elements, then there is nothing to reconstruct; stop this
03665         algorithm. */
03666         if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
03667             return false;
03668         }
03669 
03670         for($a = $formatting_elements - 1; $a >= 0; true) {
03671             /* 4. If there are no entries before entry in the list of active
03672             formatting elements, then jump to step 8. */
03673             if($a === 0) {
03674                 $step_seven = false;
03675                 break;
03676             }
03677 
03678             /* 5. Let entry be the entry one earlier than entry in the list of
03679             active formatting elements. */
03680             $a--;
03681             $entry = $this->a_formatting[$a];
03682 
03683             /* 6. If entry is neither a marker nor an element that is also in
03684             thetack of open elements, go to step 4. */
03685             if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
03686                 break;
03687             }
03688         }
03689 
03690         while(true) {
03691             /* 7. Let entry be the element one later than entry in the list of
03692             active formatting elements. */
03693             if(isset($step_seven) && $step_seven === true) {
03694                 $a++;
03695                 $entry = $this->a_formatting[$a];
03696             }
03697 
03698             /* 8. Perform a shallow clone of the element entry to obtain clone. */
03699             $clone = $entry->cloneNode();
03700 
03701             /* 9. Append clone to the current node and push it onto the stack
03702             of open elements  so that it is the new current node. */
03703             end($this->stack)->appendChild($clone);
03704             $this->stack[] = $clone;
03705 
03706             /* 10. Replace the entry for entry in the list with an entry for
03707             clone. */
03708             $this->a_formatting[$a] = $clone;
03709 
03710             /* 11. If the entry for clone in the list of active formatting
03711             elements is not the last entry in the list, return to step 7. */
03712             if(end($this->a_formatting) !== $clone) {
03713                 $step_seven = true;
03714             } else {
03715                 break;
03716             }
03717         }
03718     }
03719 
03720     private function clearTheActiveFormattingElementsUpToTheLastMarker() {
03721         /* When the steps below require the UA to clear the list of active
03722         formatting elements up to the last marker, the UA must perform the
03723         following steps: */
03724 
03725         while(true) {
03726             /* 1. Let entry be the last (most recently added) entry in the list
03727             of active formatting elements. */
03728             $entry = end($this->a_formatting);
03729 
03730             /* 2. Remove entry from the list of active formatting elements. */
03731             array_pop($this->a_formatting);
03732 
03733             /* 3. If entry was a marker, then stop the algorithm at this point.
03734             The list has been cleared up to the last marker. */
03735             if($entry === self::MARKER) {
03736                 break;
03737             }
03738         }
03739     }
03740 
03741     private function generateImpliedEndTags($exclude = array()) {
03742         /* When the steps below require the UA to generate implied end tags,
03743         then, if the current node is a dd element, a dt element, an li element,
03744         a p element, a td element, a th  element, or a tr element, the UA must
03745         act as if an end tag with the respective tag name had been seen and
03746         then generate implied end tags again. */
03747         $node = end($this->stack);
03748         $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude);
03749 
03750         while(in_array(end($this->stack)->nodeName, $elements)) {
03751             array_pop($this->stack);
03752         }
03753     }
03754 
03755     private function getElementCategory($node) {
03756         $name = $node->tagName;
03757         if(in_array($name, $this->special))
03758             return self::SPECIAL;
03759 
03760         elseif(in_array($name, $this->scoping))
03761             return self::SCOPING;
03762 
03763         elseif(in_array($name, $this->formatting))
03764             return self::FORMATTING;
03765 
03766         else
03767             return self::PHRASING;
03768     }
03769 
03770     private function clearStackToTableContext($elements) {
03771         /* When the steps above require the UA to clear the stack back to a
03772         table context, it means that the UA must, while the current node is not
03773         a table element or an html element, pop elements from the stack of open
03774         elements. If this causes any elements to be popped from the stack, then
03775         this is a parse error. */
03776         while(true) {
03777             $node = end($this->stack)->nodeName;
03778 
03779             if(in_array($node, $elements)) {
03780                 break;
03781             } else {
03782                 array_pop($this->stack);
03783             }
03784         }
03785     }
03786 
03787     private function resetInsertionMode() {
03788         /* 1. Let last be false. */
03789         $last = false;
03790         $leng = count($this->stack);
03791 
03792         for($n = $leng - 1; $n >= 0; $n--) {
03793             /* 2. Let node be the last node in the stack of open elements. */
03794             $node = $this->stack[$n];
03795 
03796             /* 3. If node is the first node in the stack of open elements, then
03797             set last to true. If the element whose innerHTML  attribute is being
03798             set is neither a td  element nor a th element, then set node to the
03799             element whose innerHTML  attribute is being set. (innerHTML  case) */
03800             if($this->stack[0]->isSameNode($node)) {
03801                 $last = true;
03802             }
03803 
03804             /* 4. If node is a select element, then switch the insertion mode to
03805             "in select" and abort these steps. (innerHTML case) */
03806             if($node->nodeName === 'select') {
03807                 $this->mode = self::IN_SELECT;
03808                 break;
03809 
03810             /* 5. If node is a td or th element, then switch the insertion mode
03811             to "in cell" and abort these steps. */
03812             } elseif($node->nodeName === 'td' || $node->nodeName === 'th') {
03813                 $this->mode = self::IN_CELL;
03814                 break;
03815 
03816             /* 6. If node is a tr element, then switch the insertion mode to
03817             "in    row" and abort these steps. */
03818             } elseif($node->nodeName === 'tr') {
03819                 $this->mode = self::IN_ROW;
03820                 break;
03821 
03822             /* 7. If node is a tbody, thead, or tfoot element, then switch the
03823             insertion mode to "in table body" and abort these steps. */
03824             } elseif(in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) {
03825                 $this->mode = self::IN_TBODY;
03826                 break;
03827 
03828             /* 8. If node is a caption element, then switch the insertion mode
03829             to "in caption" and abort these steps. */
03830             } elseif($node->nodeName === 'caption') {
03831                 $this->mode = self::IN_CAPTION;
03832                 break;
03833 
03834             /* 9. If node is a colgroup element, then switch the insertion mode
03835             to "in column group" and abort these steps. (innerHTML case) */
03836             } elseif($node->nodeName === 'colgroup') {
03837                 $this->mode = self::IN_CGROUP;
03838                 break;
03839 
03840             /* 10. If node is a table element, then switch the insertion mode
03841             to "in table" and abort these steps. */
03842             } elseif($node->nodeName === 'table') {
03843                 $this->mode = self::IN_TABLE;
03844                 break;
03845 
03846             /* 11. If node is a head element, then switch the insertion mode
03847             to "in body" ("in body"! not "in head"!) and abort these steps.
03848             (innerHTML case) */
03849             } elseif($node->nodeName === 'head') {
03850                 $this->mode = self::IN_BODY;
03851                 break;
03852 
03853             /* 12. If node is a body element, then switch the insertion mode to
03854             "in body" and abort these steps. */
03855             } elseif($node->nodeName === 'body') {
03856                 $this->mode = self::IN_BODY;
03857                 break;
03858 
03859             /* 13. If node is a frameset element, then switch the insertion
03860             mode to "in frameset" and abort these steps. (innerHTML case) */
03861             } elseif($node->nodeName === 'frameset') {
03862                 $this->mode = self::IN_FRAME;
03863                 break;
03864 
03865             /* 14. If node is an html element, then: if the head element
03866             pointer is null, switch the insertion mode to "before head",
03867             otherwise, switch the insertion mode to "after head". In either
03868             case, abort these steps. (innerHTML case) */
03869             } elseif($node->nodeName === 'html') {
03870                 $this->mode = ($this->head_pointer === null)
03871                     ? self::BEFOR_HEAD
03872                     : self::AFTER_HEAD;
03873 
03874                 break;
03875 
03876             /* 15. If last is true, then set the insertion mode to "in body"
03877             and    abort these steps. (innerHTML case) */
03878             } elseif($last) {
03879                 $this->mode = self::IN_BODY;
03880                 break;
03881             }
03882         }
03883     }
03884 
03885     private function closeCell() {
03886         /* If the stack of open elements has a td or th element in table scope,
03887         then act as if an end tag token with that tag name had been seen. */
03888         foreach(array('td', 'th') as $cell) {
03889             if($this->elementInScope($cell, true)) {
03890                 $this->inCell(array(
03891                     'name' => $cell,
03892                     'type' => HTML5::ENDTAG
03893                 ));
03894 
03895                 break;
03896             }
03897         }
03898     }
03899 
03900     public function save() {
03901         return $this->dom;
03902     }
03903 }
03904 ?>