HTMLPurifier 4.4.0
|
00001 <?php 00002 00013 class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex { 00014 00015 public function tokenizeHTML($html, $config, $context) { 00016 $new_html = $this->normalize($html, $config, $context); 00017 $new_html = $this->wrapHTML($new_html, $config, $context); 00018 try { 00019 $parser = new HTML5($new_html); 00020 $doc = $parser->save(); 00021 } catch (DOMException $e) { 00022 // Uh oh, it failed. Punt to DirectLex. 00023 $lexer = new HTMLPurifier_Lexer_DirectLex(); 00024 $context->register('PH5PError', $e); // save the error, so we can detect it 00025 return $lexer->tokenizeHTML($html, $config, $context); // use original HTML 00026 } 00027 $tokens = array(); 00028 $this->tokenizeDOM( 00029 $doc->getElementsByTagName('html')->item(0)-> // <html> 00030 getElementsByTagName('body')->item(0)-> // <body> 00031 getElementsByTagName('div')->item(0) // <div> 00032 , $tokens); 00033 return $tokens; 00034 } 00035 00036 } 00037 00038 /* 00039 00040 Copyright 2007 Jeroen van der Meer <http://jero.net/> 00041 00042 Permission is hereby granted, free of charge, to any person obtaining a 00043 copy of this software and associated documentation files (the 00044 "Software"), to deal in the Software without restriction, including 00045 without limitation the rights to use, copy, modify, merge, publish, 00046 distribute, sublicense, and/or sell copies of the Software, and to 00047 permit persons to whom the Software is furnished to do so, subject to 00048 the following conditions: 00049 00050 The above copyright notice and this permission notice shall be included 00051 in all copies or substantial portions of the Software. 00052 00053 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 00054 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 00055 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 00056 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 00057 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 00058 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 00059 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 00060 00061 */ 00062 00063 class HTML5 { 00064 private $data; 00065 private $char; 00066 private $EOF; 00067 private $state; 00068 private $tree; 00069 private $token; 00070 private $content_model; 00071 private $escape = false; 00072 private $entities = array('AElig;','AElig','AMP;','AMP','Aacute;','Aacute', 00073 'Acirc;','Acirc','Agrave;','Agrave','Alpha;','Aring;','Aring','Atilde;', 00074 'Atilde','Auml;','Auml','Beta;','COPY;','COPY','Ccedil;','Ccedil','Chi;', 00075 'Dagger;','Delta;','ETH;','ETH','Eacute;','Eacute','Ecirc;','Ecirc','Egrave;', 00076 'Egrave','Epsilon;','Eta;','Euml;','Euml','GT;','GT','Gamma;','Iacute;', 00077 'Iacute','Icirc;','Icirc','Igrave;','Igrave','Iota;','Iuml;','Iuml','Kappa;', 00078 'LT;','LT','Lambda;','Mu;','Ntilde;','Ntilde','Nu;','OElig;','Oacute;', 00079 'Oacute','Ocirc;','Ocirc','Ograve;','Ograve','Omega;','Omicron;','Oslash;', 00080 'Oslash','Otilde;','Otilde','Ouml;','Ouml','Phi;','Pi;','Prime;','Psi;', 00081 'QUOT;','QUOT','REG;','REG','Rho;','Scaron;','Sigma;','THORN;','THORN', 00082 'TRADE;','Tau;','Theta;','Uacute;','Uacute','Ucirc;','Ucirc','Ugrave;', 00083 'Ugrave','Upsilon;','Uuml;','Uuml','Xi;','Yacute;','Yacute','Yuml;','Zeta;', 00084 'aacute;','aacute','acirc;','acirc','acute;','acute','aelig;','aelig', 00085 'agrave;','agrave','alefsym;','alpha;','amp;','amp','and;','ang;','apos;', 00086 'aring;','aring','asymp;','atilde;','atilde','auml;','auml','bdquo;','beta;', 00087 'brvbar;','brvbar','bull;','cap;','ccedil;','ccedil','cedil;','cedil', 00088 'cent;','cent','chi;','circ;','clubs;','cong;','copy;','copy','crarr;', 00089 'cup;','curren;','curren','dArr;','dagger;','darr;','deg;','deg','delta;', 00090 'diams;','divide;','divide','eacute;','eacute','ecirc;','ecirc','egrave;', 00091 'egrave','empty;','emsp;','ensp;','epsilon;','equiv;','eta;','eth;','eth', 00092 'euml;','euml','euro;','exist;','fnof;','forall;','frac12;','frac12', 00093 'frac14;','frac14','frac34;','frac34','frasl;','gamma;','ge;','gt;','gt', 00094 'hArr;','harr;','hearts;','hellip;','iacute;','iacute','icirc;','icirc', 00095 'iexcl;','iexcl','igrave;','igrave','image;','infin;','int;','iota;', 00096 'iquest;','iquest','isin;','iuml;','iuml','kappa;','lArr;','lambda;','lang;', 00097 'laquo;','laquo','larr;','lceil;','ldquo;','le;','lfloor;','lowast;','loz;', 00098 'lrm;','lsaquo;','lsquo;','lt;','lt','macr;','macr','mdash;','micro;','micro', 00099 'middot;','middot','minus;','mu;','nabla;','nbsp;','nbsp','ndash;','ne;', 00100 'ni;','not;','not','notin;','nsub;','ntilde;','ntilde','nu;','oacute;', 00101 'oacute','ocirc;','ocirc','oelig;','ograve;','ograve','oline;','omega;', 00102 'omicron;','oplus;','or;','ordf;','ordf','ordm;','ordm','oslash;','oslash', 00103 'otilde;','otilde','otimes;','ouml;','ouml','para;','para','part;','permil;', 00104 'perp;','phi;','pi;','piv;','plusmn;','plusmn','pound;','pound','prime;', 00105 'prod;','prop;','psi;','quot;','quot','rArr;','radic;','rang;','raquo;', 00106 'raquo','rarr;','rceil;','rdquo;','real;','reg;','reg','rfloor;','rho;', 00107 'rlm;','rsaquo;','rsquo;','sbquo;','scaron;','sdot;','sect;','sect','shy;', 00108 'shy','sigma;','sigmaf;','sim;','spades;','sub;','sube;','sum;','sup1;', 00109 'sup1','sup2;','sup2','sup3;','sup3','sup;','supe;','szlig;','szlig','tau;', 00110 'there4;','theta;','thetasym;','thinsp;','thorn;','thorn','tilde;','times;', 00111 'times','trade;','uArr;','uacute;','uacute','uarr;','ucirc;','ucirc', 00112 'ugrave;','ugrave','uml;','uml','upsih;','upsilon;','uuml;','uuml','weierp;', 00113 'xi;','yacute;','yacute','yen;','yen','yuml;','yuml','zeta;','zwj;','zwnj;'); 00114 00115 const PCDATA = 0; 00116 const RCDATA = 1; 00117 const CDATA = 2; 00118 const PLAINTEXT = 3; 00119 00120 const DOCTYPE = 0; 00121 const STARTTAG = 1; 00122 const ENDTAG = 2; 00123 const COMMENT = 3; 00124 const CHARACTR = 4; 00125 const EOF = 5; 00126 00127 public function __construct($data) { 00128 00129 $this->data = $data; 00130 $this->char = -1; 00131 $this->EOF = strlen($data); 00132 $this->tree = new HTML5TreeConstructer; 00133 $this->content_model = self::PCDATA; 00134 00135 $this->state = 'data'; 00136 00137 while($this->state !== null) { 00138 $this->{$this->state.'State'}(); 00139 } 00140 } 00141 00142 public function save() { 00143 return $this->tree->save(); 00144 } 00145 00146 private function char() { 00147 return ($this->char < $this->EOF) 00148 ? $this->data[$this->char] 00149 : false; 00150 } 00151 00152 private function character($s, $l = 0) { 00153 if($s + $l < $this->EOF) { 00154 if($l === 0) { 00155 return $this->data[$s]; 00156 } else { 00157 return substr($this->data, $s, $l); 00158 } 00159 } 00160 } 00161 00162 private function characters($char_class, $start) { 00163 return preg_replace('#^(['.$char_class.']+).*#s', '\\1', substr($this->data, $start)); 00164 } 00165 00166 private function dataState() { 00167 // Consume the next input character 00168 $this->char++; 00169 $char = $this->char(); 00170 00171 if($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) { 00172 /* U+0026 AMPERSAND (&) 00173 When the content model flag is set to one of the PCDATA or RCDATA 00174 states: switch to the entity data state. Otherwise: treat it as per 00175 the "anything else" entry below. */ 00176 $this->state = 'entityData'; 00177 00178 } elseif($char === '-') { 00179 /* If the content model flag is set to either the RCDATA state or 00180 the CDATA state, and the escape flag is false, and there are at 00181 least three characters before this one in the input stream, and the 00182 last four characters in the input stream, including this one, are 00183 U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS, 00184 and U+002D HYPHEN-MINUS ("<!--"), then set the escape flag to true. */ 00185 if(($this->content_model === self::RCDATA || $this->content_model === 00186 self::CDATA) && $this->escape === false && 00187 $this->char >= 3 && $this->character($this->char - 4, 4) === '<!--') { 00188 $this->escape = true; 00189 } 00190 00191 /* In any case, emit the input character as a character token. Stay 00192 in the data state. */ 00193 $this->emitToken(array( 00194 'type' => self::CHARACTR, 00195 'data' => $char 00196 )); 00197 00198 /* U+003C LESS-THAN SIGN (<) */ 00199 } elseif($char === '<' && ($this->content_model === self::PCDATA || 00200 (($this->content_model === self::RCDATA || 00201 $this->content_model === self::CDATA) && $this->escape === false))) { 00202 /* When the content model flag is set to the PCDATA state: switch 00203 to the tag open state. 00204 00205 When the content model flag is set to either the RCDATA state or 00206 the CDATA state and the escape flag is false: switch to the tag 00207 open state. 00208 00209 Otherwise: treat it as per the "anything else" entry below. */ 00210 $this->state = 'tagOpen'; 00211 00212 /* U+003E GREATER-THAN SIGN (>) */ 00213 } elseif($char === '>') { 00214 /* If the content model flag is set to either the RCDATA state or 00215 the CDATA state, and the escape flag is true, and the last three 00216 characters in the input stream including this one are U+002D 00217 HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN ("-->"), 00218 set the escape flag to false. */ 00219 if(($this->content_model === self::RCDATA || 00220 $this->content_model === self::CDATA) && $this->escape === true && 00221 $this->character($this->char, 3) === '-->') { 00222 $this->escape = false; 00223 } 00224 00225 /* In any case, emit the input character as a character token. 00226 Stay in the data state. */ 00227 $this->emitToken(array( 00228 'type' => self::CHARACTR, 00229 'data' => $char 00230 )); 00231 00232 } elseif($this->char === $this->EOF) { 00233 /* EOF 00234 Emit an end-of-file token. */ 00235 $this->EOF(); 00236 00237 } elseif($this->content_model === self::PLAINTEXT) { 00238 /* When the content model flag is set to the PLAINTEXT state 00239 THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of 00240 the text and emit it as a character token. */ 00241 $this->emitToken(array( 00242 'type' => self::CHARACTR, 00243 'data' => substr($this->data, $this->char) 00244 )); 00245 00246 $this->EOF(); 00247 00248 } else { 00249 /* Anything else 00250 THIS DIFFERS GREATLY FROM THE SPEC: Get as many character that 00251 otherwise would also be treated as a character token and emit it 00252 as a single character token. Stay in the data state. */ 00253 $len = strcspn($this->data, '<&', $this->char); 00254 $char = substr($this->data, $this->char, $len); 00255 $this->char += $len - 1; 00256 00257 $this->emitToken(array( 00258 'type' => self::CHARACTR, 00259 'data' => $char 00260 )); 00261 00262 $this->state = 'data'; 00263 } 00264 } 00265 00266 private function entityDataState() { 00267 // Attempt to consume an entity. 00268 $entity = $this->entity(); 00269 00270 // If nothing is returned, emit a U+0026 AMPERSAND character token. 00271 // Otherwise, emit the character token that was returned. 00272 $char = (!$entity) ? '&' : $entity; 00273 $this->emitToken(array( 00274 'type' => self::CHARACTR, 00275 'data' => $char 00276 )); 00277 00278 // Finally, switch to the data state. 00279 $this->state = 'data'; 00280 } 00281 00282 private function tagOpenState() { 00283 switch($this->content_model) { 00284 case self::RCDATA: 00285 case self::CDATA: 00286 /* If the next input character is a U+002F SOLIDUS (/) character, 00287 consume it and switch to the close tag open state. If the next 00288 input character is not a U+002F SOLIDUS (/) character, emit a 00289 U+003C LESS-THAN SIGN character token and switch to the data 00290 state to process the next input character. */ 00291 if($this->character($this->char + 1) === '/') { 00292 $this->char++; 00293 $this->state = 'closeTagOpen'; 00294 00295 } else { 00296 $this->emitToken(array( 00297 'type' => self::CHARACTR, 00298 'data' => '<' 00299 )); 00300 00301 $this->state = 'data'; 00302 } 00303 break; 00304 00305 case self::PCDATA: 00306 // If the content model flag is set to the PCDATA state 00307 // Consume the next input character: 00308 $this->char++; 00309 $char = $this->char(); 00310 00311 if($char === '!') { 00312 /* U+0021 EXCLAMATION MARK (!) 00313 Switch to the markup declaration open state. */ 00314 $this->state = 'markupDeclarationOpen'; 00315 00316 } elseif($char === '/') { 00317 /* U+002F SOLIDUS (/) 00318 Switch to the close tag open state. */ 00319 $this->state = 'closeTagOpen'; 00320 00321 } elseif(preg_match('/^[A-Za-z]$/', $char)) { 00322 /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z 00323 Create a new start tag token, set its tag name to the lowercase 00324 version of the input character (add 0x0020 to the character's code 00325 point), then switch to the tag name state. (Don't emit the token 00326 yet; further details will be filled in before it is emitted.) */ 00327 $this->token = array( 00328 'name' => strtolower($char), 00329 'type' => self::STARTTAG, 00330 'attr' => array() 00331 ); 00332 00333 $this->state = 'tagName'; 00334 00335 } elseif($char === '>') { 00336 /* U+003E GREATER-THAN SIGN (>) 00337 Parse error. Emit a U+003C LESS-THAN SIGN character token and a 00338 U+003E GREATER-THAN SIGN character token. Switch to the data state. */ 00339 $this->emitToken(array( 00340 'type' => self::CHARACTR, 00341 'data' => '<>' 00342 )); 00343 00344 $this->state = 'data'; 00345 00346 } elseif($char === '?') { 00347 /* U+003F QUESTION MARK (?) 00348 Parse error. Switch to the bogus comment state. */ 00349 $this->state = 'bogusComment'; 00350 00351 } else { 00352 /* Anything else 00353 Parse error. Emit a U+003C LESS-THAN SIGN character token and 00354 reconsume the current input character in the data state. */ 00355 $this->emitToken(array( 00356 'type' => self::CHARACTR, 00357 'data' => '<' 00358 )); 00359 00360 $this->char--; 00361 $this->state = 'data'; 00362 } 00363 break; 00364 } 00365 } 00366 00367 private function closeTagOpenState() { 00368 $next_node = strtolower($this->characters('A-Za-z', $this->char + 1)); 00369 $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName; 00370 00371 if(($this->content_model === self::RCDATA || $this->content_model === self::CDATA) && 00372 (!$the_same || ($the_same && (!preg_match('/[\t\n\x0b\x0c >\/]/', 00373 $this->character($this->char + 1 + strlen($next_node))) || $this->EOF === $this->char)))) { 00374 /* If the content model flag is set to the RCDATA or CDATA states then 00375 examine the next few characters. If they do not match the tag name of 00376 the last start tag token emitted (case insensitively), or if they do but 00377 they are not immediately followed by one of the following characters: 00378 * U+0009 CHARACTER TABULATION 00379 * U+000A LINE FEED (LF) 00380 * U+000B LINE TABULATION 00381 * U+000C FORM FEED (FF) 00382 * U+0020 SPACE 00383 * U+003E GREATER-THAN SIGN (>) 00384 * U+002F SOLIDUS (/) 00385 * EOF 00386 ...then there is a parse error. Emit a U+003C LESS-THAN SIGN character 00387 token, a U+002F SOLIDUS character token, and switch to the data state 00388 to process the next input character. */ 00389 $this->emitToken(array( 00390 'type' => self::CHARACTR, 00391 'data' => '</' 00392 )); 00393 00394 $this->state = 'data'; 00395 00396 } else { 00397 /* Otherwise, if the content model flag is set to the PCDATA state, 00398 or if the next few characters do match that tag name, consume the 00399 next input character: */ 00400 $this->char++; 00401 $char = $this->char(); 00402 00403 if(preg_match('/^[A-Za-z]$/', $char)) { 00404 /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z 00405 Create a new end tag token, set its tag name to the lowercase version 00406 of the input character (add 0x0020 to the character's code point), then 00407 switch to the tag name state. (Don't emit the token yet; further details 00408 will be filled in before it is emitted.) */ 00409 $this->token = array( 00410 'name' => strtolower($char), 00411 'type' => self::ENDTAG 00412 ); 00413 00414 $this->state = 'tagName'; 00415 00416 } elseif($char === '>') { 00417 /* U+003E GREATER-THAN SIGN (>) 00418 Parse error. Switch to the data state. */ 00419 $this->state = 'data'; 00420 00421 } elseif($this->char === $this->EOF) { 00422 /* EOF 00423 Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F 00424 SOLIDUS character token. Reconsume the EOF character in the data state. */ 00425 $this->emitToken(array( 00426 'type' => self::CHARACTR, 00427 'data' => '</' 00428 )); 00429 00430 $this->char--; 00431 $this->state = 'data'; 00432 00433 } else { 00434 /* Parse error. Switch to the bogus comment state. */ 00435 $this->state = 'bogusComment'; 00436 } 00437 } 00438 } 00439 00440 private function tagNameState() { 00441 // Consume the next input character: 00442 $this->char++; 00443 $char = $this->character($this->char); 00444 00445 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { 00446 /* U+0009 CHARACTER TABULATION 00447 U+000A LINE FEED (LF) 00448 U+000B LINE TABULATION 00449 U+000C FORM FEED (FF) 00450 U+0020 SPACE 00451 Switch to the before attribute name state. */ 00452 $this->state = 'beforeAttributeName'; 00453 00454 } elseif($char === '>') { 00455 /* U+003E GREATER-THAN SIGN (>) 00456 Emit the current tag token. Switch to the data state. */ 00457 $this->emitToken($this->token); 00458 $this->state = 'data'; 00459 00460 } elseif($this->char === $this->EOF) { 00461 /* EOF 00462 Parse error. Emit the current tag token. Reconsume the EOF 00463 character in the data state. */ 00464 $this->emitToken($this->token); 00465 00466 $this->char--; 00467 $this->state = 'data'; 00468 00469 } elseif($char === '/') { 00470 /* U+002F SOLIDUS (/) 00471 Parse error unless this is a permitted slash. Switch to the before 00472 attribute name state. */ 00473 $this->state = 'beforeAttributeName'; 00474 00475 } else { 00476 /* Anything else 00477 Append the current input character to the current tag token's tag name. 00478 Stay in the tag name state. */ 00479 $this->token['name'] .= strtolower($char); 00480 $this->state = 'tagName'; 00481 } 00482 } 00483 00484 private function beforeAttributeNameState() { 00485 // Consume the next input character: 00486 $this->char++; 00487 $char = $this->character($this->char); 00488 00489 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { 00490 /* U+0009 CHARACTER TABULATION 00491 U+000A LINE FEED (LF) 00492 U+000B LINE TABULATION 00493 U+000C FORM FEED (FF) 00494 U+0020 SPACE 00495 Stay in the before attribute name state. */ 00496 $this->state = 'beforeAttributeName'; 00497 00498 } elseif($char === '>') { 00499 /* U+003E GREATER-THAN SIGN (>) 00500 Emit the current tag token. Switch to the data state. */ 00501 $this->emitToken($this->token); 00502 $this->state = 'data'; 00503 00504 } elseif($char === '/') { 00505 /* U+002F SOLIDUS (/) 00506 Parse error unless this is a permitted slash. Stay in the before 00507 attribute name state. */ 00508 $this->state = 'beforeAttributeName'; 00509 00510 } elseif($this->char === $this->EOF) { 00511 /* EOF 00512 Parse error. Emit the current tag token. Reconsume the EOF 00513 character in the data state. */ 00514 $this->emitToken($this->token); 00515 00516 $this->char--; 00517 $this->state = 'data'; 00518 00519 } else { 00520 /* Anything else 00521 Start a new attribute in the current tag token. Set that attribute's 00522 name to the current input character, and its value to the empty string. 00523 Switch to the attribute name state. */ 00524 $this->token['attr'][] = array( 00525 'name' => strtolower($char), 00526 'value' => null 00527 ); 00528 00529 $this->state = 'attributeName'; 00530 } 00531 } 00532 00533 private function attributeNameState() { 00534 // Consume the next input character: 00535 $this->char++; 00536 $char = $this->character($this->char); 00537 00538 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { 00539 /* U+0009 CHARACTER TABULATION 00540 U+000A LINE FEED (LF) 00541 U+000B LINE TABULATION 00542 U+000C FORM FEED (FF) 00543 U+0020 SPACE 00544 Stay in the before attribute name state. */ 00545 $this->state = 'afterAttributeName'; 00546 00547 } elseif($char === '=') { 00548 /* U+003D EQUALS SIGN (=) 00549 Switch to the before attribute value state. */ 00550 $this->state = 'beforeAttributeValue'; 00551 00552 } elseif($char === '>') { 00553 /* U+003E GREATER-THAN SIGN (>) 00554 Emit the current tag token. Switch to the data state. */ 00555 $this->emitToken($this->token); 00556 $this->state = 'data'; 00557 00558 } elseif($char === '/' && $this->character($this->char + 1) !== '>') { 00559 /* U+002F SOLIDUS (/) 00560 Parse error unless this is a permitted slash. Switch to the before 00561 attribute name state. */ 00562 $this->state = 'beforeAttributeName'; 00563 00564 } elseif($this->char === $this->EOF) { 00565 /* EOF 00566 Parse error. Emit the current tag token. Reconsume the EOF 00567 character in the data state. */ 00568 $this->emitToken($this->token); 00569 00570 $this->char--; 00571 $this->state = 'data'; 00572 00573 } else { 00574 /* Anything else 00575 Append the current input character to the current attribute's name. 00576 Stay in the attribute name state. */ 00577 $last = count($this->token['attr']) - 1; 00578 $this->token['attr'][$last]['name'] .= strtolower($char); 00579 00580 $this->state = 'attributeName'; 00581 } 00582 } 00583 00584 private function afterAttributeNameState() { 00585 // Consume the next input character: 00586 $this->char++; 00587 $char = $this->character($this->char); 00588 00589 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { 00590 /* U+0009 CHARACTER TABULATION 00591 U+000A LINE FEED (LF) 00592 U+000B LINE TABULATION 00593 U+000C FORM FEED (FF) 00594 U+0020 SPACE 00595 Stay in the after attribute name state. */ 00596 $this->state = 'afterAttributeName'; 00597 00598 } elseif($char === '=') { 00599 /* U+003D EQUALS SIGN (=) 00600 Switch to the before attribute value state. */ 00601 $this->state = 'beforeAttributeValue'; 00602 00603 } elseif($char === '>') { 00604 /* U+003E GREATER-THAN SIGN (>) 00605 Emit the current tag token. Switch to the data state. */ 00606 $this->emitToken($this->token); 00607 $this->state = 'data'; 00608 00609 } elseif($char === '/' && $this->character($this->char + 1) !== '>') { 00610 /* U+002F SOLIDUS (/) 00611 Parse error unless this is a permitted slash. Switch to the 00612 before attribute name state. */ 00613 $this->state = 'beforeAttributeName'; 00614 00615 } elseif($this->char === $this->EOF) { 00616 /* EOF 00617 Parse error. Emit the current tag token. Reconsume the EOF 00618 character in the data state. */ 00619 $this->emitToken($this->token); 00620 00621 $this->char--; 00622 $this->state = 'data'; 00623 00624 } else { 00625 /* Anything else 00626 Start a new attribute in the current tag token. Set that attribute's 00627 name to the current input character, and its value to the empty string. 00628 Switch to the attribute name state. */ 00629 $this->token['attr'][] = array( 00630 'name' => strtolower($char), 00631 'value' => null 00632 ); 00633 00634 $this->state = 'attributeName'; 00635 } 00636 } 00637 00638 private function beforeAttributeValueState() { 00639 // Consume the next input character: 00640 $this->char++; 00641 $char = $this->character($this->char); 00642 00643 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { 00644 /* U+0009 CHARACTER TABULATION 00645 U+000A LINE FEED (LF) 00646 U+000B LINE TABULATION 00647 U+000C FORM FEED (FF) 00648 U+0020 SPACE 00649 Stay in the before attribute value state. */ 00650 $this->state = 'beforeAttributeValue'; 00651 00652 } elseif($char === '"') { 00653 /* U+0022 QUOTATION MARK (") 00654 Switch to the attribute value (double-quoted) state. */ 00655 $this->state = 'attributeValueDoubleQuoted'; 00656 00657 } elseif($char === '&') { 00658 /* U+0026 AMPERSAND (&) 00659 Switch to the attribute value (unquoted) state and reconsume 00660 this input character. */ 00661 $this->char--; 00662 $this->state = 'attributeValueUnquoted'; 00663 00664 } elseif($char === '\'') { 00665 /* U+0027 APOSTROPHE (') 00666 Switch to the attribute value (single-quoted) state. */ 00667 $this->state = 'attributeValueSingleQuoted'; 00668 00669 } elseif($char === '>') { 00670 /* U+003E GREATER-THAN SIGN (>) 00671 Emit the current tag token. Switch to the data state. */ 00672 $this->emitToken($this->token); 00673 $this->state = 'data'; 00674 00675 } else { 00676 /* Anything else 00677 Append the current input character to the current attribute's value. 00678 Switch to the attribute value (unquoted) state. */ 00679 $last = count($this->token['attr']) - 1; 00680 $this->token['attr'][$last]['value'] .= $char; 00681 00682 $this->state = 'attributeValueUnquoted'; 00683 } 00684 } 00685 00686 private function attributeValueDoubleQuotedState() { 00687 // Consume the next input character: 00688 $this->char++; 00689 $char = $this->character($this->char); 00690 00691 if($char === '"') { 00692 /* U+0022 QUOTATION MARK (") 00693 Switch to the before attribute name state. */ 00694 $this->state = 'beforeAttributeName'; 00695 00696 } elseif($char === '&') { 00697 /* U+0026 AMPERSAND (&) 00698 Switch to the entity in attribute value state. */ 00699 $this->entityInAttributeValueState('double'); 00700 00701 } elseif($this->char === $this->EOF) { 00702 /* EOF 00703 Parse error. Emit the current tag token. Reconsume the character 00704 in the data state. */ 00705 $this->emitToken($this->token); 00706 00707 $this->char--; 00708 $this->state = 'data'; 00709 00710 } else { 00711 /* Anything else 00712 Append the current input character to the current attribute's value. 00713 Stay in the attribute value (double-quoted) state. */ 00714 $last = count($this->token['attr']) - 1; 00715 $this->token['attr'][$last]['value'] .= $char; 00716 00717 $this->state = 'attributeValueDoubleQuoted'; 00718 } 00719 } 00720 00721 private function attributeValueSingleQuotedState() { 00722 // Consume the next input character: 00723 $this->char++; 00724 $char = $this->character($this->char); 00725 00726 if($char === '\'') { 00727 /* U+0022 QUOTATION MARK (') 00728 Switch to the before attribute name state. */ 00729 $this->state = 'beforeAttributeName'; 00730 00731 } elseif($char === '&') { 00732 /* U+0026 AMPERSAND (&) 00733 Switch to the entity in attribute value state. */ 00734 $this->entityInAttributeValueState('single'); 00735 00736 } elseif($this->char === $this->EOF) { 00737 /* EOF 00738 Parse error. Emit the current tag token. Reconsume the character 00739 in the data state. */ 00740 $this->emitToken($this->token); 00741 00742 $this->char--; 00743 $this->state = 'data'; 00744 00745 } else { 00746 /* Anything else 00747 Append the current input character to the current attribute's value. 00748 Stay in the attribute value (single-quoted) state. */ 00749 $last = count($this->token['attr']) - 1; 00750 $this->token['attr'][$last]['value'] .= $char; 00751 00752 $this->state = 'attributeValueSingleQuoted'; 00753 } 00754 } 00755 00756 private function attributeValueUnquotedState() { 00757 // Consume the next input character: 00758 $this->char++; 00759 $char = $this->character($this->char); 00760 00761 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { 00762 /* U+0009 CHARACTER TABULATION 00763 U+000A LINE FEED (LF) 00764 U+000B LINE TABULATION 00765 U+000C FORM FEED (FF) 00766 U+0020 SPACE 00767 Switch to the before attribute name state. */ 00768 $this->state = 'beforeAttributeName'; 00769 00770 } elseif($char === '&') { 00771 /* U+0026 AMPERSAND (&) 00772 Switch to the entity in attribute value state. */ 00773 $this->entityInAttributeValueState(); 00774 00775 } elseif($char === '>') { 00776 /* U+003E GREATER-THAN SIGN (>) 00777 Emit the current tag token. Switch to the data state. */ 00778 $this->emitToken($this->token); 00779 $this->state = 'data'; 00780 00781 } else { 00782 /* Anything else 00783 Append the current input character to the current attribute's value. 00784 Stay in the attribute value (unquoted) state. */ 00785 $last = count($this->token['attr']) - 1; 00786 $this->token['attr'][$last]['value'] .= $char; 00787 00788 $this->state = 'attributeValueUnquoted'; 00789 } 00790 } 00791 00792 private function entityInAttributeValueState() { 00793 // Attempt to consume an entity. 00794 $entity = $this->entity(); 00795 00796 // If nothing is returned, append a U+0026 AMPERSAND character to the 00797 // current attribute's value. Otherwise, emit the character token that 00798 // was returned. 00799 $char = (!$entity) 00800 ? '&' 00801 : $entity; 00802 00803 $last = count($this->token['attr']) - 1; 00804 $this->token['attr'][$last]['value'] .= $char; 00805 } 00806 00807 private function bogusCommentState() { 00808 /* Consume every character up to the first U+003E GREATER-THAN SIGN 00809 character (>) or the end of the file (EOF), whichever comes first. Emit 00810 a comment token whose data is the concatenation of all the characters 00811 starting from and including the character that caused the state machine 00812 to switch into the bogus comment state, up to and including the last 00813 consumed character before the U+003E character, if any, or up to the 00814 end of the file otherwise. (If the comment was started by the end of 00815 the file (EOF), the token is empty.) */ 00816 $data = $this->characters('^>', $this->char); 00817 $this->emitToken(array( 00818 'data' => $data, 00819 'type' => self::COMMENT 00820 )); 00821 00822 $this->char += strlen($data); 00823 00824 /* Switch to the data state. */ 00825 $this->state = 'data'; 00826 00827 /* If the end of the file was reached, reconsume the EOF character. */ 00828 if($this->char === $this->EOF) { 00829 $this->char = $this->EOF - 1; 00830 } 00831 } 00832 00833 private function markupDeclarationOpenState() { 00834 /* If the next two characters are both U+002D HYPHEN-MINUS (-) 00835 characters, consume those two characters, create a comment token whose 00836 data is the empty string, and switch to the comment state. */ 00837 if($this->character($this->char + 1, 2) === '--') { 00838 $this->char += 2; 00839 $this->state = 'comment'; 00840 $this->token = array( 00841 'data' => null, 00842 'type' => self::COMMENT 00843 ); 00844 00845 /* Otherwise if the next seven chacacters are a case-insensitive match 00846 for the word "DOCTYPE", then consume those characters and switch to the 00847 DOCTYPE state. */ 00848 } elseif(strtolower($this->character($this->char + 1, 7)) === 'doctype') { 00849 $this->char += 7; 00850 $this->state = 'doctype'; 00851 00852 /* Otherwise, is is a parse error. Switch to the bogus comment state. 00853 The next character that is consumed, if any, is the first character 00854 that will be in the comment. */ 00855 } else { 00856 $this->char++; 00857 $this->state = 'bogusComment'; 00858 } 00859 } 00860 00861 private function commentState() { 00862 /* Consume the next input character: */ 00863 $this->char++; 00864 $char = $this->char(); 00865 00866 /* U+002D HYPHEN-MINUS (-) */ 00867 if($char === '-') { 00868 /* Switch to the comment dash state */ 00869 $this->state = 'commentDash'; 00870 00871 /* EOF */ 00872 } elseif($this->char === $this->EOF) { 00873 /* Parse error. Emit the comment token. Reconsume the EOF character 00874 in the data state. */ 00875 $this->emitToken($this->token); 00876 $this->char--; 00877 $this->state = 'data'; 00878 00879 /* Anything else */ 00880 } else { 00881 /* Append the input character to the comment token's data. Stay in 00882 the comment state. */ 00883 $this->token['data'] .= $char; 00884 } 00885 } 00886 00887 private function commentDashState() { 00888 /* Consume the next input character: */ 00889 $this->char++; 00890 $char = $this->char(); 00891 00892 /* U+002D HYPHEN-MINUS (-) */ 00893 if($char === '-') { 00894 /* Switch to the comment end state */ 00895 $this->state = 'commentEnd'; 00896 00897 /* EOF */ 00898 } elseif($this->char === $this->EOF) { 00899 /* Parse error. Emit the comment token. Reconsume the EOF character 00900 in the data state. */ 00901 $this->emitToken($this->token); 00902 $this->char--; 00903 $this->state = 'data'; 00904 00905 /* Anything else */ 00906 } else { 00907 /* Append a U+002D HYPHEN-MINUS (-) character and the input 00908 character to the comment token's data. Switch to the comment state. */ 00909 $this->token['data'] .= '-'.$char; 00910 $this->state = 'comment'; 00911 } 00912 } 00913 00914 private function commentEndState() { 00915 /* Consume the next input character: */ 00916 $this->char++; 00917 $char = $this->char(); 00918 00919 if($char === '>') { 00920 $this->emitToken($this->token); 00921 $this->state = 'data'; 00922 00923 } elseif($char === '-') { 00924 $this->token['data'] .= '-'; 00925 00926 } elseif($this->char === $this->EOF) { 00927 $this->emitToken($this->token); 00928 $this->char--; 00929 $this->state = 'data'; 00930 00931 } else { 00932 $this->token['data'] .= '--'.$char; 00933 $this->state = 'comment'; 00934 } 00935 } 00936 00937 private function doctypeState() { 00938 /* Consume the next input character: */ 00939 $this->char++; 00940 $char = $this->char(); 00941 00942 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { 00943 $this->state = 'beforeDoctypeName'; 00944 00945 } else { 00946 $this->char--; 00947 $this->state = 'beforeDoctypeName'; 00948 } 00949 } 00950 00951 private function beforeDoctypeNameState() { 00952 /* Consume the next input character: */ 00953 $this->char++; 00954 $char = $this->char(); 00955 00956 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { 00957 // Stay in the before DOCTYPE name state. 00958 00959 } elseif(preg_match('/^[a-z]$/', $char)) { 00960 $this->token = array( 00961 'name' => strtoupper($char), 00962 'type' => self::DOCTYPE, 00963 'error' => true 00964 ); 00965 00966 $this->state = 'doctypeName'; 00967 00968 } elseif($char === '>') { 00969 $this->emitToken(array( 00970 'name' => null, 00971 'type' => self::DOCTYPE, 00972 'error' => true 00973 )); 00974 00975 $this->state = 'data'; 00976 00977 } elseif($this->char === $this->EOF) { 00978 $this->emitToken(array( 00979 'name' => null, 00980 'type' => self::DOCTYPE, 00981 'error' => true 00982 )); 00983 00984 $this->char--; 00985 $this->state = 'data'; 00986 00987 } else { 00988 $this->token = array( 00989 'name' => $char, 00990 'type' => self::DOCTYPE, 00991 'error' => true 00992 ); 00993 00994 $this->state = 'doctypeName'; 00995 } 00996 } 00997 00998 private function doctypeNameState() { 00999 /* Consume the next input character: */ 01000 $this->char++; 01001 $char = $this->char(); 01002 01003 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { 01004 $this->state = 'AfterDoctypeName'; 01005 01006 } elseif($char === '>') { 01007 $this->emitToken($this->token); 01008 $this->state = 'data'; 01009 01010 } elseif(preg_match('/^[a-z]$/', $char)) { 01011 $this->token['name'] .= strtoupper($char); 01012 01013 } elseif($this->char === $this->EOF) { 01014 $this->emitToken($this->token); 01015 $this->char--; 01016 $this->state = 'data'; 01017 01018 } else { 01019 $this->token['name'] .= $char; 01020 } 01021 01022 $this->token['error'] = ($this->token['name'] === 'HTML') 01023 ? false 01024 : true; 01025 } 01026 01027 private function afterDoctypeNameState() { 01028 /* Consume the next input character: */ 01029 $this->char++; 01030 $char = $this->char(); 01031 01032 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { 01033 // Stay in the DOCTYPE name state. 01034 01035 } elseif($char === '>') { 01036 $this->emitToken($this->token); 01037 $this->state = 'data'; 01038 01039 } elseif($this->char === $this->EOF) { 01040 $this->emitToken($this->token); 01041 $this->char--; 01042 $this->state = 'data'; 01043 01044 } else { 01045 $this->token['error'] = true; 01046 $this->state = 'bogusDoctype'; 01047 } 01048 } 01049 01050 private function bogusDoctypeState() { 01051 /* Consume the next input character: */ 01052 $this->char++; 01053 $char = $this->char(); 01054 01055 if($char === '>') { 01056 $this->emitToken($this->token); 01057 $this->state = 'data'; 01058 01059 } elseif($this->char === $this->EOF) { 01060 $this->emitToken($this->token); 01061 $this->char--; 01062 $this->state = 'data'; 01063 01064 } else { 01065 // Stay in the bogus DOCTYPE state. 01066 } 01067 } 01068 01069 private function entity() { 01070 $start = $this->char; 01071 01072 // This section defines how to consume an entity. This definition is 01073 // used when parsing entities in text and in attributes. 01074 01075 // The behaviour depends on the identity of the next character (the 01076 // one immediately after the U+0026 AMPERSAND character): 01077 01078 switch($this->character($this->char + 1)) { 01079 // U+0023 NUMBER SIGN (#) 01080 case '#': 01081 01082 // The behaviour further depends on the character after the 01083 // U+0023 NUMBER SIGN: 01084 switch($this->character($this->char + 1)) { 01085 // U+0078 LATIN SMALL LETTER X 01086 // U+0058 LATIN CAPITAL LETTER X 01087 case 'x': 01088 case 'X': 01089 // Follow the steps below, but using the range of 01090 // characters U+0030 DIGIT ZERO through to U+0039 DIGIT 01091 // NINE, U+0061 LATIN SMALL LETTER A through to U+0066 01092 // LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER 01093 // A, through to U+0046 LATIN CAPITAL LETTER F (in other 01094 // words, 0-9, A-F, a-f). 01095 $char = 1; 01096 $char_class = '0-9A-Fa-f'; 01097 break; 01098 01099 // Anything else 01100 default: 01101 // Follow the steps below, but using the range of 01102 // characters U+0030 DIGIT ZERO through to U+0039 DIGIT 01103 // NINE (i.e. just 0-9). 01104 $char = 0; 01105 $char_class = '0-9'; 01106 break; 01107 } 01108 01109 // Consume as many characters as match the range of characters 01110 // given above. 01111 $this->char++; 01112 $e_name = $this->characters($char_class, $this->char + $char + 1); 01113 $entity = $this->character($start, $this->char); 01114 $cond = strlen($e_name) > 0; 01115 01116 // The rest of the parsing happens bellow. 01117 break; 01118 01119 // Anything else 01120 default: 01121 // Consume the maximum number of characters possible, with the 01122 // consumed characters case-sensitively matching one of the 01123 // identifiers in the first column of the entities table. 01124 $e_name = $this->characters('0-9A-Za-z;', $this->char + 1); 01125 $len = strlen($e_name); 01126 01127 for($c = 1; $c <= $len; $c++) { 01128 $id = substr($e_name, 0, $c); 01129 $this->char++; 01130 01131 if(in_array($id, $this->entities)) { 01132 if ($e_name[$c-1] !== ';') { 01133 if ($c < $len && $e_name[$c] == ';') { 01134 $this->char++; // consume extra semicolon 01135 } 01136 } 01137 $entity = $id; 01138 break; 01139 } 01140 } 01141 01142 $cond = isset($entity); 01143 // The rest of the parsing happens bellow. 01144 break; 01145 } 01146 01147 if(!$cond) { 01148 // If no match can be made, then this is a parse error. No 01149 // characters are consumed, and nothing is returned. 01150 $this->char = $start; 01151 return false; 01152 } 01153 01154 // Return a character token for the character corresponding to the 01155 // entity name (as given by the second column of the entities table). 01156 return html_entity_decode('&'.$entity.';', ENT_QUOTES, 'UTF-8'); 01157 } 01158 01159 private function emitToken($token) { 01160 $emit = $this->tree->emitToken($token); 01161 01162 if(is_int($emit)) { 01163 $this->content_model = $emit; 01164 01165 } elseif($token['type'] === self::ENDTAG) { 01166 $this->content_model = self::PCDATA; 01167 } 01168 } 01169 01170 private function EOF() { 01171 $this->state = null; 01172 $this->tree->emitToken(array( 01173 'type' => self::EOF 01174 )); 01175 } 01176 } 01177 01178 class HTML5TreeConstructer { 01179 public $stack = array(); 01180 01181 private $phase; 01182 private $mode; 01183 private $dom; 01184 private $foster_parent = null; 01185 private $a_formatting = array(); 01186 01187 private $head_pointer = null; 01188 private $form_pointer = null; 01189 01190 private $scoping = array('button','caption','html','marquee','object','table','td','th'); 01191 private $formatting = array('a','b','big','em','font','i','nobr','s','small','strike','strong','tt','u'); 01192 private $special = array('address','area','base','basefont','bgsound', 01193 'blockquote','body','br','center','col','colgroup','dd','dir','div','dl', 01194 'dt','embed','fieldset','form','frame','frameset','h1','h2','h3','h4','h5', 01195 'h6','head','hr','iframe','image','img','input','isindex','li','link', 01196 'listing','menu','meta','noembed','noframes','noscript','ol','optgroup', 01197 'option','p','param','plaintext','pre','script','select','spacer','style', 01198 'tbody','textarea','tfoot','thead','title','tr','ul','wbr'); 01199 01200 // The different phases. 01201 const INIT_PHASE = 0; 01202 const ROOT_PHASE = 1; 01203 const MAIN_PHASE = 2; 01204 const END_PHASE = 3; 01205 01206 // The different insertion modes for the main phase. 01207 const BEFOR_HEAD = 0; 01208 const IN_HEAD = 1; 01209 const AFTER_HEAD = 2; 01210 const IN_BODY = 3; 01211 const IN_TABLE = 4; 01212 const IN_CAPTION = 5; 01213 const IN_CGROUP = 6; 01214 const IN_TBODY = 7; 01215 const IN_ROW = 8; 01216 const IN_CELL = 9; 01217 const IN_SELECT = 10; 01218 const AFTER_BODY = 11; 01219 const IN_FRAME = 12; 01220 const AFTR_FRAME = 13; 01221 01222 // The different types of elements. 01223 const SPECIAL = 0; 01224 const SCOPING = 1; 01225 const FORMATTING = 2; 01226 const PHRASING = 3; 01227 01228 const MARKER = 0; 01229 01230 public function __construct() { 01231 $this->phase = self::INIT_PHASE; 01232 $this->mode = self::BEFOR_HEAD; 01233 $this->dom = new DOMDocument; 01234 01235 $this->dom->encoding = 'UTF-8'; 01236 $this->dom->preserveWhiteSpace = true; 01237 $this->dom->substituteEntities = true; 01238 $this->dom->strictErrorChecking = false; 01239 } 01240 01241 // Process tag tokens 01242 public function emitToken($token) { 01243 switch($this->phase) { 01244 case self::INIT_PHASE: return $this->initPhase($token); break; 01245 case self::ROOT_PHASE: return $this->rootElementPhase($token); break; 01246 case self::MAIN_PHASE: return $this->mainPhase($token); break; 01247 case self::END_PHASE : return $this->trailingEndPhase($token); break; 01248 } 01249 } 01250 01251 private function initPhase($token) { 01252 /* Initially, the tree construction stage must handle each token 01253 emitted from the tokenisation stage as follows: */ 01254 01255 /* A DOCTYPE token that is marked as being in error 01256 A comment token 01257 A start tag token 01258 An end tag token 01259 A character token that is not one of one of U+0009 CHARACTER TABULATION, 01260 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 01261 or U+0020 SPACE 01262 An end-of-file token */ 01263 if((isset($token['error']) && $token['error']) || 01264 $token['type'] === HTML5::COMMENT || 01265 $token['type'] === HTML5::STARTTAG || 01266 $token['type'] === HTML5::ENDTAG || 01267 $token['type'] === HTML5::EOF || 01268 ($token['type'] === HTML5::CHARACTR && isset($token['data']) && 01269 !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']))) { 01270 /* This specification does not define how to handle this case. In 01271 particular, user agents may ignore the entirety of this specification 01272 altogether for such documents, and instead invoke special parse modes 01273 with a greater emphasis on backwards compatibility. */ 01274 01275 $this->phase = self::ROOT_PHASE; 01276 return $this->rootElementPhase($token); 01277 01278 /* A DOCTYPE token marked as being correct */ 01279 } elseif(isset($token['error']) && !$token['error']) { 01280 /* Append a DocumentType node to the Document node, with the name 01281 attribute set to the name given in the DOCTYPE token (which will be 01282 "HTML"), and the other attributes specific to DocumentType objects 01283 set to null, empty lists, or the empty string as appropriate. */ 01284 $doctype = new DOMDocumentType(null, null, 'HTML'); 01285 01286 /* Then, switch to the root element phase of the tree construction 01287 stage. */ 01288 $this->phase = self::ROOT_PHASE; 01289 01290 /* A character token that is one of one of U+0009 CHARACTER TABULATION, 01291 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 01292 or U+0020 SPACE */ 01293 } elseif(isset($token['data']) && preg_match('/^[\t\n\x0b\x0c ]+$/', 01294 $token['data'])) { 01295 /* Append that character to the Document node. */ 01296 $text = $this->dom->createTextNode($token['data']); 01297 $this->dom->appendChild($text); 01298 } 01299 } 01300 01301 private function rootElementPhase($token) { 01302 /* After the initial phase, as each token is emitted from the tokenisation 01303 stage, it must be processed as described in this section. */ 01304 01305 /* A DOCTYPE token */ 01306 if($token['type'] === HTML5::DOCTYPE) { 01307 // Parse error. Ignore the token. 01308 01309 /* A comment token */ 01310 } elseif($token['type'] === HTML5::COMMENT) { 01311 /* Append a Comment node to the Document object with the data 01312 attribute set to the data given in the comment token. */ 01313 $comment = $this->dom->createComment($token['data']); 01314 $this->dom->appendChild($comment); 01315 01316 /* A character token that is one of one of U+0009 CHARACTER TABULATION, 01317 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 01318 or U+0020 SPACE */ 01319 } elseif($token['type'] === HTML5::CHARACTR && 01320 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { 01321 /* Append that character to the Document node. */ 01322 $text = $this->dom->createTextNode($token['data']); 01323 $this->dom->appendChild($text); 01324 01325 /* A character token that is not one of U+0009 CHARACTER TABULATION, 01326 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED 01327 (FF), or U+0020 SPACE 01328 A start tag token 01329 An end tag token 01330 An end-of-file token */ 01331 } elseif(($token['type'] === HTML5::CHARACTR && 01332 !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || 01333 $token['type'] === HTML5::STARTTAG || 01334 $token['type'] === HTML5::ENDTAG || 01335 $token['type'] === HTML5::EOF) { 01336 /* Create an HTMLElement node with the tag name html, in the HTML 01337 namespace. Append it to the Document object. Switch to the main 01338 phase and reprocess the current token. */ 01339 $html = $this->dom->createElement('html'); 01340 $this->dom->appendChild($html); 01341 $this->stack[] = $html; 01342 01343 $this->phase = self::MAIN_PHASE; 01344 return $this->mainPhase($token); 01345 } 01346 } 01347 01348 private function mainPhase($token) { 01349 /* Tokens in the main phase must be handled as follows: */ 01350 01351 /* A DOCTYPE token */ 01352 if($token['type'] === HTML5::DOCTYPE) { 01353 // Parse error. Ignore the token. 01354 01355 /* A start tag token with the tag name "html" */ 01356 } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') { 01357 /* If this start tag token was not the first start tag token, then 01358 it is a parse error. */ 01359 01360 /* For each attribute on the token, check to see if the attribute 01361 is already present on the top element of the stack of open elements. 01362 If it is not, add the attribute and its corresponding value to that 01363 element. */ 01364 foreach($token['attr'] as $attr) { 01365 if(!$this->stack[0]->hasAttribute($attr['name'])) { 01366 $this->stack[0]->setAttribute($attr['name'], $attr['value']); 01367 } 01368 } 01369 01370 /* An end-of-file token */ 01371 } elseif($token['type'] === HTML5::EOF) { 01372 /* Generate implied end tags. */ 01373 $this->generateImpliedEndTags(); 01374 01375 /* Anything else. */ 01376 } else { 01377 /* Depends on the insertion mode: */ 01378 switch($this->mode) { 01379 case self::BEFOR_HEAD: return $this->beforeHead($token); break; 01380 case self::IN_HEAD: return $this->inHead($token); break; 01381 case self::AFTER_HEAD: return $this->afterHead($token); break; 01382 case self::IN_BODY: return $this->inBody($token); break; 01383 case self::IN_TABLE: return $this->inTable($token); break; 01384 case self::IN_CAPTION: return $this->inCaption($token); break; 01385 case self::IN_CGROUP: return $this->inColumnGroup($token); break; 01386 case self::IN_TBODY: return $this->inTableBody($token); break; 01387 case self::IN_ROW: return $this->inRow($token); break; 01388 case self::IN_CELL: return $this->inCell($token); break; 01389 case self::IN_SELECT: return $this->inSelect($token); break; 01390 case self::AFTER_BODY: return $this->afterBody($token); break; 01391 case self::IN_FRAME: return $this->inFrameset($token); break; 01392 case self::AFTR_FRAME: return $this->afterFrameset($token); break; 01393 case self::END_PHASE: return $this->trailingEndPhase($token); break; 01394 } 01395 } 01396 } 01397 01398 private function beforeHead($token) { 01399 /* Handle the token as follows: */ 01400 01401 /* A character token that is one of one of U+0009 CHARACTER TABULATION, 01402 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 01403 or U+0020 SPACE */ 01404 if($token['type'] === HTML5::CHARACTR && 01405 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { 01406 /* Append the character to the current node. */ 01407 $this->insertText($token['data']); 01408 01409 /* A comment token */ 01410 } elseif($token['type'] === HTML5::COMMENT) { 01411 /* Append a Comment node to the current node with the data attribute 01412 set to the data given in the comment token. */ 01413 $this->insertComment($token['data']); 01414 01415 /* A start tag token with the tag name "head" */ 01416 } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') { 01417 /* Create an element for the token, append the new element to the 01418 current node and push it onto the stack of open elements. */ 01419 $element = $this->insertElement($token); 01420 01421 /* Set the head element pointer to this new element node. */ 01422 $this->head_pointer = $element; 01423 01424 /* Change the insertion mode to "in head". */ 01425 $this->mode = self::IN_HEAD; 01426 01427 /* A start tag token whose tag name is one of: "base", "link", "meta", 01428 "script", "style", "title". Or an end tag with the tag name "html". 01429 Or a character token that is not one of U+0009 CHARACTER TABULATION, 01430 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 01431 or U+0020 SPACE. Or any other start tag token */ 01432 } elseif($token['type'] === HTML5::STARTTAG || 01433 ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') || 01434 ($token['type'] === HTML5::CHARACTR && !preg_match('/^[\t\n\x0b\x0c ]$/', 01435 $token['data']))) { 01436 /* Act as if a start tag token with the tag name "head" and no 01437 attributes had been seen, then reprocess the current token. */ 01438 $this->beforeHead(array( 01439 'name' => 'head', 01440 'type' => HTML5::STARTTAG, 01441 'attr' => array() 01442 )); 01443 01444 return $this->inHead($token); 01445 01446 /* Any other end tag */ 01447 } elseif($token['type'] === HTML5::ENDTAG) { 01448 /* Parse error. Ignore the token. */ 01449 } 01450 } 01451 01452 private function inHead($token) { 01453 /* Handle the token as follows: */ 01454 01455 /* A character token that is one of one of U+0009 CHARACTER TABULATION, 01456 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 01457 or U+0020 SPACE. 01458 01459 THIS DIFFERS FROM THE SPEC: If the current node is either a title, style 01460 or script element, append the character to the current node regardless 01461 of its content. */ 01462 if(($token['type'] === HTML5::CHARACTR && 01463 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || ( 01464 $token['type'] === HTML5::CHARACTR && in_array(end($this->stack)->nodeName, 01465 array('title', 'style', 'script')))) { 01466 /* Append the character to the current node. */ 01467 $this->insertText($token['data']); 01468 01469 /* A comment token */ 01470 } elseif($token['type'] === HTML5::COMMENT) { 01471 /* Append a Comment node to the current node with the data attribute 01472 set to the data given in the comment token. */ 01473 $this->insertComment($token['data']); 01474 01475 } elseif($token['type'] === HTML5::ENDTAG && 01476 in_array($token['name'], array('title', 'style', 'script'))) { 01477 array_pop($this->stack); 01478 return HTML5::PCDATA; 01479 01480 /* A start tag with the tag name "title" */ 01481 } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') { 01482 /* Create an element for the token and append the new element to the 01483 node pointed to by the head element pointer, or, if that is null 01484 (innerHTML case), to the current node. */ 01485 if($this->head_pointer !== null) { 01486 $element = $this->insertElement($token, false); 01487 $this->head_pointer->appendChild($element); 01488 01489 } else { 01490 $element = $this->insertElement($token); 01491 } 01492 01493 /* Switch the tokeniser's content model flag to the RCDATA state. */ 01494 return HTML5::RCDATA; 01495 01496 /* A start tag with the tag name "style" */ 01497 } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') { 01498 /* Create an element for the token and append the new element to the 01499 node pointed to by the head element pointer, or, if that is null 01500 (innerHTML case), to the current node. */ 01501 if($this->head_pointer !== null) { 01502 $element = $this->insertElement($token, false); 01503 $this->head_pointer->appendChild($element); 01504 01505 } else { 01506 $this->insertElement($token); 01507 } 01508 01509 /* Switch the tokeniser's content model flag to the CDATA state. */ 01510 return HTML5::CDATA; 01511 01512 /* A start tag with the tag name "script" */ 01513 } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') { 01514 /* Create an element for the token. */ 01515 $element = $this->insertElement($token, false); 01516 $this->head_pointer->appendChild($element); 01517 01518 /* Switch the tokeniser's content model flag to the CDATA state. */ 01519 return HTML5::CDATA; 01520 01521 /* A start tag with the tag name "base", "link", or "meta" */ 01522 } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], 01523 array('base', 'link', 'meta'))) { 01524 /* Create an element for the token and append the new element to the 01525 node pointed to by the head element pointer, or, if that is null 01526 (innerHTML case), to the current node. */ 01527 if($this->head_pointer !== null) { 01528 $element = $this->insertElement($token, false); 01529 $this->head_pointer->appendChild($element); 01530 array_pop($this->stack); 01531 01532 } else { 01533 $this->insertElement($token); 01534 } 01535 01536 /* An end tag with the tag name "head" */ 01537 } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') { 01538 /* If the current node is a head element, pop the current node off 01539 the stack of open elements. */ 01540 if($this->head_pointer->isSameNode(end($this->stack))) { 01541 array_pop($this->stack); 01542 01543 /* Otherwise, this is a parse error. */ 01544 } else { 01545 // k 01546 } 01547 01548 /* Change the insertion mode to "after head". */ 01549 $this->mode = self::AFTER_HEAD; 01550 01551 /* A start tag with the tag name "head" or an end tag except "html". */ 01552 } elseif(($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') || 01553 ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html')) { 01554 // Parse error. Ignore the token. 01555 01556 /* Anything else */ 01557 } else { 01558 /* If the current node is a head element, act as if an end tag 01559 token with the tag name "head" had been seen. */ 01560 if($this->head_pointer->isSameNode(end($this->stack))) { 01561 $this->inHead(array( 01562 'name' => 'head', 01563 'type' => HTML5::ENDTAG 01564 )); 01565 01566 /* Otherwise, change the insertion mode to "after head". */ 01567 } else { 01568 $this->mode = self::AFTER_HEAD; 01569 } 01570 01571 /* Then, reprocess the current token. */ 01572 return $this->afterHead($token); 01573 } 01574 } 01575 01576 private function afterHead($token) { 01577 /* Handle the token as follows: */ 01578 01579 /* A character token that is one of one of U+0009 CHARACTER TABULATION, 01580 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 01581 or U+0020 SPACE */ 01582 if($token['type'] === HTML5::CHARACTR && 01583 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { 01584 /* Append the character to the current node. */ 01585 $this->insertText($token['data']); 01586 01587 /* A comment token */ 01588 } elseif($token['type'] === HTML5::COMMENT) { 01589 /* Append a Comment node to the current node with the data attribute 01590 set to the data given in the comment token. */ 01591 $this->insertComment($token['data']); 01592 01593 /* A start tag token with the tag name "body" */ 01594 } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') { 01595 /* Insert a body element for the token. */ 01596 $this->insertElement($token); 01597 01598 /* Change the insertion mode to "in body". */ 01599 $this->mode = self::IN_BODY; 01600 01601 /* A start tag token with the tag name "frameset" */ 01602 } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') { 01603 /* Insert a frameset element for the token. */ 01604 $this->insertElement($token); 01605 01606 /* Change the insertion mode to "in frameset". */ 01607 $this->mode = self::IN_FRAME; 01608 01609 /* A start tag token whose tag name is one of: "base", "link", "meta", 01610 "script", "style", "title" */ 01611 } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], 01612 array('base', 'link', 'meta', 'script', 'style', 'title'))) { 01613 /* Parse error. Switch the insertion mode back to "in head" and 01614 reprocess the token. */ 01615 $this->mode = self::IN_HEAD; 01616 return $this->inHead($token); 01617 01618 /* Anything else */ 01619 } else { 01620 /* Act as if a start tag token with the tag name "body" and no 01621 attributes had been seen, and then reprocess the current token. */ 01622 $this->afterHead(array( 01623 'name' => 'body', 01624 'type' => HTML5::STARTTAG, 01625 'attr' => array() 01626 )); 01627 01628 return $this->inBody($token); 01629 } 01630 } 01631 01632 private function inBody($token) { 01633 /* Handle the token as follows: */ 01634 01635 switch($token['type']) { 01636 /* A character token */ 01637 case HTML5::CHARACTR: 01638 /* Reconstruct the active formatting elements, if any. */ 01639 $this->reconstructActiveFormattingElements(); 01640 01641 /* Append the token's character to the current node. */ 01642 $this->insertText($token['data']); 01643 break; 01644 01645 /* A comment token */ 01646 case HTML5::COMMENT: 01647 /* Append a Comment node to the current node with the data 01648 attribute set to the data given in the comment token. */ 01649 $this->insertComment($token['data']); 01650 break; 01651 01652 case HTML5::STARTTAG: 01653 switch($token['name']) { 01654 /* A start tag token whose tag name is one of: "script", 01655 "style" */ 01656 case 'script': case 'style': 01657 /* Process the token as if the insertion mode had been "in 01658 head". */ 01659 return $this->inHead($token); 01660 break; 01661 01662 /* A start tag token whose tag name is one of: "base", "link", 01663 "meta", "title" */ 01664 case 'base': case 'link': case 'meta': case 'title': 01665 /* Parse error. Process the token as if the insertion mode 01666 had been "in head". */ 01667 return $this->inHead($token); 01668 break; 01669 01670 /* A start tag token with the tag name "body" */ 01671 case 'body': 01672 /* Parse error. If the second element on the stack of open 01673 elements is not a body element, or, if the stack of open 01674 elements has only one node on it, then ignore the token. 01675 (innerHTML case) */ 01676 if(count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') { 01677 // Ignore 01678 01679 /* Otherwise, for each attribute on the token, check to see 01680 if the attribute is already present on the body element (the 01681 second element) on the stack of open elements. If it is not, 01682 add the attribute and its corresponding value to that 01683 element. */ 01684 } else { 01685 foreach($token['attr'] as $attr) { 01686 if(!$this->stack[1]->hasAttribute($attr['name'])) { 01687 $this->stack[1]->setAttribute($attr['name'], $attr['value']); 01688 } 01689 } 01690 } 01691 break; 01692 01693 /* A start tag whose tag name is one of: "address", 01694 "blockquote", "center", "dir", "div", "dl", "fieldset", 01695 "listing", "menu", "ol", "p", "ul" */ 01696 case 'address': case 'blockquote': case 'center': case 'dir': 01697 case 'div': case 'dl': case 'fieldset': case 'listing': 01698 case 'menu': case 'ol': case 'p': case 'ul': 01699 /* If the stack of open elements has a p element in scope, 01700 then act as if an end tag with the tag name p had been 01701 seen. */ 01702 if($this->elementInScope('p')) { 01703 $this->emitToken(array( 01704 'name' => 'p', 01705 'type' => HTML5::ENDTAG 01706 )); 01707 } 01708 01709 /* Insert an HTML element for the token. */ 01710 $this->insertElement($token); 01711 break; 01712 01713 /* A start tag whose tag name is "form" */ 01714 case 'form': 01715 /* If the form element pointer is not null, ignore the 01716 token with a parse error. */ 01717 if($this->form_pointer !== null) { 01718 // Ignore. 01719 01720 /* Otherwise: */ 01721 } else { 01722 /* If the stack of open elements has a p element in 01723 scope, then act as if an end tag with the tag name p 01724 had been seen. */ 01725 if($this->elementInScope('p')) { 01726 $this->emitToken(array( 01727 'name' => 'p', 01728 'type' => HTML5::ENDTAG 01729 )); 01730 } 01731 01732 /* Insert an HTML element for the token, and set the 01733 form element pointer to point to the element created. */ 01734 $element = $this->insertElement($token); 01735 $this->form_pointer = $element; 01736 } 01737 break; 01738 01739 /* A start tag whose tag name is "li", "dd" or "dt" */ 01740 case 'li': case 'dd': case 'dt': 01741 /* If the stack of open elements has a p element in scope, 01742 then act as if an end tag with the tag name p had been 01743 seen. */ 01744 if($this->elementInScope('p')) { 01745 $this->emitToken(array( 01746 'name' => 'p', 01747 'type' => HTML5::ENDTAG 01748 )); 01749 } 01750 01751 $stack_length = count($this->stack) - 1; 01752 01753 for($n = $stack_length; 0 <= $n; $n--) { 01754 /* 1. Initialise node to be the current node (the 01755 bottommost node of the stack). */ 01756 $stop = false; 01757 $node = $this->stack[$n]; 01758 $cat = $this->getElementCategory($node->tagName); 01759 01760 /* 2. If node is an li, dd or dt element, then pop all 01761 the nodes from the current node up to node, including 01762 node, then stop this algorithm. */ 01763 if($token['name'] === $node->tagName || ($token['name'] !== 'li' 01764 && ($node->tagName === 'dd' || $node->tagName === 'dt'))) { 01765 for($x = $stack_length; $x >= $n ; $x--) { 01766 array_pop($this->stack); 01767 } 01768 01769 break; 01770 } 01771 01772 /* 3. If node is not in the formatting category, and is 01773 not in the phrasing category, and is not an address or 01774 div element, then stop this algorithm. */ 01775 if($cat !== self::FORMATTING && $cat !== self::PHRASING && 01776 $node->tagName !== 'address' && $node->tagName !== 'div') { 01777 break; 01778 } 01779 } 01780 01781 /* Finally, insert an HTML element with the same tag 01782 name as the token's. */ 01783 $this->insertElement($token); 01784 break; 01785 01786 /* A start tag token whose tag name is "plaintext" */ 01787 case 'plaintext': 01788 /* If the stack of open elements has a p element in scope, 01789 then act as if an end tag with the tag name p had been 01790 seen. */ 01791 if($this->elementInScope('p')) { 01792 $this->emitToken(array( 01793 'name' => 'p', 01794 'type' => HTML5::ENDTAG 01795 )); 01796 } 01797 01798 /* Insert an HTML element for the token. */ 01799 $this->insertElement($token); 01800 01801 return HTML5::PLAINTEXT; 01802 break; 01803 01804 /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4", 01805 "h5", "h6" */ 01806 case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': 01807 /* If the stack of open elements has a p element in scope, 01808 then act as if an end tag with the tag name p had been seen. */ 01809 if($this->elementInScope('p')) { 01810 $this->emitToken(array( 01811 'name' => 'p', 01812 'type' => HTML5::ENDTAG 01813 )); 01814 } 01815 01816 /* If the stack of open elements has in scope an element whose 01817 tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then 01818 this is a parse error; pop elements from the stack until an 01819 element with one of those tag names has been popped from the 01820 stack. */ 01821 while($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) { 01822 array_pop($this->stack); 01823 } 01824 01825 /* Insert an HTML element for the token. */ 01826 $this->insertElement($token); 01827 break; 01828 01829 /* A start tag whose tag name is "a" */ 01830 case 'a': 01831 /* If the list of active formatting elements contains 01832 an element whose tag name is "a" between the end of the 01833 list and the last marker on the list (or the start of 01834 the list if there is no marker on the list), then this 01835 is a parse error; act as if an end tag with the tag name 01836 "a" had been seen, then remove that element from the list 01837 of active formatting elements and the stack of open 01838 elements if the end tag didn't already remove it (it 01839 might not have if the element is not in table scope). */ 01840 $leng = count($this->a_formatting); 01841 01842 for($n = $leng - 1; $n >= 0; $n--) { 01843 if($this->a_formatting[$n] === self::MARKER) { 01844 break; 01845 01846 } elseif($this->a_formatting[$n]->nodeName === 'a') { 01847 $this->emitToken(array( 01848 'name' => 'a', 01849 'type' => HTML5::ENDTAG 01850 )); 01851 break; 01852 } 01853 } 01854 01855 /* Reconstruct the active formatting elements, if any. */ 01856 $this->reconstructActiveFormattingElements(); 01857 01858 /* Insert an HTML element for the token. */ 01859 $el = $this->insertElement($token); 01860 01861 /* Add that element to the list of active formatting 01862 elements. */ 01863 $this->a_formatting[] = $el; 01864 break; 01865 01866 /* A start tag whose tag name is one of: "b", "big", "em", "font", 01867 "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */ 01868 case 'b': case 'big': case 'em': case 'font': case 'i': 01869 case 'nobr': case 's': case 'small': case 'strike': 01870 case 'strong': case 'tt': case 'u': 01871 /* Reconstruct the active formatting elements, if any. */ 01872 $this->reconstructActiveFormattingElements(); 01873 01874 /* Insert an HTML element for the token. */ 01875 $el = $this->insertElement($token); 01876 01877 /* Add that element to the list of active formatting 01878 elements. */ 01879 $this->a_formatting[] = $el; 01880 break; 01881 01882 /* A start tag token whose tag name is "button" */ 01883 case 'button': 01884 /* If the stack of open elements has a button element in scope, 01885 then this is a parse error; act as if an end tag with the tag 01886 name "button" had been seen, then reprocess the token. (We don't 01887 do that. Unnecessary.) */ 01888 if($this->elementInScope('button')) { 01889 $this->inBody(array( 01890 'name' => 'button', 01891 'type' => HTML5::ENDTAG 01892 )); 01893 } 01894 01895 /* Reconstruct the active formatting elements, if any. */ 01896 $this->reconstructActiveFormattingElements(); 01897 01898 /* Insert an HTML element for the token. */ 01899 $this->insertElement($token); 01900 01901 /* Insert a marker at the end of the list of active 01902 formatting elements. */ 01903 $this->a_formatting[] = self::MARKER; 01904 break; 01905 01906 /* A start tag token whose tag name is one of: "marquee", "object" */ 01907 case 'marquee': case 'object': 01908 /* Reconstruct the active formatting elements, if any. */ 01909 $this->reconstructActiveFormattingElements(); 01910 01911 /* Insert an HTML element for the token. */ 01912 $this->insertElement($token); 01913 01914 /* Insert a marker at the end of the list of active 01915 formatting elements. */ 01916 $this->a_formatting[] = self::MARKER; 01917 break; 01918 01919 /* A start tag token whose tag name is "xmp" */ 01920 case 'xmp': 01921 /* Reconstruct the active formatting elements, if any. */ 01922 $this->reconstructActiveFormattingElements(); 01923 01924 /* Insert an HTML element for the token. */ 01925 $this->insertElement($token); 01926 01927 /* Switch the content model flag to the CDATA state. */ 01928 return HTML5::CDATA; 01929 break; 01930 01931 /* A start tag whose tag name is "table" */ 01932 case 'table': 01933 /* If the stack of open elements has a p element in scope, 01934 then act as if an end tag with the tag name p had been seen. */ 01935 if($this->elementInScope('p')) { 01936 $this->emitToken(array( 01937 'name' => 'p', 01938 'type' => HTML5::ENDTAG 01939 )); 01940 } 01941 01942 /* Insert an HTML element for the token. */ 01943 $this->insertElement($token); 01944 01945 /* Change the insertion mode to "in table". */ 01946 $this->mode = self::IN_TABLE; 01947 break; 01948 01949 /* A start tag whose tag name is one of: "area", "basefont", 01950 "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */ 01951 case 'area': case 'basefont': case 'bgsound': case 'br': 01952 case 'embed': case 'img': case 'param': case 'spacer': 01953 case 'wbr': 01954 /* Reconstruct the active formatting elements, if any. */ 01955 $this->reconstructActiveFormattingElements(); 01956 01957 /* Insert an HTML element for the token. */ 01958 $this->insertElement($token); 01959 01960 /* Immediately pop the current node off the stack of open elements. */ 01961 array_pop($this->stack); 01962 break; 01963 01964 /* A start tag whose tag name is "hr" */ 01965 case 'hr': 01966 /* If the stack of open elements has a p element in scope, 01967 then act as if an end tag with the tag name p had been seen. */ 01968 if($this->elementInScope('p')) { 01969 $this->emitToken(array( 01970 'name' => 'p', 01971 'type' => HTML5::ENDTAG 01972 )); 01973 } 01974 01975 /* Insert an HTML element for the token. */ 01976 $this->insertElement($token); 01977 01978 /* Immediately pop the current node off the stack of open elements. */ 01979 array_pop($this->stack); 01980 break; 01981 01982 /* A start tag whose tag name is "image" */ 01983 case 'image': 01984 /* Parse error. Change the token's tag name to "img" and 01985 reprocess it. (Don't ask.) */ 01986 $token['name'] = 'img'; 01987 return $this->inBody($token); 01988 break; 01989 01990 /* A start tag whose tag name is "input" */ 01991 case 'input': 01992 /* Reconstruct the active formatting elements, if any. */ 01993 $this->reconstructActiveFormattingElements(); 01994 01995 /* Insert an input element for the token. */ 01996 $element = $this->insertElement($token, false); 01997 01998 /* If the form element pointer is not null, then associate the 01999 input element with the form element pointed to by the form 02000 element pointer. */ 02001 $this->form_pointer !== null 02002 ? $this->form_pointer->appendChild($element) 02003 : end($this->stack)->appendChild($element); 02004 02005 /* Pop that input element off the stack of open elements. */ 02006 array_pop($this->stack); 02007 break; 02008 02009 /* A start tag whose tag name is "isindex" */ 02010 case 'isindex': 02011 /* Parse error. */ 02012 // w/e 02013 02014 /* If the form element pointer is not null, 02015 then ignore the token. */ 02016 if($this->form_pointer === null) { 02017 /* Act as if a start tag token with the tag name "form" had 02018 been seen. */ 02019 $this->inBody(array( 02020 'name' => 'body', 02021 'type' => HTML5::STARTTAG, 02022 'attr' => array() 02023 )); 02024 02025 /* Act as if a start tag token with the tag name "hr" had 02026 been seen. */ 02027 $this->inBody(array( 02028 'name' => 'hr', 02029 'type' => HTML5::STARTTAG, 02030 'attr' => array() 02031 )); 02032 02033 /* Act as if a start tag token with the tag name "p" had 02034 been seen. */ 02035 $this->inBody(array( 02036 'name' => 'p', 02037 'type' => HTML5::STARTTAG, 02038 'attr' => array() 02039 )); 02040 02041 /* Act as if a start tag token with the tag name "label" 02042 had been seen. */ 02043 $this->inBody(array( 02044 'name' => 'label', 02045 'type' => HTML5::STARTTAG, 02046 'attr' => array() 02047 )); 02048 02049 /* Act as if a stream of character tokens had been seen. */ 02050 $this->insertText('This is a searchable index. '. 02051 'Insert your search keywords here: '); 02052 02053 /* Act as if a start tag token with the tag name "input" 02054 had been seen, with all the attributes from the "isindex" 02055 token, except with the "name" attribute set to the value 02056 "isindex" (ignoring any explicit "name" attribute). */ 02057 $attr = $token['attr']; 02058 $attr[] = array('name' => 'name', 'value' => 'isindex'); 02059 02060 $this->inBody(array( 02061 'name' => 'input', 02062 'type' => HTML5::STARTTAG, 02063 'attr' => $attr 02064 )); 02065 02066 /* Act as if a stream of character tokens had been seen 02067 (see below for what they should say). */ 02068 $this->insertText('This is a searchable index. '. 02069 'Insert your search keywords here: '); 02070 02071 /* Act as if an end tag token with the tag name "label" 02072 had been seen. */ 02073 $this->inBody(array( 02074 'name' => 'label', 02075 'type' => HTML5::ENDTAG 02076 )); 02077 02078 /* Act as if an end tag token with the tag name "p" had 02079 been seen. */ 02080 $this->inBody(array( 02081 'name' => 'p', 02082 'type' => HTML5::ENDTAG 02083 )); 02084 02085 /* Act as if a start tag token with the tag name "hr" had 02086 been seen. */ 02087 $this->inBody(array( 02088 'name' => 'hr', 02089 'type' => HTML5::ENDTAG 02090 )); 02091 02092 /* Act as if an end tag token with the tag name "form" had 02093 been seen. */ 02094 $this->inBody(array( 02095 'name' => 'form', 02096 'type' => HTML5::ENDTAG 02097 )); 02098 } 02099 break; 02100 02101 /* A start tag whose tag name is "textarea" */ 02102 case 'textarea': 02103 $this->insertElement($token); 02104 02105 /* Switch the tokeniser's content model flag to the 02106 RCDATA state. */ 02107 return HTML5::RCDATA; 02108 break; 02109 02110 /* A start tag whose tag name is one of: "iframe", "noembed", 02111 "noframes" */ 02112 case 'iframe': case 'noembed': case 'noframes': 02113 $this->insertElement($token); 02114 02115 /* Switch the tokeniser's content model flag to the CDATA state. */ 02116 return HTML5::CDATA; 02117 break; 02118 02119 /* A start tag whose tag name is "select" */ 02120 case 'select': 02121 /* Reconstruct the active formatting elements, if any. */ 02122 $this->reconstructActiveFormattingElements(); 02123 02124 /* Insert an HTML element for the token. */ 02125 $this->insertElement($token); 02126 02127 /* Change the insertion mode to "in select". */ 02128 $this->mode = self::IN_SELECT; 02129 break; 02130 02131 /* A start or end tag whose tag name is one of: "caption", "col", 02132 "colgroup", "frame", "frameset", "head", "option", "optgroup", 02133 "tbody", "td", "tfoot", "th", "thead", "tr". */ 02134 case 'caption': case 'col': case 'colgroup': case 'frame': 02135 case 'frameset': case 'head': case 'option': case 'optgroup': 02136 case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': 02137 case 'tr': 02138 // Parse error. Ignore the token. 02139 break; 02140 02141 /* A start or end tag whose tag name is one of: "event-source", 02142 "section", "nav", "article", "aside", "header", "footer", 02143 "datagrid", "command" */ 02144 case 'event-source': case 'section': case 'nav': case 'article': 02145 case 'aside': case 'header': case 'footer': case 'datagrid': 02146 case 'command': 02147 // Work in progress! 02148 break; 02149 02150 /* A start tag token not covered by the previous entries */ 02151 default: 02152 /* Reconstruct the active formatting elements, if any. */ 02153 $this->reconstructActiveFormattingElements(); 02154 02155 $this->insertElement($token, true, true); 02156 break; 02157 } 02158 break; 02159 02160 case HTML5::ENDTAG: 02161 switch($token['name']) { 02162 /* An end tag with the tag name "body" */ 02163 case 'body': 02164 /* If the second element in the stack of open elements is 02165 not a body element, this is a parse error. Ignore the token. 02166 (innerHTML case) */ 02167 if(count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') { 02168 // Ignore. 02169 02170 /* If the current node is not the body element, then this 02171 is a parse error. */ 02172 } elseif(end($this->stack)->nodeName !== 'body') { 02173 // Parse error. 02174 } 02175 02176 /* Change the insertion mode to "after body". */ 02177 $this->mode = self::AFTER_BODY; 02178 break; 02179 02180 /* An end tag with the tag name "html" */ 02181 case 'html': 02182 /* Act as if an end tag with tag name "body" had been seen, 02183 then, if that token wasn't ignored, reprocess the current 02184 token. */ 02185 $this->inBody(array( 02186 'name' => 'body', 02187 'type' => HTML5::ENDTAG 02188 )); 02189 02190 return $this->afterBody($token); 02191 break; 02192 02193 /* An end tag whose tag name is one of: "address", "blockquote", 02194 "center", "dir", "div", "dl", "fieldset", "listing", "menu", 02195 "ol", "pre", "ul" */ 02196 case 'address': case 'blockquote': case 'center': case 'dir': 02197 case 'div': case 'dl': case 'fieldset': case 'listing': 02198 case 'menu': case 'ol': case 'pre': case 'ul': 02199 /* If the stack of open elements has an element in scope 02200 with the same tag name as that of the token, then generate 02201 implied end tags. */ 02202 if($this->elementInScope($token['name'])) { 02203 $this->generateImpliedEndTags(); 02204 02205 /* Now, if the current node is not an element with 02206 the same tag name as that of the token, then this 02207 is a parse error. */ 02208 // w/e 02209 02210 /* If the stack of open elements has an element in 02211 scope with the same tag name as that of the token, 02212 then pop elements from this stack until an element 02213 with that tag name has been popped from the stack. */ 02214 for($n = count($this->stack) - 1; $n >= 0; $n--) { 02215 if($this->stack[$n]->nodeName === $token['name']) { 02216 $n = -1; 02217 } 02218 02219 array_pop($this->stack); 02220 } 02221 } 02222 break; 02223 02224 /* An end tag whose tag name is "form" */ 02225 case 'form': 02226 /* If the stack of open elements has an element in scope 02227 with the same tag name as that of the token, then generate 02228 implied end tags. */ 02229 if($this->elementInScope($token['name'])) { 02230 $this->generateImpliedEndTags(); 02231 02232 } 02233 02234 if(end($this->stack)->nodeName !== $token['name']) { 02235 /* Now, if the current node is not an element with the 02236 same tag name as that of the token, then this is a parse 02237 error. */ 02238 // w/e 02239 02240 } else { 02241 /* Otherwise, if the current node is an element with 02242 the same tag name as that of the token pop that element 02243 from the stack. */ 02244 array_pop($this->stack); 02245 } 02246 02247 /* In any case, set the form element pointer to null. */ 02248 $this->form_pointer = null; 02249 break; 02250 02251 /* An end tag whose tag name is "p" */ 02252 case 'p': 02253 /* If the stack of open elements has a p element in scope, 02254 then generate implied end tags, except for p elements. */ 02255 if($this->elementInScope('p')) { 02256 $this->generateImpliedEndTags(array('p')); 02257 02258 /* If the current node is not a p element, then this is 02259 a parse error. */ 02260 // k 02261 02262 /* If the stack of open elements has a p element in 02263 scope, then pop elements from this stack until the stack 02264 no longer has a p element in scope. */ 02265 for($n = count($this->stack) - 1; $n >= 0; $n--) { 02266 if($this->elementInScope('p')) { 02267 array_pop($this->stack); 02268 02269 } else { 02270 break; 02271 } 02272 } 02273 } 02274 break; 02275 02276 /* An end tag whose tag name is "dd", "dt", or "li" */ 02277 case 'dd': case 'dt': case 'li': 02278 /* If the stack of open elements has an element in scope 02279 whose tag name matches the tag name of the token, then 02280 generate implied end tags, except for elements with the 02281 same tag name as the token. */ 02282 if($this->elementInScope($token['name'])) { 02283 $this->generateImpliedEndTags(array($token['name'])); 02284 02285 /* If the current node is not an element with the same 02286 tag name as the token, then this is a parse error. */ 02287 // w/e 02288 02289 /* If the stack of open elements has an element in scope 02290 whose tag name matches the tag name of the token, then 02291 pop elements from this stack until an element with that 02292 tag name has been popped from the stack. */ 02293 for($n = count($this->stack) - 1; $n >= 0; $n--) { 02294 if($this->stack[$n]->nodeName === $token['name']) { 02295 $n = -1; 02296 } 02297 02298 array_pop($this->stack); 02299 } 02300 } 02301 break; 02302 02303 /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4", 02304 "h5", "h6" */ 02305 case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': 02306 $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'); 02307 02308 /* If the stack of open elements has in scope an element whose 02309 tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then 02310 generate implied end tags. */ 02311 if($this->elementInScope($elements)) { 02312 $this->generateImpliedEndTags(); 02313 02314 /* Now, if the current node is not an element with the same 02315 tag name as that of the token, then this is a parse error. */ 02316 // w/e 02317 02318 /* If the stack of open elements has in scope an element 02319 whose tag name is one of "h1", "h2", "h3", "h4", "h5", or 02320 "h6", then pop elements from the stack until an element 02321 with one of those tag names has been popped from the stack. */ 02322 while($this->elementInScope($elements)) { 02323 array_pop($this->stack); 02324 } 02325 } 02326 break; 02327 02328 /* An end tag whose tag name is one of: "a", "b", "big", "em", 02329 "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */ 02330 case 'a': case 'b': case 'big': case 'em': case 'font': 02331 case 'i': case 'nobr': case 's': case 'small': case 'strike': 02332 case 'strong': case 'tt': case 'u': 02333 /* 1. Let the formatting element be the last element in 02334 the list of active formatting elements that: 02335 * is between the end of the list and the last scope 02336 marker in the list, if any, or the start of the list 02337 otherwise, and 02338 * has the same tag name as the token. 02339 */ 02340 while(true) { 02341 for($a = count($this->a_formatting) - 1; $a >= 0; $a--) { 02342 if($this->a_formatting[$a] === self::MARKER) { 02343 break; 02344 02345 } elseif($this->a_formatting[$a]->tagName === $token['name']) { 02346 $formatting_element = $this->a_formatting[$a]; 02347 $in_stack = in_array($formatting_element, $this->stack, true); 02348 $fe_af_pos = $a; 02349 break; 02350 } 02351 } 02352 02353 /* If there is no such node, or, if that node is 02354 also in the stack of open elements but the element 02355 is not in scope, then this is a parse error. Abort 02356 these steps. The token is ignored. */ 02357 if(!isset($formatting_element) || ($in_stack && 02358 !$this->elementInScope($token['name']))) { 02359 break; 02360 02361 /* Otherwise, if there is such a node, but that node 02362 is not in the stack of open elements, then this is a 02363 parse error; remove the element from the list, and 02364 abort these steps. */ 02365 } elseif(isset($formatting_element) && !$in_stack) { 02366 unset($this->a_formatting[$fe_af_pos]); 02367 $this->a_formatting = array_merge($this->a_formatting); 02368 break; 02369 } 02370 02371 /* 2. Let the furthest block be the topmost node in the 02372 stack of open elements that is lower in the stack 02373 than the formatting element, and is not an element in 02374 the phrasing or formatting categories. There might 02375 not be one. */ 02376 $fe_s_pos = array_search($formatting_element, $this->stack, true); 02377 $length = count($this->stack); 02378 02379 for($s = $fe_s_pos + 1; $s < $length; $s++) { 02380 $category = $this->getElementCategory($this->stack[$s]->nodeName); 02381 02382 if($category !== self::PHRASING && $category !== self::FORMATTING) { 02383 $furthest_block = $this->stack[$s]; 02384 } 02385 } 02386 02387 /* 3. If there is no furthest block, then the UA must 02388 skip the subsequent steps and instead just pop all 02389 the nodes from the bottom of the stack of open 02390 elements, from the current node up to the formatting 02391 element, and remove the formatting element from the 02392 list of active formatting elements. */ 02393 if(!isset($furthest_block)) { 02394 for($n = $length - 1; $n >= $fe_s_pos; $n--) { 02395 array_pop($this->stack); 02396 } 02397 02398 unset($this->a_formatting[$fe_af_pos]); 02399 $this->a_formatting = array_merge($this->a_formatting); 02400 break; 02401 } 02402 02403 /* 4. Let the common ancestor be the element 02404 immediately above the formatting element in the stack 02405 of open elements. */ 02406 $common_ancestor = $this->stack[$fe_s_pos - 1]; 02407 02408 /* 5. If the furthest block has a parent node, then 02409 remove the furthest block from its parent node. */ 02410 if($furthest_block->parentNode !== null) { 02411 $furthest_block->parentNode->removeChild($furthest_block); 02412 } 02413 02414 /* 6. Let a bookmark note the position of the 02415 formatting element in the list of active formatting 02416 elements relative to the elements on either side 02417 of it in the list. */ 02418 $bookmark = $fe_af_pos; 02419 02420 /* 7. Let node and last node be the furthest block. 02421 Follow these steps: */ 02422 $node = $furthest_block; 02423 $last_node = $furthest_block; 02424 02425 while(true) { 02426 for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) { 02427 /* 7.1 Let node be the element immediately 02428 prior to node in the stack of open elements. */ 02429 $node = $this->stack[$n]; 02430 02431 /* 7.2 If node is not in the list of active 02432 formatting elements, then remove node from 02433 the stack of open elements and then go back 02434 to step 1. */ 02435 if(!in_array($node, $this->a_formatting, true)) { 02436 unset($this->stack[$n]); 02437 $this->stack = array_merge($this->stack); 02438 02439 } else { 02440 break; 02441 } 02442 } 02443 02444 /* 7.3 Otherwise, if node is the formatting 02445 element, then go to the next step in the overall 02446 algorithm. */ 02447 if($node === $formatting_element) { 02448 break; 02449 02450 /* 7.4 Otherwise, if last node is the furthest 02451 block, then move the aforementioned bookmark to 02452 be immediately after the node in the list of 02453 active formatting elements. */ 02454 } elseif($last_node === $furthest_block) { 02455 $bookmark = array_search($node, $this->a_formatting, true) + 1; 02456 } 02457 02458 /* 7.5 If node has any children, perform a 02459 shallow clone of node, replace the entry for 02460 node in the list of active formatting elements 02461 with an entry for the clone, replace the entry 02462 for node in the stack of open elements with an 02463 entry for the clone, and let node be the clone. */ 02464 if($node->hasChildNodes()) { 02465 $clone = $node->cloneNode(); 02466 $s_pos = array_search($node, $this->stack, true); 02467 $a_pos = array_search($node, $this->a_formatting, true); 02468 02469 $this->stack[$s_pos] = $clone; 02470 $this->a_formatting[$a_pos] = $clone; 02471 $node = $clone; 02472 } 02473 02474 /* 7.6 Insert last node into node, first removing 02475 it from its previous parent node if any. */ 02476 if($last_node->parentNode !== null) { 02477 $last_node->parentNode->removeChild($last_node); 02478 } 02479 02480 $node->appendChild($last_node); 02481 02482 /* 7.7 Let last node be node. */ 02483 $last_node = $node; 02484 } 02485 02486 /* 8. Insert whatever last node ended up being in 02487 the previous step into the common ancestor node, 02488 first removing it from its previous parent node if 02489 any. */ 02490 if($last_node->parentNode !== null) { 02491 $last_node->parentNode->removeChild($last_node); 02492 } 02493 02494 $common_ancestor->appendChild($last_node); 02495 02496 /* 9. Perform a shallow clone of the formatting 02497 element. */ 02498 $clone = $formatting_element->cloneNode(); 02499 02500 /* 10. Take all of the child nodes of the furthest 02501 block and append them to the clone created in the 02502 last step. */ 02503 while($furthest_block->hasChildNodes()) { 02504 $child = $furthest_block->firstChild; 02505 $furthest_block->removeChild($child); 02506 $clone->appendChild($child); 02507 } 02508 02509 /* 11. Append that clone to the furthest block. */ 02510 $furthest_block->appendChild($clone); 02511 02512 /* 12. Remove the formatting element from the list 02513 of active formatting elements, and insert the clone 02514 into the list of active formatting elements at the 02515 position of the aforementioned bookmark. */ 02516 $fe_af_pos = array_search($formatting_element, $this->a_formatting, true); 02517 unset($this->a_formatting[$fe_af_pos]); 02518 $this->a_formatting = array_merge($this->a_formatting); 02519 02520 $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1); 02521 $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting)); 02522 $this->a_formatting = array_merge($af_part1, array($clone), $af_part2); 02523 02524 /* 13. Remove the formatting element from the stack 02525 of open elements, and insert the clone into the stack 02526 of open elements immediately after (i.e. in a more 02527 deeply nested position than) the position of the 02528 furthest block in that stack. */ 02529 $fe_s_pos = array_search($formatting_element, $this->stack, true); 02530 $fb_s_pos = array_search($furthest_block, $this->stack, true); 02531 unset($this->stack[$fe_s_pos]); 02532 02533 $s_part1 = array_slice($this->stack, 0, $fb_s_pos); 02534 $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack)); 02535 $this->stack = array_merge($s_part1, array($clone), $s_part2); 02536 02537 /* 14. Jump back to step 1 in this series of steps. */ 02538 unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block); 02539 } 02540 break; 02541 02542 /* An end tag token whose tag name is one of: "button", 02543 "marquee", "object" */ 02544 case 'button': case 'marquee': case 'object': 02545 /* If the stack of open elements has an element in scope whose 02546 tag name matches the tag name of the token, then generate implied 02547 tags. */ 02548 if($this->elementInScope($token['name'])) { 02549 $this->generateImpliedEndTags(); 02550 02551 /* Now, if the current node is not an element with the same 02552 tag name as the token, then this is a parse error. */ 02553 // k 02554 02555 /* Now, if the stack of open elements has an element in scope 02556 whose tag name matches the tag name of the token, then pop 02557 elements from the stack until that element has been popped from 02558 the stack, and clear the list of active formatting elements up 02559 to the last marker. */ 02560 for($n = count($this->stack) - 1; $n >= 0; $n--) { 02561 if($this->stack[$n]->nodeName === $token['name']) { 02562 $n = -1; 02563 } 02564 02565 array_pop($this->stack); 02566 } 02567 02568 $marker = end(array_keys($this->a_formatting, self::MARKER, true)); 02569 02570 for($n = count($this->a_formatting) - 1; $n > $marker; $n--) { 02571 array_pop($this->a_formatting); 02572 } 02573 } 02574 break; 02575 02576 /* Or an end tag whose tag name is one of: "area", "basefont", 02577 "bgsound", "br", "embed", "hr", "iframe", "image", "img", 02578 "input", "isindex", "noembed", "noframes", "param", "select", 02579 "spacer", "table", "textarea", "wbr" */ 02580 case 'area': case 'basefont': case 'bgsound': case 'br': 02581 case 'embed': case 'hr': case 'iframe': case 'image': 02582 case 'img': case 'input': case 'isindex': case 'noembed': 02583 case 'noframes': case 'param': case 'select': case 'spacer': 02584 case 'table': case 'textarea': case 'wbr': 02585 // Parse error. Ignore the token. 02586 break; 02587 02588 /* An end tag token not covered by the previous entries */ 02589 default: 02590 for($n = count($this->stack) - 1; $n >= 0; $n--) { 02591 /* Initialise node to be the current node (the bottommost 02592 node of the stack). */ 02593 $node = end($this->stack); 02594 02595 /* If node has the same tag name as the end tag token, 02596 then: */ 02597 if($token['name'] === $node->nodeName) { 02598 /* Generate implied end tags. */ 02599 $this->generateImpliedEndTags(); 02600 02601 /* If the tag name of the end tag token does not 02602 match the tag name of the current node, this is a 02603 parse error. */ 02604 // k 02605 02606 /* Pop all the nodes from the current node up to 02607 node, including node, then stop this algorithm. */ 02608 for($x = count($this->stack) - $n; $x >= $n; $x--) { 02609 array_pop($this->stack); 02610 } 02611 02612 } else { 02613 $category = $this->getElementCategory($node); 02614 02615 if($category !== self::SPECIAL && $category !== self::SCOPING) { 02616 /* Otherwise, if node is in neither the formatting 02617 category nor the phrasing category, then this is a 02618 parse error. Stop this algorithm. The end tag token 02619 is ignored. */ 02620 return false; 02621 } 02622 } 02623 } 02624 break; 02625 } 02626 break; 02627 } 02628 } 02629 02630 private function inTable($token) { 02631 $clear = array('html', 'table'); 02632 02633 /* A character token that is one of one of U+0009 CHARACTER TABULATION, 02634 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 02635 or U+0020 SPACE */ 02636 if($token['type'] === HTML5::CHARACTR && 02637 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { 02638 /* Append the character to the current node. */ 02639 $text = $this->dom->createTextNode($token['data']); 02640 end($this->stack)->appendChild($text); 02641 02642 /* A comment token */ 02643 } elseif($token['type'] === HTML5::COMMENT) { 02644 /* Append a Comment node to the current node with the data 02645 attribute set to the data given in the comment token. */ 02646 $comment = $this->dom->createComment($token['data']); 02647 end($this->stack)->appendChild($comment); 02648 02649 /* A start tag whose tag name is "caption" */ 02650 } elseif($token['type'] === HTML5::STARTTAG && 02651 $token['name'] === 'caption') { 02652 /* Clear the stack back to a table context. */ 02653 $this->clearStackToTableContext($clear); 02654 02655 /* Insert a marker at the end of the list of active 02656 formatting elements. */ 02657 $this->a_formatting[] = self::MARKER; 02658 02659 /* Insert an HTML element for the token, then switch the 02660 insertion mode to "in caption". */ 02661 $this->insertElement($token); 02662 $this->mode = self::IN_CAPTION; 02663 02664 /* A start tag whose tag name is "colgroup" */ 02665 } elseif($token['type'] === HTML5::STARTTAG && 02666 $token['name'] === 'colgroup') { 02667 /* Clear the stack back to a table context. */ 02668 $this->clearStackToTableContext($clear); 02669 02670 /* Insert an HTML element for the token, then switch the 02671 insertion mode to "in column group". */ 02672 $this->insertElement($token); 02673 $this->mode = self::IN_CGROUP; 02674 02675 /* A start tag whose tag name is "col" */ 02676 } elseif($token['type'] === HTML5::STARTTAG && 02677 $token['name'] === 'col') { 02678 $this->inTable(array( 02679 'name' => 'colgroup', 02680 'type' => HTML5::STARTTAG, 02681 'attr' => array() 02682 )); 02683 02684 $this->inColumnGroup($token); 02685 02686 /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */ 02687 } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], 02688 array('tbody', 'tfoot', 'thead'))) { 02689 /* Clear the stack back to a table context. */ 02690 $this->clearStackToTableContext($clear); 02691 02692 /* Insert an HTML element for the token, then switch the insertion 02693 mode to "in table body". */ 02694 $this->insertElement($token); 02695 $this->mode = self::IN_TBODY; 02696 02697 /* A start tag whose tag name is one of: "td", "th", "tr" */ 02698 } elseif($token['type'] === HTML5::STARTTAG && 02699 in_array($token['name'], array('td', 'th', 'tr'))) { 02700 /* Act as if a start tag token with the tag name "tbody" had been 02701 seen, then reprocess the current token. */ 02702 $this->inTable(array( 02703 'name' => 'tbody', 02704 'type' => HTML5::STARTTAG, 02705 'attr' => array() 02706 )); 02707 02708 return $this->inTableBody($token); 02709 02710 /* A start tag whose tag name is "table" */ 02711 } elseif($token['type'] === HTML5::STARTTAG && 02712 $token['name'] === 'table') { 02713 /* Parse error. Act as if an end tag token with the tag name "table" 02714 had been seen, then, if that token wasn't ignored, reprocess the 02715 current token. */ 02716 $this->inTable(array( 02717 'name' => 'table', 02718 'type' => HTML5::ENDTAG 02719 )); 02720 02721 return $this->mainPhase($token); 02722 02723 /* An end tag whose tag name is "table" */ 02724 } elseif($token['type'] === HTML5::ENDTAG && 02725 $token['name'] === 'table') { 02726 /* If the stack of open elements does not have an element in table 02727 scope with the same tag name as the token, this is a parse error. 02728 Ignore the token. (innerHTML case) */ 02729 if(!$this->elementInScope($token['name'], true)) { 02730 return false; 02731 02732 /* Otherwise: */ 02733 } else { 02734 /* Generate implied end tags. */ 02735 $this->generateImpliedEndTags(); 02736 02737 /* Now, if the current node is not a table element, then this 02738 is a parse error. */ 02739 // w/e 02740 02741 /* Pop elements from this stack until a table element has been 02742 popped from the stack. */ 02743 while(true) { 02744 $current = end($this->stack)->nodeName; 02745 array_pop($this->stack); 02746 02747 if($current === 'table') { 02748 break; 02749 } 02750 } 02751 02752 /* Reset the insertion mode appropriately. */ 02753 $this->resetInsertionMode(); 02754 } 02755 02756 /* An end tag whose tag name is one of: "body", "caption", "col", 02757 "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ 02758 } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], 02759 array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', 02760 'tfoot', 'th', 'thead', 'tr'))) { 02761 // Parse error. Ignore the token. 02762 02763 /* Anything else */ 02764 } else { 02765 /* Parse error. Process the token as if the insertion mode was "in 02766 body", with the following exception: */ 02767 02768 /* If the current node is a table, tbody, tfoot, thead, or tr 02769 element, then, whenever a node would be inserted into the current 02770 node, it must instead be inserted into the foster parent element. */ 02771 if(in_array(end($this->stack)->nodeName, 02772 array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { 02773 /* The foster parent element is the parent element of the last 02774 table element in the stack of open elements, if there is a 02775 table element and it has such a parent element. If there is no 02776 table element in the stack of open elements (innerHTML case), 02777 then the foster parent element is the first element in the 02778 stack of open elements (the html element). Otherwise, if there 02779 is a table element in the stack of open elements, but the last 02780 table element in the stack of open elements has no parent, or 02781 its parent node is not an element, then the foster parent 02782 element is the element before the last table element in the 02783 stack of open elements. */ 02784 for($n = count($this->stack) - 1; $n >= 0; $n--) { 02785 if($this->stack[$n]->nodeName === 'table') { 02786 $table = $this->stack[$n]; 02787 break; 02788 } 02789 } 02790 02791 if(isset($table) && $table->parentNode !== null) { 02792 $this->foster_parent = $table->parentNode; 02793 02794 } elseif(!isset($table)) { 02795 $this->foster_parent = $this->stack[0]; 02796 02797 } elseif(isset($table) && ($table->parentNode === null || 02798 $table->parentNode->nodeType !== XML_ELEMENT_NODE)) { 02799 $this->foster_parent = $this->stack[$n - 1]; 02800 } 02801 } 02802 02803 $this->inBody($token); 02804 } 02805 } 02806 02807 private function inCaption($token) { 02808 /* An end tag whose tag name is "caption" */ 02809 if($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') { 02810 /* If the stack of open elements does not have an element in table 02811 scope with the same tag name as the token, this is a parse error. 02812 Ignore the token. (innerHTML case) */ 02813 if(!$this->elementInScope($token['name'], true)) { 02814 // Ignore 02815 02816 /* Otherwise: */ 02817 } else { 02818 /* Generate implied end tags. */ 02819 $this->generateImpliedEndTags(); 02820 02821 /* Now, if the current node is not a caption element, then this 02822 is a parse error. */ 02823 // w/e 02824 02825 /* Pop elements from this stack until a caption element has 02826 been popped from the stack. */ 02827 while(true) { 02828 $node = end($this->stack)->nodeName; 02829 array_pop($this->stack); 02830 02831 if($node === 'caption') { 02832 break; 02833 } 02834 } 02835 02836 /* Clear the list of active formatting elements up to the last 02837 marker. */ 02838 $this->clearTheActiveFormattingElementsUpToTheLastMarker(); 02839 02840 /* Switch the insertion mode to "in table". */ 02841 $this->mode = self::IN_TABLE; 02842 } 02843 02844 /* A start tag whose tag name is one of: "caption", "col", "colgroup", 02845 "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag 02846 name is "table" */ 02847 } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'], 02848 array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', 02849 'thead', 'tr'))) || ($token['type'] === HTML5::ENDTAG && 02850 $token['name'] === 'table')) { 02851 /* Parse error. Act as if an end tag with the tag name "caption" 02852 had been seen, then, if that token wasn't ignored, reprocess the 02853 current token. */ 02854 $this->inCaption(array( 02855 'name' => 'caption', 02856 'type' => HTML5::ENDTAG 02857 )); 02858 02859 return $this->inTable($token); 02860 02861 /* An end tag whose tag name is one of: "body", "col", "colgroup", 02862 "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ 02863 } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], 02864 array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th', 02865 'thead', 'tr'))) { 02866 // Parse error. Ignore the token. 02867 02868 /* Anything else */ 02869 } else { 02870 /* Process the token as if the insertion mode was "in body". */ 02871 $this->inBody($token); 02872 } 02873 } 02874 02875 private function inColumnGroup($token) { 02876 /* A character token that is one of one of U+0009 CHARACTER TABULATION, 02877 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 02878 or U+0020 SPACE */ 02879 if($token['type'] === HTML5::CHARACTR && 02880 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { 02881 /* Append the character to the current node. */ 02882 $text = $this->dom->createTextNode($token['data']); 02883 end($this->stack)->appendChild($text); 02884 02885 /* A comment token */ 02886 } elseif($token['type'] === HTML5::COMMENT) { 02887 /* Append a Comment node to the current node with the data 02888 attribute set to the data given in the comment token. */ 02889 $comment = $this->dom->createComment($token['data']); 02890 end($this->stack)->appendChild($comment); 02891 02892 /* A start tag whose tag name is "col" */ 02893 } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') { 02894 /* Insert a col element for the token. Immediately pop the current 02895 node off the stack of open elements. */ 02896 $this->insertElement($token); 02897 array_pop($this->stack); 02898 02899 /* An end tag whose tag name is "colgroup" */ 02900 } elseif($token['type'] === HTML5::ENDTAG && 02901 $token['name'] === 'colgroup') { 02902 /* If the current node is the root html element, then this is a 02903 parse error, ignore the token. (innerHTML case) */ 02904 if(end($this->stack)->nodeName === 'html') { 02905 // Ignore 02906 02907 /* Otherwise, pop the current node (which will be a colgroup 02908 element) from the stack of open elements. Switch the insertion 02909 mode to "in table". */ 02910 } else { 02911 array_pop($this->stack); 02912 $this->mode = self::IN_TABLE; 02913 } 02914 02915 /* An end tag whose tag name is "col" */ 02916 } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') { 02917 /* Parse error. Ignore the token. */ 02918 02919 /* Anything else */ 02920 } else { 02921 /* Act as if an end tag with the tag name "colgroup" had been seen, 02922 and then, if that token wasn't ignored, reprocess the current token. */ 02923 $this->inColumnGroup(array( 02924 'name' => 'colgroup', 02925 'type' => HTML5::ENDTAG 02926 )); 02927 02928 return $this->inTable($token); 02929 } 02930 } 02931 02932 private function inTableBody($token) { 02933 $clear = array('tbody', 'tfoot', 'thead', 'html'); 02934 02935 /* A start tag whose tag name is "tr" */ 02936 if($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') { 02937 /* Clear the stack back to a table body context. */ 02938 $this->clearStackToTableContext($clear); 02939 02940 /* Insert a tr element for the token, then switch the insertion 02941 mode to "in row". */ 02942 $this->insertElement($token); 02943 $this->mode = self::IN_ROW; 02944 02945 /* A start tag whose tag name is one of: "th", "td" */ 02946 } elseif($token['type'] === HTML5::STARTTAG && 02947 ($token['name'] === 'th' || $token['name'] === 'td')) { 02948 /* Parse error. Act as if a start tag with the tag name "tr" had 02949 been seen, then reprocess the current token. */ 02950 $this->inTableBody(array( 02951 'name' => 'tr', 02952 'type' => HTML5::STARTTAG, 02953 'attr' => array() 02954 )); 02955 02956 return $this->inRow($token); 02957 02958 /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ 02959 } elseif($token['type'] === HTML5::ENDTAG && 02960 in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { 02961 /* If the stack of open elements does not have an element in table 02962 scope with the same tag name as the token, this is a parse error. 02963 Ignore the token. */ 02964 if(!$this->elementInScope($token['name'], true)) { 02965 // Ignore 02966 02967 /* Otherwise: */ 02968 } else { 02969 /* Clear the stack back to a table body context. */ 02970 $this->clearStackToTableContext($clear); 02971 02972 /* Pop the current node from the stack of open elements. Switch 02973 the insertion mode to "in table". */ 02974 array_pop($this->stack); 02975 $this->mode = self::IN_TABLE; 02976 } 02977 02978 /* A start tag whose tag name is one of: "caption", "col", "colgroup", 02979 "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */ 02980 } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'], 02981 array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead'))) || 02982 ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table')) { 02983 /* If the stack of open elements does not have a tbody, thead, or 02984 tfoot element in table scope, this is a parse error. Ignore the 02985 token. (innerHTML case) */ 02986 if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) { 02987 // Ignore. 02988 02989 /* Otherwise: */ 02990 } else { 02991 /* Clear the stack back to a table body context. */ 02992 $this->clearStackToTableContext($clear); 02993 02994 /* Act as if an end tag with the same tag name as the current 02995 node ("tbody", "tfoot", or "thead") had been seen, then 02996 reprocess the current token. */ 02997 $this->inTableBody(array( 02998 'name' => end($this->stack)->nodeName, 02999 'type' => HTML5::ENDTAG 03000 )); 03001 03002 return $this->mainPhase($token); 03003 } 03004 03005 /* An end tag whose tag name is one of: "body", "caption", "col", 03006 "colgroup", "html", "td", "th", "tr" */ 03007 } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], 03008 array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) { 03009 /* Parse error. Ignore the token. */ 03010 03011 /* Anything else */ 03012 } else { 03013 /* Process the token as if the insertion mode was "in table". */ 03014 $this->inTable($token); 03015 } 03016 } 03017 03018 private function inRow($token) { 03019 $clear = array('tr', 'html'); 03020 03021 /* A start tag whose tag name is one of: "th", "td" */ 03022 if($token['type'] === HTML5::STARTTAG && 03023 ($token['name'] === 'th' || $token['name'] === 'td')) { 03024 /* Clear the stack back to a table row context. */ 03025 $this->clearStackToTableContext($clear); 03026 03027 /* Insert an HTML element for the token, then switch the insertion 03028 mode to "in cell". */ 03029 $this->insertElement($token); 03030 $this->mode = self::IN_CELL; 03031 03032 /* Insert a marker at the end of the list of active formatting 03033 elements. */ 03034 $this->a_formatting[] = self::MARKER; 03035 03036 /* An end tag whose tag name is "tr" */ 03037 } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') { 03038 /* If the stack of open elements does not have an element in table 03039 scope with the same tag name as the token, this is a parse error. 03040 Ignore the token. (innerHTML case) */ 03041 if(!$this->elementInScope($token['name'], true)) { 03042 // Ignore. 03043 03044 /* Otherwise: */ 03045 } else { 03046 /* Clear the stack back to a table row context. */ 03047 $this->clearStackToTableContext($clear); 03048 03049 /* Pop the current node (which will be a tr element) from the 03050 stack of open elements. Switch the insertion mode to "in table 03051 body". */ 03052 array_pop($this->stack); 03053 $this->mode = self::IN_TBODY; 03054 } 03055 03056 /* A start tag whose tag name is one of: "caption", "col", "colgroup", 03057 "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */ 03058 } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], 03059 array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) { 03060 /* Act as if an end tag with the tag name "tr" had been seen, then, 03061 if that token wasn't ignored, reprocess the current token. */ 03062 $this->inRow(array( 03063 'name' => 'tr', 03064 'type' => HTML5::ENDTAG 03065 )); 03066 03067 return $this->inCell($token); 03068 03069 /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ 03070 } elseif($token['type'] === HTML5::ENDTAG && 03071 in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { 03072 /* If the stack of open elements does not have an element in table 03073 scope with the same tag name as the token, this is a parse error. 03074 Ignore the token. */ 03075 if(!$this->elementInScope($token['name'], true)) { 03076 // Ignore. 03077 03078 /* Otherwise: */ 03079 } else { 03080 /* Otherwise, act as if an end tag with the tag name "tr" had 03081 been seen, then reprocess the current token. */ 03082 $this->inRow(array( 03083 'name' => 'tr', 03084 'type' => HTML5::ENDTAG 03085 )); 03086 03087 return $this->inCell($token); 03088 } 03089 03090 /* An end tag whose tag name is one of: "body", "caption", "col", 03091 "colgroup", "html", "td", "th" */ 03092 } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], 03093 array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) { 03094 /* Parse error. Ignore the token. */ 03095 03096 /* Anything else */ 03097 } else { 03098 /* Process the token as if the insertion mode was "in table". */ 03099 $this->inTable($token); 03100 } 03101 } 03102 03103 private function inCell($token) { 03104 /* An end tag whose tag name is one of: "td", "th" */ 03105 if($token['type'] === HTML5::ENDTAG && 03106 ($token['name'] === 'td' || $token['name'] === 'th')) { 03107 /* If the stack of open elements does not have an element in table 03108 scope with the same tag name as that of the token, then this is a 03109 parse error and the token must be ignored. */ 03110 if(!$this->elementInScope($token['name'], true)) { 03111 // Ignore. 03112 03113 /* Otherwise: */ 03114 } else { 03115 /* Generate implied end tags, except for elements with the same 03116 tag name as the token. */ 03117 $this->generateImpliedEndTags(array($token['name'])); 03118 03119 /* Now, if the current node is not an element with the same tag 03120 name as the token, then this is a parse error. */ 03121 // k 03122 03123 /* Pop elements from this stack until an element with the same 03124 tag name as the token has been popped from the stack. */ 03125 while(true) { 03126 $node = end($this->stack)->nodeName; 03127 array_pop($this->stack); 03128 03129 if($node === $token['name']) { 03130 break; 03131 } 03132 } 03133 03134 /* Clear the list of active formatting elements up to the last 03135 marker. */ 03136 $this->clearTheActiveFormattingElementsUpToTheLastMarker(); 03137 03138 /* Switch the insertion mode to "in row". (The current node 03139 will be a tr element at this point.) */ 03140 $this->mode = self::IN_ROW; 03141 } 03142 03143 /* A start tag whose tag name is one of: "caption", "col", "colgroup", 03144 "tbody", "td", "tfoot", "th", "thead", "tr" */ 03145 } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], 03146 array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', 03147 'thead', 'tr'))) { 03148 /* If the stack of open elements does not have a td or th element 03149 in table scope, then this is a parse error; ignore the token. 03150 (innerHTML case) */ 03151 if(!$this->elementInScope(array('td', 'th'), true)) { 03152 // Ignore. 03153 03154 /* Otherwise, close the cell (see below) and reprocess the current 03155 token. */ 03156 } else { 03157 $this->closeCell(); 03158 return $this->inRow($token); 03159 } 03160 03161 /* A start tag whose tag name is one of: "caption", "col", "colgroup", 03162 "tbody", "td", "tfoot", "th", "thead", "tr" */ 03163 } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], 03164 array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', 03165 'thead', 'tr'))) { 03166 /* If the stack of open elements does not have a td or th element 03167 in table scope, then this is a parse error; ignore the token. 03168 (innerHTML case) */ 03169 if(!$this->elementInScope(array('td', 'th'), true)) { 03170 // Ignore. 03171 03172 /* Otherwise, close the cell (see below) and reprocess the current 03173 token. */ 03174 } else { 03175 $this->closeCell(); 03176 return $this->inRow($token); 03177 } 03178 03179 /* An end tag whose tag name is one of: "body", "caption", "col", 03180 "colgroup", "html" */ 03181 } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], 03182 array('body', 'caption', 'col', 'colgroup', 'html'))) { 03183 /* Parse error. Ignore the token. */ 03184 03185 /* An end tag whose tag name is one of: "table", "tbody", "tfoot", 03186 "thead", "tr" */ 03187 } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], 03188 array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { 03189 /* If the stack of open elements does not have an element in table 03190 scope with the same tag name as that of the token (which can only 03191 happen for "tbody", "tfoot" and "thead", or, in the innerHTML case), 03192 then this is a parse error and the token must be ignored. */ 03193 if(!$this->elementInScope($token['name'], true)) { 03194 // Ignore. 03195 03196 /* Otherwise, close the cell (see below) and reprocess the current 03197 token. */ 03198 } else { 03199 $this->closeCell(); 03200 return $this->inRow($token); 03201 } 03202 03203 /* Anything else */ 03204 } else { 03205 /* Process the token as if the insertion mode was "in body". */ 03206 $this->inBody($token); 03207 } 03208 } 03209 03210 private function inSelect($token) { 03211 /* Handle the token as follows: */ 03212 03213 /* A character token */ 03214 if($token['type'] === HTML5::CHARACTR) { 03215 /* Append the token's character to the current node. */ 03216 $this->insertText($token['data']); 03217 03218 /* A comment token */ 03219 } elseif($token['type'] === HTML5::COMMENT) { 03220 /* Append a Comment node to the current node with the data 03221 attribute set to the data given in the comment token. */ 03222 $this->insertComment($token['data']); 03223 03224 /* A start tag token whose tag name is "option" */ 03225 } elseif($token['type'] === HTML5::STARTTAG && 03226 $token['name'] === 'option') { 03227 /* If the current node is an option element, act as if an end tag 03228 with the tag name "option" had been seen. */ 03229 if(end($this->stack)->nodeName === 'option') { 03230 $this->inSelect(array( 03231 'name' => 'option', 03232 'type' => HTML5::ENDTAG 03233 )); 03234 } 03235 03236 /* Insert an HTML element for the token. */ 03237 $this->insertElement($token); 03238 03239 /* A start tag token whose tag name is "optgroup" */ 03240 } elseif($token['type'] === HTML5::STARTTAG && 03241 $token['name'] === 'optgroup') { 03242 /* If the current node is an option element, act as if an end tag 03243 with the tag name "option" had been seen. */ 03244 if(end($this->stack)->nodeName === 'option') { 03245 $this->inSelect(array( 03246 'name' => 'option', 03247 'type' => HTML5::ENDTAG 03248 )); 03249 } 03250 03251 /* If the current node is an optgroup element, act as if an end tag 03252 with the tag name "optgroup" had been seen. */ 03253 if(end($this->stack)->nodeName === 'optgroup') { 03254 $this->inSelect(array( 03255 'name' => 'optgroup', 03256 'type' => HTML5::ENDTAG 03257 )); 03258 } 03259 03260 /* Insert an HTML element for the token. */ 03261 $this->insertElement($token); 03262 03263 /* An end tag token whose tag name is "optgroup" */ 03264 } elseif($token['type'] === HTML5::ENDTAG && 03265 $token['name'] === 'optgroup') { 03266 /* First, if the current node is an option element, and the node 03267 immediately before it in the stack of open elements is an optgroup 03268 element, then act as if an end tag with the tag name "option" had 03269 been seen. */ 03270 $elements_in_stack = count($this->stack); 03271 03272 if($this->stack[$elements_in_stack - 1]->nodeName === 'option' && 03273 $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup') { 03274 $this->inSelect(array( 03275 'name' => 'option', 03276 'type' => HTML5::ENDTAG 03277 )); 03278 } 03279 03280 /* If the current node is an optgroup element, then pop that node 03281 from the stack of open elements. Otherwise, this is a parse error, 03282 ignore the token. */ 03283 if($this->stack[$elements_in_stack - 1] === 'optgroup') { 03284 array_pop($this->stack); 03285 } 03286 03287 /* An end tag token whose tag name is "option" */ 03288 } elseif($token['type'] === HTML5::ENDTAG && 03289 $token['name'] === 'option') { 03290 /* If the current node is an option element, then pop that node 03291 from the stack of open elements. Otherwise, this is a parse error, 03292 ignore the token. */ 03293 if(end($this->stack)->nodeName === 'option') { 03294 array_pop($this->stack); 03295 } 03296 03297 /* An end tag whose tag name is "select" */ 03298 } elseif($token['type'] === HTML5::ENDTAG && 03299 $token['name'] === 'select') { 03300 /* If the stack of open elements does not have an element in table 03301 scope with the same tag name as the token, this is a parse error. 03302 Ignore the token. (innerHTML case) */ 03303 if(!$this->elementInScope($token['name'], true)) { 03304 // w/e 03305 03306 /* Otherwise: */ 03307 } else { 03308 /* Pop elements from the stack of open elements until a select 03309 element has been popped from the stack. */ 03310 while(true) { 03311 $current = end($this->stack)->nodeName; 03312 array_pop($this->stack); 03313 03314 if($current === 'select') { 03315 break; 03316 } 03317 } 03318 03319 /* Reset the insertion mode appropriately. */ 03320 $this->resetInsertionMode(); 03321 } 03322 03323 /* A start tag whose tag name is "select" */ 03324 } elseif($token['name'] === 'select' && 03325 $token['type'] === HTML5::STARTTAG) { 03326 /* Parse error. Act as if the token had been an end tag with the 03327 tag name "select" instead. */ 03328 $this->inSelect(array( 03329 'name' => 'select', 03330 'type' => HTML5::ENDTAG 03331 )); 03332 03333 /* An end tag whose tag name is one of: "caption", "table", "tbody", 03334 "tfoot", "thead", "tr", "td", "th" */ 03335 } elseif(in_array($token['name'], array('caption', 'table', 'tbody', 03336 'tfoot', 'thead', 'tr', 'td', 'th')) && $token['type'] === HTML5::ENDTAG) { 03337 /* Parse error. */ 03338 // w/e 03339 03340 /* If the stack of open elements has an element in table scope with 03341 the same tag name as that of the token, then act as if an end tag 03342 with the tag name "select" had been seen, and reprocess the token. 03343 Otherwise, ignore the token. */ 03344 if($this->elementInScope($token['name'], true)) { 03345 $this->inSelect(array( 03346 'name' => 'select', 03347 'type' => HTML5::ENDTAG 03348 )); 03349 03350 $this->mainPhase($token); 03351 } 03352 03353 /* Anything else */ 03354 } else { 03355 /* Parse error. Ignore the token. */ 03356 } 03357 } 03358 03359 private function afterBody($token) { 03360 /* Handle the token as follows: */ 03361 03362 /* A character token that is one of one of U+0009 CHARACTER TABULATION, 03363 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 03364 or U+0020 SPACE */ 03365 if($token['type'] === HTML5::CHARACTR && 03366 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { 03367 /* Process the token as it would be processed if the insertion mode 03368 was "in body". */ 03369 $this->inBody($token); 03370 03371 /* A comment token */ 03372 } elseif($token['type'] === HTML5::COMMENT) { 03373 /* Append a Comment node to the first element in the stack of open 03374 elements (the html element), with the data attribute set to the 03375 data given in the comment token. */ 03376 $comment = $this->dom->createComment($token['data']); 03377 $this->stack[0]->appendChild($comment); 03378 03379 /* An end tag with the tag name "html" */ 03380 } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') { 03381 /* If the parser was originally created in order to handle the 03382 setting of an element's innerHTML attribute, this is a parse error; 03383 ignore the token. (The element will be an html element in this 03384 case.) (innerHTML case) */ 03385 03386 /* Otherwise, switch to the trailing end phase. */ 03387 $this->phase = self::END_PHASE; 03388 03389 /* Anything else */ 03390 } else { 03391 /* Parse error. Set the insertion mode to "in body" and reprocess 03392 the token. */ 03393 $this->mode = self::IN_BODY; 03394 return $this->inBody($token); 03395 } 03396 } 03397 03398 private function inFrameset($token) { 03399 /* Handle the token as follows: */ 03400 03401 /* A character token that is one of one of U+0009 CHARACTER TABULATION, 03402 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 03403 U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ 03404 if($token['type'] === HTML5::CHARACTR && 03405 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { 03406 /* Append the character to the current node. */ 03407 $this->insertText($token['data']); 03408 03409 /* A comment token */ 03410 } elseif($token['type'] === HTML5::COMMENT) { 03411 /* Append a Comment node to the current node with the data 03412 attribute set to the data given in the comment token. */ 03413 $this->insertComment($token['data']); 03414 03415 /* A start tag with the tag name "frameset" */ 03416 } elseif($token['name'] === 'frameset' && 03417 $token['type'] === HTML5::STARTTAG) { 03418 $this->insertElement($token); 03419 03420 /* An end tag with the tag name "frameset" */ 03421 } elseif($token['name'] === 'frameset' && 03422 $token['type'] === HTML5::ENDTAG) { 03423 /* If the current node is the root html element, then this is a 03424 parse error; ignore the token. (innerHTML case) */ 03425 if(end($this->stack)->nodeName === 'html') { 03426 // Ignore 03427 03428 } else { 03429 /* Otherwise, pop the current node from the stack of open 03430 elements. */ 03431 array_pop($this->stack); 03432 03433 /* If the parser was not originally created in order to handle 03434 the setting of an element's innerHTML attribute (innerHTML case), 03435 and the current node is no longer a frameset element, then change 03436 the insertion mode to "after frameset". */ 03437 $this->mode = self::AFTR_FRAME; 03438 } 03439 03440 /* A start tag with the tag name "frame" */ 03441 } elseif($token['name'] === 'frame' && 03442 $token['type'] === HTML5::STARTTAG) { 03443 /* Insert an HTML element for the token. */ 03444 $this->insertElement($token); 03445 03446 /* Immediately pop the current node off the stack of open elements. */ 03447 array_pop($this->stack); 03448 03449 /* A start tag with the tag name "noframes" */ 03450 } elseif($token['name'] === 'noframes' && 03451 $token['type'] === HTML5::STARTTAG) { 03452 /* Process the token as if the insertion mode had been "in body". */ 03453 $this->inBody($token); 03454 03455 /* Anything else */ 03456 } else { 03457 /* Parse error. Ignore the token. */ 03458 } 03459 } 03460 03461 private function afterFrameset($token) { 03462 /* Handle the token as follows: */ 03463 03464 /* A character token that is one of one of U+0009 CHARACTER TABULATION, 03465 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 03466 U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ 03467 if($token['type'] === HTML5::CHARACTR && 03468 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { 03469 /* Append the character to the current node. */ 03470 $this->insertText($token['data']); 03471 03472 /* A comment token */ 03473 } elseif($token['type'] === HTML5::COMMENT) { 03474 /* Append a Comment node to the current node with the data 03475 attribute set to the data given in the comment token. */ 03476 $this->insertComment($token['data']); 03477 03478 /* An end tag with the tag name "html" */ 03479 } elseif($token['name'] === 'html' && 03480 $token['type'] === HTML5::ENDTAG) { 03481 /* Switch to the trailing end phase. */ 03482 $this->phase = self::END_PHASE; 03483 03484 /* A start tag with the tag name "noframes" */ 03485 } elseif($token['name'] === 'noframes' && 03486 $token['type'] === HTML5::STARTTAG) { 03487 /* Process the token as if the insertion mode had been "in body". */ 03488 $this->inBody($token); 03489 03490 /* Anything else */ 03491 } else { 03492 /* Parse error. Ignore the token. */ 03493 } 03494 } 03495 03496 private function trailingEndPhase($token) { 03497 /* After the main phase, as each token is emitted from the tokenisation 03498 stage, it must be processed as described in this section. */ 03499 03500 /* A DOCTYPE token */ 03501 if($token['type'] === HTML5::DOCTYPE) { 03502 // Parse error. Ignore the token. 03503 03504 /* A comment token */ 03505 } elseif($token['type'] === HTML5::COMMENT) { 03506 /* Append a Comment node to the Document object with the data 03507 attribute set to the data given in the comment token. */ 03508 $comment = $this->dom->createComment($token['data']); 03509 $this->dom->appendChild($comment); 03510 03511 /* A character token that is one of one of U+0009 CHARACTER TABULATION, 03512 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 03513 or U+0020 SPACE */ 03514 } elseif($token['type'] === HTML5::CHARACTR && 03515 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { 03516 /* Process the token as it would be processed in the main phase. */ 03517 $this->mainPhase($token); 03518 03519 /* A character token that is not one of U+0009 CHARACTER TABULATION, 03520 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 03521 or U+0020 SPACE. Or a start tag token. Or an end tag token. */ 03522 } elseif(($token['type'] === HTML5::CHARACTR && 03523 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || 03524 $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG) { 03525 /* Parse error. Switch back to the main phase and reprocess the 03526 token. */ 03527 $this->phase = self::MAIN_PHASE; 03528 return $this->mainPhase($token); 03529 03530 /* An end-of-file token */ 03531 } elseif($token['type'] === HTML5::EOF) { 03532 /* OMG DONE!! */ 03533 } 03534 } 03535 03536 private function insertElement($token, $append = true, $check = false) { 03537 // Proprietary workaround for libxml2's limitations with tag names 03538 if ($check) { 03539 // Slightly modified HTML5 tag-name modification, 03540 // removing anything that's not an ASCII letter, digit, or hyphen 03541 $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']); 03542 // Remove leading hyphens and numbers 03543 $token['name'] = ltrim($token['name'], '-0..9'); 03544 // In theory, this should ever be needed, but just in case 03545 if ($token['name'] === '') $token['name'] = 'span'; // arbitrary generic choice 03546 } 03547 03548 $el = $this->dom->createElement($token['name']); 03549 03550 foreach($token['attr'] as $attr) { 03551 if(!$el->hasAttribute($attr['name'])) { 03552 $el->setAttribute($attr['name'], $attr['value']); 03553 } 03554 } 03555 03556 $this->appendToRealParent($el); 03557 $this->stack[] = $el; 03558 03559 return $el; 03560 } 03561 03562 private function insertText($data) { 03563 $text = $this->dom->createTextNode($data); 03564 $this->appendToRealParent($text); 03565 } 03566 03567 private function insertComment($data) { 03568 $comment = $this->dom->createComment($data); 03569 $this->appendToRealParent($comment); 03570 } 03571 03572 private function appendToRealParent($node) { 03573 if($this->foster_parent === null) { 03574 end($this->stack)->appendChild($node); 03575 03576 } elseif($this->foster_parent !== null) { 03577 /* If the foster parent element is the parent element of the 03578 last table element in the stack of open elements, then the new 03579 node must be inserted immediately before the last table element 03580 in the stack of open elements in the foster parent element; 03581 otherwise, the new node must be appended to the foster parent 03582 element. */ 03583 for($n = count($this->stack) - 1; $n >= 0; $n--) { 03584 if($this->stack[$n]->nodeName === 'table' && 03585 $this->stack[$n]->parentNode !== null) { 03586 $table = $this->stack[$n]; 03587 break; 03588 } 03589 } 03590 03591 if(isset($table) && $this->foster_parent->isSameNode($table->parentNode)) 03592 $this->foster_parent->insertBefore($node, $table); 03593 else 03594 $this->foster_parent->appendChild($node); 03595 03596 $this->foster_parent = null; 03597 } 03598 } 03599 03600 private function elementInScope($el, $table = false) { 03601 if(is_array($el)) { 03602 foreach($el as $element) { 03603 if($this->elementInScope($element, $table)) { 03604 return true; 03605 } 03606 } 03607 03608 return false; 03609 } 03610 03611 $leng = count($this->stack); 03612 03613 for($n = 0; $n < $leng; $n++) { 03614 /* 1. Initialise node to be the current node (the bottommost node of 03615 the stack). */ 03616 $node = $this->stack[$leng - 1 - $n]; 03617 03618 if($node->tagName === $el) { 03619 /* 2. If node is the target node, terminate in a match state. */ 03620 return true; 03621 03622 } elseif($node->tagName === 'table') { 03623 /* 3. Otherwise, if node is a table element, terminate in a failure 03624 state. */ 03625 return false; 03626 03627 } elseif($table === true && in_array($node->tagName, array('caption', 'td', 03628 'th', 'button', 'marquee', 'object'))) { 03629 /* 4. Otherwise, if the algorithm is the "has an element in scope" 03630 variant (rather than the "has an element in table scope" variant), 03631 and node is one of the following, terminate in a failure state. */ 03632 return false; 03633 03634 } elseif($node === $node->ownerDocument->documentElement) { 03635 /* 5. Otherwise, if node is an html element (root element), terminate 03636 in a failure state. (This can only happen if the node is the topmost 03637 node of the stack of open elements, and prevents the next step from 03638 being invoked if there are no more elements in the stack.) */ 03639 return false; 03640 } 03641 03642 /* Otherwise, set node to the previous entry in the stack of open 03643 elements and return to step 2. (This will never fail, since the loop 03644 will always terminate in the previous step if the top of the stack 03645 is reached.) */ 03646 } 03647 } 03648 03649 private function reconstructActiveFormattingElements() { 03650 /* 1. If there are no entries in the list of active formatting elements, 03651 then there is nothing to reconstruct; stop this algorithm. */ 03652 $formatting_elements = count($this->a_formatting); 03653 03654 if($formatting_elements === 0) { 03655 return false; 03656 } 03657 03658 /* 3. Let entry be the last (most recently added) element in the list 03659 of active formatting elements. */ 03660 $entry = end($this->a_formatting); 03661 03662 /* 2. If the last (most recently added) entry in the list of active 03663 formatting elements is a marker, or if it is an element that is in the 03664 stack of open elements, then there is nothing to reconstruct; stop this 03665 algorithm. */ 03666 if($entry === self::MARKER || in_array($entry, $this->stack, true)) { 03667 return false; 03668 } 03669 03670 for($a = $formatting_elements - 1; $a >= 0; true) { 03671 /* 4. If there are no entries before entry in the list of active 03672 formatting elements, then jump to step 8. */ 03673 if($a === 0) { 03674 $step_seven = false; 03675 break; 03676 } 03677 03678 /* 5. Let entry be the entry one earlier than entry in the list of 03679 active formatting elements. */ 03680 $a--; 03681 $entry = $this->a_formatting[$a]; 03682 03683 /* 6. If entry is neither a marker nor an element that is also in 03684 thetack of open elements, go to step 4. */ 03685 if($entry === self::MARKER || in_array($entry, $this->stack, true)) { 03686 break; 03687 } 03688 } 03689 03690 while(true) { 03691 /* 7. Let entry be the element one later than entry in the list of 03692 active formatting elements. */ 03693 if(isset($step_seven) && $step_seven === true) { 03694 $a++; 03695 $entry = $this->a_formatting[$a]; 03696 } 03697 03698 /* 8. Perform a shallow clone of the element entry to obtain clone. */ 03699 $clone = $entry->cloneNode(); 03700 03701 /* 9. Append clone to the current node and push it onto the stack 03702 of open elements so that it is the new current node. */ 03703 end($this->stack)->appendChild($clone); 03704 $this->stack[] = $clone; 03705 03706 /* 10. Replace the entry for entry in the list with an entry for 03707 clone. */ 03708 $this->a_formatting[$a] = $clone; 03709 03710 /* 11. If the entry for clone in the list of active formatting 03711 elements is not the last entry in the list, return to step 7. */ 03712 if(end($this->a_formatting) !== $clone) { 03713 $step_seven = true; 03714 } else { 03715 break; 03716 } 03717 } 03718 } 03719 03720 private function clearTheActiveFormattingElementsUpToTheLastMarker() { 03721 /* When the steps below require the UA to clear the list of active 03722 formatting elements up to the last marker, the UA must perform the 03723 following steps: */ 03724 03725 while(true) { 03726 /* 1. Let entry be the last (most recently added) entry in the list 03727 of active formatting elements. */ 03728 $entry = end($this->a_formatting); 03729 03730 /* 2. Remove entry from the list of active formatting elements. */ 03731 array_pop($this->a_formatting); 03732 03733 /* 3. If entry was a marker, then stop the algorithm at this point. 03734 The list has been cleared up to the last marker. */ 03735 if($entry === self::MARKER) { 03736 break; 03737 } 03738 } 03739 } 03740 03741 private function generateImpliedEndTags($exclude = array()) { 03742 /* When the steps below require the UA to generate implied end tags, 03743 then, if the current node is a dd element, a dt element, an li element, 03744 a p element, a td element, a th element, or a tr element, the UA must 03745 act as if an end tag with the respective tag name had been seen and 03746 then generate implied end tags again. */ 03747 $node = end($this->stack); 03748 $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude); 03749 03750 while(in_array(end($this->stack)->nodeName, $elements)) { 03751 array_pop($this->stack); 03752 } 03753 } 03754 03755 private function getElementCategory($node) { 03756 $name = $node->tagName; 03757 if(in_array($name, $this->special)) 03758 return self::SPECIAL; 03759 03760 elseif(in_array($name, $this->scoping)) 03761 return self::SCOPING; 03762 03763 elseif(in_array($name, $this->formatting)) 03764 return self::FORMATTING; 03765 03766 else 03767 return self::PHRASING; 03768 } 03769 03770 private function clearStackToTableContext($elements) { 03771 /* When the steps above require the UA to clear the stack back to a 03772 table context, it means that the UA must, while the current node is not 03773 a table element or an html element, pop elements from the stack of open 03774 elements. If this causes any elements to be popped from the stack, then 03775 this is a parse error. */ 03776 while(true) { 03777 $node = end($this->stack)->nodeName; 03778 03779 if(in_array($node, $elements)) { 03780 break; 03781 } else { 03782 array_pop($this->stack); 03783 } 03784 } 03785 } 03786 03787 private function resetInsertionMode() { 03788 /* 1. Let last be false. */ 03789 $last = false; 03790 $leng = count($this->stack); 03791 03792 for($n = $leng - 1; $n >= 0; $n--) { 03793 /* 2. Let node be the last node in the stack of open elements. */ 03794 $node = $this->stack[$n]; 03795 03796 /* 3. If node is the first node in the stack of open elements, then 03797 set last to true. If the element whose innerHTML attribute is being 03798 set is neither a td element nor a th element, then set node to the 03799 element whose innerHTML attribute is being set. (innerHTML case) */ 03800 if($this->stack[0]->isSameNode($node)) { 03801 $last = true; 03802 } 03803 03804 /* 4. If node is a select element, then switch the insertion mode to 03805 "in select" and abort these steps. (innerHTML case) */ 03806 if($node->nodeName === 'select') { 03807 $this->mode = self::IN_SELECT; 03808 break; 03809 03810 /* 5. If node is a td or th element, then switch the insertion mode 03811 to "in cell" and abort these steps. */ 03812 } elseif($node->nodeName === 'td' || $node->nodeName === 'th') { 03813 $this->mode = self::IN_CELL; 03814 break; 03815 03816 /* 6. If node is a tr element, then switch the insertion mode to 03817 "in row" and abort these steps. */ 03818 } elseif($node->nodeName === 'tr') { 03819 $this->mode = self::IN_ROW; 03820 break; 03821 03822 /* 7. If node is a tbody, thead, or tfoot element, then switch the 03823 insertion mode to "in table body" and abort these steps. */ 03824 } elseif(in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) { 03825 $this->mode = self::IN_TBODY; 03826 break; 03827 03828 /* 8. If node is a caption element, then switch the insertion mode 03829 to "in caption" and abort these steps. */ 03830 } elseif($node->nodeName === 'caption') { 03831 $this->mode = self::IN_CAPTION; 03832 break; 03833 03834 /* 9. If node is a colgroup element, then switch the insertion mode 03835 to "in column group" and abort these steps. (innerHTML case) */ 03836 } elseif($node->nodeName === 'colgroup') { 03837 $this->mode = self::IN_CGROUP; 03838 break; 03839 03840 /* 10. If node is a table element, then switch the insertion mode 03841 to "in table" and abort these steps. */ 03842 } elseif($node->nodeName === 'table') { 03843 $this->mode = self::IN_TABLE; 03844 break; 03845 03846 /* 11. If node is a head element, then switch the insertion mode 03847 to "in body" ("in body"! not "in head"!) and abort these steps. 03848 (innerHTML case) */ 03849 } elseif($node->nodeName === 'head') { 03850 $this->mode = self::IN_BODY; 03851 break; 03852 03853 /* 12. If node is a body element, then switch the insertion mode to 03854 "in body" and abort these steps. */ 03855 } elseif($node->nodeName === 'body') { 03856 $this->mode = self::IN_BODY; 03857 break; 03858 03859 /* 13. If node is a frameset element, then switch the insertion 03860 mode to "in frameset" and abort these steps. (innerHTML case) */ 03861 } elseif($node->nodeName === 'frameset') { 03862 $this->mode = self::IN_FRAME; 03863 break; 03864 03865 /* 14. If node is an html element, then: if the head element 03866 pointer is null, switch the insertion mode to "before head", 03867 otherwise, switch the insertion mode to "after head". In either 03868 case, abort these steps. (innerHTML case) */ 03869 } elseif($node->nodeName === 'html') { 03870 $this->mode = ($this->head_pointer === null) 03871 ? self::BEFOR_HEAD 03872 : self::AFTER_HEAD; 03873 03874 break; 03875 03876 /* 15. If last is true, then set the insertion mode to "in body" 03877 and abort these steps. (innerHTML case) */ 03878 } elseif($last) { 03879 $this->mode = self::IN_BODY; 03880 break; 03881 } 03882 } 03883 } 03884 03885 private function closeCell() { 03886 /* If the stack of open elements has a td or th element in table scope, 03887 then act as if an end tag token with that tag name had been seen. */ 03888 foreach(array('td', 'th') as $cell) { 03889 if($this->elementInScope($cell, true)) { 03890 $this->inCell(array( 03891 'name' => $cell, 03892 'type' => HTML5::ENDTAG 03893 )); 03894 03895 break; 03896 } 03897 } 03898 } 03899 03900 public function save() { 03901 return $this->dom; 03902 } 03903 } 03904 ?>