HTMLPurifier 4.4.0
/home/ezyang/Dev/htmlpurifier/library/HTMLPurifier/Strategy/RemoveForeignElements.php
Go to the documentation of this file.
00001 <?php
00002 
00011 class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
00012 {
00013 
00014     public function execute($tokens, $config, $context) {
00015         $definition = $config->getHTMLDefinition();
00016         $generator = new HTMLPurifier_Generator($config, $context);
00017         $result = array();
00018 
00019         $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
00020         $remove_invalid_img  = $config->get('Core.RemoveInvalidImg');
00021 
00022         // currently only used to determine if comments should be kept
00023         $trusted = $config->get('HTML.Trusted');
00024         $comment_lookup = $config->get('HTML.AllowedComments');
00025         $comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
00026         $check_comments = $comment_lookup !== array() || $comment_regexp !== null;
00027 
00028         $remove_script_contents = $config->get('Core.RemoveScriptContents');
00029         $hidden_elements     = $config->get('Core.HiddenElements');
00030 
00031         // remove script contents compatibility
00032         if ($remove_script_contents === true) {
00033             $hidden_elements['script'] = true;
00034         } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
00035             unset($hidden_elements['script']);
00036         }
00037 
00038         $attr_validator = new HTMLPurifier_AttrValidator();
00039 
00040         // removes tokens until it reaches a closing tag with its value
00041         $remove_until = false;
00042 
00043         // converts comments into text tokens when this is equal to a tag name
00044         $textify_comments = false;
00045 
00046         $token = false;
00047         $context->register('CurrentToken', $token);
00048 
00049         $e = false;
00050         if ($config->get('Core.CollectErrors')) {
00051             $e =& $context->get('ErrorCollector');
00052         }
00053 
00054         foreach($tokens as $token) {
00055             if ($remove_until) {
00056                 if (empty($token->is_tag) || $token->name !== $remove_until) {
00057                     continue;
00058                 }
00059             }
00060             if (!empty( $token->is_tag )) {
00061                 // DEFINITION CALL
00062 
00063                 // before any processing, try to transform the element
00064                 if (
00065                     isset($definition->info_tag_transform[$token->name])
00066                 ) {
00067                     $original_name = $token->name;
00068                     // there is a transformation for this tag
00069                     // DEFINITION CALL
00070                     $token = $definition->
00071                                 info_tag_transform[$token->name]->
00072                                     transform($token, $config, $context);
00073                     if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
00074                 }
00075 
00076                 if (isset($definition->info[$token->name])) {
00077 
00078                     // mostly everything's good, but
00079                     // we need to make sure required attributes are in order
00080                     if (
00081                         ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
00082                         $definition->info[$token->name]->required_attr &&
00083                         ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
00084                     ) {
00085                         $attr_validator->validateToken($token, $config, $context);
00086                         $ok = true;
00087                         foreach ($definition->info[$token->name]->required_attr as $name) {
00088                             if (!isset($token->attr[$name])) {
00089                                 $ok = false;
00090                                 break;
00091                             }
00092                         }
00093                         if (!$ok) {
00094                             if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $name);
00095                             continue;
00096                         }
00097                         $token->armor['ValidateAttributes'] = true;
00098                     }
00099 
00100                     if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
00101                         $textify_comments = $token->name;
00102                     } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
00103                         $textify_comments = false;
00104                     }
00105 
00106                 } elseif ($escape_invalid_tags) {
00107                     // invalid tag, generate HTML representation and insert in
00108                     if ($e) $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
00109                     $token = new HTMLPurifier_Token_Text(
00110                         $generator->generateFromToken($token)
00111                     );
00112                 } else {
00113                     // check if we need to destroy all of the tag's children
00114                     // CAN BE GENERICIZED
00115                     if (isset($hidden_elements[$token->name])) {
00116                         if ($token instanceof HTMLPurifier_Token_Start) {
00117                             $remove_until = $token->name;
00118                         } elseif ($token instanceof HTMLPurifier_Token_Empty) {
00119                             // do nothing: we're still looking
00120                         } else {
00121                             $remove_until = false;
00122                         }
00123                         if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
00124                     } else {
00125                         if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
00126                     }
00127                     continue;
00128                 }
00129             } elseif ($token instanceof HTMLPurifier_Token_Comment) {
00130                 // textify comments in script tags when they are allowed
00131                 if ($textify_comments !== false) {
00132                     $data = $token->data;
00133                     $token = new HTMLPurifier_Token_Text($data);
00134                 } elseif ($trusted || $check_comments) {
00135                     // always cleanup comments
00136                     $trailing_hyphen = false;
00137                     if ($e) {
00138                         // perform check whether or not there's a trailing hyphen
00139                         if (substr($token->data, -1) == '-') {
00140                             $trailing_hyphen = true;
00141                         }
00142                     }
00143                     $token->data = rtrim($token->data, '-');
00144                     $found_double_hyphen = false;
00145                     while (strpos($token->data, '--') !== false) {
00146                         $found_double_hyphen = true;
00147                         $token->data = str_replace('--', '-', $token->data);
00148                     }
00149                     if ($trusted || !empty($comment_lookup[trim($token->data)]) || ($comment_regexp !== NULL && preg_match($comment_regexp, trim($token->data)))) {
00150                         // OK good
00151                         if ($e) {
00152                             if ($trailing_hyphen) {
00153                                 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
00154                             }
00155                             if ($found_double_hyphen) {
00156                                 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
00157                             }
00158                         }
00159                     } else {
00160                         if ($e) {
00161                             $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
00162                         }
00163                         continue;
00164                     }
00165                 } else {
00166                     // strip comments
00167                     if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
00168                     continue;
00169                 }
00170             } elseif ($token instanceof HTMLPurifier_Token_Text) {
00171             } else {
00172                 continue;
00173             }
00174             $result[] = $token;
00175         }
00176         if ($remove_until && $e) {
00177             // we removed tokens until the end, throw error
00178             $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
00179         }
00180 
00181         $context->destroy('CurrentToken');
00182 
00183         return $result;
00184     }
00185 
00186 }
00187 
00188 // vim: et sw=4 sts=4