HTMLPurifier 4.4.0
|
00001 <?php 00002 00011 class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy 00012 { 00013 00014 public function execute($tokens, $config, $context) { 00015 $definition = $config->getHTMLDefinition(); 00016 $generator = new HTMLPurifier_Generator($config, $context); 00017 $result = array(); 00018 00019 $escape_invalid_tags = $config->get('Core.EscapeInvalidTags'); 00020 $remove_invalid_img = $config->get('Core.RemoveInvalidImg'); 00021 00022 // currently only used to determine if comments should be kept 00023 $trusted = $config->get('HTML.Trusted'); 00024 $comment_lookup = $config->get('HTML.AllowedComments'); 00025 $comment_regexp = $config->get('HTML.AllowedCommentsRegexp'); 00026 $check_comments = $comment_lookup !== array() || $comment_regexp !== null; 00027 00028 $remove_script_contents = $config->get('Core.RemoveScriptContents'); 00029 $hidden_elements = $config->get('Core.HiddenElements'); 00030 00031 // remove script contents compatibility 00032 if ($remove_script_contents === true) { 00033 $hidden_elements['script'] = true; 00034 } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) { 00035 unset($hidden_elements['script']); 00036 } 00037 00038 $attr_validator = new HTMLPurifier_AttrValidator(); 00039 00040 // removes tokens until it reaches a closing tag with its value 00041 $remove_until = false; 00042 00043 // converts comments into text tokens when this is equal to a tag name 00044 $textify_comments = false; 00045 00046 $token = false; 00047 $context->register('CurrentToken', $token); 00048 00049 $e = false; 00050 if ($config->get('Core.CollectErrors')) { 00051 $e =& $context->get('ErrorCollector'); 00052 } 00053 00054 foreach($tokens as $token) { 00055 if ($remove_until) { 00056 if (empty($token->is_tag) || $token->name !== $remove_until) { 00057 continue; 00058 } 00059 } 00060 if (!empty( $token->is_tag )) { 00061 // DEFINITION CALL 00062 00063 // before any processing, try to transform the element 00064 if ( 00065 isset($definition->info_tag_transform[$token->name]) 00066 ) { 00067 $original_name = $token->name; 00068 // there is a transformation for this tag 00069 // DEFINITION CALL 00070 $token = $definition-> 00071 info_tag_transform[$token->name]-> 00072 transform($token, $config, $context); 00073 if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name); 00074 } 00075 00076 if (isset($definition->info[$token->name])) { 00077 00078 // mostly everything's good, but 00079 // we need to make sure required attributes are in order 00080 if ( 00081 ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) && 00082 $definition->info[$token->name]->required_attr && 00083 ($token->name != 'img' || $remove_invalid_img) // ensure config option still works 00084 ) { 00085 $attr_validator->validateToken($token, $config, $context); 00086 $ok = true; 00087 foreach ($definition->info[$token->name]->required_attr as $name) { 00088 if (!isset($token->attr[$name])) { 00089 $ok = false; 00090 break; 00091 } 00092 } 00093 if (!$ok) { 00094 if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $name); 00095 continue; 00096 } 00097 $token->armor['ValidateAttributes'] = true; 00098 } 00099 00100 if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) { 00101 $textify_comments = $token->name; 00102 } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) { 00103 $textify_comments = false; 00104 } 00105 00106 } elseif ($escape_invalid_tags) { 00107 // invalid tag, generate HTML representation and insert in 00108 if ($e) $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text'); 00109 $token = new HTMLPurifier_Token_Text( 00110 $generator->generateFromToken($token) 00111 ); 00112 } else { 00113 // check if we need to destroy all of the tag's children 00114 // CAN BE GENERICIZED 00115 if (isset($hidden_elements[$token->name])) { 00116 if ($token instanceof HTMLPurifier_Token_Start) { 00117 $remove_until = $token->name; 00118 } elseif ($token instanceof HTMLPurifier_Token_Empty) { 00119 // do nothing: we're still looking 00120 } else { 00121 $remove_until = false; 00122 } 00123 if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed'); 00124 } else { 00125 if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed'); 00126 } 00127 continue; 00128 } 00129 } elseif ($token instanceof HTMLPurifier_Token_Comment) { 00130 // textify comments in script tags when they are allowed 00131 if ($textify_comments !== false) { 00132 $data = $token->data; 00133 $token = new HTMLPurifier_Token_Text($data); 00134 } elseif ($trusted || $check_comments) { 00135 // always cleanup comments 00136 $trailing_hyphen = false; 00137 if ($e) { 00138 // perform check whether or not there's a trailing hyphen 00139 if (substr($token->data, -1) == '-') { 00140 $trailing_hyphen = true; 00141 } 00142 } 00143 $token->data = rtrim($token->data, '-'); 00144 $found_double_hyphen = false; 00145 while (strpos($token->data, '--') !== false) { 00146 $found_double_hyphen = true; 00147 $token->data = str_replace('--', '-', $token->data); 00148 } 00149 if ($trusted || !empty($comment_lookup[trim($token->data)]) || ($comment_regexp !== NULL && preg_match($comment_regexp, trim($token->data)))) { 00150 // OK good 00151 if ($e) { 00152 if ($trailing_hyphen) { 00153 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed'); 00154 } 00155 if ($found_double_hyphen) { 00156 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed'); 00157 } 00158 } 00159 } else { 00160 if ($e) { 00161 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed'); 00162 } 00163 continue; 00164 } 00165 } else { 00166 // strip comments 00167 if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed'); 00168 continue; 00169 } 00170 } elseif ($token instanceof HTMLPurifier_Token_Text) { 00171 } else { 00172 continue; 00173 } 00174 $result[] = $token; 00175 } 00176 if ($remove_until && $e) { 00177 // we removed tokens until the end, throw error 00178 $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until); 00179 } 00180 00181 $context->destroy('CurrentToken'); 00182 00183 return $result; 00184 } 00185 00186 } 00187 00188 // vim: et sw=4 sts=4