HTMLPurifier 4.4.0
|
00001 <?php 00002 00003 class HTMLPurifier_HTMLModuleManager 00004 { 00005 00009 public $doctypes; 00010 00014 public $doctype; 00015 00019 public $attrTypes; 00020 00025 public $modules = array(); 00026 00032 public $registeredModules = array(); 00033 00039 public $userModules = array(); 00040 00045 public $elementLookup = array(); 00046 00048 public $prefixes = array('HTMLPurifier_HTMLModule_'); 00049 00050 public $contentSets; 00051 public $attrCollections; 00054 public $trusted = false; 00055 00056 public function __construct() { 00057 00058 // editable internal objects 00059 $this->attrTypes = new HTMLPurifier_AttrTypes(); 00060 $this->doctypes = new HTMLPurifier_DoctypeRegistry(); 00061 00062 // setup basic modules 00063 $common = array( 00064 'CommonAttributes', 'Text', 'Hypertext', 'List', 00065 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image', 00066 'StyleAttribute', 00067 // Unsafe: 00068 'Scripting', 'Object', 'Forms', 00069 // Sorta legacy, but present in strict: 00070 'Name', 00071 ); 00072 $transitional = array('Legacy', 'Target', 'Iframe'); 00073 $xml = array('XMLCommonAttributes'); 00074 $non_xml = array('NonXMLCommonAttributes'); 00075 00076 // setup basic doctypes 00077 $this->doctypes->register( 00078 'HTML 4.01 Transitional', false, 00079 array_merge($common, $transitional, $non_xml), 00080 array('Tidy_Transitional', 'Tidy_Proprietary'), 00081 array(), 00082 '-//W3C//DTD HTML 4.01 Transitional//EN', 00083 'http://www.w3.org/TR/html4/loose.dtd' 00084 ); 00085 00086 $this->doctypes->register( 00087 'HTML 4.01 Strict', false, 00088 array_merge($common, $non_xml), 00089 array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'), 00090 array(), 00091 '-//W3C//DTD HTML 4.01//EN', 00092 'http://www.w3.org/TR/html4/strict.dtd' 00093 ); 00094 00095 $this->doctypes->register( 00096 'XHTML 1.0 Transitional', true, 00097 array_merge($common, $transitional, $xml, $non_xml), 00098 array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'), 00099 array(), 00100 '-//W3C//DTD XHTML 1.0 Transitional//EN', 00101 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd' 00102 ); 00103 00104 $this->doctypes->register( 00105 'XHTML 1.0 Strict', true, 00106 array_merge($common, $xml, $non_xml), 00107 array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'), 00108 array(), 00109 '-//W3C//DTD XHTML 1.0 Strict//EN', 00110 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' 00111 ); 00112 00113 $this->doctypes->register( 00114 'XHTML 1.1', true, 00115 // Iframe is a real XHTML 1.1 module, despite being 00116 // "transitional"! 00117 array_merge($common, $xml, array('Ruby', 'Iframe')), 00118 array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1 00119 array(), 00120 '-//W3C//DTD XHTML 1.1//EN', 00121 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' 00122 ); 00123 00124 } 00125 00147 public function registerModule($module, $overload = false) { 00148 if (is_string($module)) { 00149 // attempt to load the module 00150 $original_module = $module; 00151 $ok = false; 00152 foreach ($this->prefixes as $prefix) { 00153 $module = $prefix . $original_module; 00154 if (class_exists($module)) { 00155 $ok = true; 00156 break; 00157 } 00158 } 00159 if (!$ok) { 00160 $module = $original_module; 00161 if (!class_exists($module)) { 00162 trigger_error($original_module . ' module does not exist', 00163 E_USER_ERROR); 00164 return; 00165 } 00166 } 00167 $module = new $module(); 00168 } 00169 if (empty($module->name)) { 00170 trigger_error('Module instance of ' . get_class($module) . ' must have name'); 00171 return; 00172 } 00173 if (!$overload && isset($this->registeredModules[$module->name])) { 00174 trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING); 00175 } 00176 $this->registeredModules[$module->name] = $module; 00177 } 00178 00183 public function addModule($module) { 00184 $this->registerModule($module); 00185 if (is_object($module)) $module = $module->name; 00186 $this->userModules[] = $module; 00187 } 00188 00193 public function addPrefix($prefix) { 00194 $this->prefixes[] = $prefix; 00195 } 00196 00202 public function setup($config) { 00203 00204 $this->trusted = $config->get('HTML.Trusted'); 00205 00206 // generate 00207 $this->doctype = $this->doctypes->make($config); 00208 $modules = $this->doctype->modules; 00209 00210 // take out the default modules that aren't allowed 00211 $lookup = $config->get('HTML.AllowedModules'); 00212 $special_cases = $config->get('HTML.CoreModules'); 00213 00214 if (is_array($lookup)) { 00215 foreach ($modules as $k => $m) { 00216 if (isset($special_cases[$m])) continue; 00217 if (!isset($lookup[$m])) unset($modules[$k]); 00218 } 00219 } 00220 00221 // custom modules 00222 if ($config->get('HTML.Proprietary')) { 00223 $modules[] = 'Proprietary'; 00224 } 00225 if ($config->get('HTML.SafeObject')) { 00226 $modules[] = 'SafeObject'; 00227 } 00228 if ($config->get('HTML.SafeEmbed')) { 00229 $modules[] = 'SafeEmbed'; 00230 } 00231 if ($config->get('HTML.Nofollow')) { 00232 $modules[] = 'Nofollow'; 00233 } 00234 if ($config->get('HTML.TargetBlank')) { 00235 $modules[] = 'TargetBlank'; 00236 } 00237 00238 // merge in custom modules 00239 $modules = array_merge($modules, $this->userModules); 00240 00241 foreach ($modules as $module) { 00242 $this->processModule($module); 00243 $this->modules[$module]->setup($config); 00244 } 00245 00246 foreach ($this->doctype->tidyModules as $module) { 00247 $this->processModule($module); 00248 $this->modules[$module]->setup($config); 00249 } 00250 00251 // prepare any injectors 00252 foreach ($this->modules as $module) { 00253 $n = array(); 00254 foreach ($module->info_injector as $i => $injector) { 00255 if (!is_object($injector)) { 00256 $class = "HTMLPurifier_Injector_$injector"; 00257 $injector = new $class; 00258 } 00259 $n[$injector->name] = $injector; 00260 } 00261 $module->info_injector = $n; 00262 } 00263 00264 // setup lookup table based on all valid modules 00265 foreach ($this->modules as $module) { 00266 foreach ($module->info as $name => $def) { 00267 if (!isset($this->elementLookup[$name])) { 00268 $this->elementLookup[$name] = array(); 00269 } 00270 $this->elementLookup[$name][] = $module->name; 00271 } 00272 } 00273 00274 // note the different choice 00275 $this->contentSets = new HTMLPurifier_ContentSets( 00276 // content set assembly deals with all possible modules, 00277 // not just ones deemed to be "safe" 00278 $this->modules 00279 ); 00280 $this->attrCollections = new HTMLPurifier_AttrCollections( 00281 $this->attrTypes, 00282 // there is no way to directly disable a global attribute, 00283 // but using AllowedAttributes or simply not including 00284 // the module in your custom doctype should be sufficient 00285 $this->modules 00286 ); 00287 } 00288 00293 public function processModule($module) { 00294 if (!isset($this->registeredModules[$module]) || is_object($module)) { 00295 $this->registerModule($module); 00296 } 00297 $this->modules[$module] = $this->registeredModules[$module]; 00298 } 00299 00304 public function getElements() { 00305 00306 $elements = array(); 00307 foreach ($this->modules as $module) { 00308 if (!$this->trusted && !$module->safe) continue; 00309 foreach ($module->info as $name => $v) { 00310 if (isset($elements[$name])) continue; 00311 $elements[$name] = $this->getElement($name); 00312 } 00313 } 00314 00315 // remove dud elements, this happens when an element that 00316 // appeared to be safe actually wasn't 00317 foreach ($elements as $n => $v) { 00318 if ($v === false) unset($elements[$n]); 00319 } 00320 00321 return $elements; 00322 00323 } 00324 00335 public function getElement($name, $trusted = null) { 00336 00337 if (!isset($this->elementLookup[$name])) { 00338 return false; 00339 } 00340 00341 // setup global state variables 00342 $def = false; 00343 if ($trusted === null) $trusted = $this->trusted; 00344 00345 // iterate through each module that has registered itself to this 00346 // element 00347 foreach($this->elementLookup[$name] as $module_name) { 00348 00349 $module = $this->modules[$module_name]; 00350 00351 // refuse to create/merge from a module that is deemed unsafe-- 00352 // pretend the module doesn't exist--when trusted mode is not on. 00353 if (!$trusted && !$module->safe) { 00354 continue; 00355 } 00356 00357 // clone is used because, ideally speaking, the original 00358 // definition should not be modified. Usually, this will 00359 // make no difference, but for consistency's sake 00360 $new_def = clone $module->info[$name]; 00361 00362 if (!$def && $new_def->standalone) { 00363 $def = $new_def; 00364 } elseif ($def) { 00365 // This will occur even if $new_def is standalone. In practice, 00366 // this will usually result in a full replacement. 00367 $def->mergeIn($new_def); 00368 } else { 00369 // :TODO: 00370 // non-standalone definitions that don't have a standalone 00371 // to merge into could be deferred to the end 00372 // HOWEVER, it is perfectly valid for a non-standalone 00373 // definition to lack a standalone definition, even 00374 // after all processing: this allows us to safely 00375 // specify extra attributes for elements that may not be 00376 // enabled all in one place. In particular, this might 00377 // be the case for trusted elements. WARNING: care must 00378 // be taken that the /extra/ definitions are all safe. 00379 continue; 00380 } 00381 00382 // attribute value expansions 00383 $this->attrCollections->performInclusions($def->attr); 00384 $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes); 00385 00386 // descendants_are_inline, for ChildDef_Chameleon 00387 if (is_string($def->content_model) && 00388 strpos($def->content_model, 'Inline') !== false) { 00389 if ($name != 'del' && $name != 'ins') { 00390 // this is for you, ins/del 00391 $def->descendants_are_inline = true; 00392 } 00393 } 00394 00395 $this->contentSets->generateChildDef($def, $module); 00396 } 00397 00398 // This can occur if there is a blank definition, but no base to 00399 // mix it in with 00400 if (!$def) return false; 00401 00402 // add information on required attributes 00403 foreach ($def->attr as $attr_name => $attr_def) { 00404 if ($attr_def->required) { 00405 $def->required_attr[] = $attr_name; 00406 } 00407 } 00408 00409 return $def; 00410 00411 } 00412 00413 } 00414 00415 // vim: et sw=4 sts=4