HTMLPurifier 4.4.0
/home/ezyang/Dev/htmlpurifier/library/HTMLPurifier/HTMLModuleManager.php
Go to the documentation of this file.
00001 <?php
00002 
00003 class HTMLPurifier_HTMLModuleManager
00004 {
00005 
00009     public $doctypes;
00010 
00014     public $doctype;
00015 
00019     public $attrTypes;
00020 
00025     public $modules = array();
00026 
00032     public $registeredModules = array();
00033 
00039     public $userModules = array();
00040 
00045     public $elementLookup = array();
00046 
00048     public $prefixes = array('HTMLPurifier_HTMLModule_');
00049 
00050     public $contentSets;     
00051     public $attrCollections; 
00054     public $trusted = false;
00055 
00056     public function __construct() {
00057 
00058         // editable internal objects
00059         $this->attrTypes = new HTMLPurifier_AttrTypes();
00060         $this->doctypes  = new HTMLPurifier_DoctypeRegistry();
00061 
00062         // setup basic modules
00063         $common = array(
00064             'CommonAttributes', 'Text', 'Hypertext', 'List',
00065             'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
00066             'StyleAttribute',
00067             // Unsafe:
00068             'Scripting', 'Object', 'Forms',
00069             // Sorta legacy, but present in strict:
00070             'Name',
00071         );
00072         $transitional = array('Legacy', 'Target', 'Iframe');
00073         $xml = array('XMLCommonAttributes');
00074         $non_xml = array('NonXMLCommonAttributes');
00075 
00076         // setup basic doctypes
00077         $this->doctypes->register(
00078             'HTML 4.01 Transitional', false,
00079             array_merge($common, $transitional, $non_xml),
00080             array('Tidy_Transitional', 'Tidy_Proprietary'),
00081             array(),
00082             '-//W3C//DTD HTML 4.01 Transitional//EN',
00083             'http://www.w3.org/TR/html4/loose.dtd'
00084         );
00085 
00086         $this->doctypes->register(
00087             'HTML 4.01 Strict', false,
00088             array_merge($common, $non_xml),
00089             array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
00090             array(),
00091             '-//W3C//DTD HTML 4.01//EN',
00092             'http://www.w3.org/TR/html4/strict.dtd'
00093         );
00094 
00095         $this->doctypes->register(
00096             'XHTML 1.0 Transitional', true,
00097             array_merge($common, $transitional, $xml, $non_xml),
00098             array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'),
00099             array(),
00100             '-//W3C//DTD XHTML 1.0 Transitional//EN',
00101             'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
00102         );
00103 
00104         $this->doctypes->register(
00105             'XHTML 1.0 Strict', true,
00106             array_merge($common, $xml, $non_xml),
00107             array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
00108             array(),
00109             '-//W3C//DTD XHTML 1.0 Strict//EN',
00110             'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
00111         );
00112 
00113         $this->doctypes->register(
00114             'XHTML 1.1', true,
00115             // Iframe is a real XHTML 1.1 module, despite being
00116             // "transitional"!
00117             array_merge($common, $xml, array('Ruby', 'Iframe')),
00118             array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1
00119             array(),
00120             '-//W3C//DTD XHTML 1.1//EN',
00121             'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
00122         );
00123 
00124     }
00125 
00147     public function registerModule($module, $overload = false) {
00148         if (is_string($module)) {
00149             // attempt to load the module
00150             $original_module = $module;
00151             $ok = false;
00152             foreach ($this->prefixes as $prefix) {
00153                 $module = $prefix . $original_module;
00154                 if (class_exists($module)) {
00155                     $ok = true;
00156                     break;
00157                 }
00158             }
00159             if (!$ok) {
00160                 $module = $original_module;
00161                 if (!class_exists($module)) {
00162                     trigger_error($original_module . ' module does not exist',
00163                         E_USER_ERROR);
00164                     return;
00165                 }
00166             }
00167             $module = new $module();
00168         }
00169         if (empty($module->name)) {
00170             trigger_error('Module instance of ' . get_class($module) . ' must have name');
00171             return;
00172         }
00173         if (!$overload && isset($this->registeredModules[$module->name])) {
00174             trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING);
00175         }
00176         $this->registeredModules[$module->name] = $module;
00177     }
00178 
00183     public function addModule($module) {
00184         $this->registerModule($module);
00185         if (is_object($module)) $module = $module->name;
00186         $this->userModules[] = $module;
00187     }
00188 
00193     public function addPrefix($prefix) {
00194         $this->prefixes[] = $prefix;
00195     }
00196 
00202     public function setup($config) {
00203 
00204         $this->trusted = $config->get('HTML.Trusted');
00205 
00206         // generate
00207         $this->doctype = $this->doctypes->make($config);
00208         $modules = $this->doctype->modules;
00209 
00210         // take out the default modules that aren't allowed
00211         $lookup = $config->get('HTML.AllowedModules');
00212         $special_cases = $config->get('HTML.CoreModules');
00213 
00214         if (is_array($lookup)) {
00215             foreach ($modules as $k => $m) {
00216                 if (isset($special_cases[$m])) continue;
00217                 if (!isset($lookup[$m])) unset($modules[$k]);
00218             }
00219         }
00220 
00221         // custom modules
00222         if ($config->get('HTML.Proprietary')) {
00223             $modules[] = 'Proprietary';
00224         }
00225         if ($config->get('HTML.SafeObject')) {
00226             $modules[] = 'SafeObject';
00227         }
00228         if ($config->get('HTML.SafeEmbed')) {
00229             $modules[] = 'SafeEmbed';
00230         }
00231         if ($config->get('HTML.Nofollow')) {
00232             $modules[] = 'Nofollow';
00233         }
00234         if ($config->get('HTML.TargetBlank')) {
00235             $modules[] = 'TargetBlank';
00236         }
00237 
00238         // merge in custom modules
00239         $modules = array_merge($modules, $this->userModules);
00240 
00241         foreach ($modules as $module) {
00242             $this->processModule($module);
00243             $this->modules[$module]->setup($config);
00244         }
00245 
00246         foreach ($this->doctype->tidyModules as $module) {
00247             $this->processModule($module);
00248             $this->modules[$module]->setup($config);
00249         }
00250 
00251         // prepare any injectors
00252         foreach ($this->modules as $module) {
00253             $n = array();
00254             foreach ($module->info_injector as $i => $injector) {
00255                 if (!is_object($injector)) {
00256                     $class = "HTMLPurifier_Injector_$injector";
00257                     $injector = new $class;
00258                 }
00259                 $n[$injector->name] = $injector;
00260             }
00261             $module->info_injector = $n;
00262         }
00263 
00264         // setup lookup table based on all valid modules
00265         foreach ($this->modules as $module) {
00266             foreach ($module->info as $name => $def) {
00267                 if (!isset($this->elementLookup[$name])) {
00268                     $this->elementLookup[$name] = array();
00269                 }
00270                 $this->elementLookup[$name][] = $module->name;
00271             }
00272         }
00273 
00274         // note the different choice
00275         $this->contentSets = new HTMLPurifier_ContentSets(
00276             // content set assembly deals with all possible modules,
00277             // not just ones deemed to be "safe"
00278             $this->modules
00279         );
00280         $this->attrCollections = new HTMLPurifier_AttrCollections(
00281             $this->attrTypes,
00282             // there is no way to directly disable a global attribute,
00283             // but using AllowedAttributes or simply not including
00284             // the module in your custom doctype should be sufficient
00285             $this->modules
00286         );
00287     }
00288 
00293     public function processModule($module) {
00294         if (!isset($this->registeredModules[$module]) || is_object($module)) {
00295             $this->registerModule($module);
00296         }
00297         $this->modules[$module] = $this->registeredModules[$module];
00298     }
00299 
00304     public function getElements() {
00305 
00306         $elements = array();
00307         foreach ($this->modules as $module) {
00308             if (!$this->trusted && !$module->safe) continue;
00309             foreach ($module->info as $name => $v) {
00310                 if (isset($elements[$name])) continue;
00311                 $elements[$name] = $this->getElement($name);
00312             }
00313         }
00314 
00315         // remove dud elements, this happens when an element that
00316         // appeared to be safe actually wasn't
00317         foreach ($elements as $n => $v) {
00318             if ($v === false) unset($elements[$n]);
00319         }
00320 
00321         return $elements;
00322 
00323     }
00324 
00335     public function getElement($name, $trusted = null) {
00336 
00337         if (!isset($this->elementLookup[$name])) {
00338             return false;
00339         }
00340 
00341         // setup global state variables
00342         $def = false;
00343         if ($trusted === null) $trusted = $this->trusted;
00344 
00345         // iterate through each module that has registered itself to this
00346         // element
00347         foreach($this->elementLookup[$name] as $module_name) {
00348 
00349             $module = $this->modules[$module_name];
00350 
00351             // refuse to create/merge from a module that is deemed unsafe--
00352             // pretend the module doesn't exist--when trusted mode is not on.
00353             if (!$trusted && !$module->safe) {
00354                 continue;
00355             }
00356 
00357             // clone is used because, ideally speaking, the original
00358             // definition should not be modified. Usually, this will
00359             // make no difference, but for consistency's sake
00360             $new_def = clone $module->info[$name];
00361 
00362             if (!$def && $new_def->standalone) {
00363                 $def = $new_def;
00364             } elseif ($def) {
00365                 // This will occur even if $new_def is standalone. In practice,
00366                 // this will usually result in a full replacement.
00367                 $def->mergeIn($new_def);
00368             } else {
00369                 // :TODO:
00370                 // non-standalone definitions that don't have a standalone
00371                 // to merge into could be deferred to the end
00372                 // HOWEVER, it is perfectly valid for a non-standalone
00373                 // definition to lack a standalone definition, even
00374                 // after all processing: this allows us to safely
00375                 // specify extra attributes for elements that may not be
00376                 // enabled all in one place.  In particular, this might
00377                 // be the case for trusted elements.  WARNING: care must
00378                 // be taken that the /extra/ definitions are all safe.
00379                 continue;
00380             }
00381 
00382             // attribute value expansions
00383             $this->attrCollections->performInclusions($def->attr);
00384             $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes);
00385 
00386             // descendants_are_inline, for ChildDef_Chameleon
00387             if (is_string($def->content_model) &&
00388                 strpos($def->content_model, 'Inline') !== false) {
00389                 if ($name != 'del' && $name != 'ins') {
00390                     // this is for you, ins/del
00391                     $def->descendants_are_inline = true;
00392                 }
00393             }
00394 
00395             $this->contentSets->generateChildDef($def, $module);
00396         }
00397 
00398         // This can occur if there is a blank definition, but no base to
00399         // mix it in with
00400         if (!$def) return false;
00401 
00402         // add information on required attributes
00403         foreach ($def->attr as $attr_name => $attr_def) {
00404             if ($attr_def->required) {
00405                 $def->required_attr[] = $attr_name;
00406             }
00407         }
00408 
00409         return $def;
00410 
00411     }
00412 
00413 }
00414 
00415 // vim: et sw=4 sts=4