library/HTMLPurifier.standalone.php

Go to the documentation of this file.
00001 <?php
00002 
00041 /*
00042     HTML Purifier 3.1.1 - Standards Compliant HTML Filtering
00043     Copyright (C) 2006-2008 Edward Z. Yang
00044 
00045     This library is free software; you can redistribute it and/or
00046     modify it under the terms of the GNU Lesser General Public
00047     License as published by the Free Software Foundation; either
00048     version 2.1 of the License, or (at your option) any later version.
00049 
00050     This library is distributed in the hope that it will be useful,
00051     but WITHOUT ANY WARRANTY; without even the implied warranty of
00052     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00053     Lesser General Public License for more details.
00054 
00055     You should have received a copy of the GNU Lesser General Public
00056     License along with this library; if not, write to the Free Software
00057     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
00058  */
00059 
00074 class HTMLPurifier
00075 {
00076     
00078     public $version = '3.1.1';
00079     
00081     const VERSION = '3.1.1';
00082     
00084     public $config;
00085     
00087     private $filters = array();
00088     
00090     private static $instance;
00091     
00092     protected $strategy, $generator;
00093     
00098     public $context;
00099     
00108     public function __construct($config = null) {
00109         
00110         $this->config = HTMLPurifier_Config::create($config);
00111         
00112         $this->strategy     = new HTMLPurifier_Strategy_Core();
00113         
00114     }
00115     
00120     public function addFilter($filter) {
00121         trigger_error('HTMLPurifier->addFilter() is deprecated, use configuration directives in the Filter namespace or Filter.Custom', E_USER_WARNING);
00122         $this->filters[] = $filter;
00123     }
00124     
00135     public function purify($html, $config = null) {
00136         
00137         // :TODO: make the config merge in, instead of replace
00138         $config = $config ? HTMLPurifier_Config::create($config) : $this->config;
00139         
00140         // implementation is partially environment dependant, partially
00141         // configuration dependant
00142         $lexer = HTMLPurifier_Lexer::create($config);
00143         
00144         $context = new HTMLPurifier_Context();
00145         
00146         // setup HTML generator
00147         $this->generator = new HTMLPurifier_Generator($config, $context);
00148         $context->register('Generator', $this->generator);
00149         
00150         // set up global context variables
00151         if ($config->get('Core', 'CollectErrors')) {
00152             // may get moved out if other facilities use it
00153             $language_factory = HTMLPurifier_LanguageFactory::instance();
00154             $language = $language_factory->create($config, $context);
00155             $context->register('Locale', $language);
00156             
00157             $error_collector = new HTMLPurifier_ErrorCollector($context);
00158             $context->register('ErrorCollector', $error_collector);
00159         }
00160         
00161         // setup id_accumulator context, necessary due to the fact that
00162         // AttrValidator can be called from many places
00163         $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
00164         $context->register('IDAccumulator', $id_accumulator);
00165         
00166         $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
00167         
00168         // setup filters
00169         $filter_flags = $config->getBatch('Filter');
00170         $custom_filters = $filter_flags['Custom'];
00171         unset($filter_flags['Custom']);
00172         $filters = array();
00173         foreach ($filter_flags as $filter => $flag) {
00174             if (!$flag) continue;
00175             $class = "HTMLPurifier_Filter_$filter";
00176             $filters[] = new $class;
00177         }
00178         foreach ($custom_filters as $filter) {
00179             // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat
00180             $filters[] = $filter;
00181         }
00182         $filters = array_merge($filters, $this->filters);
00183         // maybe prepare(), but later
00184         
00185         for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) {
00186             $html = $filters[$i]->preFilter($html, $config, $context);
00187         }
00188         
00189         // purified HTML
00190         $html = 
00191             $this->generator->generateFromTokens(
00192                 // list of tokens
00193                 $this->strategy->execute(
00194                     // list of un-purified tokens
00195                     $lexer->tokenizeHTML(
00196                         // un-purified HTML
00197                         $html, $config, $context
00198                     ),
00199                     $config, $context
00200                 )
00201             );
00202         
00203         for ($i = $filter_size - 1; $i >= 0; $i--) {
00204             $html = $filters[$i]->postFilter($html, $config, $context);
00205         }
00206         
00207         $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
00208         $this->context =& $context;
00209         return $html;
00210     }
00211     
00218     public function purifyArray($array_of_html, $config = null) {
00219         $context_array = array();
00220         foreach ($array_of_html as $key => $html) {
00221             $array_of_html[$key] = $this->purify($html, $config);
00222             $context_array[$key] = $this->context;
00223         }
00224         $this->context = $context_array;
00225         return $array_of_html;
00226     }
00227     
00234     public static function instance($prototype = null) {
00235         if (!self::$instance || $prototype) {
00236             if ($prototype instanceof HTMLPurifier) {
00237                 self::$instance = $prototype;
00238             } elseif ($prototype) {
00239                 self::$instance = new HTMLPurifier($prototype);
00240             } else {
00241                 self::$instance = new HTMLPurifier();
00242             }
00243         }
00244         return self::$instance;
00245     }
00246     
00250     public static function getInstance($prototype = null) {
00251         return HTMLPurifier::instance($prototype);
00252     }
00253     
00254 }
00255 
00256 
00257 
00262 class HTMLPurifier_AttrCollections
00263 {
00264     
00268     public $info = array();
00269     
00277     public function __construct($attr_types, $modules) {
00278         // load extensions from the modules
00279         foreach ($modules as $module) {
00280             foreach ($module->attr_collections as $coll_i => $coll) {
00281                 if (!isset($this->info[$coll_i])) {
00282                     $this->info[$coll_i] = array();
00283                 }
00284                 foreach ($coll as $attr_i => $attr) {
00285                     if ($attr_i === 0 && isset($this->info[$coll_i][$attr_i])) {
00286                         // merge in includes
00287                         $this->info[$coll_i][$attr_i] = array_merge(
00288                             $this->info[$coll_i][$attr_i], $attr);
00289                         continue;
00290                     }
00291                     $this->info[$coll_i][$attr_i] = $attr;
00292                 }
00293             }
00294         }
00295         // perform internal expansions and inclusions
00296         foreach ($this->info as $name => $attr) {
00297             // merge attribute collections that include others
00298             $this->performInclusions($this->info[$name]);
00299             // replace string identifiers with actual attribute objects
00300             $this->expandIdentifiers($this->info[$name], $attr_types);
00301         }
00302     }
00303     
00309     public function performInclusions(&$attr) {
00310         if (!isset($attr[0])) return;
00311         $merge = $attr[0];
00312         $seen  = array(); // recursion guard
00313         // loop through all the inclusions
00314         for ($i = 0; isset($merge[$i]); $i++) {
00315             if (isset($seen[$merge[$i]])) continue;
00316             $seen[$merge[$i]] = true;
00317             // foreach attribute of the inclusion, copy it over
00318             if (!isset($this->info[$merge[$i]])) continue;
00319             foreach ($this->info[$merge[$i]] as $key => $value) {
00320                 if (isset($attr[$key])) continue; // also catches more inclusions
00321                 $attr[$key] = $value;
00322             }
00323             if (isset($this->info[$merge[$i]][0])) {
00324                 // recursion
00325                 $merge = array_merge($merge, $this->info[$merge[$i]][0]);
00326             }
00327         }
00328         unset($attr[0]);
00329     }
00330     
00337     public function expandIdentifiers(&$attr, $attr_types) {
00338         
00339         // because foreach will process new elements we add, make sure we
00340         // skip duplicates
00341         $processed = array();
00342         
00343         foreach ($attr as $def_i => $def) {
00344             // skip inclusions
00345             if ($def_i === 0) continue;
00346             
00347             if (isset($processed[$def_i])) continue;
00348             
00349             // determine whether or not attribute is required
00350             if ($required = (strpos($def_i, '*') !== false)) {
00351                 // rename the definition
00352                 unset($attr[$def_i]);
00353                 $def_i = trim($def_i, '*');
00354                 $attr[$def_i] = $def;
00355             }
00356             
00357             $processed[$def_i] = true;
00358             
00359             // if we've already got a literal object, move on
00360             if (is_object($def)) {
00361                 // preserve previous required
00362                 $attr[$def_i]->required = ($required || $attr[$def_i]->required);
00363                 continue;
00364             }
00365             
00366             if ($def === false) {
00367                 unset($attr[$def_i]);
00368                 continue;
00369             }
00370             
00371             if ($t = $attr_types->get($def)) {
00372                 $attr[$def_i] = $t;
00373                 $attr[$def_i]->required = $required;
00374             } else {
00375                 unset($attr[$def_i]);
00376             }
00377         }
00378         
00379     }
00380     
00381 }
00382 
00383 
00384 
00385 
00396 abstract class HTMLPurifier_AttrDef
00397 {
00398     
00403     public $minimized = false;
00404     
00409     public $required = false;
00410     
00418     abstract public function validate($string, $config, $context);
00419     
00441     public function parseCDATA($string) {
00442         $string = trim($string);
00443         $string = str_replace(array("\n", "\t", "\r"), ' ', $string);
00444         return $string;
00445     }
00446     
00452     public function make($string) {
00453         // default implementation, return a flyweight of this object.
00454         // If $string has an effect on the returned object (i.e. you
00455         // need to overload this method), it is best
00456         // to clone or instantiate new copies. (Instantiation is safer.)
00457         return $this;
00458     }
00459     
00464     protected function mungeRgb($string) {
00465         return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
00466     }
00467     
00468 }
00469 
00470 
00471 
00472 
00487 abstract class HTMLPurifier_AttrTransform
00488 {
00489     
00499     abstract public function transform($attr, $config, $context);
00500     
00507     public function prependCSS(&$attr, $css) {
00508         $attr['style'] = isset($attr['style']) ? $attr['style'] : '';
00509         $attr['style'] = $css . $attr['style'];
00510     }
00511     
00517     public function confiscateAttr(&$attr, $key) {
00518         if (!isset($attr[$key])) return null;
00519         $value = $attr[$key];
00520         unset($attr[$key]);
00521         return $value;
00522     }
00523     
00524 }
00525 
00526 
00527 
00528 
00532 class HTMLPurifier_AttrTypes
00533 {
00537     protected $info = array();
00538     
00543     public function __construct() {
00544         // pseudo-types, must be instantiated via shorthand
00545         $this->info['Enum']    = new HTMLPurifier_AttrDef_Enum();
00546         $this->info['Bool']    = new HTMLPurifier_AttrDef_HTML_Bool();
00547         
00548         $this->info['CDATA']    = new HTMLPurifier_AttrDef_Text();
00549         $this->info['ID']       = new HTMLPurifier_AttrDef_HTML_ID();
00550         $this->info['Length']   = new HTMLPurifier_AttrDef_HTML_Length();
00551         $this->info['MultiLength'] = new HTMLPurifier_AttrDef_HTML_MultiLength();
00552         $this->info['NMTOKENS'] = new HTMLPurifier_AttrDef_HTML_Nmtokens();
00553         $this->info['Pixels']   = new HTMLPurifier_AttrDef_HTML_Pixels();
00554         $this->info['Text']     = new HTMLPurifier_AttrDef_Text();
00555         $this->info['URI']      = new HTMLPurifier_AttrDef_URI();
00556         $this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang();
00557         $this->info['Color']    = new HTMLPurifier_AttrDef_HTML_Color();
00558         
00559         // unimplemented aliases
00560         $this->info['ContentType'] = new HTMLPurifier_AttrDef_Text();
00561         
00562         // number is really a positive integer (one or more digits)
00563         // FIXME: ^^ not always, see start and value of list items
00564         $this->info['Number']   = new HTMLPurifier_AttrDef_Integer(false, false, true);
00565     }
00566     
00572     public function get($type) {
00573         
00574         // determine if there is any extra info tacked on
00575         if (strpos($type, '#') !== false) list($type, $string) = explode('#', $type, 2);
00576         else $string = '';
00577         
00578         if (!isset($this->info[$type])) {
00579             trigger_error('Cannot retrieve undefined attribute type ' . $type, E_USER_ERROR);
00580             return;
00581         }
00582         
00583         return $this->info[$type]->make($string);
00584         
00585     }
00586     
00592     public function set($type, $impl) {
00593         $this->info[$type] = $impl;
00594     }
00595 }
00596 
00597 
00598 
00599 
00600 
00606 class HTMLPurifier_AttrValidator
00607 {
00608     
00619     public function validateToken(&$token, &$config, $context) {
00620             
00621         $definition = $config->getHTMLDefinition();
00622         $e =& $context->get('ErrorCollector', true);
00623         
00624         // initialize IDAccumulator if necessary
00625         $ok =& $context->get('IDAccumulator', true);
00626         if (!$ok) {
00627             $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
00628             $context->register('IDAccumulator', $id_accumulator);
00629         }
00630         
00631         // initialize CurrentToken if necessary
00632         $current_token =& $context->get('CurrentToken', true);
00633         if (!$current_token) $context->register('CurrentToken', $token);
00634         
00635         if (
00636           !$token instanceof HTMLPurifier_Token_Start &&
00637           !$token instanceof HTMLPurifier_Token_Empty
00638         ) return $token;
00639         
00640         // create alias to global definition array, see also $defs
00641         // DEFINITION CALL
00642         $d_defs = $definition->info_global_attr;
00643         
00644         // don't update token until the very end, to ensure an atomic update
00645         $attr = $token->attr;
00646         
00647         // do global transformations (pre)
00648         // nothing currently utilizes this
00649         foreach ($definition->info_attr_transform_pre as $transform) {
00650             $attr = $transform->transform($o = $attr, $config, $context);
00651             if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
00652         }
00653         
00654         // do local transformations only applicable to this element (pre)
00655         // ex. <p align="right"> to <p style="text-align:right;">
00656         foreach ($definition->info[$token->name]->attr_transform_pre as $transform) {
00657             $attr = $transform->transform($o = $attr, $config, $context);
00658             if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
00659         }
00660         
00661         // create alias to this element's attribute definition array, see
00662         // also $d_defs (global attribute definition array)
00663         // DEFINITION CALL
00664         $defs = $definition->info[$token->name]->attr;
00665         
00666         $attr_key = false;
00667         $context->register('CurrentAttr', $attr_key);
00668         
00669         // iterate through all the attribute keypairs
00670         // Watch out for name collisions: $key has previously been used
00671         foreach ($attr as $attr_key => $value) {
00672             
00673             // call the definition
00674             if ( isset($defs[$attr_key]) ) {
00675                 // there is a local definition defined
00676                 if ($defs[$attr_key] === false) {
00677                     // We've explicitly been told not to allow this element.
00678                     // This is usually when there's a global definition
00679                     // that must be overridden.
00680                     // Theoretically speaking, we could have a
00681                     // AttrDef_DenyAll, but this is faster!
00682                     $result = false;
00683                 } else {
00684                     // validate according to the element's definition
00685                     $result = $defs[$attr_key]->validate(
00686                                     $value, $config, $context
00687                                );
00688                 }
00689             } elseif ( isset($d_defs[$attr_key]) ) {
00690                 // there is a global definition defined, validate according
00691                 // to the global definition
00692                 $result = $d_defs[$attr_key]->validate(
00693                                 $value, $config, $context
00694                            );
00695             } else {
00696                 // system never heard of the attribute? DELETE!
00697                 $result = false;
00698             }
00699             
00700             // put the results into effect
00701             if ($result === false || $result === null) {
00702                 // this is a generic error message that should replaced
00703                 // with more specific ones when possible
00704                 if ($e) $e->send(E_ERROR, 'AttrValidator: Attribute removed');
00705                 
00706                 // remove the attribute
00707                 unset($attr[$attr_key]);
00708             } elseif (is_string($result)) {
00709                 // generally, if a substitution is happening, there
00710                 // was some sort of implicit correction going on. We'll
00711                 // delegate it to the attribute classes to say exactly what.
00712                 
00713                 // simple substitution
00714                 $attr[$attr_key] = $result;
00715             }
00716             
00717             // we'd also want slightly more complicated substitution
00718             // involving an array as the return value,
00719             // although we're not sure how colliding attributes would
00720             // resolve (certain ones would be completely overriden,
00721             // others would prepend themselves).
00722         }
00723         
00724         $context->destroy('CurrentAttr');
00725         
00726         // post transforms
00727         
00728         // global (error reporting untested)
00729         foreach ($definition->info_attr_transform_post as $transform) {
00730             $attr = $transform->transform($o = $attr, $config, $context);
00731             if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
00732         }
00733         
00734         // local (error reporting untested)
00735         foreach ($definition->info[$token->name]->attr_transform_post as $transform) {
00736             $attr = $transform->transform($o = $attr, $config, $context);
00737             if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
00738         }
00739         
00740         $token->attr = $attr;
00741         
00742         // destroy CurrentToken if we made it ourselves
00743         if (!$current_token) $context->destroy('CurrentToken');
00744         
00745     }
00746     
00747     
00748 }
00749 
00750 
00751 
00752 
00753 // constants are slow, so we use as few as possible
00754 if (!defined('HTMLPURIFIER_PREFIX')) {
00755     define('HTMLPURIFIER_PREFIX', dirname(__FILE__) . '/standalone');
00756     set_include_path(HTMLPURIFIER_PREFIX . PATH_SEPARATOR . get_include_path());
00757 }
00758 
00759 // accomodations for versions earlier than 5.0.2
00760 // borrowed from PHP_Compat, LGPL licensed, by Aidan Lister <aidan@php.net>
00761 if (!defined('PHP_EOL')) {
00762     switch (strtoupper(substr(PHP_OS, 0, 3))) {
00763         case 'WIN':
00764             define('PHP_EOL', "\r\n");
00765             break;
00766         case 'DAR':
00767             define('PHP_EOL', "\r");
00768             break;
00769         default:
00770             define('PHP_EOL', "\n");
00771     }
00772 }
00773 
00781 class HTMLPurifier_Bootstrap
00782 {
00783     
00788     public static function autoload($class) {
00789         $file = HTMLPurifier_Bootstrap::getPath($class);
00790         if (!$file) return false;
00791         require HTMLPURIFIER_PREFIX . '/' . $file;
00792         return true;
00793     }
00794     
00798     public static function getPath($class) {
00799         if (strncmp('HTMLPurifier', $class, 12) !== 0) return false;
00800         // Custom implementations
00801         if (strncmp('HTMLPurifier_Language_', $class, 22) === 0) {
00802             $code = str_replace('_', '-', substr($class, 22));
00803             $file = 'HTMLPurifier/Language/classes/' . $code . '.php';
00804         } else {
00805             $file = str_replace('_', '/', $class) . '.php';
00806         }
00807         if (!file_exists(HTMLPURIFIER_PREFIX . '/' . $file)) return false;
00808         return $file;
00809     }
00810     
00814     public static function registerAutoload() {
00815         $autoload = array('HTMLPurifier_Bootstrap', 'autoload');
00816         if ( ($funcs = spl_autoload_functions()) === false ) {
00817             spl_autoload_register($autoload);
00818         } elseif (function_exists('spl_autoload_unregister')) {
00819             $compat = version_compare(PHP_VERSION, '5.1.2', '<=') &&
00820                       version_compare(PHP_VERSION, '5.1.0', '>=');
00821             foreach ($funcs as $func) {
00822                 if (is_array($func)) {
00823                     // :TRICKY: There are some compatibility issues and some
00824                     // places where we need to error out
00825                     $reflector = new ReflectionMethod($func[0], $func[1]);
00826                     if (!$reflector->isStatic()) {
00827                         throw new Exception('
00828                             HTML Purifier autoloader registrar is not compatible
00829                             with non-static object methods due to PHP Bug #44144;
00830                             Please do not use HTMLPurifier.autoload.php (or any
00831                             file that includes this file); instead, place the code:
00832                             spl_autoload_register(array(\'HTMLPurifier_Bootstrap\', \'autoload\'))
00833                             after your own autoloaders.
00834                         ');
00835                     }
00836                     // Suprisingly, spl_autoload_register supports the
00837                     // Class::staticMethod callback format, although call_user_func doesn't
00838                     if ($compat) $func = implode('::', $func);
00839                 }
00840                 spl_autoload_unregister($func);
00841             }
00842             spl_autoload_register($autoload);
00843             foreach ($funcs as $func) spl_autoload_register($func);
00844         }
00845     }
00846     
00847 }
00848 
00849 
00850 
00855 abstract class HTMLPurifier_Definition
00856 {
00857     
00861     public $setup = false;
00862     
00866     public $type;
00867     
00873     abstract protected function doSetup($config);
00874     
00879     public function setup($config) {
00880         if ($this->setup) return;
00881         $this->setup = true;
00882         $this->doSetup($config);
00883     }
00884     
00885 }
00886 
00887 
00888 
00889 
00894 class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
00895 {
00896     
00897     public $type = 'CSS';
00898     
00902     public $info = array();
00903     
00907     protected function doSetup($config) {
00908         
00909         $this->info['text-align'] = new HTMLPurifier_AttrDef_Enum(
00910             array('left', 'right', 'center', 'justify'), false);
00911         
00912         $border_style =
00913         $this->info['border-bottom-style'] = 
00914         $this->info['border-right-style'] = 
00915         $this->info['border-left-style'] = 
00916         $this->info['border-top-style'] =  new HTMLPurifier_AttrDef_Enum(
00917             array('none', 'hidden', 'dotted', 'dashed', 'solid', 'double',
00918             'groove', 'ridge', 'inset', 'outset'), false);
00919         
00920         $this->info['border-style'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_style);
00921         
00922         $this->info['clear'] = new HTMLPurifier_AttrDef_Enum(
00923             array('none', 'left', 'right', 'both'), false);
00924         $this->info['float'] = new HTMLPurifier_AttrDef_Enum(
00925             array('none', 'left', 'right'), false);
00926         $this->info['font-style'] = new HTMLPurifier_AttrDef_Enum(
00927             array('normal', 'italic', 'oblique'), false);
00928         $this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum(
00929             array('normal', 'small-caps'), false);
00930         
00931         $uri_or_none = new HTMLPurifier_AttrDef_CSS_Composite(
00932             array(
00933                 new HTMLPurifier_AttrDef_Enum(array('none')),
00934                 new HTMLPurifier_AttrDef_CSS_URI()
00935             )
00936         );
00937         
00938         $this->info['list-style-position'] = new HTMLPurifier_AttrDef_Enum(
00939             array('inside', 'outside'), false);
00940         $this->info['list-style-type'] = new HTMLPurifier_AttrDef_Enum(
00941             array('disc', 'circle', 'square', 'decimal', 'lower-roman',
00942             'upper-roman', 'lower-alpha', 'upper-alpha', 'none'), false);
00943         $this->info['list-style-image'] = $uri_or_none;
00944         
00945         $this->info['list-style'] = new HTMLPurifier_AttrDef_CSS_ListStyle($config);
00946         
00947         $this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum(
00948             array('capitalize', 'uppercase', 'lowercase', 'none'), false);
00949         $this->info['color'] = new HTMLPurifier_AttrDef_CSS_Color();
00950         
00951         $this->info['background-image'] = $uri_or_none;
00952         $this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum(
00953             array('repeat', 'repeat-x', 'repeat-y', 'no-repeat')
00954         );
00955         $this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum(
00956             array('scroll', 'fixed')
00957         );
00958         $this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition();
00959         
00960         $border_color = 
00961         $this->info['border-top-color'] = 
00962         $this->info['border-bottom-color'] = 
00963         $this->info['border-left-color'] = 
00964         $this->info['border-right-color'] = 
00965         $this->info['background-color'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
00966             new HTMLPurifier_AttrDef_Enum(array('transparent')),
00967             new HTMLPurifier_AttrDef_CSS_Color()
00968         ));
00969         
00970         $this->info['background'] = new HTMLPurifier_AttrDef_CSS_Background($config);
00971         
00972         $this->info['border-color'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_color);
00973         
00974         $border_width = 
00975         $this->info['border-top-width'] = 
00976         $this->info['border-bottom-width'] = 
00977         $this->info['border-left-width'] = 
00978         $this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
00979             new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')),
00980             new HTMLPurifier_AttrDef_CSS_Length('0') //disallow negative
00981         ));
00982         
00983         $this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width);
00984         
00985         $this->info['letter-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
00986             new HTMLPurifier_AttrDef_Enum(array('normal')),
00987             new HTMLPurifier_AttrDef_CSS_Length()
00988         ));
00989         
00990         $this->info['word-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
00991             new HTMLPurifier_AttrDef_Enum(array('normal')),
00992             new HTMLPurifier_AttrDef_CSS_Length()
00993         ));
00994         
00995         $this->info['font-size'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
00996             new HTMLPurifier_AttrDef_Enum(array('xx-small', 'x-small',
00997                 'small', 'medium', 'large', 'x-large', 'xx-large',
00998                 'larger', 'smaller')),
00999             new HTMLPurifier_AttrDef_CSS_Percentage(),
01000             new HTMLPurifier_AttrDef_CSS_Length()
01001         ));
01002         
01003         $this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01004             new HTMLPurifier_AttrDef_Enum(array('normal')),
01005             new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives
01006             new HTMLPurifier_AttrDef_CSS_Length('0'),
01007             new HTMLPurifier_AttrDef_CSS_Percentage(true)
01008         ));
01009         
01010         $margin =
01011         $this->info['margin-top'] = 
01012         $this->info['margin-bottom'] = 
01013         $this->info['margin-left'] = 
01014         $this->info['margin-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01015             new HTMLPurifier_AttrDef_CSS_Length(),
01016             new HTMLPurifier_AttrDef_CSS_Percentage(),
01017             new HTMLPurifier_AttrDef_Enum(array('auto'))
01018         ));
01019         
01020         $this->info['margin'] = new HTMLPurifier_AttrDef_CSS_Multiple($margin);
01021         
01022         // non-negative
01023         $padding =
01024         $this->info['padding-top'] = 
01025         $this->info['padding-bottom'] = 
01026         $this->info['padding-left'] = 
01027         $this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01028             new HTMLPurifier_AttrDef_CSS_Length('0'),
01029             new HTMLPurifier_AttrDef_CSS_Percentage(true)
01030         ));
01031         
01032         $this->info['padding'] = new HTMLPurifier_AttrDef_CSS_Multiple($padding);
01033         
01034         $this->info['text-indent'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01035             new HTMLPurifier_AttrDef_CSS_Length(),
01036             new HTMLPurifier_AttrDef_CSS_Percentage()
01037         ));
01038         
01039         $trusted_wh = new HTMLPurifier_AttrDef_CSS_Composite(array(
01040             new HTMLPurifier_AttrDef_CSS_Length('0'),
01041             new HTMLPurifier_AttrDef_CSS_Percentage(true),
01042             new HTMLPurifier_AttrDef_Enum(array('auto'))
01043         ));
01044         $max = $config->get('CSS', 'MaxImgLength');
01045         
01046         $this->info['width'] =
01047         $this->info['height'] =
01048             $max === null ?
01049             $trusted_wh : 
01050             new HTMLPurifier_AttrDef_Switch('img',
01051                 // For img tags:
01052                 new HTMLPurifier_AttrDef_CSS_Composite(array(
01053                     new HTMLPurifier_AttrDef_CSS_Length('0', $max),
01054                     new HTMLPurifier_AttrDef_Enum(array('auto'))
01055                 )),
01056                 // For everyone else:
01057                 $trusted_wh
01058             );
01059         
01060         $this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration();
01061         
01062         $this->info['font-family'] = new HTMLPurifier_AttrDef_CSS_FontFamily();
01063         
01064         // this could use specialized code
01065         $this->info['font-weight'] = new HTMLPurifier_AttrDef_Enum(
01066             array('normal', 'bold', 'bolder', 'lighter', '100', '200', '300',
01067             '400', '500', '600', '700', '800', '900'), false);
01068         
01069         // MUST be called after other font properties, as it references
01070         // a CSSDefinition object
01071         $this->info['font'] = new HTMLPurifier_AttrDef_CSS_Font($config);
01072         
01073         // same here
01074         $this->info['border'] =
01075         $this->info['border-bottom'] = 
01076         $this->info['border-top'] = 
01077         $this->info['border-left'] = 
01078         $this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config);
01079         
01080         $this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(array(
01081             'collapse', 'separate'));
01082         
01083         $this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum(array(
01084             'top', 'bottom'));
01085         
01086         $this->info['table-layout'] = new HTMLPurifier_AttrDef_Enum(array(
01087             'auto', 'fixed'));
01088         
01089         $this->info['vertical-align'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01090             new HTMLPurifier_AttrDef_Enum(array('baseline', 'sub', 'super',
01091                 'top', 'text-top', 'middle', 'bottom', 'text-bottom')),
01092             new HTMLPurifier_AttrDef_CSS_Length(),
01093             new HTMLPurifier_AttrDef_CSS_Percentage()
01094         ));
01095         
01096         $this->info['border-spacing'] = new HTMLPurifier_AttrDef_CSS_Multiple(new HTMLPurifier_AttrDef_CSS_Length(), 2);
01097         
01098         // partial support
01099         $this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(array('nowrap'));
01100         
01101         if ($config->get('CSS', 'Proprietary')) {
01102             $this->doSetupProprietary($config);
01103         }
01104         
01105         if ($config->get('CSS', 'AllowTricky')) {
01106             $this->doSetupTricky($config);
01107         }
01108         
01109         $allow_important = $config->get('CSS', 'AllowImportant');
01110         // wrap all attr-defs with decorator that handles !important
01111         foreach ($this->info as $k => $v) {
01112             $this->info[$k] = new HTMLPurifier_AttrDef_CSS_ImportantDecorator($v, $allow_important);
01113         }
01114         
01115         $this->setupConfigStuff($config);
01116     }
01117     
01118     protected function doSetupProprietary($config) {
01119         // Internet Explorer only scrollbar colors
01120         $this->info['scrollbar-arrow-color']        = new HTMLPurifier_AttrDef_CSS_Color();
01121         $this->info['scrollbar-base-color']         = new HTMLPurifier_AttrDef_CSS_Color();
01122         $this->info['scrollbar-darkshadow-color']   = new HTMLPurifier_AttrDef_CSS_Color();
01123         $this->info['scrollbar-face-color']         = new HTMLPurifier_AttrDef_CSS_Color();
01124         $this->info['scrollbar-highlight-color']    = new HTMLPurifier_AttrDef_CSS_Color();
01125         $this->info['scrollbar-shadow-color']       = new HTMLPurifier_AttrDef_CSS_Color();
01126         
01127         // technically not proprietary, but CSS3, and no one supports it
01128         $this->info['opacity']          = new HTMLPurifier_AttrDef_CSS_AlphaValue();
01129         $this->info['-moz-opacity']     = new HTMLPurifier_AttrDef_CSS_AlphaValue();
01130         $this->info['-khtml-opacity']   = new HTMLPurifier_AttrDef_CSS_AlphaValue();
01131         
01132         // only opacity, for now
01133         $this->info['filter'] = new HTMLPurifier_AttrDef_CSS_Filter();
01134         
01135     }
01136     
01137     protected function doSetupTricky($config) {
01138         $this->info['display'] = new HTMLPurifier_AttrDef_Enum(array(
01139             'inline', 'block', 'list-item', 'run-in', 'compact',
01140             'marker', 'table', 'inline-table', 'table-row-group',
01141             'table-header-group', 'table-footer-group', 'table-row',
01142             'table-column-group', 'table-column', 'table-cell', 'table-caption', 'none'
01143         ));
01144         $this->info['visibility'] = new HTMLPurifier_AttrDef_Enum(array(
01145             'visible', 'hidden', 'collapse'
01146         ));
01147     }
01148     
01149     
01156     protected function setupConfigStuff($config) {
01157         
01158         // setup allowed elements
01159         $support = "(for information on implementing this, see the ".
01160                    "support forums) ";
01161         $allowed_attributes = $config->get('CSS', 'AllowedProperties');
01162         if ($allowed_attributes !== null) {
01163             foreach ($this->info as $name => $d) {
01164                 if(!isset($allowed_attributes[$name])) unset($this->info[$name]);
01165                 unset($allowed_attributes[$name]);
01166             }
01167             // emit errors
01168             foreach ($allowed_attributes as $name => $d) {
01169                 // :TODO: Is this htmlspecialchars() call really necessary?
01170                 $name = htmlspecialchars($name);
01171                 trigger_error("Style attribute '$name' is not supported $support", E_USER_WARNING);
01172             }
01173         }
01174         
01175     }
01176 }
01177 
01178 
01179 
01180 
01184 abstract class HTMLPurifier_ChildDef
01185 {
01190     public $type;
01191     
01198     public $allow_empty;
01199     
01203     public $elements = array();
01204     
01215     abstract public function validateChildren($tokens_of_children, $config, $context);
01216 }
01217 
01218 
01219 
01220 
01221 
01236 class HTMLPurifier_Config
01237 {
01238     
01242     public $version = '3.1.1';
01243     
01248     public $autoFinalize = true;
01249     
01250     // protected member variables
01251     
01256     protected $serials = array();
01257     
01261     protected $serial;
01262     
01266     protected $conf;
01267     
01271     protected $parser;
01272     
01278     public $def;
01279     
01283     protected $definitions;
01284     
01288     protected $finalized = false;
01289     
01294     public function __construct($definition) {
01295         $this->conf = $definition->defaults; // set up, copy in defaults
01296         $this->def  = $definition; // keep a copy around for checking
01297         $this->parser = new HTMLPurifier_VarParser_Flexible();
01298     }
01299     
01309     public static function create($config, $schema = null) {
01310         if ($config instanceof HTMLPurifier_Config) {
01311             // pass-through
01312             return $config;
01313         }
01314         if (!$schema) {
01315             $ret = HTMLPurifier_Config::createDefault();
01316         } else {
01317             $ret = new HTMLPurifier_Config($schema);
01318         }
01319         if (is_string($config)) $ret->loadIni($config);
01320         elseif (is_array($config)) $ret->loadArray($config);
01321         return $ret;
01322     }
01323     
01328     public static function createDefault() {
01329         $definition = HTMLPurifier_ConfigSchema::instance();
01330         $config = new HTMLPurifier_Config($definition);
01331         return $config;
01332     }
01333     
01339     public function get($namespace, $key) {
01340         if (!$this->finalized && $this->autoFinalize) $this->finalize();
01341         if (!isset($this->def->info[$namespace][$key])) {
01342             // can't add % due to SimpleTest bug
01343             trigger_error('Cannot retrieve value of undefined directive ' . htmlspecialchars("$namespace.$key"),
01344                 E_USER_WARNING);
01345             return;
01346         }
01347         if (isset($this->def->info[$namespace][$key]->isAlias)) {
01348             $d = $this->def->info[$namespace][$key];
01349             trigger_error('Cannot get value from aliased directive, use real name ' . $d->namespace . '.' . $d->name,
01350                 E_USER_ERROR);
01351             return;
01352         }
01353         return $this->conf[$namespace][$key];
01354     }
01355     
01360     public function getBatch($namespace) {
01361         if (!$this->finalized && $this->autoFinalize) $this->finalize();
01362         if (!isset($this->def->info[$namespace])) {
01363             trigger_error('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace),
01364                 E_USER_WARNING);
01365             return;
01366         }
01367         return $this->conf[$namespace];
01368     }
01369     
01377     public function getBatchSerial($namespace) {
01378         if (empty($this->serials[$namespace])) {
01379             $batch = $this->getBatch($namespace);
01380             unset($batch['DefinitionRev']);
01381             $this->serials[$namespace] = md5(serialize($batch));
01382         }
01383         return $this->serials[$namespace];
01384     }
01385     
01390     public function getSerial() {
01391         if (empty($this->serial)) {
01392             $this->serial = md5(serialize($this->getAll()));
01393         }
01394         return $this->serial;
01395     }
01396     
01400     public function getAll() {
01401         if (!$this->finalized && $this->autoFinalize) $this->finalize();
01402         return $this->conf;
01403     }
01404     
01411     public function set($namespace, $key, $value, $from_alias = false) {
01412         if ($this->isFinalized('Cannot set directive after finalization')) return;
01413         if (!isset($this->def->info[$namespace][$key])) {
01414             trigger_error('Cannot set undefined directive ' . htmlspecialchars("$namespace.$key") . ' to value',
01415                 E_USER_WARNING);
01416             return;
01417         }
01418         $def = $this->def->info[$namespace][$key];
01419         
01420         if (isset($def->isAlias)) {
01421             if ($from_alias) {
01422                 trigger_error('Double-aliases not allowed, please fix '.
01423                     'ConfigSchema bug with' . "$namespace.$key", E_USER_ERROR);
01424                 return;
01425             }
01426             $this->set($new_ns  = $def->namespace,
01427                        $new_dir = $def->name,
01428                        $value, true);
01429             trigger_error("$namespace.$key is an alias, preferred directive name is $new_ns.$new_dir", E_USER_NOTICE);
01430             return;
01431         }
01432         
01433         // Raw type might be negative when using the fully optimized form
01434         // of stdclass, which indicates allow_null == true
01435         $rtype = is_int($def) ? $def : $def->type;
01436         if ($rtype < 0) {
01437             $type = -$rtype;
01438             $allow_null = true;
01439         } else {
01440             $type = $rtype;
01441             $allow_null = isset($def->allow_null);
01442         }
01443         
01444         try {
01445             $value = $this->parser->parse($value, $type, $allow_null);
01446         } catch (HTMLPurifier_VarParserException $e) {
01447             trigger_error('Value for ' . "$namespace.$key" . ' is of invalid type, should be ' . HTMLPurifier_VarParser::getTypeName($type), E_USER_WARNING);
01448             return;
01449         }
01450         if (is_string($value) && is_object($def)) {
01451             // resolve value alias if defined
01452             if (isset($def->aliases[$value])) {
01453                 $value = $def->aliases[$value];
01454             }
01455             // check to see if the value is allowed
01456             if (isset($def->allowed) && !isset($def->allowed[$value])) {
01457                 trigger_error('Value not supported, valid values are: ' .
01458                     $this->_listify($def->allowed), E_USER_WARNING);
01459                 return;
01460             }
01461         }
01462         $this->conf[$namespace][$key] = $value;
01463         
01464         // reset definitions if the directives they depend on changed
01465         // this is a very costly process, so it's discouraged 
01466         // with finalization
01467         if ($namespace == 'HTML' || $namespace == 'CSS') {
01468             $this->definitions[$namespace] = null;
01469         }
01470         
01471         $this->serials[$namespace] = false;
01472     }
01473     
01477     private function _listify($lookup) {
01478         $list = array();
01479         foreach ($lookup as $name => $b) $list[] = $name;
01480         return implode(', ', $list);
01481     }
01482     
01488     public function getHTMLDefinition($raw = false) {
01489         return $this->getDefinition('HTML', $raw);
01490     }
01491     
01497     public function getCSSDefinition($raw = false) {
01498         return $this->getDefinition('CSS', $raw);
01499     }
01500