HTMLPurifier 4.4.0
/home/ezyang/Dev/htmlpurifier/library/HTMLPurifier.standalone.php
Go to the documentation of this file.
00001 <?php
00002 
00041 /*
00042     HTML Purifier 4.4.0 - Standards Compliant HTML Filtering
00043     Copyright (C) 2006-2008 Edward Z. Yang
00044 
00045     This library is free software; you can redistribute it and/or
00046     modify it under the terms of the GNU Lesser General Public
00047     License as published by the Free Software Foundation; either
00048     version 2.1 of the License, or (at your option) any later version.
00049 
00050     This library is distributed in the hope that it will be useful,
00051     but WITHOUT ANY WARRANTY; without even the implied warranty of
00052     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00053     Lesser General Public License for more details.
00054 
00055     You should have received a copy of the GNU Lesser General Public
00056     License along with this library; if not, write to the Free Software
00057     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
00058  */
00059 
00074 class HTMLPurifier
00075 {
00076 
00078     public $version = '4.4.0';
00079 
00081     const VERSION = '4.4.0';
00082 
00084     public $config;
00085 
00087     private $filters = array();
00088 
00090     private static $instance;
00091 
00092     protected $strategy, $generator;
00093 
00098     public $context;
00099 
00108     public function __construct($config = null) {
00109 
00110         $this->config = HTMLPurifier_Config::create($config);
00111 
00112         $this->strategy     = new HTMLPurifier_Strategy_Core();
00113 
00114     }
00115 
00120     public function addFilter($filter) {
00121         trigger_error('HTMLPurifier->addFilter() is deprecated, use configuration directives in the Filter namespace or Filter.Custom', E_USER_WARNING);
00122         $this->filters[] = $filter;
00123     }
00124 
00135     public function purify($html, $config = null) {
00136 
00137         // :TODO: make the config merge in, instead of replace
00138         $config = $config ? HTMLPurifier_Config::create($config) : $this->config;
00139 
00140         // implementation is partially environment dependant, partially
00141         // configuration dependant
00142         $lexer = HTMLPurifier_Lexer::create($config);
00143 
00144         $context = new HTMLPurifier_Context();
00145 
00146         // setup HTML generator
00147         $this->generator = new HTMLPurifier_Generator($config, $context);
00148         $context->register('Generator', $this->generator);
00149 
00150         // set up global context variables
00151         if ($config->get('Core.CollectErrors')) {
00152             // may get moved out if other facilities use it
00153             $language_factory = HTMLPurifier_LanguageFactory::instance();
00154             $language = $language_factory->create($config, $context);
00155             $context->register('Locale', $language);
00156 
00157             $error_collector = new HTMLPurifier_ErrorCollector($context);
00158             $context->register('ErrorCollector', $error_collector);
00159         }
00160 
00161         // setup id_accumulator context, necessary due to the fact that
00162         // AttrValidator can be called from many places
00163         $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
00164         $context->register('IDAccumulator', $id_accumulator);
00165 
00166         $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
00167 
00168         // setup filters
00169         $filter_flags = $config->getBatch('Filter');
00170         $custom_filters = $filter_flags['Custom'];
00171         unset($filter_flags['Custom']);
00172         $filters = array();
00173         foreach ($filter_flags as $filter => $flag) {
00174             if (!$flag) continue;
00175             if (strpos($filter, '.') !== false) continue;
00176             $class = "HTMLPurifier_Filter_$filter";
00177             $filters[] = new $class;
00178         }
00179         foreach ($custom_filters as $filter) {
00180             // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat
00181             $filters[] = $filter;
00182         }
00183         $filters = array_merge($filters, $this->filters);
00184         // maybe prepare(), but later
00185 
00186         for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) {
00187             $html = $filters[$i]->preFilter($html, $config, $context);
00188         }
00189 
00190         // purified HTML
00191         $html =
00192             $this->generator->generateFromTokens(
00193                 // list of tokens
00194                 $this->strategy->execute(
00195                     // list of un-purified tokens
00196                     $lexer->tokenizeHTML(
00197                         // un-purified HTML
00198                         $html, $config, $context
00199                     ),
00200                     $config, $context
00201                 )
00202             );
00203 
00204         for ($i = $filter_size - 1; $i >= 0; $i--) {
00205             $html = $filters[$i]->postFilter($html, $config, $context);
00206         }
00207 
00208         $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
00209         $this->context =& $context;
00210         return $html;
00211     }
00212 
00219     public function purifyArray($array_of_html, $config = null) {
00220         $context_array = array();
00221         foreach ($array_of_html as $key => $html) {
00222             $array_of_html[$key] = $this->purify($html, $config);
00223             $context_array[$key] = $this->context;
00224         }
00225         $this->context = $context_array;
00226         return $array_of_html;
00227     }
00228 
00235     public static function instance($prototype = null) {
00236         if (!self::$instance || $prototype) {
00237             if ($prototype instanceof HTMLPurifier) {
00238                 self::$instance = $prototype;
00239             } elseif ($prototype) {
00240                 self::$instance = new HTMLPurifier($prototype);
00241             } else {
00242                 self::$instance = new HTMLPurifier();
00243             }
00244         }
00245         return self::$instance;
00246     }
00247 
00251     public static function getInstance($prototype = null) {
00252         return HTMLPurifier::instance($prototype);
00253     }
00254 
00255 }
00256 
00257 
00258 
00259 
00260 
00265 class HTMLPurifier_AttrCollections
00266 {
00267 
00271     public $info = array();
00272 
00280     public function __construct($attr_types, $modules) {
00281         // load extensions from the modules
00282         foreach ($modules as $module) {
00283             foreach ($module->attr_collections as $coll_i => $coll) {
00284                 if (!isset($this->info[$coll_i])) {
00285                     $this->info[$coll_i] = array();
00286                 }
00287                 foreach ($coll as $attr_i => $attr) {
00288                     if ($attr_i === 0 && isset($this->info[$coll_i][$attr_i])) {
00289                         // merge in includes
00290                         $this->info[$coll_i][$attr_i] = array_merge(
00291                             $this->info[$coll_i][$attr_i], $attr);
00292                         continue;
00293                     }
00294                     $this->info[$coll_i][$attr_i] = $attr;
00295                 }
00296             }
00297         }
00298         // perform internal expansions and inclusions
00299         foreach ($this->info as $name => $attr) {
00300             // merge attribute collections that include others
00301             $this->performInclusions($this->info[$name]);
00302             // replace string identifiers with actual attribute objects
00303             $this->expandIdentifiers($this->info[$name], $attr_types);
00304         }
00305     }
00306 
00312     public function performInclusions(&$attr) {
00313         if (!isset($attr[0])) return;
00314         $merge = $attr[0];
00315         $seen  = array(); // recursion guard
00316         // loop through all the inclusions
00317         for ($i = 0; isset($merge[$i]); $i++) {
00318             if (isset($seen[$merge[$i]])) continue;
00319             $seen[$merge[$i]] = true;
00320             // foreach attribute of the inclusion, copy it over
00321             if (!isset($this->info[$merge[$i]])) continue;
00322             foreach ($this->info[$merge[$i]] as $key => $value) {
00323                 if (isset($attr[$key])) continue; // also catches more inclusions
00324                 $attr[$key] = $value;
00325             }
00326             if (isset($this->info[$merge[$i]][0])) {
00327                 // recursion
00328                 $merge = array_merge($merge, $this->info[$merge[$i]][0]);
00329             }
00330         }
00331         unset($attr[0]);
00332     }
00333 
00340     public function expandIdentifiers(&$attr, $attr_types) {
00341 
00342         // because foreach will process new elements we add, make sure we
00343         // skip duplicates
00344         $processed = array();
00345 
00346         foreach ($attr as $def_i => $def) {
00347             // skip inclusions
00348             if ($def_i === 0) continue;
00349 
00350             if (isset($processed[$def_i])) continue;
00351 
00352             // determine whether or not attribute is required
00353             if ($required = (strpos($def_i, '*') !== false)) {
00354                 // rename the definition
00355                 unset($attr[$def_i]);
00356                 $def_i = trim($def_i, '*');
00357                 $attr[$def_i] = $def;
00358             }
00359 
00360             $processed[$def_i] = true;
00361 
00362             // if we've already got a literal object, move on
00363             if (is_object($def)) {
00364                 // preserve previous required
00365                 $attr[$def_i]->required = ($required || $attr[$def_i]->required);
00366                 continue;
00367             }
00368 
00369             if ($def === false) {
00370                 unset($attr[$def_i]);
00371                 continue;
00372             }
00373 
00374             if ($t = $attr_types->get($def)) {
00375                 $attr[$def_i] = $t;
00376                 $attr[$def_i]->required = $required;
00377             } else {
00378                 unset($attr[$def_i]);
00379             }
00380         }
00381 
00382     }
00383 
00384 }
00385 
00386 
00387 
00388 
00389 
00400 abstract class HTMLPurifier_AttrDef
00401 {
00402 
00407     public $minimized = false;
00408 
00413     public $required = false;
00414 
00422     abstract public function validate($string, $config, $context);
00423 
00445     public function parseCDATA($string) {
00446         $string = trim($string);
00447         $string = str_replace(array("\n", "\t", "\r"), ' ', $string);
00448         return $string;
00449     }
00450 
00456     public function make($string) {
00457         // default implementation, return a flyweight of this object.
00458         // If $string has an effect on the returned object (i.e. you
00459         // need to overload this method), it is best
00460         // to clone or instantiate new copies. (Instantiation is safer.)
00461         return $this;
00462     }
00463 
00468     protected function mungeRgb($string) {
00469         return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
00470     }
00471 
00476     protected function expandCSSEscape($string) {
00477         // flexibly parse it
00478         $ret = '';
00479         for ($i = 0, $c = strlen($string); $i < $c; $i++) {
00480             if ($string[$i] === '\\') {
00481                 $i++;
00482                 if ($i >= $c) {
00483                     $ret .= '\\';
00484                     break;
00485                 }
00486                 if (ctype_xdigit($string[$i])) {
00487                     $code = $string[$i];
00488                     for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
00489                         if (!ctype_xdigit($string[$i])) break;
00490                         $code .= $string[$i];
00491                     }
00492                     // We have to be extremely careful when adding
00493                     // new characters, to make sure we're not breaking
00494                     // the encoding.
00495                     $char = HTMLPurifier_Encoder::unichr(hexdec($code));
00496                     if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
00497                     $ret .= $char;
00498                     if ($i < $c && trim($string[$i]) !== '') $i--;
00499                     continue;
00500                 }
00501                 if ($string[$i] === "\n") continue;
00502             }
00503             $ret .= $string[$i];
00504         }
00505         return $ret;
00506     }
00507 
00508 }
00509 
00510 
00511 
00512 
00513 
00528 abstract class HTMLPurifier_AttrTransform
00529 {
00530 
00540     abstract public function transform($attr, $config, $context);
00541 
00548     public function prependCSS(&$attr, $css) {
00549         $attr['style'] = isset($attr['style']) ? $attr['style'] : '';
00550         $attr['style'] = $css . $attr['style'];
00551     }
00552 
00558     public function confiscateAttr(&$attr, $key) {
00559         if (!isset($attr[$key])) return null;
00560         $value = $attr[$key];
00561         unset($attr[$key]);
00562         return $value;
00563     }
00564 
00565 }
00566 
00567 
00568 
00569 
00570 
00574 class HTMLPurifier_AttrTypes
00575 {
00579     protected $info = array();
00580 
00585     public function __construct() {
00586         // XXX This is kind of poor, since we don't actually /clone/
00587         // instances; instead, we use the supplied make() attribute. So,
00588         // the underlying class must know how to deal with arguments.
00589         // With the old implementation of Enum, that ignored its
00590         // arguments when handling a make dispatch, the IAlign
00591         // definition wouldn't work.
00592 
00593         // pseudo-types, must be instantiated via shorthand
00594         $this->info['Enum']    = new HTMLPurifier_AttrDef_Enum();
00595         $this->info['Bool']    = new HTMLPurifier_AttrDef_HTML_Bool();
00596 
00597         $this->info['CDATA']    = new HTMLPurifier_AttrDef_Text();
00598         $this->info['ID']       = new HTMLPurifier_AttrDef_HTML_ID();
00599         $this->info['Length']   = new HTMLPurifier_AttrDef_HTML_Length();
00600         $this->info['MultiLength'] = new HTMLPurifier_AttrDef_HTML_MultiLength();
00601         $this->info['NMTOKENS'] = new HTMLPurifier_AttrDef_HTML_Nmtokens();
00602         $this->info['Pixels']   = new HTMLPurifier_AttrDef_HTML_Pixels();
00603         $this->info['Text']     = new HTMLPurifier_AttrDef_Text();
00604         $this->info['URI']      = new HTMLPurifier_AttrDef_URI();
00605         $this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang();
00606         $this->info['Color']    = new HTMLPurifier_AttrDef_HTML_Color();
00607         $this->info['IAlign']   = self::makeEnum('top,middle,bottom,left,right');
00608         $this->info['LAlign']   = self::makeEnum('top,bottom,left,right');
00609         $this->info['FrameTarget'] = new HTMLPurifier_AttrDef_HTML_FrameTarget();
00610 
00611         // unimplemented aliases
00612         $this->info['ContentType'] = new HTMLPurifier_AttrDef_Text();
00613         $this->info['ContentTypes'] = new HTMLPurifier_AttrDef_Text();
00614         $this->info['Charsets'] = new HTMLPurifier_AttrDef_Text();
00615         $this->info['Character'] = new HTMLPurifier_AttrDef_Text();
00616 
00617         // "proprietary" types
00618         $this->info['Class'] = new HTMLPurifier_AttrDef_HTML_Class();
00619 
00620         // number is really a positive integer (one or more digits)
00621         // FIXME: ^^ not always, see start and value of list items
00622         $this->info['Number']   = new HTMLPurifier_AttrDef_Integer(false, false, true);
00623     }
00624 
00625     private static function makeEnum($in) {
00626         return new HTMLPurifier_AttrDef_Clone(new HTMLPurifier_AttrDef_Enum(explode(',', $in)));
00627     }
00628 
00634     public function get($type) {
00635 
00636         // determine if there is any extra info tacked on
00637         if (strpos($type, '#') !== false) list($type, $string) = explode('#', $type, 2);
00638         else $string = '';
00639 
00640         if (!isset($this->info[$type])) {
00641             trigger_error('Cannot retrieve undefined attribute type ' . $type, E_USER_ERROR);
00642             return;
00643         }
00644 
00645         return $this->info[$type]->make($string);
00646 
00647     }
00648 
00654     public function set($type, $impl) {
00655         $this->info[$type] = $impl;
00656     }
00657 }
00658 
00659 
00660 
00661 
00662 
00668 class HTMLPurifier_AttrValidator
00669 {
00670 
00681     public function validateToken(&$token, &$config, $context) {
00682 
00683         $definition = $config->getHTMLDefinition();
00684         $e =& $context->get('ErrorCollector', true);
00685 
00686         // initialize IDAccumulator if necessary
00687         $ok =& $context->get('IDAccumulator', true);
00688         if (!$ok) {
00689             $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
00690             $context->register('IDAccumulator', $id_accumulator);
00691         }
00692 
00693         // initialize CurrentToken if necessary
00694         $current_token =& $context->get('CurrentToken', true);
00695         if (!$current_token) $context->register('CurrentToken', $token);
00696 
00697         if (
00698             !$token instanceof HTMLPurifier_Token_Start &&
00699             !$token instanceof HTMLPurifier_Token_Empty
00700         ) return $token;
00701 
00702         // create alias to global definition array, see also $defs
00703         // DEFINITION CALL
00704         $d_defs = $definition->info_global_attr;
00705 
00706         // don't update token until the very end, to ensure an atomic update
00707         $attr = $token->attr;
00708 
00709         // do global transformations (pre)
00710         // nothing currently utilizes this
00711         foreach ($definition->info_attr_transform_pre as $transform) {
00712             $attr = $transform->transform($o = $attr, $config, $context);
00713             if ($e) {
00714                 if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
00715             }
00716         }
00717 
00718         // do local transformations only applicable to this element (pre)
00719         // ex. <p align="right"> to <p style="text-align:right;">
00720         foreach ($definition->info[$token->name]->attr_transform_pre as $transform) {
00721             $attr = $transform->transform($o = $attr, $config, $context);
00722             if ($e) {
00723                 if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
00724             }
00725         }
00726 
00727         // create alias to this element's attribute definition array, see
00728         // also $d_defs (global attribute definition array)
00729         // DEFINITION CALL
00730         $defs = $definition->info[$token->name]->attr;
00731 
00732         $attr_key = false;
00733         $context->register('CurrentAttr', $attr_key);
00734 
00735         // iterate through all the attribute keypairs
00736         // Watch out for name collisions: $key has previously been used
00737         foreach ($attr as $attr_key => $value) {
00738 
00739             // call the definition
00740             if ( isset($defs[$attr_key]) ) {
00741                 // there is a local definition defined
00742                 if ($defs[$attr_key] === false) {
00743                     // We've explicitly been told not to allow this element.
00744                     // This is usually when there's a global definition
00745                     // that must be overridden.
00746                     // Theoretically speaking, we could have a
00747                     // AttrDef_DenyAll, but this is faster!
00748                     $result = false;
00749                 } else {
00750                     // validate according to the element's definition
00751                     $result = $defs[$attr_key]->validate(
00752                                     $value, $config, $context
00753                                );
00754                 }
00755             } elseif ( isset($d_defs[$attr_key]) ) {
00756                 // there is a global definition defined, validate according
00757                 // to the global definition
00758                 $result = $d_defs[$attr_key]->validate(
00759                                 $value, $config, $context
00760                            );
00761             } else {
00762                 // system never heard of the attribute? DELETE!
00763                 $result = false;
00764             }
00765 
00766             // put the results into effect
00767             if ($result === false || $result === null) {
00768                 // this is a generic error message that should replaced
00769                 // with more specific ones when possible
00770                 if ($e) $e->send(E_ERROR, 'AttrValidator: Attribute removed');
00771 
00772                 // remove the attribute
00773                 unset($attr[$attr_key]);
00774             } elseif (is_string($result)) {
00775                 // generally, if a substitution is happening, there
00776                 // was some sort of implicit correction going on. We'll
00777                 // delegate it to the attribute classes to say exactly what.
00778 
00779                 // simple substitution
00780                 $attr[$attr_key] = $result;
00781             } else {
00782                 // nothing happens
00783             }
00784 
00785             // we'd also want slightly more complicated substitution
00786             // involving an array as the return value,
00787             // although we're not sure how colliding attributes would
00788             // resolve (certain ones would be completely overriden,
00789             // others would prepend themselves).
00790         }
00791 
00792         $context->destroy('CurrentAttr');
00793 
00794         // post transforms
00795 
00796         // global (error reporting untested)
00797         foreach ($definition->info_attr_transform_post as $transform) {
00798             $attr = $transform->transform($o = $attr, $config, $context);
00799             if ($e) {
00800                 if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
00801             }
00802         }
00803 
00804         // local (error reporting untested)
00805         foreach ($definition->info[$token->name]->attr_transform_post as $transform) {
00806             $attr = $transform->transform($o = $attr, $config, $context);
00807             if ($e) {
00808                 if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
00809             }
00810         }
00811 
00812         $token->attr = $attr;
00813 
00814         // destroy CurrentToken if we made it ourselves
00815         if (!$current_token) $context->destroy('CurrentToken');
00816 
00817     }
00818 
00819 
00820 }
00821 
00822 
00823 
00824 
00825 
00826 // constants are slow, so we use as few as possible
00827 if (!defined('HTMLPURIFIER_PREFIX')) {
00828     define('HTMLPURIFIER_PREFIX', dirname(__FILE__) . '/standalone');
00829     set_include_path(HTMLPURIFIER_PREFIX . PATH_SEPARATOR . get_include_path());
00830 }
00831 
00832 // accomodations for versions earlier than 5.0.2
00833 // borrowed from PHP_Compat, LGPL licensed, by Aidan Lister <aidan@php.net>
00834 if (!defined('PHP_EOL')) {
00835     switch (strtoupper(substr(PHP_OS, 0, 3))) {
00836         case 'WIN':
00837             define('PHP_EOL', "\r\n");
00838             break;
00839         case 'DAR':
00840             define('PHP_EOL', "\r");
00841             break;
00842         default:
00843             define('PHP_EOL', "\n");
00844     }
00845 }
00846 
00854 class HTMLPurifier_Bootstrap
00855 {
00856 
00861     public static function autoload($class) {
00862         $file = HTMLPurifier_Bootstrap::getPath($class);
00863         if (!$file) return false;
00864         // Technically speaking, it should be ok and more efficient to
00865         // just do 'require', but Antonio Parraga reports that with
00866         // Zend extensions such as Zend debugger and APC, this invariant
00867         // may be broken.  Since we have efficient alternatives, pay
00868         // the cost here and avoid the bug.
00869         require_once HTMLPURIFIER_PREFIX . '/' . $file;
00870         return true;
00871     }
00872 
00876     public static function getPath($class) {
00877         if (strncmp('HTMLPurifier', $class, 12) !== 0) return false;
00878         // Custom implementations
00879         if (strncmp('HTMLPurifier_Language_', $class, 22) === 0) {
00880             $code = str_replace('_', '-', substr($class, 22));
00881             $file = 'HTMLPurifier/Language/classes/' . $code . '.php';
00882         } else {
00883             $file = str_replace('_', '/', $class) . '.php';
00884         }
00885         if (!file_exists(HTMLPURIFIER_PREFIX . '/' . $file)) return false;
00886         return $file;
00887     }
00888 
00892     public static function registerAutoload() {
00893         $autoload = array('HTMLPurifier_Bootstrap', 'autoload');
00894         if ( ($funcs = spl_autoload_functions()) === false ) {
00895             spl_autoload_register($autoload);
00896         } elseif (function_exists('spl_autoload_unregister')) {
00897             $buggy  = version_compare(PHP_VERSION, '5.2.11', '<');
00898             $compat = version_compare(PHP_VERSION, '5.1.2', '<=') &&
00899                       version_compare(PHP_VERSION, '5.1.0', '>=');
00900             foreach ($funcs as $func) {
00901                 if ($buggy && is_array($func)) {
00902                     // :TRICKY: There are some compatibility issues and some
00903                     // places where we need to error out
00904                     $reflector = new ReflectionMethod($func[0], $func[1]);
00905                     if (!$reflector->isStatic()) {
00906                         throw new Exception('
00907                             HTML Purifier autoloader registrar is not compatible
00908                             with non-static object methods due to PHP Bug #44144;
00909                             Please do not use HTMLPurifier.autoload.php (or any
00910                             file that includes this file); instead, place the code:
00911                             spl_autoload_register(array(\'HTMLPurifier_Bootstrap\', \'autoload\'))
00912                             after your own autoloaders.
00913                         ');
00914                     }
00915                     // Suprisingly, spl_autoload_register supports the
00916                     // Class::staticMethod callback format, although call_user_func doesn't
00917                     if ($compat) $func = implode('::', $func);
00918                 }
00919                 spl_autoload_unregister($func);
00920             }
00921             spl_autoload_register($autoload);
00922             foreach ($funcs as $func) spl_autoload_register($func);
00923         }
00924     }
00925 
00926 }
00927 
00928 
00929 
00930 
00931 
00936 abstract class HTMLPurifier_Definition
00937 {
00938 
00942     public $setup = false;
00943 
00953     public $optimized = null;
00954 
00958     public $type;
00959 
00965     abstract protected function doSetup($config);
00966 
00971     public function setup($config) {
00972         if ($this->setup) return;
00973         $this->setup = true;
00974         $this->doSetup($config);
00975     }
00976 
00977 }
00978 
00979 
00980 
00981 
00982 
00987 class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
00988 {
00989 
00990     public $type = 'CSS';
00991 
00995     public $info = array();
00996 
01000     protected function doSetup($config) {
01001 
01002         $this->info['text-align'] = new HTMLPurifier_AttrDef_Enum(
01003             array('left', 'right', 'center', 'justify'), false);
01004 
01005         $border_style =
01006         $this->info['border-bottom-style'] =
01007         $this->info['border-right-style'] =
01008         $this->info['border-left-style'] =
01009         $this->info['border-top-style'] =  new HTMLPurifier_AttrDef_Enum(
01010             array('none', 'hidden', 'dotted', 'dashed', 'solid', 'double',
01011             'groove', 'ridge', 'inset', 'outset'), false);
01012 
01013         $this->info['border-style'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_style);
01014 
01015         $this->info['clear'] = new HTMLPurifier_AttrDef_Enum(
01016             array('none', 'left', 'right', 'both'), false);
01017         $this->info['float'] = new HTMLPurifier_AttrDef_Enum(
01018             array('none', 'left', 'right'), false);
01019         $this->info['font-style'] = new HTMLPurifier_AttrDef_Enum(
01020             array('normal', 'italic', 'oblique'), false);
01021         $this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum(
01022             array('normal', 'small-caps'), false);
01023 
01024         $uri_or_none = new HTMLPurifier_AttrDef_CSS_Composite(
01025             array(
01026                 new HTMLPurifier_AttrDef_Enum(array('none')),
01027                 new HTMLPurifier_AttrDef_CSS_URI()
01028             )
01029         );
01030 
01031         $this->info['list-style-position'] = new HTMLPurifier_AttrDef_Enum(
01032             array('inside', 'outside'), false);
01033         $this->info['list-style-type'] = new HTMLPurifier_AttrDef_Enum(
01034             array('disc', 'circle', 'square', 'decimal', 'lower-roman',
01035             'upper-roman', 'lower-alpha', 'upper-alpha', 'none'), false);
01036         $this->info['list-style-image'] = $uri_or_none;
01037 
01038         $this->info['list-style'] = new HTMLPurifier_AttrDef_CSS_ListStyle($config);
01039 
01040         $this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum(
01041             array('capitalize', 'uppercase', 'lowercase', 'none'), false);
01042         $this->info['color'] = new HTMLPurifier_AttrDef_CSS_Color();
01043 
01044         $this->info['background-image'] = $uri_or_none;
01045         $this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum(
01046             array('repeat', 'repeat-x', 'repeat-y', 'no-repeat')
01047         );
01048         $this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum(
01049             array('scroll', 'fixed')
01050         );
01051         $this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition();
01052 
01053         $border_color =
01054         $this->info['border-top-color'] =
01055         $this->info['border-bottom-color'] =
01056         $this->info['border-left-color'] =
01057         $this->info['border-right-color'] =
01058         $this->info['background-color'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01059             new HTMLPurifier_AttrDef_Enum(array('transparent')),
01060             new HTMLPurifier_AttrDef_CSS_Color()
01061         ));
01062 
01063         $this->info['background'] = new HTMLPurifier_AttrDef_CSS_Background($config);
01064 
01065         $this->info['border-color'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_color);
01066 
01067         $border_width =
01068         $this->info['border-top-width'] =
01069         $this->info['border-bottom-width'] =
01070         $this->info['border-left-width'] =
01071         $this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01072             new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')),
01073             new HTMLPurifier_AttrDef_CSS_Length('0') //disallow negative
01074         ));
01075 
01076         $this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width);
01077 
01078         $this->info['letter-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01079             new HTMLPurifier_AttrDef_Enum(array('normal')),
01080             new HTMLPurifier_AttrDef_CSS_Length()
01081         ));
01082 
01083         $this->info['word-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01084             new HTMLPurifier_AttrDef_Enum(array('normal')),
01085             new HTMLPurifier_AttrDef_CSS_Length()
01086         ));
01087 
01088         $this->info['font-size'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01089             new HTMLPurifier_AttrDef_Enum(array('xx-small', 'x-small',
01090                 'small', 'medium', 'large', 'x-large', 'xx-large',
01091                 'larger', 'smaller')),
01092             new HTMLPurifier_AttrDef_CSS_Percentage(),
01093             new HTMLPurifier_AttrDef_CSS_Length()
01094         ));
01095 
01096         $this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01097             new HTMLPurifier_AttrDef_Enum(array('normal')),
01098             new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives
01099             new HTMLPurifier_AttrDef_CSS_Length('0'),
01100             new HTMLPurifier_AttrDef_CSS_Percentage(true)
01101         ));
01102 
01103         $margin =
01104         $this->info['margin-top'] =
01105         $this->info['margin-bottom'] =
01106         $this->info['margin-left'] =
01107         $this->info['margin-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01108             new HTMLPurifier_AttrDef_CSS_Length(),
01109             new HTMLPurifier_AttrDef_CSS_Percentage(),
01110             new HTMLPurifier_AttrDef_Enum(array('auto'))
01111         ));
01112 
01113         $this->info['margin'] = new HTMLPurifier_AttrDef_CSS_Multiple($margin);
01114 
01115         // non-negative
01116         $padding =
01117         $this->info['padding-top'] =
01118         $this->info['padding-bottom'] =
01119         $this->info['padding-left'] =
01120         $this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01121             new HTMLPurifier_AttrDef_CSS_Length('0'),
01122             new HTMLPurifier_AttrDef_CSS_Percentage(true)
01123         ));
01124 
01125         $this->info['padding'] = new HTMLPurifier_AttrDef_CSS_Multiple($padding);
01126 
01127         $this->info['text-indent'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01128             new HTMLPurifier_AttrDef_CSS_Length(),
01129             new HTMLPurifier_AttrDef_CSS_Percentage()
01130         ));
01131 
01132         $trusted_wh = new HTMLPurifier_AttrDef_CSS_Composite(array(
01133             new HTMLPurifier_AttrDef_CSS_Length('0'),
01134             new HTMLPurifier_AttrDef_CSS_Percentage(true),
01135             new HTMLPurifier_AttrDef_Enum(array('auto'))
01136         ));
01137         $max = $config->get('CSS.MaxImgLength');
01138 
01139         $this->info['width'] =
01140         $this->info['height'] =
01141             $max === null ?
01142             $trusted_wh :
01143             new HTMLPurifier_AttrDef_Switch('img',
01144                 // For img tags:
01145                 new HTMLPurifier_AttrDef_CSS_Composite(array(
01146                     new HTMLPurifier_AttrDef_CSS_Length('0', $max),
01147                     new HTMLPurifier_AttrDef_Enum(array('auto'))
01148                 )),
01149                 // For everyone else:
01150                 $trusted_wh
01151             );
01152 
01153         $this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration();
01154 
01155         $this->info['font-family'] = new HTMLPurifier_AttrDef_CSS_FontFamily();
01156 
01157         // this could use specialized code
01158         $this->info['font-weight'] = new HTMLPurifier_AttrDef_Enum(
01159             array('normal', 'bold', 'bolder', 'lighter', '100', '200', '300',
01160             '400', '500', '600', '700', '800', '900'), false);
01161 
01162         // MUST be called after other font properties, as it references
01163         // a CSSDefinition object
01164         $this->info['font'] = new HTMLPurifier_AttrDef_CSS_Font($config);
01165 
01166         // same here
01167         $this->info['border'] =
01168         $this->info['border-bottom'] =
01169         $this->info['border-top'] =
01170         $this->info['border-left'] =
01171         $this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config);
01172 
01173         $this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(array(
01174             'collapse', 'separate'));
01175 
01176         $this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum(array(
01177             'top', 'bottom'));
01178 
01179         $this->info['table-layout'] = new HTMLPurifier_AttrDef_Enum(array(
01180             'auto', 'fixed'));
01181 
01182         $this->info['vertical-align'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01183             new HTMLPurifier_AttrDef_Enum(array('baseline', 'sub', 'super',
01184                 'top', 'text-top', 'middle', 'bottom', 'text-bottom')),
01185             new HTMLPurifier_AttrDef_CSS_Length(),
01186             new HTMLPurifier_AttrDef_CSS_Percentage()
01187         ));
01188 
01189         $this->info['border-spacing'] = new HTMLPurifier_AttrDef_CSS_Multiple(new HTMLPurifier_AttrDef_CSS_Length(), 2);
01190 
01191         // partial support
01192         $this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(array('nowrap'));
01193 
01194         if ($config->get('CSS.Proprietary')) {
01195             $this->doSetupProprietary($config);
01196         }
01197 
01198         if ($config->get('CSS.AllowTricky')) {
01199             $this->doSetupTricky($config);
01200         }
01201 
01202         if ($config->get('CSS.Trusted')) {
01203             $this->doSetupTrusted($config);
01204         }
01205 
01206         $allow_important = $config->get('CSS.AllowImportant');
01207         // wrap all attr-defs with decorator that handles !important
01208         foreach ($this->info as $k => $v) {
01209             $this->info[$k] = new HTMLPurifier_AttrDef_CSS_ImportantDecorator($v, $allow_important);
01210         }
01211 
01212         $this->setupConfigStuff($config);
01213     }
01214 
01215     protected function doSetupProprietary($config) {
01216         // Internet Explorer only scrollbar colors
01217         $this->info['scrollbar-arrow-color']        = new HTMLPurifier_AttrDef_CSS_Color();
01218         $this->info['scrollbar-base-color']         = new HTMLPurifier_AttrDef_CSS_Color();
01219         $this->info['scrollbar-darkshadow-color']   = new HTMLPurifier_AttrDef_CSS_Color();
01220         $this->info['scrollbar-face-color']         = new HTMLPurifier_AttrDef_CSS_Color();
01221         $this->info['scrollbar-highlight-color']    = new HTMLPurifier_AttrDef_CSS_Color();
01222         $this->info['scrollbar-shadow-color']       = new HTMLPurifier_AttrDef_CSS_Color();
01223 
01224         // technically not proprietary, but CSS3, and no one supports it
01225         $this->info['opacity']          = new HTMLPurifier_AttrDef_CSS_AlphaValue();
01226         $this->info['-moz-opacity']     = new HTMLPurifier_AttrDef_CSS_AlphaValue();
01227         $this->info['-khtml-opacity']   = new HTMLPurifier_AttrDef_CSS_AlphaValue();
01228 
01229         // only opacity, for now
01230         $this->info['filter'] = new HTMLPurifier_AttrDef_CSS_Filter();
01231 
01232     }
01233 
01234     protected function doSetupTricky($config) {
01235         $this->info['display'] = new HTMLPurifier_AttrDef_Enum(array(
01236             'inline', 'block', 'list-item', 'run-in', 'compact',
01237             'marker', 'table', 'inline-table', 'table-row-group',
01238             'table-header-group', 'table-footer-group', 'table-row',
01239             'table-column-group', 'table-column', 'table-cell', 'table-caption', 'none'
01240         ));
01241         $this->info['visibility'] = new HTMLPurifier_AttrDef_Enum(array(
01242             'visible', 'hidden', 'collapse'
01243         ));
01244         $this->info['overflow'] = new HTMLPurifier_AttrDef_Enum(array('visible', 'hidden', 'auto', 'scroll'));
01245     }
01246 
01247     protected function doSetupTrusted($config) {
01248         $this->info['position'] = new HTMLPurifier_AttrDef_Enum(array(
01249             'static', 'relative', 'absolute', 'fixed'
01250         ));
01251         $this->info['top'] =
01252         $this->info['left'] =
01253         $this->info['right'] =
01254         $this->info['bottom'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01255             new HTMLPurifier_AttrDef_CSS_Length(),
01256             new HTMLPurifier_AttrDef_CSS_Percentage(),
01257             new HTMLPurifier_AttrDef_Enum(array('auto')),
01258         ));
01259         $this->info['z-index'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01260             new HTMLPurifier_AttrDef_Integer(),
01261             new HTMLPurifier_AttrDef_Enum(array('auto')),
01262         ));
01263     }
01264 
01271     protected function setupConfigStuff($config) {
01272 
01273         // setup allowed elements
01274         $support = "(for information on implementing this, see the ".
01275                    "support forums) ";
01276         $allowed_properties = $config->get('CSS.AllowedProperties');
01277         if ($allowed_properties !== null) {
01278             foreach ($this->info as $name => $d) {
01279                 if(!isset($allowed_properties[$name])) unset($this->info[$name]);
01280                 unset($allowed_properties[$name]);
01281             }
01282             // emit errors
01283             foreach ($allowed_properties as $name => $d) {
01284                 // :TODO: Is this htmlspecialchars() call really necessary?
01285                 $name = htmlspecialchars($name);
01286                 trigger_error("Style attribute '$name' is not supported $support", E_USER_WARNING);
01287             }
01288         }
01289 
01290         $forbidden_properties = $config->get('CSS.ForbiddenProperties');
01291         if ($forbidden_properties !== null) {
01292             foreach ($this->info as $name => $d) {
01293                 if (isset($forbidden_properties[$name])) {
01294                     unset($this->info[$name]);
01295                 }
01296             }
01297         }
01298 
01299     }
01300 }
01301 
01302 
01303 
01304 
01305 
01309 abstract class HTMLPurifier_ChildDef
01310 {
01315     public $type;
01316 
01323     public $allow_empty;
01324 
01328     public $elements = array();
01329 
01334     public function getAllowedElements($config) {
01335         return $this->elements;
01336     }
01337 
01348     abstract public function validateChildren($tokens_of_children, $config, $context);
01349 }
01350 
01351 
01352 
01353 
01354 
01369 class HTMLPurifier_Config
01370 {
01371 
01375     public $version = '4.4.0';
01376 
01381     public $autoFinalize = true;
01382 
01383     // protected member variables
01384 
01389     protected $serials = array();
01390 
01394     protected $serial;
01395 
01399     protected $parser = null;
01400 
01406     public $def;
01407 
01411     protected $definitions;
01412 
01416     protected $finalized = false;
01417 
01421     protected $plist;
01422 
01427     private $aliasMode;
01428 
01434     public $chatty = true;
01435 
01439     private $lock;
01440 
01445     public function __construct($definition, $parent = null) {
01446         $parent = $parent ? $parent : $definition->defaultPlist;
01447         $this->plist = new HTMLPurifier_PropertyList($parent);
01448         $this->def = $definition; // keep a copy around for checking
01449         $this->parser = new HTMLPurifier_VarParser_Flexible();
01450     }
01451 
01461     public static function create($config, $schema = null) {
01462         if ($config instanceof HTMLPurifier_Config) {
01463             // pass-through
01464             return $config;
01465         }
01466         if (!$schema) {
01467             $ret = HTMLPurifier_Config::createDefault();
01468         } else {
01469             $ret = new HTMLPurifier_Config($schema);
01470         }
01471         if (is_string($config)) $ret->loadIni($config);
01472         elseif (is_array($config)) $ret->loadArray($config);
01473         return $ret;
01474     }
01475 
01482     public static function inherit(HTMLPurifier_Config $config) {
01483         return new HTMLPurifier_Config($config->def, $config->plist);
01484     }
01485 
01490     public static function createDefault() {
01491         $definition = HTMLPurifier_ConfigSchema::instance();
01492         $config = new HTMLPurifier_Config($definition);
01493         return $config;
01494     }
01495 
01500     public function get($key, $a = null) {
01501         if ($a !== null) {
01502             $this->triggerError("Using deprecated API: use \$config->get('$key.$a') instead", E_USER_WARNING);
01503             $key = "$key.$a";
01504         }
01505         if (!$this->finalized) $this->autoFinalize();
01506         if (!isset($this->def->info[$key])) {
01507             // can't add % due to SimpleTest bug
01508             $this->triggerError('Cannot retrieve value of undefined directive ' . htmlspecialchars($key),
01509                 E_USER_WARNING);
01510             return;
01511         }
01512         if (isset($this->def->info[$key]->isAlias)) {
01513             $d = $this->def->info[$key];
01514             $this->triggerError('Cannot get value from aliased directive, use real name ' . $d->key,
01515                 E_USER_ERROR);
01516             return;
01517         }
01518         if ($this->lock) {
01519             list($ns) = explode('.', $key);
01520             if ($ns !== $this->lock) {
01521                 $this->triggerError('Cannot get value of namespace ' . $ns . ' when lock for ' . $this->lock . ' is active, this probably indicates a Definition setup method is accessing directives that are not within its namespace', E_USER_ERROR);
01522                 return;
01523             }
01524         }
01525         return $this->plist->get($key);
01526     }
01527 
01532     public function getBatch($namespace) {
01533         if (!$this->finalized) $this->autoFinalize();
01534         $full = $this->getAll();
01535         if (!isset($full[$namespace])) {
01536             $this->triggerError('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace),
01537                 E_USER_WARNING);
01538             return;
01539         }
01540         return $full[$namespace];
01541     }
01542 
01550     public function getBatchSerial($namespace) {
01551         if (empty($this->serials[$namespace])) {
01552             $batch = $this->getBatch($namespace);
01553             unset($batch['DefinitionRev']);
01554             $this->serials[$namespace] = md5(serialize($batch));
01555         }
01556         return $this->serials[$namespace];
01557     }
01558 
01563     public function getSerial() {
01564         if (empty($this->serial)) {
01565             $this->serial = md5(serialize($this->getAll()));
01566         }
01567         return $this->serial;
01568     }
01569 
01574     public function getAll() {
01575         if (!$this->finalized) $this->autoFinalize();
01576         $ret = array();
01577         foreach ($this->plist->squash() as $name => $value) {
01578             list($ns, $key) = explode('.', $name, 2);
01579             $ret[$ns][$key] = $value;
01580         }
01581         return $ret;
01582     }
01583 
01589     public function set($key, $value, $a = null) {
01590         if (strpos($key, '.') === false) {
01591             $namespace = $key;
01592             $directive = $value;
01593             $value = $a;
01594             $key = "$key.$directive";
01595             $this->triggerError("Using deprecated API: use \$config->set('$key', ...) instead", E_USER_NOTICE);
01596         } else {
01597             list($namespace) = explode('.', $key);
01598         }
01599         if ($this->isFinalized('Cannot set directive after finalization')) return;
01600         if (!isset($this->def->info[$key])) {
01601             $this->triggerError('Cannot set undefined directive ' . htmlspecialchars($key) . ' to value',
01602                 E_USER_WARNING);
01603             return;
01604         }
01605         $def = $this->def->info[$key];
01606 
01607         if (isset($def->isAlias)) {
01608             if ($this->aliasMode) {
01609                 $this->triggerError('Double-aliases not allowed, please fix '.
01610                     'ConfigSchema bug with' . $key, E_USER_ERROR);
01611                 return;
01612             }
01613             $this->aliasMode = true;
01614             $this->set($def->key, $value);
01615             $this->aliasMode = false;
01616             $this->triggerError("$key is an alias, preferred directive name is {$def->key}", E_USER_NOTICE);
01617             return;
01618         }
01619 
01620         // Raw type might be negative when using the fully optimized form
01621         // of stdclass, which indicates allow_null == true
01622         $rtype = is_int($def) ? $def : $def->type;
01623         if ($rtype < 0) {
01624             $type = -$rtype;
01625             $allow_null = true;
01626         } else {
01627             $type = $rtype;
01628             $allow_null = isset($def->allow_null);
01629         }
01630 
01631         try {
01632             $value = $this->parser->parse($value, $type, $allow_null);
01633         } catch (HTMLPurifier_VarParserException $e) {
01634             $this->triggerError('Value for ' . $key . ' is of invalid type, should be ' . HTMLPurifier_VarParser::getTypeName($type), E_USER_WARNING);
01635             return;
01636         }
01637         if (is_string($value) && is_object($def)) {
01638             // resolve value alias if defined
01639             if (isset($def->aliases[$value])) {
01640                 $value = $def->aliases[$value];
01641             }
01642             // check to see if the value is allowed
01643             if (isset($def->allowed) && !isset($def->allowed[$value])) {
01644                 $this->triggerError('Value not supported, valid values are: ' .
01645                     $this->_listify($def->allowed), E_USER_WARNING);
01646                 return;
01647             }
01648         }
01649         $this->plist->set($key, $value);
01650 
01651         // reset definitions if the directives they depend on changed
01652         // this is a very costly process, so it's discouraged
01653         // with finalization
01654         if ($namespace == 'HTML' || $namespace == 'CSS' || $namespace == 'URI') {
01655             $this->definitions[$namespace] = null;
01656         }
01657 
01658         $this->serials[$namespace] = false;
01659     }
01660 
01664     private function _listify($lookup) {
01665         $list = array();
01666         foreach ($lookup as $name => $b) $list[] = $name;
01667         return implode(', ', $list);
01668     }
01669 
01681     public function getHTMLDefinition($raw = false, $optimized = false) {
01682         return $this->getDefinition('HTML', $raw, $optimized);
01683     }
01684 
01696     public function getCSSDefinition($raw = false, $optimized = false) {
01697         return $this->getDefinition('CSS', $raw, $optimized);
01698     }
01699 
01711     public function getURIDefinition($raw = false, $optimized = false) {
01712         return $this->getDefinition('URI', $raw, $optimized);
01713     }
01714 
01728     public function getDefinition($type, $raw = false, $optimized = false) {
01729         if ($optimized && !$raw) {
01730             throw new HTMLPurifier_Exception("Cannot set optimized = true when raw = false");
01731         }
01732         if (!$this->finalized) $this->autoFinalize();
01733         // temporarily suspend locks, so we can handle recursive definition calls
01734         $lock = $this->lock;
01735         $this->lock = null;
01736         $factory = HTMLPurifier_DefinitionCacheFactory::instance();
01737         $cache = $factory->create($type, $this);
01738         $this->lock = $lock;
01739         if (!$raw) {
01740             // full definition
01741             // ---------------
01742             // check if definition is in memory
01743             if (!empty($this->definitions[$type])) {
01744                 $def = $this->definitions[$type];
01745                 // check if the definition is setup
01746                 if ($def->setup) {
01747                     return $def;
01748                 } else {
01749                     $def->setup($this);
01750                     if ($def->optimized) $cache->add($def, $this);
01751                     return $def;
01752                 }
01753             }
01754             // check if definition is in cache
01755             $def = $cache->get($this);
01756             if ($def) {
01757                 // definition in cache, save to memory and return it
01758                 $this->definitions[$type] = $def;
01759                 return $def;
01760             }
01761             // initialize it
01762             $def = $this->initDefinition($type);
01763             // set it up
01764             $this->lock = $type;
01765             $def->setup($this);
01766             $this->lock = null;
01767             // save in cache
01768             $cache->add($def, $this);
01769             // return it
01770             return $def;
01771         } else {
01772             // raw definition
01773             // --------------
01774             // check preconditions
01775             $def = null;
01776             if ($optimized) {
01777                 if (is_null($this->get($type . '.DefinitionID'))) {
01778                     // fatally error out if definition ID not set
01779                     throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID");
01780                 }
01781             }
01782             if (!empty($this->definitions[$type])) {
01783                 $def = $this->definitions[$type];
01784                 if ($def->setup && !$optimized) {
01785                     $extra = $this->chatty ? " (try moving this code block earlier in your initialization)" : "";
01786                     throw new HTMLPurifier_Exception("Cannot retrieve raw definition after it has already been setup" . $extra);
01787                 }
01788                 if ($def->optimized === null) {
01789                     $extra = $this->chatty ? " (try flushing your cache)" : "";
01790                     throw new HTMLPurifier_Exception("Optimization status of definition is unknown" . $extra);
01791                 }
01792                 if ($def->optimized !== $optimized) {
01793                     $msg = $optimized ? "optimized" : "unoptimized";
01794                     $extra = $this->chatty ? " (this backtrace is for the first inconsistent call, which was for a $msg raw definition)" : "";
01795                     throw new HTMLPurifier_Exception("Inconsistent use of optimized and unoptimized raw definition retrievals" . $extra);
01796                 }
01797             }
01798             // check if definition was in memory
01799             if ($def) {
01800                 if ($def->setup) {
01801                     // invariant: $optimized === true (checked above)
01802                     return null;
01803                 } else {
01804                     return $def;
01805                 }
01806             }
01807             // if optimized, check if definition was in cache
01808             // (because we do the memory check first, this formulation
01809             // is prone to cache slamming, but I think
01810             // guaranteeing that either /all/ of the raw
01811             // setup code or /none/ of it is run is more important.)
01812             if ($optimized) {
01813                 // This code path only gets run once; once we put
01814                 // something in $definitions (which is guaranteed by the
01815                 // trailing code), we always short-circuit above.
01816                 $def = $cache->get($this);
01817                 if ($def) {
01818                     // save the full definition for later, but don't
01819                     // return it yet
01820                     $this->definitions[$type] = $def;
01821                     return null;
01822                 }
01823             }
01824             // check invariants for creation
01825             if (!$optimized) {
01826                 if (!is_null($this->get($type . '.DefinitionID'))) {
01827                     if ($this->chatty) {
01828                         $this->triggerError("Due to a documentation error in previous version of HTML Purifier, your definitions are not being cached.  If this is OK, you can remove the %$type.DefinitionRev and %$type.DefinitionID declaration.  Otherwise, modify your code to use maybeGetRawDefinition, and test if the returned value is null before making any edits (if it is null, that means that a cached version is available, and no raw operations are necessary).  See <a href='http://htmlpurifier.org/docs/enduser-customize.html#optimized'>Customize</a> for more details", E_USER_WARNING);
01829                     } else {
01830                         $this->triggerError("Useless DefinitionID declaration", E_USER_WARNING);
01831                     }
01832                 }
01833             }
01834             // initialize it
01835             $def = $this->initDefinition($type);
01836             $def->optimized = $optimized;
01837             return $def;
01838         }
01839         throw new HTMLPurifier_Exception("The impossible happened!");
01840     }
01841 
01842     private function initDefinition($type) {
01843         // quick checks failed, let's create the object
01844         if ($type == 'HTML') {
01845             $def = new HTMLPurifier_HTMLDefinition();
01846         } elseif ($type == 'CSS') {
01847             $def = new HTMLPurifier_CSSDefinition();
01848         } elseif ($type == 'URI') {
01849             $def = new HTMLPurifier_URIDefinition();
01850         } else {
01851             throw new HTMLPurifier_Exception("Definition of $type type not supported");
01852         }
01853         $this->definitions[$type] = $def;
01854         return $def;
01855     }
01856 
01857     public function maybeGetRawDefinition($name) {
01858         return $this->getDefinition($name, true, true);
01859     }
01860 
01861     public function maybeGetRawHTMLDefinition() {
01862         return $this->getDefinition('HTML', true, true);
01863     }
01864 
01865     public function maybeGetRawCSSDefinition() {
01866         return $this->getDefinition('CSS', true, true);
01867     }
01868 
01869     public function maybeGetRawURIDefinition() {
01870         return $this->getDefinition('URI', true, true);
01871     }
01872 
01878     public function loadArray($config_array) {
01879         if ($this->isFinalized('Cannot load directives after finalization')) return;
01880         foreach ($config_array as $key => $value) {
01881             $key = str_replace('_', '.', $key);
01882             if (strpos($key, '.') !== false) {
01883                 $this->set($key, $value);
01884             } else {
01885                 $namespace = $key;
01886                 $namespace_values = $value;
01887                 foreach ($namespace_values as $directive => $value) {
01888                     $this->set($namespace .'.'. $directive, $value);
01889                 }
01890             }
01891         }
01892     }
01893 
01900     public static function getAllowedDirectivesForForm($allowed, $schema = null) {
01901         if (!$schema) {
01902             $schema = HTMLPurifier_ConfigSchema::instance();
01903         }
01904         if ($allowed !== true) {
01905              if (is_string($allowed)) $allowed = array($allowed);
01906              $allowed_ns = array();
01907              $allowed_directives = array();
01908              $blacklisted_directives = array();
01909              foreach ($allowed as $ns_or_directive) {
01910                  if (strpos($ns_or_directive, '.') !== false) {
01911                      // directive
01912                      if ($ns_or_directive[0] == '-') {
01913                          $blacklisted_directives[substr($ns_or_directive, 1)] = true;
01914                      } else {
01915                          $allowed_directives[$ns_or_directive] = true;
01916                      }
01917                  } else {
01918                      // namespace
01919                      $allowed_ns[$ns_or_directive] = true;
01920                  }
01921              }
01922         }
01923         $ret = array();
01924         foreach ($schema->info as $key => $def) {
01925             list($ns, $directive) = explode('.', $key, 2);
01926             if ($allowed !== true) {
01927                 if (isset($blacklisted_directives["$ns.$directive"])) continue;
01928                 if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue;
01929             }
01930             if (isset($def->isAlias)) continue;
01931             if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue;
01932             $ret[] = array($ns, $directive);
01933         }
01934         return $ret;
01935     }
01936 
01946     public static function loadArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) {
01947         $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $schema);
01948         $config = HTMLPurifier_Config::create($ret, $schema);
01949         return $config;
01950     }
01951 
01956     public function mergeArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true) {
01957          $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $this->def);
01958          $this->loadArray($ret);
01959     }
01960 
01965     public static function prepareArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) {
01966         if ($index !== false) $array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array();
01967         $mq = $mq_fix && function_exists('get_magic_quotes_gpc') && get_magic_quotes_gpc();
01968 
01969         $allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed, $schema);
01970         $ret = array();
01971         foreach ($allowed as $key) {
01972             list($ns, $directive) = $key;
01973             $skey = "$ns.$directive";
01974             if (!empty($array["Null_$skey"])) {
01975                 $ret[$ns][$directive] = null;
01976                 continue;
01977             }
01978             if (!isset($array[$skey])) continue;
01979             $value = $mq ? stripslashes($array[$skey]) : $array[$skey];
01980             $ret[$ns][$directive] = $value;
01981         }
01982         return $ret;
01983     }
01984 
01989     public function loadIni($filename) {
01990         if ($this->isFinalized('Cannot load directives after finalization')) return;
01991         $array = parse_ini_file($filename, true);
01992         $this->loadArray($array);
01993     }
01994 
01999     public function isFinalized($error = false) {
02000         if ($this->finalized && $error) {
02001             $this->triggerError($error, E_USER_ERROR);
02002         }
02003         return $this->finalized;
02004     }
02005 
02010     public function autoFinalize() {
02011         if ($this->autoFinalize) {
02012             $this->finalize();
02013         } else {
02014             $this->plist->squash(true);
02015         }
02016     }
02017 
02021     public function finalize() {
02022         $this->finalized = true;
02023         $this->parser = null;
02024     }
02025 
02030     protected function triggerError($msg, $no) {
02031         // determine previous stack frame
02032         $extra = '';
02033         if ($this->chatty) {
02034             $trace = debug_backtrace();
02035             // zip(tail(trace), trace) -- but PHP is not Haskell har har
02036             for ($i = 0, $c = count($trace); $i < $c - 1; $i++) {
02037                 if ($trace[$i + 1]['class'] === 'HTMLPurifier_Config') {
02038                     continue;
02039                 }
02040                 $frame = $trace[$i];
02041                 $extra = " invoked on line {$frame['line']} in file {$frame['file']}";
02042                 break;
02043             }
02044         }
02045         trigger_error($msg . $extra, $no);
02046     }
02047 
02052     public function serialize() {
02053         $this->getDefinition('HTML');
02054         $this->getDefinition('CSS');
02055         $this->getDefinition('URI');
02056         return serialize($this);
02057     }
02058 
02059 }
02060 
02061 
02062 
02063 
02064 
02068 class HTMLPurifier_ConfigSchema {
02069 
02074     public $defaults = array();
02075 
02079     public $defaultPlist;
02080 
02110     public $info = array();
02111 
02115     static protected $singleton;
02116 
02117     public function __construct() {
02118         $this->defaultPlist = new HTMLPurifier_PropertyList();
02119     }
02120 
02124     public static function makeFromSerial() {
02125         $contents = file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser');
02126         $r = unserialize($contents);
02127         if (!$r) {
02128             $hash = sha1($contents);
02129             trigger_error("Unserialization of configuration schema failed, sha1 of file was $hash", E_USER_ERROR);
02130         }
02131         return $r;
02132     }
02133 
02137     public static function instance($prototype = null) {
02138         if ($prototype !== null) {
02139             HTMLPurifier_ConfigSchema::$singleton = $prototype;
02140         } elseif (HTMLPurifier_ConfigSchema::$singleton === null || $prototype === true) {
02141             HTMLPurifier_ConfigSchema::$singleton = HTMLPurifier_ConfigSchema::makeFromSerial();
02142         }
02143         return HTMLPurifier_ConfigSchema::$singleton;
02144     }
02145 
02158     public function add($key, $default, $type, $allow_null) {
02159         $obj = new stdclass();
02160         $obj->type = is_int($type) ? $type : HTMLPurifier_VarParser::$types[$type];
02161         if ($allow_null) $obj->allow_null = true;
02162         $this->info[$key] = $obj;
02163         $this->defaults[$key] = $default;
02164         $this->defaultPlist->set($key, $default);
02165     }
02166 
02176     public function addValueAliases($key, $aliases) {
02177         if (!isset($this->info[$key]->aliases)) {
02178             $this->info[$key]->aliases = array();
02179         }
02180         foreach ($aliases as $alias => $real) {
02181             $this->info[$key]->aliases[$alias] = $real;
02182         }
02183     }
02184 
02193     public function addAllowedValues($key, $allowed) {
02194         $this->info[$key]->allowed = $allowed;
02195     }
02196 
02204     public function addAlias($key, $new_key) {
02205         $obj = new stdclass;
02206         $obj->key = $new_key;
02207         $obj->isAlias = true;
02208         $this->info[$key] = $obj;
02209     }
02210 
02214     public function postProcess() {
02215         foreach ($this->info as $key => $v) {
02216             if (count((array) $v) == 1) {
02217                 $this->info[$key] = $v->type;
02218             } elseif (count((array) $v) == 2 && isset($v->allow_null)) {
02219                 $this->info[$key] = -$v->type;
02220             }
02221         }
02222     }
02223 
02224 }
02225 
02226 
02227 
02228 
02229 
02233 class HTMLPurifier_ContentSets
02234 {
02235 
02239     public $info = array();
02240 
02245     public $lookup = array();
02246 
02250     protected $keys = array();
02254     protected $values = array();
02255 
02261     public function __construct($modules) {
02262         if (!is_array($modules)) $modules = array($modules);
02263         // populate content_sets based on module hints
02264         // sorry, no way of overloading
02265         foreach ($modules as $module_i => $module) {
02266             foreach ($module->content_sets as $key => $value) {
02267                 $temp = $this->convertToLookup($value);
02268                 if (isset($this->lookup[$key])) {
02269                     // add it into the existing content set
02270                     $this->lookup[$key] = array_merge($this->lookup[$key], $temp);
02271                 } else {
02272                     $this->lookup[$key] = $temp;
02273                 }
02274             }
02275         }
02276         $old_lookup = false;
02277         while ($old_lookup !== $this->lookup) {
02278             $old_lookup = $this->lookup;
02279             foreach ($this->lookup as $i => $set) {
02280                 $add = array();
02281                 foreach ($set as $element => $x) {
02282                     if (isset($this->lookup[$element])) {
02283                         $add += $this->lookup[$element];
02284                         unset($this->lookup[$i][$element]);
02285                     }
02286                 }
02287                 $this->lookup[$i] += $add;
02288             }
02289         }
02290 
02291         foreach ($this->lookup as $key => $lookup) {
02292             $this->info[$key] = implode(' | ', array_keys($lookup));
02293         }
02294         $this->keys   = array_keys($this->info);
02295         $this->values = array_values($this->info);
02296     }
02297 
02303     public function generateChildDef(&$def, $module) {
02304         if (!empty($def->child)) return; // already done!
02305         $content_model = $def->content_model;
02306         if (is_string($content_model)) {
02307             // Assume that $this->keys is alphanumeric
02308             $def->content_model = preg_replace_callback(
02309                 '/\b(' . implode('|', $this->keys) . ')\b/',
02310                 array($this, 'generateChildDefCallback'),
02311                 $content_model
02312             );
02313             //$def->content_model = str_replace(
02314             //    $this->keys, $this->values, $content_model);
02315         }
02316         $def->child = $this->getChildDef($def, $module);
02317     }
02318 
02319     public function generateChildDefCallback($matches) {
02320         return $this->info[$matches[0]];
02321     }
02322 
02331     public function getChildDef($def, $module) {
02332         $value = $def->content_model;
02333         if (is_object($value)) {
02334             trigger_error(
02335                 'Literal object child definitions should be stored in '.
02336                 'ElementDef->child not ElementDef->content_model',
02337                 E_USER_NOTICE
02338             );
02339             return $value;
02340         }
02341         switch ($def->content_model_type) {
02342             case 'required':
02343                 return new HTMLPurifier_ChildDef_Required($value);
02344             case 'optional':
02345                 return new HTMLPurifier_ChildDef_Optional($value);
02346             case 'empty':
02347                 return new HTMLPurifier_ChildDef_Empty();
02348             case 'custom':
02349                 return new HTMLPurifier_ChildDef_Custom($value);
02350         }
02351         // defer to its module
02352         $return = false;
02353         if ($module->defines_child_def) { // save a func call
02354             $return = $module->getChildDef($def);
02355         }
02356         if ($return !== false) return $return;
02357         // error-out
02358         trigger_error(
02359             'Could not determine which ChildDef class to instantiate',
02360             E_USER_ERROR
02361         );
02362         return false;
02363     }
02364 
02371     protected function convertToLookup($string) {
02372         $array = explode('|', str_replace(' ', '', $string));
02373         $ret = array();
02374         foreach ($array as $i => $k) {
02375             $ret[$k] = true;
02376         }
02377         return $ret;
02378     }
02379 
02380 }
02381 
02382 
02383 
02384 
02385 
02393 class HTMLPurifier_Context
02394 {
02395 
02399     private $_storage = array();
02400 
02406     public function register($name, &$ref) {
02407         if (isset($this->_storage[$name])) {
02408             trigger_error("Name $name produces collision, cannot re-register",
02409                           E_USER_ERROR);
02410             return;
02411         }
02412         $this->_storage[$name] =& $ref;
02413     }
02414 
02420     public function &get($name, $ignore_error = false) {
02421         if (!isset($this->_storage[$name])) {
02422             if (!$ignore_error) {
02423                 trigger_error("Attempted to retrieve non-existent variable $name",
02424                               E_USER_ERROR);
02425             }
02426             $var = null; // so we can return by reference
02427             return $var;
02428         }
02429         return $this->_storage[$name];
02430     }
02431 
02436     public function destroy($name) {
02437         if (!isset($this->_storage[$name])) {
02438             trigger_error("Attempted to destroy non-existent variable $name",
02439                           E_USER_ERROR);
02440             return;
02441         }
02442         unset($this->_storage[$name]);
02443     }
02444 
02449     public function exists($name) {
02450         return isset($this->_storage[$name]);
02451     }
02452 
02457     public function loadArray($context_array) {
02458         foreach ($context_array as $key => $discard) {
02459             $this->register($key, $context_array[$key]);
02460         }
02461     }
02462 
02463 }
02464 
02465 
02466 
02467 
02468 
02477 abstract class HTMLPurifier_DefinitionCache
02478 {
02479 
02480     public $type;
02481 
02486     public function __construct($type) {
02487         $this->type = $type;
02488     }
02489 
02494     public function generateKey($config) {
02495         return $config->version . ',' . // possibly replace with function calls
02496                $config->getBatchSerial($this->type) . ',' .
02497                $config->get($this->type . '.DefinitionRev');
02498     }
02499 
02506     public function isOld($key, $config) {
02507         if (substr_count($key, ',') < 2) return true;
02508         list($version, $hash, $revision) = explode(',', $key, 3);
02509         $compare = version_compare($version, $config->version);
02510         // version mismatch, is always old
02511         if ($compare != 0) return true;
02512         // versions match, ids match, check revision number
02513         if (
02514             $hash == $config->getBatchSerial($this->type) &&
02515             $revision < $config->get($this->type . '.DefinitionRev')
02516         ) return true;
02517         return false;
02518     }
02519 
02526     public function checkDefType($def) {
02527         if ($def->type !== $this->type) {
02528             trigger_error("Cannot use definition of type {$def->type} in cache for {$this->type}");
02529             return false;
02530         }
02531         return true;
02532     }
02533 
02537     abstract public function add($def, $config);
02538 
02542     abstract public function set($def, $config);
02543 
02547     abstract public function replace($def, $config);
02548 
02552     abstract public function get($config);
02553 
02557     abstract public function remove($config);
02558 
02562     abstract public function flush($config);
02563 
02570     abstract public function cleanup($config);
02571 
02572 }
02573 
02574 
02575 
02576 
02577 
02581 class HTMLPurifier_DefinitionCacheFactory
02582 {
02583 
02584     protected $caches = array('Serializer' => array());
02585     protected $implementations = array();
02586     protected $decorators = array();
02587 
02591     public function setup() {
02592         $this->addDecorator('Cleanup');
02593     }
02594 
02598     public static function instance($prototype = null) {
02599         static $instance;
02600         if ($prototype !== null) {
02601             $instance = $prototype;
02602         } elseif ($instance === null || $prototype === true) {
02603             $instance = new HTMLPurifier_DefinitionCacheFactory();
02604             $instance->setup();
02605         }
02606         return $instance;
02607     }
02608 
02614     public function register($short, $long) {
02615         $this->implementations[$short] = $long;
02616     }
02617 
02623     public function create($type, $config) {
02624         $method = $config->get('Cache.DefinitionImpl');
02625         if ($method === null) {
02626             return new HTMLPurifier_DefinitionCache_Null($type);
02627         }
02628         if (!empty($this->caches[$method][$type])) {
02629             return $this->caches[$method][$type];
02630         }
02631         if (
02632           isset($this->implementations[$method]) &&
02633           class_exists($class = $this->implementations[$method], false)
02634         ) {
02635             $cache = new $class($type);
02636         } else {
02637             if ($method != 'Serializer') {
02638                 trigger_error("Unrecognized DefinitionCache $method, using Serializer instead", E_USER_WARNING);
02639             }
02640             $cache = new HTMLPurifier_DefinitionCache_Serializer($type);
02641         }
02642         foreach ($this->decorators as $decorator) {
02643             $new_cache = $decorator->decorate($cache);
02644             // prevent infinite recursion in PHP 4
02645             unset($cache);
02646             $cache = $new_cache;
02647         }
02648         $this->caches[$method][$type] = $cache;
02649         return $this->caches[$method][$type];
02650     }
02651 
02656     public function addDecorator($decorator) {
02657         if (is_string($decorator)) {
02658             $class = "HTMLPurifier_DefinitionCache_Decorator_$decorator";
02659             $decorator = new $class;
02660         }
02661         $this->decorators[$decorator->name] = $decorator;
02662     }
02663 
02664 }
02665 
02666 
02667 
02668 
02669 
02676 class HTMLPurifier_Doctype
02677 {
02681     public $name;
02682 
02687     public $modules = array();
02688 
02692     public $tidyModules = array();
02693 
02697     public $xml = true;
02698 
02702     public $aliases = array();
02703 
02707     public $dtdPublic;
02708 
02712     public $dtdSystem;
02713 
02714     public function __construct($name = null, $xml = true, $modules = array(),
02715         $tidyModules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null
02716     ) {
02717         $this->name         = $name;
02718         $this->xml          = $xml;
02719         $this->modules      = $modules;
02720         $this->tidyModules  = $tidyModules;
02721         $this->aliases      = $aliases;
02722         $this->dtdPublic    = $dtd_public;
02723         $this->dtdSystem    = $dtd_system;
02724     }
02725 }
02726 
02727 
02728 
02729 
02730 
02731 class HTMLPurifier_DoctypeRegistry
02732 {
02733 
02737     protected $doctypes;
02738 
02742     protected $aliases;
02743 
02754     public function register($doctype, $xml = true, $modules = array(),
02755         $tidy_modules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null
02756     ) {
02757         if (!is_array($modules)) $modules = array($modules);
02758         if (!is_array($tidy_modules)) $tidy_modules = array($tidy_modules);
02759         if (!is_array($aliases)) $aliases = array($aliases);
02760         if (!is_object($doctype)) {
02761             $doctype = new HTMLPurifier_Doctype(
02762                 $doctype, $xml, $modules, $tidy_modules, $aliases, $dtd_public, $dtd_system
02763             );
02764         }
02765         $this->doctypes[$doctype->name] = $doctype;
02766         $name = $doctype->name;
02767         // hookup aliases
02768         foreach ($doctype->aliases as $alias) {
02769             if (isset($this->doctypes[$alias])) continue;
02770             $this->aliases[$alias] = $name;
02771         }
02772         // remove old aliases
02773         if (isset($this->aliases[$name])) unset($this->aliases[$name]);
02774         return $doctype;
02775     }
02776 
02784     public function get($doctype) {
02785         if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype];
02786         if (!isset($this->doctypes[$doctype])) {
02787             trigger_error('Doctype ' . htmlspecialchars($doctype) . ' does not exist', E_USER_ERROR);
02788             $anon = new HTMLPurifier_Doctype($doctype);
02789             return $anon;
02790         }
02791         return $this->doctypes[$doctype];
02792     }
02793 
02802     public function make($config) {
02803         return clone $this->get($this->getDoctypeFromConfig($config));
02804     }
02805 
02809     public function getDoctypeFromConfig($config) {
02810         // recommended test
02811         $doctype = $config->get('HTML.Doctype');
02812         if (!empty($doctype)) return $doctype;
02813         $doctype = $config->get('HTML.CustomDoctype');
02814         if (!empty($doctype)) return $doctype;
02815         // backwards-compatibility
02816         if ($config->get('HTML.XHTML')) {
02817             $doctype = 'XHTML 1.0';
02818         } else {
02819             $doctype = 'HTML 4.01';
02820         }
02821         if ($config->get('HTML.Strict')) {
02822             $doctype .= ' Strict';
02823         } else {
02824             $doctype .= ' Transitional';
02825         }
02826         return $doctype;
02827     }
02828 
02829 }
02830 
02831 
02832 
02833 
02834 
02843 class HTMLPurifier_ElementDef
02844 {
02845 
02850     public $standalone = true;
02851 
02863     public $attr = array();
02864 
02868     public $attr_transform_pre = array();
02869 
02873     public $attr_transform_post = array();
02874 
02878     public $child;
02879 
02887     public $content_model;
02888 
02896     public $content_model_type;
02897 
02898 
02899 
02906     public $descendants_are_inline = false;
02907 
02912     public $required_attr = array();
02913 
02925     public $excludes = array();
02926 
02930     public $autoclose = array();
02931 
02937     public $wrap;
02938 
02943     public $formatting;
02944 
02948     public static function create($content_model, $content_model_type, $attr) {
02949         $def = new HTMLPurifier_ElementDef();
02950         $def->content_model = $content_model;
02951         $def->content_model_type = $content_model_type;
02952         $def->attr = $attr;
02953         return $def;
02954     }
02955 
02961     public function mergeIn($def) {
02962 
02963         // later keys takes precedence
02964         foreach($def->attr as $k => $v) {
02965             if ($k === 0) {
02966                 // merge in the includes
02967                 // sorry, no way to override an include
02968                 foreach ($v as $v2) {
02969                     $this->attr[0][] = $v2;
02970                 }
02971                 continue;
02972             }
02973             if ($v === false) {
02974                 if (isset($this->attr[$k])) unset($this->attr[$k]);
02975                 continue;
02976             }
02977             $this->attr[$k] = $v;
02978         }
02979         $this->_mergeAssocArray($this->attr_transform_pre, $def->attr_transform_pre);
02980         $this->_mergeAssocArray($this->attr_transform_post, $def->attr_transform_post);
02981         $this->_mergeAssocArray($this->excludes, $def->excludes);
02982 
02983         if(!empty($def->content_model)) {
02984             $this->content_model =
02985                 str_replace("#SUPER", $this->content_model, $def->content_model);
02986             $this->child = false;
02987         }
02988         if(!empty($def->content_model_type)) {
02989             $this->content_model_type = $def->content_model_type;
02990             $this->child = false;
02991         }
02992         if(!is_null($def->child)) $this->child = $def->child;
02993         if(!is_null($def->formatting)) $this->formatting = $def->formatting;
02994         if($def->descendants_are_inline) $this->descendants_are_inline = $def->descendants_are_inline;
02995 
02996     }
02997 
03003     private function _mergeAssocArray(&$a1, $a2) {
03004         foreach ($a2 as $k => $v) {
03005             if ($v === false) {
03006                 if (isset($a1[$k])) unset($a1[$k]);
03007                 continue;
03008             }
03009             $a1[$k] = $v;
03010         }
03011     }
03012 
03013 }
03014 
03015 
03016 
03017 
03018 
03023 class HTMLPurifier_Encoder
03024 {
03025 
03029     private function __construct() {
03030         trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR);
03031     }
03032 
03036     public static function muteErrorHandler() {}
03037 
03041     public static function unsafeIconv($in, $out, $text) {
03042         set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
03043         $r = iconv($in, $out, $text);
03044         restore_error_handler();
03045         return $r;
03046     }
03047 
03051     public static function iconv($in, $out, $text, $max_chunk_size = 8000) {
03052         $code = self::testIconvTruncateBug();
03053         if ($code == self::ICONV_OK) {
03054             return self::unsafeIconv($in, $out, $text);
03055         } elseif ($code == self::ICONV_TRUNCATES) {
03056             // we can only work around this if the input character set
03057             // is utf-8
03058             if ($in == 'utf-8') {
03059                 if ($max_chunk_size < 4) {
03060                     trigger_error('max_chunk_size is too small', E_USER_WARNING);
03061                     return false;
03062                 }
03063                 // split into 8000 byte chunks, but be careful to handle
03064                 // multibyte boundaries properly
03065                 if (($c = strlen($text)) <= $max_chunk_size) {
03066                     return self::unsafeIconv($in, $out, $text);
03067                 }
03068                 $r = '';
03069                 $i = 0;
03070                 while (true) {
03071                     if ($i + $max_chunk_size >= $c) {
03072                         $r .= self::unsafeIconv($in, $out, substr($text, $i));
03073                         break;
03074                     }
03075                     // wibble the boundary
03076                     if (0x80 != (0xC0 & ord($text[$i + $max_chunk_size]))) {
03077                         $chunk_size = $max_chunk_size;
03078                     } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 1]))) {
03079                         $chunk_size = $max_chunk_size - 1;
03080                     } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 2]))) {
03081                         $chunk_size = $max_chunk_size - 2;
03082                     } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 3]))) {
03083                         $chunk_size = $max_chunk_size - 3;
03084                     } else {
03085                         return false; // rather confusing UTF-8...
03086                     }
03087                     $chunk = substr($text, $i, $chunk_size); // substr doesn't mind overlong lengths
03088                     $r .= self::unsafeIconv($in, $out, $chunk);
03089                     $i += $chunk_size;
03090                 }
03091                 return $r;
03092             } else {
03093                 return false;
03094             }
03095         } else {
03096             return false;
03097         }
03098     }
03099 
03125     public static function cleanUTF8($str, $force_php = false) {
03126 
03127         // UTF-8 validity is checked since PHP 4.3.5
03128         // This is an optimization: if the string is already valid UTF-8, no
03129         // need to do PHP stuff. 99% of the time, this will be the case.
03130         // The regexp matches the XML char production, as well as well as excluding
03131         // non-SGML codepoints U+007F to U+009F
03132         if (preg_match('/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du', $str)) {
03133             return $str;
03134         }
03135 
03136         $mState = 0; // cached expected number of octets after the current octet
03137                      // until the beginning of the next UTF8 character sequence
03138         $mUcs4  = 0; // cached Unicode character
03139         $mBytes = 1; // cached expected number of octets in the current sequence
03140 
03141         // original code involved an $out that was an array of Unicode
03142         // codepoints.  Instead of having to convert back into UTF-8, we've
03143         // decided to directly append valid UTF-8 characters onto a string
03144         // $out once they're done.  $char accumulates raw bytes, while $mUcs4
03145         // turns into the Unicode code point, so there's some redundancy.
03146 
03147         $out = '';
03148         $char = '';
03149 
03150         $len = strlen($str);
03151         for($i = 0; $i < $len; $i++) {
03152             $in = ord($str{$i});
03153             $char .= $str[$i]; // append byte to char
03154             if (0 == $mState) {
03155                 // When mState is zero we expect either a US-ASCII character
03156                 // or a multi-octet sequence.
03157                 if (0 == (0x80 & ($in))) {
03158                     // US-ASCII, pass straight through.
03159                     if (($in <= 31 || $in == 127) &&
03160                         !($in == 9 || $in == 13 || $in == 10) // save \r\t\n
03161                     ) {
03162                         // control characters, remove
03163                     } else {
03164                         $out .= $char;
03165                     }
03166                     // reset
03167                     $char = '';
03168                     $mBytes = 1;
03169                 } elseif (0xC0 == (0xE0 & ($in))) {
03170                     // First octet of 2 octet sequence
03171                     $mUcs4 = ($in);
03172                     $mUcs4 = ($mUcs4 & 0x1F) << 6;
03173                     $mState = 1;
03174                     $mBytes = 2;
03175                 } elseif (0xE0 == (0xF0 & ($in))) {
03176                     // First octet of 3 octet sequence
03177                     $mUcs4 = ($in);
03178                     $mUcs4 = ($mUcs4 & 0x0F) << 12;
03179                     $mState = 2;
03180                     $mBytes = 3;
03181                 } elseif (0xF0 == (0xF8 & ($in))) {
03182                     // First octet of 4 octet sequence
03183                     $mUcs4 = ($in);
03184                     $mUcs4 = ($mUcs4 & 0x07) << 18;
03185                     $mState = 3;
03186                     $mBytes = 4;
03187                 } elseif (0xF8 == (0xFC & ($in))) {
03188                     // First octet of 5 octet sequence.
03189                     //
03190                     // This is illegal because the encoded codepoint must be
03191                     // either:
03192                     // (a) not the shortest form or
03193                     // (b) outside the Unicode range of 0-0x10FFFF.
03194                     // Rather than trying to resynchronize, we will carry on
03195                     // until the end of the sequence and let the later error
03196                     // handling code catch it.
03197                     $mUcs4 = ($in);
03198                     $mUcs4 = ($mUcs4 & 0x03) << 24;
03199                     $mState = 4;
03200                     $mBytes = 5;
03201                 } elseif (0xFC == (0xFE & ($in))) {
03202                     // First octet of 6 octet sequence, see comments for 5
03203                     // octet sequence.
03204                     $mUcs4 = ($in);
03205                     $mUcs4 = ($mUcs4 & 1) << 30;
03206                     $mState = 5;
03207                     $mBytes = 6;
03208                 } else {
03209                     // Current octet is neither in the US-ASCII range nor a
03210                     // legal first octet of a multi-octet sequence.
03211                     $mState = 0;
03212                     $mUcs4  = 0;
03213                     $mBytes = 1;
03214                     $char = '';
03215                 }
03216             } else {
03217                 // When mState is non-zero, we expect a continuation of the
03218                 // multi-octet sequence
03219                 if (0x80 == (0xC0 & ($in))) {
03220                     // Legal continuation.
03221                     $shift = ($mState - 1) * 6;
03222                     $tmp = $in;
03223                     $tmp = ($tmp & 0x0000003F) << $shift;
03224                     $mUcs4 |= $tmp;
03225 
03226                     if (0 == --$mState) {
03227                         // End of the multi-octet sequence. mUcs4 now contains
03228                         // the final Unicode codepoint to be output
03229 
03230                         // Check for illegal sequences and codepoints.
03231 
03232                         // From Unicode 3.1, non-shortest form is illegal
03233                         if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
03234                             ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
03235                             ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
03236                             (4 < $mBytes) ||
03237                             // From Unicode 3.2, surrogate characters = illegal
03238                             (($mUcs4 & 0xFFFFF800) == 0xD800) ||
03239                             // Codepoints outside the Unicode range are illegal
03240                             ($mUcs4 > 0x10FFFF)
03241                         ) {
03242 
03243                         } elseif (0xFEFF != $mUcs4 && // omit BOM
03244                             // check for valid Char unicode codepoints
03245                             (
03246                                 0x9 == $mUcs4 ||
03247                                 0xA == $mUcs4 ||
03248                                 0xD == $mUcs4 ||
03249                                 (0x20 <= $mUcs4 && 0x7E >= $mUcs4) ||
03250                                 // 7F-9F is not strictly prohibited by XML,
03251                                 // but it is non-SGML, and thus we don't allow it
03252                                 (0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) ||
03253                                 (0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4)
03254                             )
03255                         ) {
03256                             $out .= $char;
03257                         }
03258                         // initialize UTF8 cache (reset)
03259                         $mState = 0;
03260                         $mUcs4  = 0;
03261                         $mBytes = 1;
03262                         $char = '';
03263                     }
03264                 } else {
03265                     // ((0xC0 & (*in) != 0x80) && (mState != 0))
03266                     // Incomplete multi-octet sequence.
03267                     // used to result in complete fail, but we'll reset
03268                     $mState = 0;
03269                     $mUcs4  = 0;
03270                     $mBytes = 1;
03271                     $char ='';
03272                 }
03273             }
03274         }
03275         return $out;
03276     }
03277 
03291     // +----------+----------+----------+----------+
03292     // | 33222222 | 22221111 | 111111   |          |
03293     // | 10987654 | 32109876 | 54321098 | 76543210 | bit
03294     // +----------+----------+----------+----------+
03295     // |          |          |          | 0xxxxxxx | 1 byte 0x00000000..0x0000007F
03296     // |          |          | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF
03297     // |          | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF
03298     // | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF
03299     // +----------+----------+----------+----------+
03300     // | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF)
03301     // | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes
03302     // +----------+----------+----------+----------+
03303 
03304     public static function unichr($code) {
03305         if($code > 1114111 or $code < 0 or
03306           ($code >= 55296 and $code <= 57343) ) {
03307             // bits are set outside the "valid" range as defined
03308             // by UNICODE 4.1.0
03309             return '';
03310         }
03311 
03312         $x = $y = $z = $w = 0;
03313         if ($code < 128) {
03314             // regular ASCII character
03315             $x = $code;
03316         } else {
03317             // set up bits for UTF-8
03318             $x = ($code & 63) | 128;
03319             if ($code < 2048) {
03320                 $y = (($code & 2047) >> 6) | 192;
03321             } else {
03322                 $y = (($code & 4032) >> 6) | 128;
03323                 if($code < 65536) {
03324                     $z = (($code >> 12) & 15) | 224;
03325                 } else {
03326                     $z = (($code >> 12) & 63) | 128;
03327                     $w = (($code >> 18) & 7)  | 240;
03328                 }
03329             }
03330         }
03331         // set up the actual character
03332         $ret = '';
03333         if($w) $ret .= chr($w);
03334         if($z) $ret .= chr($z);
03335         if($y) $ret .= chr($y);
03336         $ret .= chr($x);
03337 
03338         return $ret;
03339     }
03340 
03341     public static function iconvAvailable() {
03342         static $iconv = null;
03343         if ($iconv === null) {
03344             $iconv = function_exists('iconv') && self::testIconvTruncateBug() != self::ICONV_UNUSABLE;
03345         }
03346         return $iconv;
03347     }
03348 
03352     public static function convertToUTF8($str, $config, $context) {
03353         $encoding = $config->get('Core.Encoding');
03354         if ($encoding === 'utf-8') return $str;
03355         static $iconv = null;
03356         if ($iconv === null) $iconv = self::iconvAvailable();
03357         if ($iconv && !$config->get('Test.ForceNoIconv')) {
03358             // unaffected by bugs, since UTF-8 support all characters
03359             $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
03360             if ($str === false) {
03361                 // $encoding is not a valid encoding
03362                 trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR);
03363                 return '';
03364             }
03365             // If the string is bjorked by Shift_JIS or a similar encoding
03366             // that doesn't support all of ASCII, convert the naughty
03367             // characters to their true byte-wise ASCII/UTF-8 equivalents.
03368             $str = strtr($str, self::testEncodingSupportsASCII($encoding));
03369             return $str;
03370         } elseif ($encoding === 'iso-8859-1') {
03371             $str = utf8_encode($str);
03372             return $str;
03373         }
03374         trigger_error('Encoding not supported, please install iconv', E_USER_ERROR);
03375     }
03376 
03382     public static function convertFromUTF8($str, $config, $context) {
03383         $encoding = $config->get('Core.Encoding');
03384         if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
03385             $str = self::convertToASCIIDumbLossless($str);
03386         }
03387         if ($encoding === 'utf-8') return $str;
03388         static $iconv = null;
03389         if ($iconv === null) $iconv = self::iconvAvailable();
03390         if ($iconv && !$config->get('Test.ForceNoIconv')) {
03391             // Undo our previous fix in convertToUTF8, otherwise iconv will barf
03392             $ascii_fix = self::testEncodingSupportsASCII($encoding);
03393             if (!$escape && !empty($ascii_fix)) {
03394                 $clear_fix = array();
03395                 foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = '';
03396                 $str = strtr($str, $clear_fix);
03397             }
03398             $str = strtr($str, array_flip($ascii_fix));
03399             // Normal stuff
03400             $str = self::iconv('utf-8', $encoding . '//IGNORE', $str);
03401             return $str;
03402         } elseif ($encoding === 'iso-8859-1') {
03403             $str = utf8_decode($str);
03404             return $str;
03405         }
03406         trigger_error('Encoding not supported', E_USER_ERROR);
03407         // You might be tempted to assume that the ASCII representation
03408         // might be OK, however, this is *not* universally true over all
03409         // encodings.  So we take the conservative route here, rather
03410         // than forcibly turn on %Core.EscapeNonASCIICharacters
03411     }
03412 
03429     public static function convertToASCIIDumbLossless($str) {
03430         $bytesleft = 0;
03431         $result = '';
03432         $working = 0;
03433         $len = strlen($str);
03434         for( $i = 0; $i < $len; $i++ ) {
03435             $bytevalue = ord( $str[$i] );
03436             if( $bytevalue <= 0x7F ) { //0xxx xxxx
03437                 $result .= chr( $bytevalue );
03438                 $bytesleft = 0;
03439             } elseif( $bytevalue <= 0xBF ) { //10xx xxxx
03440                 $working = $working << 6;
03441                 $working += ($bytevalue & 0x3F);
03442                 $bytesleft--;
03443                 if( $bytesleft <= 0 ) {
03444                     $result .= "&#" . $working . ";";
03445                 }
03446             } elseif( $bytevalue <= 0xDF ) { //110x xxxx
03447                 $working = $bytevalue & 0x1F;
03448                 $bytesleft = 1;
03449             } elseif( $bytevalue <= 0xEF ) { //1110 xxxx
03450                 $working = $bytevalue & 0x0F;
03451                 $bytesleft = 2;
03452             } else { //1111 0xxx
03453                 $working = $bytevalue & 0x07;
03454                 $bytesleft = 3;
03455             }
03456         }
03457         return $result;
03458     }
03459 
03461     const ICONV_OK = 0;
03462 
03465     const ICONV_TRUNCATES = 1;
03466 
03469     const ICONV_UNUSABLE = 2;
03470 
03485     public static function testIconvTruncateBug() {
03486         static $code = null;
03487         if ($code === null) {
03488             // better not use iconv, otherwise infinite loop!
03489             $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000));
03490             if ($r === false) {
03491                 $code = self::ICONV_UNUSABLE;
03492             } elseif (($c = strlen($r)) < 9000) {
03493                 $code = self::ICONV_TRUNCATES;
03494             } elseif ($c > 9000) {
03495                 trigger_error('Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: include your iconv version as per phpversion()', E_USER_ERROR);
03496             } else {
03497                 $code = self::ICONV_OK;
03498             }
03499         }
03500         return $code;
03501     }
03502 
03514     public static function testEncodingSupportsASCII($encoding, $bypass = false) {
03515         // All calls to iconv here are unsafe, proof by case analysis:
03516         // If ICONV_OK, no difference.
03517         // If ICONV_TRUNCATE, all calls involve one character inputs,
03518         // so bug is not triggered.
03519         // If ICONV_UNUSABLE, this call is irrelevant
03520         static $encodings = array();
03521         if (!$bypass) {
03522             if (isset($encodings[$encoding])) return $encodings[$encoding];
03523             $lenc = strtolower($encoding);
03524             switch ($lenc) {
03525                 case 'shift_jis':
03526                     return array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~');
03527                 case 'johab':
03528                     return array("\xE2\x82\xA9" => '\\');
03529             }
03530             if (strpos($lenc, 'iso-8859-') === 0) return array();
03531         }
03532         $ret = array();
03533         if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) return false;
03534         for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
03535             $c = chr($i); // UTF-8 char
03536             $r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion
03537             if (
03538                 $r === '' ||
03539                 // This line is needed for iconv implementations that do not
03540                 // omit characters that do not exist in the target character set
03541                 ($r === $c && self::unsafeIconv($encoding, 'UTF-8//IGNORE', $r) !== $c)
03542             ) {
03543                 // Reverse engineer: what's the UTF-8 equiv of this byte
03544                 // sequence? This assumes that there's no variable width
03545                 // encoding that doesn't support ASCII.
03546                 $ret[self::unsafeIconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
03547             }
03548         }
03549         $encodings[$encoding] = $ret;
03550         return $ret;
03551     }
03552 
03553 
03554 }
03555 
03556 
03557 
03558 
03559 
03563 class HTMLPurifier_EntityLookup {
03564 
03568     public $table;
03569 
03576     public function setup($file = false) {
03577         if (!$file) {
03578             $file = HTMLPURIFIER_PREFIX . '/HTMLPurifier/EntityLookup/entities.ser';
03579         }
03580         $this->table = unserialize(file_get_contents($file));
03581     }
03582 
03587     public static function instance($prototype = false) {
03588         // no references, since PHP doesn't copy unless modified
03589         static $instance = null;
03590         if ($prototype) {
03591             $instance = $prototype;
03592         } elseif (!$instance) {
03593             $instance = new HTMLPurifier_EntityLookup();
03594             $instance->setup();
03595         }
03596         return $instance;
03597     }
03598 
03599 }
03600 
03601 
03602 
03603 
03604 
03605 // if want to implement error collecting here, we'll need to use some sort
03606 // of global data (probably trigger_error) because it's impossible to pass
03607 // $config or $context to the callback functions.
03608 
03612 class HTMLPurifier_EntityParser
03613 {
03614 
03618     protected $_entity_lookup;
03619 
03623     protected $_substituteEntitiesRegex =
03624 '/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/';
03625 //     1. hex             2. dec      3. string (XML style)
03626 
03627 
03631     protected $_special_dec2str =
03632             array(
03633                     34 => '"',
03634                     38 => '&',
03635                     39 => "'",
03636                     60 => '<',
03637                     62 => '>'
03638             );
03639 
03643     protected $_special_ent2dec =
03644             array(
03645                     'quot' => 34,
03646                     'amp'  => 38,
03647                     'lt'   => 60,
03648                     'gt'   => 62
03649             );
03650 
03659     public function substituteNonSpecialEntities($string) {
03660         // it will try to detect missing semicolons, but don't rely on it
03661         return preg_replace_callback(
03662             $this->_substituteEntitiesRegex,
03663             array($this, 'nonSpecialEntityCallback'),
03664             $string
03665             );
03666     }
03667 
03677     protected function nonSpecialEntityCallback($matches) {
03678         // replaces all but big five
03679         $entity = $matches[0];
03680         $is_num = (@$matches[0][1] === '#');
03681         if ($is_num) {
03682             $is_hex = (@$entity[2] === 'x');
03683             $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
03684 
03685             // abort for special characters
03686             if (isset($this->_special_dec2str[$code]))  return $entity;
03687 
03688             return HTMLPurifier_Encoder::unichr($code);
03689         } else {
03690             if (isset($this->_special_ent2dec[$matches[3]])) return $entity;
03691             if (!$this->_entity_lookup) {
03692                 $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
03693             }
03694             if (isset($this->_entity_lookup->table[$matches[3]])) {
03695                 return $this->_entity_lookup->table[$matches[3]];
03696             } else {
03697                 return $entity;
03698             }
03699         }
03700     }
03701 
03711     public function substituteSpecialEntities($string) {
03712         return preg_replace_callback(
03713             $this->_substituteEntitiesRegex,
03714             array($this, 'specialEntityCallback'),
03715             $string);
03716     }
03717 
03728     protected function specialEntityCallback($matches) {
03729         $entity = $matches[0];
03730         $is_num = (@$matches[0][1] === '#');
03731         if ($is_num) {
03732             $is_hex = (@$entity[2] === 'x');
03733             $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
03734             return isset($this->_special_dec2str[$int]) ?
03735                 $this->_special_dec2str[$int] :
03736                 $entity;
03737         } else {
03738             return isset($this->_special_ent2dec[$matches[3]]) ?
03739                 $this->_special_ent2dec[$matches[3]] :
03740                 $entity;
03741         }
03742     }
03743 
03744 }
03745 
03746 
03747 
03748 
03749 
03754 class HTMLPurifier_ErrorCollector
03755 {
03756 
03761     const LINENO   = 0;
03762     const SEVERITY = 1;
03763     const MESSAGE  = 2;
03764     const CHILDREN = 3;
03765 
03766     protected $errors;
03767     protected $_current;
03768     protected $_stacks = array(array());
03769     protected $locale;
03770     protected $generator;
03771     protected $context;
03772 
03773     protected $lines = array();
03774 
03775     public function __construct($context) {
03776         $this->locale    =& $context->get('Locale');
03777         $this->context   = $context;
03778         $this->_current  =& $this->_stacks[0];
03779         $this->errors    =& $this->_stacks[0];
03780     }
03781 
03789     public function send($severity, $msg) {
03790 
03791         $args = array();
03792         if (func_num_args() > 2) {
03793             $args = func_get_args();
03794             array_shift($args);
03795             unset($args[0]);
03796         }
03797 
03798         $token = $this->context->get('CurrentToken', true);
03799         $line  = $token ? $token->line : $this->context->get('CurrentLine', true);
03800         $col   = $token ? $token->col  : $this->context->get('CurrentCol',  true);
03801         $attr  = $this->context->get('CurrentAttr', true);
03802 
03803         // perform special substitutions, also add custom parameters
03804         $subst = array();
03805         if (!is_null($token)) {
03806             $args['CurrentToken'] = $token;
03807         }
03808         if (!is_null($attr)) {
03809             $subst['$CurrentAttr.Name'] = $attr;
03810             if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr];
03811         }
03812 
03813         if (empty($args)) {
03814             $msg = $this->locale->getMessage($msg);
03815         } else {
03816             $msg = $this->locale->formatMessage($msg, $args);
03817         }
03818 
03819         if (!empty($subst)) $msg = strtr($msg, $subst);
03820 
03821         // (numerically indexed)
03822         $error = array(
03823             self::LINENO   => $line,
03824             self::SEVERITY => $severity,
03825             self::MESSAGE  => $msg,
03826             self::CHILDREN => array()
03827         );
03828         $this->_current[] = $error;
03829 
03830 
03831         // NEW CODE BELOW ...
03832 
03833         $struct = null;
03834         // Top-level errors are either:
03835         //  TOKEN type, if $value is set appropriately, or
03836         //  "syntax" type, if $value is null
03837         $new_struct = new HTMLPurifier_ErrorStruct();
03838         $new_struct->type = HTMLPurifier_ErrorStruct::TOKEN;
03839         if ($token) $new_struct->value = clone $token;
03840         if (is_int($line) && is_int($col)) {
03841             if (isset($this->lines[$line][$col])) {
03842                 $struct = $this->lines[$line][$col];
03843             } else {
03844                 $struct = $this->lines[$line][$col] = $new_struct;
03845             }
03846             // These ksorts may present a performance problem
03847             ksort($this->lines[$line], SORT_NUMERIC);
03848         } else {
03849             if (isset($this->lines[-1])) {
03850                 $struct = $this->lines[-1];
03851             } else {
03852                 $struct = $this->lines[-1] = $new_struct;
03853             }
03854         }
03855         ksort($this->lines, SORT_NUMERIC);
03856 
03857         // Now, check if we need to operate on a lower structure
03858         if (!empty($attr)) {
03859             $struct = $struct->getChild(HTMLPurifier_ErrorStruct::ATTR, $attr);
03860             if (!$struct->value) {
03861                 $struct->value = array($attr, 'PUT VALUE HERE');
03862             }
03863         }
03864         if (!empty($cssprop)) {
03865             $struct = $struct->getChild(HTMLPurifier_ErrorStruct::CSSPROP, $cssprop);
03866             if (!$struct->value) {
03867                 // if we tokenize CSS this might be a little more difficult to do
03868                 $struct->value = array($cssprop, 'PUT VALUE HERE');
03869             }
03870         }
03871 
03872         // Ok, structs are all setup, now time to register the error
03873         $struct->addError($severity, $msg);
03874     }
03875 
03882     public function getRaw() {
03883         return $this->errors;
03884     }
03885 
03891     public function getHTMLFormatted($config, $errors = null) {
03892         $ret = array();
03893 
03894         $this->generator = new HTMLPurifier_Generator($config, $this->context);
03895         if ($errors === null) $errors = $this->errors;
03896 
03897         // 'At line' message needs to be removed
03898 
03899         // generation code for new structure goes here. It needs to be recursive.
03900         foreach ($this->lines as $line => $col_array) {
03901             if ($line == -1) continue;
03902             foreach ($col_array as $col => $struct) {
03903                 $this->_renderStruct($ret, $struct, $line, $col);
03904             }
03905         }
03906         if (isset($this->lines[-1])) {
03907             $this->_renderStruct($ret, $this->lines[-1]);
03908         }
03909 
03910         if (empty($errors)) {
03911             return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>';
03912         } else {
03913             return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>';
03914         }
03915 
03916     }
03917 
03918     private function _renderStruct(&$ret, $struct, $line = null, $col = null) {
03919         $stack = array($struct);
03920         $context_stack = array(array());
03921         while ($current = array_pop($stack)) {
03922             $context = array_pop($context_stack);
03923             foreach ($current->errors as $error) {
03924                 list($severity, $msg) = $error;
03925                 $string = '';
03926                 $string .= '<div>';
03927                 // W3C uses an icon to indicate the severity of the error.
03928                 $error = $this->locale->getErrorName($severity);
03929                 $string .= "<span class=\"error e$severity\"><strong>$error</strong></span> ";
03930                 if (!is_null($line) && !is_null($col)) {
03931                     $string .= "<em class=\"location\">Line $line, Column $col: </em> ";
03932                 } else {
03933                     $string .= '<em class="location">End of Document: </em> ';
03934                 }
03935                 $string .= '<strong class="description">' . $this->generator->escape($msg) . '</strong> ';
03936                 $string .= '</div>';
03937                 // Here, have a marker for the character on the column appropriate.
03938                 // Be sure to clip extremely long lines.
03939                 //$string .= '<pre>';
03940                 //$string .= '';
03941                 //$string .= '</pre>';
03942                 $ret[] = $string;
03943             }
03944             foreach ($current->children as $type => $array) {
03945                 $context[] = $current;
03946                 $stack = array_merge($stack, array_reverse($array, true));
03947                 for ($i = count($array); $i > 0; $i--) {
03948                     $context_stack[] = $context;
03949                 }
03950             }
03951         }
03952     }
03953 
03954 }
03955 
03956 
03957 
03958 
03959 
03966 class HTMLPurifier_ErrorStruct
03967 {
03968 
03973     const TOKEN     = 0;
03974     const ATTR      = 1;
03975     const CSSPROP   = 2;
03976 
03980     public $type;
03981 
03989     public $value;
03990 
03994     public $errors = array();
03995 
04001     public $children = array();
04002 
04003     public function getChild($type, $id) {
04004         if (!isset($this->children[$type][$id])) {
04005             $this->children[$type][$id] = new HTMLPurifier_ErrorStruct();
04006             $this->children[$type][$id]->type = $type;
04007         }
04008         return $this->children[$type][$id];
04009     }
04010 
04011     public function addError($severity, $message) {
04012         $this->errors[] = array($severity, $message);
04013     }
04014 
04015 }
04016 
04017 
04018 
04019 
04020 
04025 class HTMLPurifier_Exception extends Exception
04026 {
04027 
04028 }
04029 
04030 
04031 
04032 
04033 
04053 class HTMLPurifier_Filter
04054 {
04055 
04059     public $name;
04060 
04064     public function preFilter($html, $config, $context) {
04065         return $html;
04066     }
04067 
04071     public function postFilter($html, $config, $context) {
04072         return $html;
04073     }
04074 
04075 }
04076 
04077 
04078 
04079 
04080 
04088 class HTMLPurifier_Generator
04089 {
04090 
04094     private $_xhtml = true;
04095 
04099     private $_scriptFix = false;
04100 
04105     private $_def;
04106 
04110     private $_sortAttr;
04111 
04115     private $_flashCompat;
04116 
04120     private $_innerHTMLFix;
04121 
04126     private $_flashStack = array();
04127 
04131     protected $config;
04132 
04137     public function __construct($config, $context) {
04138         $this->config = $config;
04139         $this->_scriptFix = $config->get('Output.CommentScriptContents');
04140         $this->_innerHTMLFix = $config->get('Output.FixInnerHTML');
04141         $this->_sortAttr = $config->get('Output.SortAttr');
04142         $this->_flashCompat = $config->get('Output.FlashCompat');
04143         $this->_def = $config->getHTMLDefinition();
04144         $this->_xhtml = $this->_def->doctype->xml;
04145     }
04146 
04153     public function generateFromTokens($tokens) {
04154         if (!$tokens) return '';
04155 
04156         // Basic algorithm
04157         $html = '';
04158         for ($i = 0, $size = count($tokens); $i < $size; $i++) {
04159             if ($this->_scriptFix && $tokens[$i]->name === 'script'
04160                 && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) {
04161                 // script special case
04162                 // the contents of the script block must be ONE token
04163                 // for this to work.
04164                 $html .= $this->generateFromToken($tokens[$i++]);
04165                 $html .= $this->generateScriptFromToken($tokens[$i++]);
04166             }
04167             $html .= $this->generateFromToken($tokens[$i]);
04168         }
04169 
04170         // Tidy cleanup
04171         if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) {
04172             $tidy = new Tidy;
04173             $tidy->parseString($html, array(
04174                'indent'=> true,
04175                'output-xhtml' => $this->_xhtml,
04176                'show-body-only' => true,
04177                'indent-spaces' => 2,
04178                'wrap' => 68,
04179             ), 'utf8');
04180             $tidy->cleanRepair();
04181             $html = (string) $tidy; // explicit cast necessary
04182         }
04183 
04184         // Normalize newlines to system defined value
04185         if ($this->config->get('Core.NormalizeNewlines')) {
04186             $nl = $this->config->get('Output.Newline');
04187             if ($nl === null) $nl = PHP_EOL;
04188             if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
04189         }
04190         return $html;
04191     }
04192 
04198     public function generateFromToken($token) {
04199         if (!$token instanceof HTMLPurifier_Token) {
04200             trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING);
04201             return '';
04202 
04203         } elseif ($token instanceof HTMLPurifier_Token_Start) {
04204             $attr = $this->generateAttributes($token->attr, $token->name);
04205             if ($this->_flashCompat) {
04206                 if ($token->name == "object") {
04207                     $flash = new stdclass();
04208                     $flash->attr = $token->attr;
04209                     $flash->param = array();
04210                     $this->_flashStack[] = $flash;
04211                 }
04212             }
04213             return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
04214 
04215         } elseif ($token instanceof HTMLPurifier_Token_End) {
04216             $_extra = '';
04217             if ($this->_flashCompat) {
04218                 if ($token->name == "object" && !empty($this->_flashStack)) {
04219                     // doesn't do anything for now
04220                 }
04221             }
04222             return $_extra . '</' . $token->name . '>';
04223 
04224         } elseif ($token instanceof HTMLPurifier_Token_Empty) {
04225             if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) {
04226                 $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value'];
04227             }
04228             $attr = $this->generateAttributes($token->attr, $token->name);
04229              return '<' . $token->name . ($attr ? ' ' : '') . $attr .
04230                 ( $this->_xhtml ? ' /': '' ) // <br /> v. <br>
04231                 . '>';
04232 
04233         } elseif ($token instanceof HTMLPurifier_Token_Text) {
04234             return $this->escape($token->data, ENT_NOQUOTES);
04235 
04236         } elseif ($token instanceof HTMLPurifier_Token_Comment) {
04237             return '<!--' . $token->data . '-->';
04238         } else {
04239             return '';
04240 
04241         }
04242     }
04243 
04249     public function generateScriptFromToken($token) {
04250         if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token);
04251         // Thanks <http://lachy.id.au/log/2005/05/script-comments>
04252         $data = preg_replace('#//\s*$#', '', $token->data);
04253         return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
04254     }
04255 
04264     public function generateAttributes($assoc_array_of_attributes, $element = false) {
04265         $html = '';
04266         if ($this->_sortAttr) ksort($assoc_array_of_attributes);
04267         foreach ($assoc_array_of_attributes as $key => $value) {
04268             if (!$this->_xhtml) {
04269                 // Remove namespaced attributes
04270                 if (strpos($key, ':') !== false) continue;
04271                 // Check if we should minimize the attribute: val="val" -> val
04272                 if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) {
04273                     $html .= $key . ' ';
04274                     continue;
04275                 }
04276             }
04277             // Workaround for Internet Explorer innerHTML bug.
04278             // Essentially, Internet Explorer, when calculating
04279             // innerHTML, omits quotes if there are no instances of
04280             // angled brackets, quotes or spaces.  However, when parsing
04281             // HTML (for example, when you assign to innerHTML), it
04282             // treats backticks as quotes.  Thus,
04283             //      <img alt="``" />
04284             // becomes
04285             //      <img alt=`` />
04286             // becomes
04287             //      <img alt='' />
04288             // Fortunately, all we need to do is trigger an appropriate
04289             // quoting style, which we do by adding an extra space.
04290             // This also is consistent with the W3C spec, which states
04291             // that user agents may ignore leading or trailing
04292             // whitespace (in fact, most don't, at least for attributes
04293             // like alt, but an extra space at the end is barely
04294             // noticeable).  Still, we have a configuration knob for
04295             // this, since this transformation is not necesary if you
04296             // don't process user input with innerHTML or you don't plan
04297             // on supporting Internet Explorer.
04298             if ($this->_innerHTMLFix) {
04299                 if (strpos($value, '`') !== false) {
04300                     // check if correct quoting style would not already be
04301                     // triggered
04302                     if (strcspn($value, '"\' <>') === strlen($value)) {
04303                         // protect!
04304                         $value .= ' ';
04305                     }
04306                 }
04307             }
04308             $html .= $key.'="'.$this->escape($value).'" ';
04309         }
04310         return rtrim($html);
04311     }
04312 
04323     public function escape($string, $quote = null) {
04324         // Workaround for APC bug on Mac Leopard reported by sidepodcast
04325         // http://htmlpurifier.org/phorum/read.php?3,4823,4846
04326         if ($quote === null) $quote = ENT_COMPAT;
04327         return htmlspecialchars($string, $quote, 'UTF-8');
04328     }
04329 
04330 }
04331 
04332 
04333 
04334 
04335 
04359 class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
04360 {
04361 
04362     // FULLY-PUBLIC VARIABLES ---------------------------------------------
04363 
04367     public $info = array();
04368 
04372     public $info_global_attr = array();
04373 
04377     public $info_parent = 'div';
04378 
04383     public $info_parent_def;
04384 
04389     public $info_block_wrapper = 'p';
04390 
04394     public $info_tag_transform = array();
04395 
04399     public $info_attr_transform_pre = array();
04400 
04404     public $info_attr_transform_post = array();
04405 
04410     public $info_content_sets = array();
04411 
04415     public $info_injector = array();
04416 
04420     public $doctype;
04421 
04422 
04423 
04424     // RAW CUSTOMIZATION STUFF --------------------------------------------
04425 
04435     public function addAttribute($element_name, $attr_name, $def) {
04436         $module = $this->getAnonymousModule();
04437         if (!isset($module->info[$element_name])) {
04438             $element = $module->addBlankElement($element_name);
04439         } else {
04440             $element = $module->info[$element_name];
04441         }
04442         $element->attr[$attr_name] = $def;
04443     }
04444 
04450     public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array()) {
04451         $module = $this->getAnonymousModule();
04452         // assume that if the user is calling this, the element
04453         // is safe. This may not be a good idea
04454         $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes);
04455         return $element;
04456     }
04457 
04464     public function addBlankElement($element_name) {
04465         $module  = $this->getAnonymousModule();
04466         $element = $module->addBlankElement($element_name);
04467         return $element;
04468     }
04469 
04475     public function getAnonymousModule() {
04476         if (!$this->_anonModule) {
04477             $this->_anonModule = new HTMLPurifier_HTMLModule();
04478             $this->_anonModule->name = 'Anonymous';
04479         }
04480         return $this->_anonModule;
04481     }
04482 
04483     private $_anonModule = null;
04484 
04485 
04486     // PUBLIC BUT INTERNAL VARIABLES --------------------------------------
04487 
04488     public $type = 'HTML';
04489     public $manager; 
04494     public function __construct() {
04495         $this->manager = new HTMLPurifier_HTMLModuleManager();
04496     }
04497 
04498     protected function doSetup($config) {
04499         $this->processModules($config);
04500         $this->setupConfigStuff($config);
04501         unset($this->manager);
04502 
04503         // cleanup some of the element definitions
04504         foreach ($this->info as $k => $v) {
04505             unset($this->info[$k]->content_model);
04506             unset($this->info[$k]->content_model_type);
04507         }
04508     }
04509 
04513     protected function processModules($config) {
04514 
04515         if ($this->_anonModule) {
04516             // for user specific changes
04517             // this is late-loaded so we don't have to deal with PHP4
04518             // reference wonky-ness
04519             $this->manager->addModule($this->_anonModule);
04520             unset($this->_anonModule);
04521         }
04522 
04523         $this->manager->setup($config);
04524         $this->doctype = $this->manager->doctype;
04525 
04526         foreach ($this->manager->modules as $module) {
04527             foreach($module->info_tag_transform as $k => $v) {
04528                 if ($v === false) unset($this->info_tag_transform[$k]);
04529                 else $this->info_tag_transform[$k] = $v;
04530             }
04531             foreach($module->info_attr_transform_pre as $k => $v) {
04532                 if ($v === false) unset($this->info_attr_transform_pre[$k]);
04533                 else $this->info_attr_transform_pre[$k] = $v;
04534             }
04535             foreach($module->info_attr_transform_post as $k => $v) {
04536                 if ($v === false) unset($this->info_attr_transform_post[$k]);
04537                 else $this->info_attr_transform_post[$k] = $v;
04538             }
04539             foreach ($module->info_injector as $k => $v) {
04540                 if ($v === false) unset($this->info_injector[$k]);
04541                 else $this->info_injector[$k] = $v;
04542             }
04543         }
04544 
04545         $this->info = $this->manager->getElements();
04546         $this->info_content_sets = $this->manager->contentSets->lookup;
04547 
04548     }
04549 
04553     protected function setupConfigStuff($config) {
04554 
04555         $block_wrapper = $config->get('HTML.BlockWrapper');
04556         if (isset($this->info_content_sets['Block'][$block_wrapper])) {
04557             $this->info_block_wrapper = $block_wrapper;
04558         } else {
04559             trigger_error('Cannot use non-block element as block wrapper',
04560                 E_USER_ERROR);
04561         }
04562 
04563         $parent = $config->get('HTML.Parent');
04564         $def = $this->manager->getElement($parent, true);
04565         if ($def) {
04566             $this->info_parent = $parent;
04567             $this->info_parent_def = $def;
04568         } else {
04569             trigger_error('Cannot use unrecognized element as parent',
04570                 E_USER_ERROR);
04571             $this->info_parent_def = $this->manager->getElement($this->info_parent, true);
04572         }
04573 
04574         // support template text
04575         $support = "(for information on implementing this, see the ".
04576                    "support forums) ";
04577 
04578         // setup allowed elements -----------------------------------------
04579 
04580         $allowed_elements = $config->get('HTML.AllowedElements');
04581         $allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early
04582 
04583         if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
04584             $allowed = $config->get('HTML.Allowed');
04585             if (is_string($allowed)) {
04586                 list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);
04587             }
04588         }
04589 
04590         if (is_array($allowed_elements)) {
04591             foreach ($this->info as $name => $d) {
04592                 if(!isset($allowed_elements[$name])) unset($this->info[$name]);
04593                 unset($allowed_elements[$name]);
04594             }
04595             // emit errors
04596             foreach ($allowed_elements as $element => $d) {
04597                 $element = htmlspecialchars($element); // PHP doesn't escape errors, be careful!
04598                 trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
04599             }
04600         }
04601 
04602         // setup allowed attributes ---------------------------------------
04603 
04604         $allowed_attributes_mutable = $allowed_attributes; // by copy!
04605         if (is_array($allowed_attributes)) {
04606 
04607             // This actually doesn't do anything, since we went away from
04608             // global attributes. It's possible that userland code uses
04609             // it, but HTMLModuleManager doesn't!
04610             foreach ($this->info_global_attr as $attr => $x) {
04611                 $keys = array($attr, "*@$attr", "*.$attr");
04612                 $delete = true;
04613                 foreach ($keys as $key) {
04614                     if ($delete && isset($allowed_attributes[$key])) {
04615                         $delete = false;
04616                     }
04617                     if (isset($allowed_attributes_mutable[$key])) {
04618                         unset($allowed_attributes_mutable[$key]);
04619                     }
04620                 }
04621                 if ($delete) unset($this->info_global_attr[$attr]);
04622             }
04623 
04624             foreach ($this->info as $tag => $info) {
04625                 foreach ($info->attr as $attr => $x) {
04626                     $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr");
04627                     $delete = true;
04628                     foreach ($keys as $key) {
04629                         if ($delete && isset($allowed_attributes[$key])) {
04630                             $delete = false;
04631                         }
04632                         if (isset($allowed_attributes_mutable[$key])) {
04633                             unset($allowed_attributes_mutable[$key]);
04634                         }
04635                     }
04636                     if ($delete) {
04637                         if ($this->info[$tag]->attr[$attr]->required) {
04638                             trigger_error("Required attribute '$attr' in element '$tag' was not allowed, which means '$tag' will not be allowed either", E_USER_WARNING);
04639                         }
04640                         unset($this->info[$tag]->attr[$attr]);
04641                     }
04642                 }
04643             }
04644             // emit errors
04645             foreach ($allowed_attributes_mutable as $elattr => $d) {
04646                 $bits = preg_split('/[.@]/', $elattr, 2);
04647                 $c = count($bits);
04648                 switch ($c) {
04649                     case 2:
04650                         if ($bits[0] !== '*') {
04651                             $element = htmlspecialchars($bits[0]);
04652                             $attribute = htmlspecialchars($bits[1]);
04653                             if (!isset($this->info[$element])) {
04654                                 trigger_error("Cannot allow attribute '$attribute' if element '$element' is not allowed/supported $support");
04655                             } else {
04656                                 trigger_error("Attribute '$attribute' in element '$element' not supported $support",
04657                                     E_USER_WARNING);
04658                             }
04659                             break;
04660                         }
04661                         // otherwise fall through
04662                     case 1:
04663                         $attribute = htmlspecialchars($bits[0]);
04664                         trigger_error("Global attribute '$attribute' is not ".
04665                             "supported in any elements $support",
04666                             E_USER_WARNING);
04667                         break;
04668                 }
04669             }
04670 
04671         }
04672 
04673         // setup forbidden elements ---------------------------------------
04674 
04675         $forbidden_elements   = $config->get('HTML.ForbiddenElements');
04676         $forbidden_attributes = $config->get('HTML.ForbiddenAttributes');
04677 
04678         foreach ($this->info as $tag => $info) {
04679             if (isset($forbidden_elements[$tag])) {
04680                 unset($this->info[$tag]);
04681                 continue;
04682             }
04683             foreach ($info->attr as $attr => $x) {
04684                 if (
04685                     isset($forbidden_attributes["$tag@$attr"]) ||
04686                     isset($forbidden_attributes["*@$attr"]) ||
04687                     isset($forbidden_attributes[$attr])
04688                 ) {
04689                     unset($this->info[$tag]->attr[$attr]);
04690                     continue;
04691                 } // this segment might get removed eventually
04692                 elseif (isset($forbidden_attributes["$tag.$attr"])) {
04693                     // $tag.$attr are not user supplied, so no worries!
04694                     trigger_error("Error with $tag.$attr: tag.attr syntax not supported for HTML.ForbiddenAttributes; use tag@attr instead", E_USER_WARNING);
04695                 }
04696             }
04697         }
04698         foreach ($forbidden_attributes as $key => $v) {
04699             if (strlen($key) < 2) continue;
04700             if ($key[0] != '*') continue;
04701             if ($key[1] == '.') {
04702                 trigger_error("Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", E_USER_WARNING);
04703             }
04704         }
04705 
04706         // setup injectors -----------------------------------------------------
04707         foreach ($this->info_injector as $i => $injector) {
04708             if ($injector->checkNeeded($config) !== false) {
04709                 // remove injector that does not have it's required
04710                 // elements/attributes present, and is thus not needed.
04711                 unset($this->info_injector[$i]);
04712             }
04713         }
04714     }
04715 
04725     public function parseTinyMCEAllowedList($list) {
04726 
04727         $list = str_replace(array(' ', "\t"), '', $list);
04728 
04729         $elements = array();
04730         $attributes = array();
04731 
04732         $chunks = preg_split('/(,|[\n\r]+)/', $list);
04733         foreach ($chunks as $chunk) {
04734             if (empty($chunk)) continue;
04735             // remove TinyMCE element control characters
04736             if (!strpos($chunk, '[')) {
04737                 $element = $chunk;
04738                 $attr = false;
04739             } else {
04740                 list($element, $attr) = explode('[', $chunk);
04741             }
04742             if ($element !== '*') $elements[$element] = true;
04743             if (!$attr) continue;
04744             $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
04745             $attr = explode('|', $attr);
04746             foreach ($attr as $key) {
04747                 $attributes["$element.$key"] = true;
04748             }
04749         }
04750 
04751         return array($elements, $attributes);
04752 
04753     }
04754 
04755 
04756 }
04757 
04758 
04759 
04760 
04761 
04777 class HTMLPurifier_HTMLModule
04778 {
04779 
04780     // -- Overloadable ----------------------------------------------------
04781 
04785     public $name;
04786 
04791     public $elements = array();
04792 
04798     public $info = array();
04799 
04806     public $content_sets = array();
04807 
04816     public $attr_collections = array();
04817 
04821     public $info_tag_transform = array();
04822 
04826     public $info_attr_transform_pre = array();
04827 
04831     public $info_attr_transform_post = array();
04832 
04839     public $info_injector = array();
04840 
04847     public $defines_child_def = false;
04848 
04861     public $safe = true;
04862 
04871     public function getChildDef($def) {return false;}
04872 
04873     // -- Convenience -----------------------------------------------------
04874 
04889     public function addElement($element, $type, $contents, $attr_includes = array(), $attr = array()) {
04890         $this->elements[] = $element;
04891         // parse content_model
04892         list($content_model_type, $content_model) = $this->parseContents($contents);
04893         // merge in attribute inclusions
04894         $this->mergeInAttrIncludes($attr, $attr_includes);
04895         // add element to content sets
04896         if ($type) $this->addElementToContentSet($element, $type);
04897         // create element
04898         $this->info[$element] = HTMLPurifier_ElementDef::create(
04899             $content_model, $content_model_type, $attr
04900         );
04901         // literal object $contents means direct child manipulation
04902         if (!is_string($contents)) $this->info[$element]->child = $contents;
04903         return $this->info[$element];
04904     }
04905 
04912     public function addBlankElement($element) {
04913         if (!isset($this->info[$element])) {
04914             $this->elements[] = $element;
04915             $this->info[$element] = new HTMLPurifier_ElementDef();
04916             $this->info[$element]->standalone = false;
04917         } else {
04918             trigger_error("Definition for $element already exists in module, cannot redefine");
04919         }
04920         return $this->info[$element];
04921     }
04922 
04929     public function addElementToContentSet($element, $type) {
04930         if (!isset($this->content_sets[$type])) $this->content_sets[$type] = '';
04931         else $this->content_sets[$type] .= ' | ';
04932         $this->content_sets[$type] .= $element;
04933     }
04934 
04944     public function parseContents($contents) {
04945         if (!is_string($contents)) return array(null, null); // defer
04946         switch ($contents) {
04947             // check for shorthand content model forms
04948             case 'Empty':
04949                 return array('empty', '');
04950             case 'Inline':
04951                 return array('optional', 'Inline | #PCDATA');
04952             case 'Flow':
04953                 return array('optional', 'Flow | #PCDATA');
04954         }
04955         list($content_model_type, $content_model) = explode(':', $contents);
04956         $content_model_type = strtolower(trim($content_model_type));
04957         $content_model = trim($content_model);
04958         return array($content_model_type, $content_model);
04959     }
04960 
04967     public function mergeInAttrIncludes(&$attr, $attr_includes) {
04968         if (!is_array($attr_includes)) {
04969             if (empty($attr_includes)) $attr_includes = array();
04970             else $attr_includes = array($attr_includes);
04971         }
04972         $attr[0] = $attr_includes;
04973     }
04974 
04983     public function makeLookup($list) {
04984         if (is_string($list)) $list = func_get_args();
04985         $ret = array();
04986         foreach ($list as $value) {
04987             if (is_null($value)) continue;
04988             $ret[$value] = true;
04989         }
04990         return $ret;
04991     }
04992 
04999     public function setup($config) {}
05000 
05001 }
05002 
05003 
05004 
05005 
05006 
05007 class HTMLPurifier_HTMLModuleManager
05008 {
05009 
05013     public $doctypes;
05014 
05018     public $doctype;
05019 
05023     public $attrTypes;
05024 
05029     public $modules = array();
05030 
05036     public $registeredModules = array();
05037 
05043     public $userModules = array();
05044 
05049     public $elementLookup = array();
05050 
05052     public $prefixes = array('HTMLPurifier_HTMLModule_');
05053 
05054     public $contentSets;     
05055     public $attrCollections; 
05058     public $trusted = false;
05059 
05060     public function __construct() {
05061 
05062         // editable internal objects
05063         $this->attrTypes = new HTMLPurifier_AttrTypes();
05064         $this->doctypes  = new HTMLPurifier_DoctypeRegistry();
05065 
05066         // setup basic modules
05067         $common = array(
05068             'CommonAttributes', 'Text', 'Hypertext', 'List',
05069             'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
05070             'StyleAttribute',
05071             // Unsafe:
05072             'Scripting', 'Object', 'Forms',
05073             // Sorta legacy, but present in strict:
05074             'Name',
05075         );
05076         $transitional = array('Legacy', 'Target', 'Iframe');
05077         $xml = array('XMLCommonAttributes');
05078         $non_xml = array('NonXMLCommonAttributes');
05079 
05080         // setup basic doctypes
05081         $this->doctypes->register(
05082             'HTML 4.01 Transitional', false,
05083             array_merge($common, $transitional, $non_xml),
05084             array('Tidy_Transitional', 'Tidy_Proprietary'),
05085             array(),
05086             '-//W3C//DTD HTML 4.01 Transitional//EN',
05087             'http://www.w3.org/TR/html4/loose.dtd'
05088         );
05089 
05090         $this->doctypes->register(
05091             'HTML 4.01 Strict', false,
05092             array_merge($common, $non_xml),
05093             array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
05094             array(),
05095             '-//W3C//DTD HTML 4.01//EN',
05096             'http://www.w3.org/TR/html4/strict.dtd'
05097         );
05098 
05099         $this->doctypes->register(
05100             'XHTML 1.0 Transitional', true,
05101             array_merge($common, $transitional, $xml, $non_xml),
05102             array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'),
05103             array(),
05104             '-//W3C//DTD XHTML 1.0 Transitional//EN',
05105             'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
05106         );
05107 
05108         $this->doctypes->register(
05109             'XHTML 1.0 Strict', true,
05110             array_merge($common, $xml, $non_xml),
05111             array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
05112             array(),
05113             '-//W3C//DTD XHTML 1.0 Strict//EN',
05114             'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
05115         );
05116 
05117         $this->doctypes->register(
05118             'XHTML 1.1', true,
05119             // Iframe is a real XHTML 1.1 module, despite being
05120             // "transitional"!
05121             array_merge($common, $xml, array('Ruby', 'Iframe')),
05122             array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1
05123             array(),
05124             '-//W3C//DTD XHTML 1.1//EN',
05125             'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
05126         );
05127 
05128     }
05129 
05151     public function registerModule($module, $overload = false) {
05152         if (is_string($module)) {
05153             // attempt to load the module
05154             $original_module = $module;
05155             $ok = false;
05156             foreach ($this->prefixes as $prefix) {
05157                 $module = $prefix . $original_module;
05158                 if (class_exists($module)) {
05159                     $ok = true;
05160                     break;
05161                 }
05162             }
05163             if (!$ok) {
05164                 $module = $original_module;
05165                 if (!class_exists($module)) {
05166                     trigger_error($original_module . ' module does not exist',
05167                         E_USER_ERROR);
05168                     return;
05169                 }
05170             }
05171             $module = new $module();
05172         }
05173         if (empty($module->name)) {
05174             trigger_error('Module instance of ' . get_class($module) . ' must have name');
05175             return;
05176         }
05177         if (!$overload && isset($this->registeredModules[$module->name])) {
05178             trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING);
05179         }
05180         $this->registeredModules[$module->name] = $module;
05181     }
05182 
05187     public function addModule($module) {
05188         $this->registerModule($module);
05189         if (is_object($module)) $module = $module->name;
05190         $this->userModules[] = $module;
05191     }
05192 
05197     public function addPrefix($prefix) {
05198         $this->prefixes[] = $prefix;
05199     }
05200 
05206     public function setup($config) {
05207 
05208         $this->trusted = $config->get('HTML.Trusted');
05209 
05210         // generate
05211         $this->doctype = $this->doctypes->make($config);
05212         $modules = $this->doctype->modules;
05213 
05214         // take out the default modules that aren't allowed
05215         $lookup = $config->get('HTML.AllowedModules');
05216         $special_cases = $config->get('HTML.CoreModules');
05217 
05218         if (is_array($lookup)) {
05219             foreach ($modules as $k => $m) {
05220                 if (isset($special_cases[$m])) continue;
05221                 if (!isset($lookup[$m])) unset($modules[$k]);
05222             }
05223         }
05224 
05225         // custom modules
05226         if ($config->get('HTML.Proprietary')) {
05227             $modules[] = 'Proprietary';
05228         }
05229         if ($config->get('HTML.SafeObject')) {
05230             $modules[] = 'SafeObject';
05231         }
05232         if ($config->get('HTML.SafeEmbed')) {
05233             $modules[] = 'SafeEmbed';
05234         }
05235         if ($config->get('HTML.Nofollow')) {
05236             $modules[] = 'Nofollow';
05237         }
05238         if ($config->get('HTML.TargetBlank')) {
05239             $modules[] = 'TargetBlank';
05240         }
05241 
05242         // merge in custom modules
05243         $modules = array_merge($modules, $this->userModules);
05244 
05245         foreach ($modules as $module) {
05246             $this->processModule($module);
05247             $this->modules[$module]->setup($config);
05248         }
05249 
05250         foreach ($this->doctype->tidyModules as $module) {
05251             $this->processModule($module);
05252             $this->modules[$module]->setup($config);
05253         }
05254 
05255         // prepare any injectors
05256         foreach ($this->modules as $module) {
05257             $n = array();
05258             foreach ($module->info_injector as $i => $injector) {
05259                 if (!is_object($injector)) {
05260                     $class = "HTMLPurifier_Injector_$injector";
05261                     $injector = new $class;
05262                 }
05263                 $n[$injector->name] = $injector;
05264             }
05265             $module->info_injector = $n;
05266         }
05267 
05268         // setup lookup table based on all valid modules
05269         foreach ($this->modules as $module) {
05270             foreach ($module->info as $name => $def) {
05271                 if (!isset($this->elementLookup[$name])) {
05272                     $this->elementLookup[$name] = array();
05273                 }
05274                 $this->elementLookup[$name][] = $module->name;
05275             }
05276         }
05277 
05278         // note the different choice
05279         $this->contentSets = new HTMLPurifier_ContentSets(
05280             // content set assembly deals with all possible modules,
05281             // not just ones deemed to be "safe"
05282             $this->modules
05283         );
05284         $this->attrCollections = new HTMLPurifier_AttrCollections(
05285             $this->attrTypes,
05286             // there is no way to directly disable a global attribute,
05287             // but using AllowedAttributes or simply not including
05288             // the module in your custom doctype should be sufficient
05289             $this->modules
05290         );
05291     }
05292 
05297     public function processModule($module) {
05298         if (!isset($this->registeredModules[$module]) || is_object($module)) {
05299             $this->registerModule($module);
05300         }
05301         $this->modules[$module] = $this->registeredModules[$module];
05302     }
05303 
05308     public function getElements() {
05309 
05310         $elements = array();
05311         foreach ($this->modules as $module) {
05312             if (!$this->trusted && !$module->safe) continue;
05313             foreach ($module->info as $name => $v) {
05314                 if (isset($elements[$name])) continue;
05315                 $elements[$name] = $this->getElement($name);
05316             }
05317         }
05318 
05319         // remove dud elements, this happens when an element that
05320         // appeared to be safe actually wasn't
05321         foreach ($elements as $n => $v) {
05322             if ($v === false) unset($elements[$n]);
05323         }
05324 
05325         return $elements;
05326 
05327     }
05328 
05339     public function getElement($name, $trusted = null) {
05340 
05341         if (!isset($this->elementLookup[$name])) {
05342             return false;
05343         }
05344 
05345         // setup global state variables
05346         $def = false;
05347         if ($trusted === null) $trusted = $this->trusted;
05348 
05349         // iterate through each module that has registered itself to this
05350         // element
05351         foreach($this->elementLookup[$name] as $module_name) {
05352 
05353             $module = $this->modules[$module_name];
05354 
05355             // refuse to create/merge from a module that is deemed unsafe--
05356             // pretend the module doesn't exist--when trusted mode is not on.
05357             if (!$trusted && !$module->safe) {
05358                 continue;
05359             }
05360 
05361             // clone is used because, ideally speaking, the original
05362             // definition should not be modified. Usually, this will
05363             // make no difference, but for consistency's sake
05364             $new_def = clone $module->info[$name];
05365 
05366             if (!$def && $new_def->standalone) {
05367                 $def = $new_def;
05368             } elseif ($def) {
05369                 // This will occur even if $new_def is standalone. In practice,
05370                 // this will usually result in a full replacement.
05371                 $def->mergeIn($new_def);
05372             } else {
05373                 // :TODO:
05374                 // non-standalone definitions that don't have a standalone
05375                 // to merge into could be deferred to the end
05376                 // HOWEVER, it is perfectly valid for a non-standalone
05377                 // definition to lack a standalone definition, even
05378                 // after all processing: this allows us to safely
05379                 // specify extra attributes for elements that may not be
05380                 // enabled all in one place.  In particular, this might
05381                 // be the case for trusted elements.  WARNING: care must
05382                 // be taken that the /extra/ definitions are all safe.
05383                 continue;
05384             }
05385 
05386             // attribute value expansions
05387             $this->attrCollections->performInclusions($def->attr);
05388             $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes);
05389 
05390             // descendants_are_inline, for ChildDef_Chameleon
05391             if (is_string($def->content_model) &&
05392                 strpos($def->content_model, 'Inline') !== false) {
05393                 if ($name != 'del' && $name != 'ins') {
05394                     // this is for you, ins/del
05395                     $def->descendants_are_inline = true;
05396                 }
05397             }
05398 
05399             $this->contentSets->generateChildDef($def, $module);
05400         }
05401 
05402         // This can occur if there is a blank definition, but no base to
05403         // mix it in with
05404         if (!$def) return false;
05405 
05406         // add information on required attributes
05407         foreach ($def->attr as $attr_name => $attr_def) {
05408             if ($attr_def->required) {
05409                 $def->required_attr[] = $attr_name;
05410             }
05411         }
05412 
05413         return $def;
05414 
05415     }
05416 
05417 }
05418 
05419 
05420 
05421 
05422 
05429 class HTMLPurifier_IDAccumulator
05430 {
05431 
05436     public $ids = array();
05437 
05444     public static function build($config, $context) {
05445         $id_accumulator = new HTMLPurifier_IDAccumulator();
05446         $id_accumulator->load($config->get('Attr.IDBlacklist'));
05447         return $id_accumulator;
05448     }
05449 
05455     public function add($id) {
05456         if (isset($this->ids[$id])) return false;
05457         return $this->ids[$id] = true;
05458     }
05459 
05465     public function load($array_of_ids) {
05466         foreach ($array_of_ids as $id) {
05467             $this->ids[$id] = true;
05468         }
05469     }
05470 
05471 }
05472 
05473 
05474 
05475 
05476 
05490 abstract class HTMLPurifier_Injector
05491 {
05492 
05496     public $name;
05497 
05501     protected $htmlDefinition;
05502 
05507     protected $currentNesting;
05508 
05513     protected $inputTokens;
05514 
05520     protected $inputIndex;
05521 
05527     public $needed = array();
05528 
05532     protected $rewind = false;
05533 
05542     public function rewind($index) {
05543         $this->rewind = $index;
05544     }
05545 
05549     public function getRewind() {
05550         $r = $this->rewind;
05551         $this->rewind = false;
05552         return $r;
05553     }
05554 
05564     public function prepare($config, $context) {
05565         $this->htmlDefinition = $config->getHTMLDefinition();
05566         // Even though this might fail, some unit tests ignore this and
05567         // still test checkNeeded, so be careful. Maybe get rid of that
05568         // dependency.
05569         $result = $this->checkNeeded($config);
05570         if ($result !== false) return $result;
05571         $this->currentNesting =& $context->get('CurrentNesting');
05572         $this->inputTokens    =& $context->get('InputTokens');
05573         $this->inputIndex     =& $context->get('InputIndex');
05574         return false;
05575     }
05576 
05585     public function checkNeeded($config) {
05586         $def = $config->getHTMLDefinition();
05587         foreach ($this->needed as $element => $attributes) {
05588             if (is_int($element)) $element = $attributes;
05589             if (!isset($def->info[$element])) return $element;
05590             if (!is_array($attributes)) continue;
05591             foreach ($attributes as $name) {
05592                 if (!isset($def->info[$element]->attr[$name])) return "$element.$name";
05593             }
05594         }
05595         return false;
05596     }
05597 
05603     public function allowsElement($name) {
05604         if (!empty($this->currentNesting)) {
05605             $parent_token = array_pop($this->currentNesting);
05606             $this->currentNesting[] = $parent_token;
05607             $parent = $this->htmlDefinition->info[$parent_token->name];
05608         } else {
05609             $parent = $this->htmlDefinition->info_parent_def;
05610         }
05611         if (!isset($parent->child->elements[$name]) || isset($parent->excludes[$name])) {
05612             return false;
05613         }
05614         // check for exclusion
05615         for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) {
05616             $node = $this->currentNesting[$i];
05617             $def  = $this->htmlDefinition->info[$node->name];
05618             if (isset($def->excludes[$name])) return false;
05619         }
05620         return true;
05621     }
05622 
05631     protected function forward(&$i, &$current) {
05632         if ($i === null) $i = $this->inputIndex + 1;
05633         else $i++;
05634         if (!isset($this->inputTokens[$i])) return false;
05635         $current = $this->inputTokens[$i];
05636         return true;
05637     }
05638 
05644     protected function forwardUntilEndToken(&$i, &$current, &$nesting) {
05645         $result = $this->forward($i, $current);
05646         if (!$result) return false;
05647         if ($nesting === null) $nesting = 0;
05648         if     ($current instanceof HTMLPurifier_Token_Start) $nesting++;
05649         elseif ($current instanceof HTMLPurifier_Token_End) {
05650             if ($nesting <= 0) return false;
05651             $nesting--;
05652         }
05653         return true;
05654     }
05655 
05664     protected function backward(&$i, &$current) {
05665         if ($i === null) $i = $this->inputIndex - 1;
05666         else $i--;
05667         if ($i < 0) return false;
05668         $current = $this->inputTokens[$i];
05669         return true;
05670     }
05671 
05681     protected function current(&$i, &$current) {
05682         if ($i === null) $i = $this->inputIndex;
05683         $current = $this->inputTokens[$i];
05684     }
05685 
05689     public function handleText(&$token) {}
05690 
05694     public function handleElement(&$token) {}
05695 
05699     public function handleEnd(&$token) {
05700         $this->notifyEnd($token);
05701     }
05702 
05708     public function notifyEnd($token) {}
05709 
05710 
05711 }
05712 
05713 
05714 
05715 
05716 
05721 class HTMLPurifier_Language
05722 {
05723 
05727     public $code = 'en';
05728 
05732     public $fallback = false;
05733 
05737     public $messages = array();
05738 
05742     public $errorNames = array();
05743 
05749     public $error = false;
05750 
05755     public $_loaded = false;
05756 
05760     protected $config, $context;
05761 
05762     public function __construct($config, $context) {
05763         $this->config  = $config;
05764         $this->context = $context;
05765     }
05766 
05771     public function load() {
05772         if ($this->_loaded) return;
05773         $factory = HTMLPurifier_LanguageFactory::instance();
05774         $factory->loadLanguage($this->code);
05775         foreach ($factory->keys as $key) {
05776             $this->$key = $factory->cache[$this->code][$key];
05777         }
05778         $this->_loaded = true;
05779     }
05780 
05786     public function getMessage($key) {
05787         if (!$this->_loaded) $this->load();
05788         if (!isset($this->messages[$key])) return "[$key]";
05789         return $this->messages[$key];
05790     }
05791 
05798     public function getErrorName($int) {
05799         if (!$this->_loaded) $this->load();
05800         if (!isset($this->errorNames[$int])) return "[Error: $int]";
05801         return $this->errorNames[$int];
05802     }
05803 
05807     public function listify($array) {
05808         $sep      = $this->getMessage('Item separator');
05809         $sep_last = $this->getMessage('Item separator last');
05810         $ret = '';
05811         for ($i = 0, $c = count($array); $i < $c; $i++) {
05812             if ($i == 0) {
05813             } elseif ($i + 1 < $c) {
05814                 $ret .= $sep;
05815             } else {
05816                 $ret .= $sep_last;
05817             }
05818             $ret .= $array[$i];
05819         }
05820         return $ret;
05821     }
05822 
05831     public function formatMessage($key, $args = array()) {
05832         if (!$this->_loaded) $this->load();
05833         if (!isset($this->messages[$key])) return "[$key]";
05834         $raw = $this->messages[$key];
05835         $subst = array();
05836         $generator = false;
05837         foreach ($args as $i => $value) {
05838             if (is_object($value)) {
05839                 if ($value instanceof HTMLPurifier_Token) {
05840                     // factor this out some time
05841                     if (!$generator) $generator = $this->context->get('Generator');
05842                     if (isset($value->name)) $subst['$'.$i.'.Name'] = $value->name;
05843                     if (isset($value->data)) $subst['$'.$i.'.Data'] = $value->data;
05844                     $subst['$'.$i.'.Compact'] =
05845                     $subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value);
05846                     // a more complex algorithm for compact representation
05847                     // could be introduced for all types of tokens. This
05848                     // may need to be factored out into a dedicated class
05849                     if (!empty($value->attr)) {
05850                         $stripped_token = clone $value;
05851                         $stripped_token->attr = array();
05852                         $subst['$'.$i.'.Compact'] = $generator->generateFromToken($stripped_token);
05853                     }
05854                     $subst['$'.$i.'.Line'] = $value->line ? $value->line : 'unknown';
05855                 }
05856                 continue;
05857             } elseif (is_array($value)) {
05858                 $keys = array_keys($value);
05859                 if (array_keys($keys) === $keys) {
05860                     // list
05861                     $subst['$'.$i] = $this->listify($value);
05862                 } else {
05863                     // associative array
05864                     // no $i implementation yet, sorry
05865                     $subst['$'.$i.'.Keys'] = $this->listify($keys);
05866                     $subst['$'.$i.'.Values'] = $this->listify(array_values($value));
05867                 }
05868                 continue;
05869             }
05870             $subst['$' . $i] = $value;
05871         }
05872         return strtr($raw, $subst);
05873     }
05874 
05875 }
05876 
05877 
05878 
05879 
05880 
05888 class HTMLPurifier_LanguageFactory
05889 {
05890 
05896     public $cache;
05897 
05903     public $keys = array('fallback', 'messages', 'errorNames');
05904 
05909     protected $validator;
05910 
05916     protected $dir;
05917 
05922     protected $mergeable_keys_map = array('messages' => true, 'errorNames' => true);
05923 
05928     protected $mergeable_keys_list = array();
05929 
05935     public static function instance($prototype = null) {
05936         static $instance = null;
05937         if ($prototype !== null) {
05938             $instance = $prototype;
05939         } elseif ($instance === null || $prototype == true) {
05940             $instance = new HTMLPurifier_LanguageFactory();
05941             $instance->setup();
05942         }
05943         return $instance;
05944     }
05945 
05950     public function setup() {
05951         $this->validator = new HTMLPurifier_AttrDef_Lang();
05952         $this->dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier';
05953     }
05954 
05961     public function create($config, $context, $code = false) {
05962 
05963         // validate language code
05964         if ($code === false) {
05965             $code = $this->validator->validate(
05966               $config->get('Core.Language'), $config, $context
05967             );
05968         } else {
05969             $code = $this->validator->validate($code, $config, $context);
05970         }
05971         if ($code === false) $code = 'en'; // malformed code becomes English
05972 
05973         $pcode = str_replace('-', '_', $code); // make valid PHP classname
05974         static $depth = 0; // recursion protection
05975 
05976         if ($code == 'en') {
05977             $lang = new HTMLPurifier_Language($config, $context);
05978         } else {
05979             $class = 'HTMLPurifier_Language_' . $pcode;
05980             $file  = $this->dir . '/Language/classes/' . $code . '.php';
05981             if (file_exists($file) || class_exists($class, false)) {
05982                 $lang = new $class($config, $context);
05983             } else {
05984                 // Go fallback
05985                 $raw_fallback = $this->getFallbackFor($code);
05986                 $fallback = $raw_fallback ? $raw_fallback : 'en';
05987                 $depth++;
05988                 $lang = $this->create($config, $context, $fallback);
05989                 if (!$raw_fallback) {
05990                     $lang->error = true;
05991                 }
05992                 $depth--;
05993             }
05994         }
05995 
05996         $lang->code = $code;
05997 
05998         return $lang;
05999 
06000     }
06001 
06007     public function getFallbackFor($code) {
06008         $this->loadLanguage($code);
06009         return $this->cache[$code]['fallback'];
06010     }
06011 
06016     public function loadLanguage($code) {
06017         static $languages_seen = array(); // recursion guard
06018 
06019         // abort if we've already loaded it
06020         if (isset($this->cache[$code])) return;
06021 
06022         // generate filename
06023         $filename = $this->dir . '/Language/messages/' . $code . '.php';
06024 
06025         // default fallback : may be overwritten by the ensuing include
06026         $fallback = ($code != 'en') ? 'en' : false;
06027 
06028         // load primary localisation
06029         if (!file_exists($filename)) {
06030             // skip the include: will rely solely on fallback
06031             $filename = $this->dir . '/Language/messages/en.php';
06032             $cache = array();
06033         } else {
06034             include $filename;
06035             $cache = compact($this->keys);
06036         }
06037 
06038         // load fallback localisation
06039         if (!empty($fallback)) {
06040 
06041             // infinite recursion guard
06042             if (isset($languages_seen[$code])) {
06043                 trigger_error('Circular fallback reference in language ' .
06044                     $code, E_USER_ERROR);
06045                 $fallback = 'en';
06046             }
06047             $language_seen[$code] = true;
06048 
06049             // load the fallback recursively
06050             $this->loadLanguage($fallback);
06051             $fallback_cache = $this->cache[$fallback];
06052 
06053             // merge fallback with current language
06054             foreach ( $this->keys as $key ) {
06055                 if (isset($cache[$key]) && isset($fallback_cache[$key])) {
06056                     if (isset($this->mergeable_keys_map[$key])) {
06057                         $cache[$key] = $cache[$key] + $fallback_cache[$key];
06058                     } elseif (isset($this->mergeable_keys_list[$key])) {
06059                         $cache[$key] = array_merge( $fallback_cache[$key], $cache[$key] );
06060                     }
06061                 } else {
06062                     $cache[$key] = $fallback_cache[$key];
06063                 }
06064             }
06065 
06066         }
06067 
06068         // save to cache for later retrieval
06069         $this->cache[$code] = $cache;
06070 
06071         return;
06072     }
06073 
06074 }
06075 
06076 
06077 
06078 
06079 
06084 class HTMLPurifier_Length
06085 {
06086 
06090     protected $n;
06091 
06095     protected $unit;
06096 
06100     protected $isValid;
06101 
06105     protected static $allowedUnits = array(
06106         'em' => true, 'ex' => true, 'px' => true, 'in' => true,
06107         'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true
06108     );
06109 
06114     public function __construct($n = '0', $u = false) {
06115         $this->n = (string) $n;
06116         $this->unit = $u !== false ? (string) $u : false;
06117     }
06118 
06123     static public function make($s) {
06124         if ($s instanceof HTMLPurifier_Length) return $s;
06125         $n_length = strspn($s, '1234567890.+-');
06126         $n = substr($s, 0, $n_length);
06127         $unit = substr($s, $n_length);
06128         if ($unit === '') $unit = false;
06129         return new HTMLPurifier_Length($n, $unit);
06130     }
06131 
06135     protected function validate() {
06136         // Special case:
06137         if ($this->n === '+0' || $this->n === '-0') $this->n = '0';
06138         if ($this->n === '0' && $this->unit === false) return true;
06139         if (!ctype_lower($this->unit)) $this->unit = strtolower($this->unit);
06140         if (!isset(HTMLPurifier_Length::$allowedUnits[$this->unit])) return false;
06141         // Hack:
06142         $def = new HTMLPurifier_AttrDef_CSS_Number();
06143         $result = $def->validate($this->n, false, false);
06144         if ($result === false) return false;
06145         $this->n = $result;
06146         return true;
06147     }
06148 
06152     public function toString() {
06153         if (!$this->isValid()) return false;
06154         return $this->n . $this->unit;
06155     }
06156 
06160     public function getN() {return $this->n;}
06161 
06165     public function getUnit() {return $this->unit;}
06166 
06170     public function isValid() {
06171         if ($this->isValid === null) $this->isValid = $this->validate();
06172         return $this->isValid;
06173     }
06174 
06180     public function compareTo($l) {
06181         if ($l === false) return false;
06182         if ($l->unit !== $this->unit) {
06183             $converter = new HTMLPurifier_UnitConverter();
06184             $l = $converter->convert($l, $this->unit);
06185             if ($l === false) return false;
06186         }
06187         return $this->n - $l->n;
06188     }
06189 
06190 }
06191 
06192 
06193 
06194 
06195 
06235 class HTMLPurifier_Lexer
06236 {
06237 
06242     public $tracksLineNumbers = false;
06243 
06244     // -- STATIC ----------------------------------------------------------
06245 
06261     public static function create($config) {
06262 
06263         if (!($config instanceof HTMLPurifier_Config)) {
06264             $lexer = $config;
06265             trigger_error("Passing a prototype to
06266               HTMLPurifier_Lexer::create() is deprecated, please instead
06267               use %Core.LexerImpl", E_USER_WARNING);
06268         } else {
06269             $lexer = $config->get('Core.LexerImpl');
06270         }
06271 
06272         $needs_tracking =
06273             $config->get('Core.MaintainLineNumbers') ||
06274             $config->get('Core.CollectErrors');
06275 
06276         $inst = null;
06277         if (is_object($lexer)) {
06278             $inst = $lexer;
06279         } else {
06280 
06281             if (is_null($lexer)) { do {
06282                 // auto-detection algorithm
06283 
06284                 if ($needs_tracking) {
06285                     $lexer = 'DirectLex';
06286                     break;
06287                 }
06288 
06289                 if (
06290                     class_exists('DOMDocument') &&
06291                     method_exists('DOMDocument', 'loadHTML') &&
06292                     !extension_loaded('domxml')
06293                 ) {
06294                     // check for DOM support, because while it's part of the
06295                     // core, it can be disabled compile time. Also, the PECL
06296                     // domxml extension overrides the default DOM, and is evil
06297                     // and nasty and we shan't bother to support it
06298                     $lexer = 'DOMLex';
06299                 } else {
06300                     $lexer = 'DirectLex';
06301                 }
06302 
06303             } while(0); } // do..while so we can break
06304 
06305             // instantiate recognized string names
06306             switch ($lexer) {
06307                 case 'DOMLex':
06308                     $inst = new HTMLPurifier_Lexer_DOMLex();
06309                     break;
06310                 case 'DirectLex':
06311                     $inst = new HTMLPurifier_Lexer_DirectLex();
06312                     break;
06313                 case 'PH5P':
06314                     $inst = new HTMLPurifier_Lexer_PH5P();
06315                     break;
06316                 default:
06317                     throw new HTMLPurifier_Exception("Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer));
06318             }
06319         }
06320 
06321         if (!$inst) throw new HTMLPurifier_Exception('No lexer was instantiated');
06322 
06323         // once PHP DOM implements native line numbers, or we
06324         // hack out something using XSLT, remove this stipulation
06325         if ($needs_tracking && !$inst->tracksLineNumbers) {
06326             throw new HTMLPurifier_Exception('Cannot use lexer that does not support line numbers with Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)');
06327         }
06328 
06329         return $inst;
06330 
06331     }
06332 
06333     // -- CONVENIENCE MEMBERS ---------------------------------------------
06334 
06335     public function __construct() {
06336         $this->_entity_parser = new HTMLPurifier_EntityParser();
06337     }
06338 
06342     protected $_special_entity2str =
06343             array(
06344                     '&quot;' => '"',
06345                     '&amp;'  => '&',
06346                     '&lt;'   => '<',
06347                     '&gt;'   => '>',
06348                     '&#39;'  => "'",
06349                     '&#039;' => "'",
06350                     '&#x27;' => "'"
06351             );
06352 
06367     public function parseData($string) {
06368 
06369         // following functions require at least one character
06370         if ($string === '') return '';
06371 
06372         // subtracts amps that cannot possibly be escaped
06373         $num_amp = substr_count($string, '&') - substr_count($string, '& ') -
06374             ($string[strlen($string)-1] === '&' ? 1 : 0);
06375 
06376         if (!$num_amp) return $string; // abort if no entities
06377         $num_esc_amp = substr_count($string, '&amp;');
06378         $string = strtr($string, $this->_special_entity2str);
06379 
06380         // code duplication for sake of optimization, see above
06381         $num_amp_2 = substr_count($string, '&') - substr_count($string, '& ') -
06382             ($string[strlen($string)-1] === '&' ? 1 : 0);
06383 
06384         if ($num_amp_2 <= $num_esc_amp) return $string;
06385 
06386         // hmm... now we have some uncommon entities. Use the callback.
06387         $string = $this->_entity_parser->substituteSpecialEntities($string);
06388         return $string;
06389     }
06390 
06397     public function tokenizeHTML($string, $config, $context) {
06398         trigger_error('Call to abstract class', E_USER_ERROR);
06399     }
06400 
06407     protected static function escapeCDATA($string) {
06408         return preg_replace_callback(
06409             '/<!\[CDATA\[(.+?)\]\]>/s',
06410             array('HTMLPurifier_Lexer', 'CDATACallback'),
06411             $string
06412         );
06413     }
06414 
06418     protected static function escapeCommentedCDATA($string) {
06419         return preg_replace_callback(
06420             '#<!--//--><!\[CDATA\[//><!--(.+?)//--><!\]\]>#s',
06421             array('HTMLPurifier_Lexer', 'CDATACallback'),
06422             $string
06423         );
06424     }
06425 
06429     protected static function removeIEConditional($string) {
06430         return preg_replace(
06431             '#<!--\[if [^>]+\]>.*?<!\[endif\]-->#si', // probably should generalize for all strings
06432             '',
06433             $string
06434         );
06435     }
06436 
06446     protected static function CDATACallback($matches) {
06447         // not exactly sure why the character set is needed, but whatever
06448         return htmlspecialchars($matches[1], ENT_COMPAT, 'UTF-8');
06449     }
06450 
06456     public function normalize($html, $config, $context) {
06457 
06458         // normalize newlines to \n
06459         if ($config->get('Core.NormalizeNewlines')) {
06460             $html = str_replace("\r\n", "\n", $html);
06461             $html = str_replace("\r", "\n", $html);
06462         }
06463 
06464         if ($config->get('HTML.Trusted')) {
06465             // escape convoluted CDATA
06466             $html = $this->escapeCommentedCDATA($html);
06467         }
06468 
06469         // escape CDATA
06470         $html = $this->escapeCDATA($html);
06471 
06472         $html = $this->removeIEConditional($html);
06473 
06474         // extract body from document if applicable
06475         if ($config->get('Core.ConvertDocumentToFragment')) {
06476             $e = false;
06477             if ($config->get('Core.CollectErrors')) {
06478                 $e =& $context->get('ErrorCollector');
06479             }
06480             $new_html = $this->extractBody($html);
06481             if ($e && $new_html != $html) {
06482                 $e->send(E_WARNING, 'Lexer: Extracted body');
06483             }
06484             $html = $new_html;
06485         }
06486 
06487         // expand entities that aren't the big five
06488         $html = $this->_entity_parser->substituteNonSpecialEntities($html);
06489 
06490         // clean into wellformed UTF-8 string for an SGML context: this has
06491         // to be done after entity expansion because the entities sometimes
06492         // represent non-SGML characters (horror, horror!)
06493         $html = HTMLPurifier_Encoder::cleanUTF8($html);
06494 
06495         // if processing instructions are to removed, remove them now
06496         if ($config->get('Core.RemoveProcessingInstructions')) {
06497             $html = preg_replace('#<\?.+?\?>#s', '', $html);
06498         }
06499 
06500         return $html;
06501     }
06502 
06507     public function extractBody($html) {
06508         $matches = array();
06509         $result = preg_match('!<body[^>]*>(.*)</body>!is', $html, $matches);
06510         if ($result) {
06511             return $matches[1];
06512         } else {
06513             return $html;
06514         }
06515     }
06516 
06517 }
06518 
06519 
06520 
06521 
06522 
06531 class HTMLPurifier_PercentEncoder
06532 {
06533 
06537     protected $preserve = array();
06538 
06542     public function __construct($preserve = false) {
06543         // unreserved letters, ought to const-ify
06544         for ($i = 48; $i <= 57;  $i++) $this->preserve[$i] = true; // digits
06545         for ($i = 65; $i <= 90;  $i++) $this->preserve[$i] = true; // upper-case
06546         for ($i = 97; $i <= 122; $i++) $this->preserve[$i] = true; // lower-case
06547         $this->preserve[45] = true; // Dash         -
06548         $this->preserve[46] = true; // Period       .
06549         $this->preserve[95] = true; // Underscore   _
06550         $this->preserve[126]= true; // Tilde        ~
06551 
06552         // extra letters not to escape
06553         if ($preserve !== false) {
06554             for ($i = 0, $c = strlen($preserve); $i < $c; $i++) {
06555                 $this->preserve[ord($preserve[$i])] = true;
06556             }
06557         }
06558     }
06559 
06570     public function encode($string) {
06571         $ret = '';
06572         for ($i = 0, $c = strlen($string); $i < $c; $i++) {
06573             if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])]) ) {
06574                 $ret .= '%' . sprintf('%02X', $int);
06575             } else {
06576                 $ret .= $string[$i];
06577             }
06578         }
06579         return $ret;
06580     }
06581 
06589     public function normalize($string) {
06590         if ($string == '') return '';
06591         $parts = explode('%', $string);
06592         $ret = array_shift($parts);
06593         foreach ($parts as $part) {
06594             $length = strlen($part);
06595             if ($length < 2) {
06596                 $ret .= '%25' . $part;
06597                 continue;
06598             }
06599             $encoding = substr($part, 0, 2);
06600             $text     = substr($part, 2);
06601             if (!ctype_xdigit($encoding)) {
06602                 $ret .= '%25' . $part;
06603                 continue;
06604             }
06605             $int = hexdec($encoding);
06606             if (isset($this->preserve[$int])) {
06607                 $ret .= chr($int) . $text;
06608                 continue;
06609             }
06610             $encoding = strtoupper($encoding);
06611             $ret .= '%' . $encoding . $text;
06612         }
06613         return $ret;
06614     }
06615 
06616 }
06617 
06618 
06619 
06620 
06621 
06625 class HTMLPurifier_PropertyList
06626 {
06630     protected $data = array();
06631 
06635     protected $parent;
06636 
06637     protected $cache;
06638 
06639     public function __construct($parent = null) {
06640         $this->parent = $parent;
06641     }
06642 
06646     public function get($name) {
06647         if ($this->has($name)) return $this->data[$name];
06648         // possible performance bottleneck, convert to iterative if necessary
06649         if ($this->parent) return $this->parent->get($name);
06650         throw new HTMLPurifier_Exception("Key '$name' not found");
06651     }
06652 
06656     public function set($name, $value) {
06657         $this->data[$name] = $value;
06658     }
06659 
06663     public function has($name) {
06664         return array_key_exists($name, $this->data);
06665     }
06666 
06671     public function reset($name = null) {
06672         if ($name == null) $this->data = array();
06673         else unset($this->data[$name]);
06674     }
06675 
06681     public function squash($force = false) {
06682         if ($this->cache !== null && !$force) return $this->cache;
06683         if ($this->parent) {
06684             return $this->cache = array_merge($this->parent->squash($force), $this->data);
06685         } else {
06686             return $this->cache = $this->data;
06687         }
06688     }
06689 
06693     public function getParent() {
06694         return $this->parent;
06695     }
06696 
06700     public function setParent($plist) {
06701         $this->parent = $plist;
06702     }
06703 }
06704 
06705 
06706 
06707 
06708 
06712 class HTMLPurifier_PropertyListIterator extends FilterIterator
06713 {
06714 
06715     protected $l;
06716     protected $filter;
06717 
06722     public function __construct(Iterator $iterator, $filter = null) {
06723         parent::__construct($iterator);
06724         $this->l = strlen($filter);
06725         $this->filter = $filter;
06726     }
06727 
06728     public function accept() {
06729         $key = $this->getInnerIterator()->key();
06730         if( strncmp($key, $this->filter, $this->l) !== 0 ) {
06731             return false;
06732         }
06733         return true;
06734     }
06735 
06736 }
06737 
06738 
06739 
06740 
06741 
06751 abstract class HTMLPurifier_Strategy
06752 {
06753 
06761     abstract public function execute($tokens, $config, $context);
06762 
06763 }
06764 
06765 
06766 
06767 
06768 
06777 class HTMLPurifier_StringHash extends ArrayObject
06778 {
06779     protected $accessed = array();
06780 
06784     public function offsetGet($index) {
06785         $this->accessed[$index] = true;
06786         return parent::offsetGet($index);
06787     }
06788 
06793     public function getAccessed() {
06794         return $this->accessed;
06795     }
06796 
06800     public function resetAccessed() {
06801         $this->accessed = array();
06802     }
06803 }
06804 
06805 
06806 
06807 
06808 
06834 class HTMLPurifier_StringHashParser
06835 {
06836 
06837     public $default = 'ID';
06838 
06842     public function parseFile($file) {
06843         if (!file_exists($file)) return false;
06844         $fh = fopen($file, 'r');
06845         if (!$fh) return false;
06846         $ret = $this->parseHandle($fh);
06847         fclose($fh);
06848         return $ret;
06849     }
06850 
06854     public function parseMultiFile($file) {
06855         if (!file_exists($file)) return false;
06856         $ret = array();
06857         $fh = fopen($file, 'r');
06858         if (!$fh) return false;
06859         while (!feof($fh)) {
06860             $ret[] = $this->parseHandle($fh);
06861         }
06862         fclose($fh);
06863         return $ret;
06864     }
06865 
06874     protected function parseHandle($fh) {
06875         $state   = false;
06876         $single  = false;
06877         $ret     = array();
06878         do {
06879             $line = fgets($fh);
06880             if ($line === false) break;
06881             $line = rtrim($line, "\n\r");
06882             if (!$state && $line === '') continue;
06883             if ($line === '----') break;
06884             if (strncmp('--#', $line, 3) === 0) {
06885                 // Comment
06886                 continue;
06887             } elseif (strncmp('--', $line, 2) === 0) {
06888                 // Multiline declaration
06889                 $state = trim($line, '- ');
06890                 if (!isset($ret[$state])) $ret[$state] = '';
06891                 continue;
06892             } elseif (!$state) {
06893                 $single = true;
06894                 if (strpos($line, ':') !== false) {
06895                     // Single-line declaration
06896                     list($state, $line) = explode(':', $line, 2);
06897                     $line = trim($line);
06898                 } else {
06899                     // Use default declaration
06900                     $state  = $this->default;
06901                 }
06902             }
06903             if ($single) {
06904                 $ret[$state] = $line;
06905                 $single = false;
06906                 $state  = false;
06907             } else {
06908                 $ret[$state] .= "$line\n";
06909             }
06910         } while (!feof($fh));
06911         return $ret;
06912     }
06913 
06914 }
06915 
06916 
06917 
06918 
06919 
06923 abstract class HTMLPurifier_TagTransform
06924 {
06925 
06929     public $transform_to;
06930 
06937     abstract public function transform($tag, $config, $context);
06938 
06946     protected function prependCSS(&$attr, $css) {
06947         $attr['style'] = isset($attr['style']) ? $attr['style'] : '';
06948         $attr['style'] = $css . $attr['style'];
06949     }
06950 
06951 }
06952 
06953 
06954 
06955 
06956 
06960 class HTMLPurifier_Token {
06961     public $line; 
06962     public $col;  
06969     public $armor = array();
06970 
06974     public $skip;
06975     public $rewind;
06976     public $carryover;
06977 
06978     public function __get($n) {
06979       if ($n === 'type') {
06980         trigger_error('Deprecated type property called; use instanceof', E_USER_NOTICE);
06981         switch (get_class($this)) {
06982           case 'HTMLPurifier_Token_Start':      return 'start';
06983           case 'HTMLPurifier_Token_Empty':      return 'empty';
06984           case 'HTMLPurifier_Token_End':        return 'end';
06985           case 'HTMLPurifier_Token_Text':       return 'text';
06986           case 'HTMLPurifier_Token_Comment':    return 'comment';
06987           default: return null;
06988         }
06989       }
06990     }
06991 
06995     public function position($l = null, $c = null) {
06996         $this->line = $l;
06997         $this->col  = $c;
06998     }
06999 
07003     public function rawPosition($l, $c) {
07004         if ($c === -1) $l++;
07005         $this->line = $l;
07006         $this->col  = $c;
07007     }
07008 
07009 }
07010 
07011 
07012 
07013 
07014 
07026 class HTMLPurifier_TokenFactory
07027 {
07028 
07033     // p stands for prototype
07034     private $p_start, $p_end, $p_empty, $p_text, $p_comment;
07035 
07039     public function __construct() {
07040         $this->p_start  = new HTMLPurifier_Token_Start('', array());
07041         $this->p_end    = new HTMLPurifier_Token_End('');
07042         $this->p_empty  = new HTMLPurifier_Token_Empty('', array());
07043         $this->p_text   = new HTMLPurifier_Token_Text('');
07044         $this->p_comment= new HTMLPurifier_Token_Comment('');
07045     }
07046 
07053     public function createStart($name, $attr = array()) {
07054         $p = clone $this->p_start;
07055         $p->__construct($name, $attr);
07056         return $p;
07057     }
07058 
07064     public function createEnd($name) {
07065         $p = clone $this->p_end;
07066         $p->__construct($name);
07067         return $p;
07068     }
07069 
07076     public function createEmpty($name, $attr = array()) {
07077         $p = clone $this->p_empty;
07078         $p->__construct($name, $attr);
07079         return $p;
07080     }
07081 
07087     public function createText($data) {
07088         $p = clone $this->p_text;
07089         $p->__construct($data);
07090         return $p;
07091     }
07092 
07098     public function createComment($data) {
07099         $p = clone $this->p_comment;
07100         $p->__construct($data);
07101         return $p;
07102     }
07103 
07104 }
07105 
07106 
07107 
07108 
07109 
07118 class HTMLPurifier_URI
07119 {
07120 
07121     public $scheme, $userinfo, $host, $port, $path, $query, $fragment;
07122 
07126     public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment) {
07127         $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
07128         $this->userinfo = $userinfo;
07129         $this->host = $host;
07130         $this->port = is_null($port) ? $port : (int) $port;
07131         $this->path = $path;
07132         $this->query = $query;
07133         $this->fragment = $fragment;
07134     }
07135 
07142     public function getSchemeObj($config, $context) {
07143         $registry = HTMLPurifier_URISchemeRegistry::instance();
07144         if ($this->scheme !== null) {
07145             $scheme_obj = $registry->getScheme($this->scheme, $config, $context);
07146             if (!$scheme_obj) return false; // invalid scheme, clean it out
07147         } else {
07148             // no scheme: retrieve the default one
07149             $def = $config->getDefinition('URI');
07150             $scheme_obj = $def->getDefaultScheme($config, $context);
07151             if (!$scheme_obj) {
07152                 // something funky happened to the default scheme object
07153                 trigger_error(
07154                     'Default scheme object "' . $def->defaultScheme . '" was not readable',
07155                     E_USER_WARNING
07156                 );
07157                 return false;
07158             }
07159         }
07160         return $scheme_obj;
07161     }
07162 
07170     public function validate($config, $context) {
07171 
07172         // ABNF definitions from RFC 3986
07173         $chars_sub_delims = '!$&\'()*+,;=';
07174         $chars_gen_delims = ':/?#[]@';
07175         $chars_pchar = $chars_sub_delims . ':@';
07176 
07177         // validate host
07178         if (!is_null($this->host)) {
07179             $host_def = new HTMLPurifier_AttrDef_URI_Host();
07180             $this->host = $host_def->validate($this->host, $config, $context);
07181             if ($this->host === false) $this->host = null;
07182         }
07183 
07184         // validate scheme
07185         // NOTE: It's not appropriate to check whether or not this
07186         // scheme is in our registry, since a URIFilter may convert a
07187         // URI that we don't allow into one we do.  So instead, we just
07188         // check if the scheme can be dropped because there is no host
07189         // and it is our default scheme.
07190         if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') {
07191             // support for relative paths is pretty abysmal when the
07192             // scheme is present, so axe it when possible
07193             $def = $config->getDefinition('URI');
07194             if ($def->defaultScheme === $this->scheme) {
07195                 $this->scheme = null;
07196             }
07197         }
07198 
07199         // validate username
07200         if (!is_null($this->userinfo)) {
07201             $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
07202             $this->userinfo = $encoder->encode($this->userinfo);
07203         }
07204 
07205         // validate port
07206         if (!is_null($this->port)) {
07207             if ($this->port < 1 || $this->port > 65535) $this->port = null;
07208         }
07209 
07210         // validate path
07211         $path_parts = array();
07212         $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
07213         if (!is_null($this->host)) { // this catches $this->host === ''
07214             // path-abempty (hier and relative)
07215             // http://www.example.com/my/path
07216             // //www.example.com/my/path (looks odd, but works, and
07217             //                            recognized by most browsers)
07218             // (this set is valid or invalid on a scheme by scheme
07219             // basis, so we'll deal with it later)
07220             // file:///my/path
07221             // ///my/path
07222             $this->path = $segments_encoder->encode($this->path);
07223         } elseif ($this->path !== '') {
07224             if ($this->path[0] === '/') {
07225                 // path-absolute (hier and relative)
07226                 // http:/my/path
07227                 // /my/path
07228                 if (strlen($this->path) >= 2 && $this->path[1] === '/') {
07229                     // This could happen if both the host gets stripped
07230                     // out
07231                     // http://my/path
07232                     // //my/path
07233                     $this->path = '';
07234                 } else {
07235                     $this->path = $segments_encoder->encode($this->path);
07236                 }
07237             } elseif (!is_null($this->scheme)) {
07238                 // path-rootless (hier)
07239                 // http:my/path
07240                 // Short circuit evaluation means we don't need to check nz
07241                 $this->path = $segments_encoder->encode($this->path);
07242             } else {
07243                 // path-noscheme (relative)
07244                 // my/path
07245                 // (once again, not checking nz)
07246                 $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
07247                 $c = strpos($this->path, '/');
07248                 if ($c !== false) {
07249                     $this->path =
07250                         $segment_nc_encoder->encode(substr($this->path, 0, $c)) .
07251                         $segments_encoder->encode(substr($this->path, $c));
07252                 } else {
07253                     $this->path = $segment_nc_encoder->encode($this->path);
07254                 }
07255             }
07256         } else {
07257             // path-empty (hier and relative)
07258             $this->path = ''; // just to be safe
07259         }
07260 
07261         // qf = query and fragment
07262         $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');
07263 
07264         if (!is_null($this->query)) {
07265             $this->query = $qf_encoder->encode($this->query);
07266         }
07267 
07268         if (!is_null($this->fragment)) {
07269             $this->fragment = $qf_encoder->encode($this->fragment);
07270         }
07271 
07272         return true;
07273 
07274     }
07275 
07280     public function toString() {
07281         // reconstruct authority
07282         $authority = null;
07283         // there is a rendering difference between a null authority
07284         // (http:foo-bar) and an empty string authority
07285         // (http:///foo-bar).
07286         if (!is_null($this->host)) {
07287             $authority = '';
07288             if(!is_null($this->userinfo)) $authority .= $this->userinfo . '@';
07289             $authority .= $this->host;
07290             if(!is_null($this->port))     $authority .= ':' . $this->port;
07291         }
07292 
07293         // Reconstruct the result
07294         // One might wonder about parsing quirks from browsers after
07295         // this reconstruction.  Unfortunately, parsing behavior depends
07296         // on what *scheme* was employed (file:///foo is handled *very*
07297         // differently than http:///foo), so unfortunately we have to
07298         // defer to the schemes to do the right thing.
07299         $result = '';
07300         if (!is_null($this->scheme))    $result .= $this->scheme . ':';
07301         if (!is_null($authority))       $result .=  '//' . $authority;
07302         $result .= $this->path;
07303         if (!is_null($this->query))     $result .= '?' . $this->query;
07304         if (!is_null($this->fragment))  $result .= '#' . $this->fragment;
07305 
07306         return $result;
07307     }
07308 
07318     public function isLocal($config, $context) {
07319         if ($this->host === null) return true;
07320         $uri_def = $config->getDefinition('URI');
07321         if ($uri_def->host === $this->host) return true;
07322         return false;
07323     }
07324 
07332     public function isBenign($config, $context) {
07333         if (!$this->isLocal($config, $context)) return false;
07334 
07335         $scheme_obj = $this->getSchemeObj($config, $context);
07336         if (!$scheme_obj) return false; // conservative approach
07337 
07338         $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context);
07339         if ($current_scheme_obj->secure) {
07340             if (!$scheme_obj->secure) {
07341                 return false;
07342             }
07343         }
07344         return true;
07345     }
07346 
07347 }
07348 
07349 
07350 
07351 
07352 
07353 class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
07354 {
07355 
07356     public $type = 'URI';
07357     protected $filters = array();
07358     protected $postFilters = array();
07359     protected $registeredFilters = array();
07360 
07364     public $base;
07365 
07369     public $host;
07370 
07374     public $defaultScheme;
07375 
07376     public function __construct() {
07377         $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternal());
07378         $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources());
07379         $this->registerFilter(new HTMLPurifier_URIFilter_HostBlacklist());
07380         $this->registerFilter(new HTMLPurifier_URIFilter_SafeIframe());
07381         $this->registerFilter(new HTMLPurifier_URIFilter_MakeAbsolute());
07382         $this->registerFilter(new HTMLPurifier_URIFilter_Munge());
07383     }
07384 
07385     public function registerFilter($filter) {
07386         $this->registeredFilters[$filter->name] = $filter;
07387     }
07388 
07389     public function addFilter($filter, $config) {
07390         $r = $filter->prepare($config);
07391         if ($r === false) return; // null is ok, for backwards compat
07392         if ($filter->post) {
07393             $this->postFilters[$filter->name] = $filter;
07394         } else {
07395             $this->filters[$filter->name] = $filter;
07396         }
07397     }
07398 
07399     protected function doSetup($config) {
07400         $this->setupMemberVariables($config);
07401         $this->setupFilters($config);
07402     }
07403 
07404     protected function setupFilters($config) {
07405         foreach ($this->registeredFilters as $name => $filter) {
07406             if ($filter->always_load) {
07407                 $this->addFilter($filter, $config);
07408             } else {
07409                 $conf = $config->get('URI.' . $name);
07410                 if ($conf !== false && $conf !== null) {
07411                     $this->addFilter($filter, $config);
07412                 }
07413             }
07414         }
07415         unset($this->registeredFilters);
07416     }
07417 
07418     protected function setupMemberVariables($config) {
07419         $this->host = $config->get('URI.Host');
07420         $base_uri = $config->get('URI.Base');
07421         if (!is_null($base_uri)) {
07422             $parser = new HTMLPurifier_URIParser();
07423             $this->base = $parser->parse($base_uri);
07424             $this->defaultScheme = $this->base->scheme;
07425             if (is_null($this->host)) $this->host = $this->base->host;
07426         }
07427         if (is_null($this->defaultScheme)) $this->defaultScheme = $config->get('URI.DefaultScheme');
07428     }
07429 
07430     public function getDefaultScheme($config, $context) {
07431         return HTMLPurifier_URISchemeRegistry::instance()->getScheme($this->defaultScheme, $config, $context);
07432     }
07433 
07434     public function filter(&$uri, $config, $context) {
07435         foreach ($this->filters as $name => $f) {
07436             $result = $f->filter($uri, $config, $context);
07437             if (!$result) return false;
07438         }
07439         return true;
07440     }
07441 
07442     public function postFilter(&$uri, $config, $context) {
07443         foreach ($this->postFilters as $name => $f) {
07444             $result = $f->filter($uri, $config, $context);
07445             if (!$result) return false;
07446         }
07447         return true;
07448     }
07449 
07450 }
07451 
07452 
07453 
07454 
07455 
07481 abstract class HTMLPurifier_URIFilter
07482 {
07483 
07487     public $name;
07488 
07492     public $post = false;
07493 
07499     public $always_load = false;
07500 
07505     public function prepare($config) {return true;}
07506 
07516     abstract public function filter(&$uri, $config, $context);
07517 
07518 }
07519 
07520 
07521 
07522 
07523 
07528 class HTMLPurifier_URIParser
07529 {
07530 
07534     protected $percentEncoder;
07535 
07536     public function __construct() {
07537         $this->percentEncoder = new HTMLPurifier_PercentEncoder();
07538     }
07539 
07546     public function parse($uri) {
07547 
07548         $uri = $this->percentEncoder->normalize($uri);
07549 
07550         // Regexp is as per Appendix B.
07551         // Note that ["<>] are an addition to the RFC's recommended
07552         // characters, because they represent external delimeters.
07553         $r_URI = '!'.
07554             '(([^:/?#"<>]+):)?'. // 2. Scheme
07555             '(//([^/?#"<>]*))?'. // 4. Authority
07556             '([^?#"<>]*)'.       // 5. Path
07557             '(\?([^#"<>]*))?'.   // 7. Query
07558             '(#([^"<>]*))?'.     // 8. Fragment
07559             '!';
07560 
07561         $matches = array();
07562         $result = preg_match($r_URI, $uri, $matches);
07563 
07564         if (!$result) return false; // *really* invalid URI
07565 
07566         // seperate out parts
07567         $scheme     = !empty($matches[1]) ? $matches[2] : null;
07568         $authority  = !empty($matches[3]) ? $matches[4] : null;
07569         $path       = $matches[5]; // always present, can be empty
07570         $query      = !empty($matches[6]) ? $matches[7] : null;
07571         $fragment   = !empty($matches[8]) ? $matches[9] : null;
07572 
07573         // further parse authority
07574         if ($authority !== null) {
07575             $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
07576             $matches = array();
07577             preg_match($r_authority, $authority, $matches);
07578             $userinfo   = !empty($matches[1]) ? $matches[2] : null;
07579             $host       = !empty($matches[3]) ? $matches[3] : '';
07580             $port       = !empty($matches[4]) ? (int) $matches[5] : null;
07581         } else {
07582             $port = $host = $userinfo = null;
07583         }
07584 
07585         return new HTMLPurifier_URI(
07586             $scheme, $userinfo, $host, $port, $path, $query, $fragment);
07587     }
07588 
07589 }
07590 
07591 
07592 
07593 
07594 
07598 abstract class HTMLPurifier_URIScheme
07599 {
07600 
07606     public $default_port = null;
07607 
07612     public $browsable = false;
07613 
07618     public $secure = false;
07619 
07624     public $hierarchical = false;
07625 
07631     public $may_omit_host = false;
07632 
07640     public abstract function doValidate(&$uri, $config, $context);
07641 
07650     public function validate(&$uri, $config, $context) {
07651         if ($this->default_port == $uri->port) $uri->port = null;
07652         // kludge: browsers do funny things when the scheme but not the
07653         // authority is set
07654         if (!$this->may_omit_host &&
07655             // if the scheme is present, a missing host is always in error
07656             (!is_null($uri->scheme) && ($uri->host === '' || is_null($uri->host))) ||
07657             // if the scheme is not present, a *blank* host is in error,
07658             // since this translates into '///path' which most browsers
07659             // interpret as being 'http://path'.
07660              (is_null($uri->scheme) && $uri->host === '')
07661         ) {
07662             do {
07663                 if (is_null($uri->scheme)) {
07664                     if (substr($uri->path, 0, 2) != '//') {
07665                         $uri->host = null;
07666                         break;
07667                     }
07668                     // URI is '////path', so we cannot nullify the
07669                     // host to preserve semantics.  Try expanding the
07670                     // hostname instead (fall through)
07671                 }
07672                 // first see if we can manually insert a hostname
07673                 $host = $config->get('URI.Host');
07674                 if (!is_null($host)) {
07675                     $uri->host = $host;
07676                 } else {
07677                     // we can't do anything sensible, reject the URL.
07678                     return false;
07679                 }
07680             } while (false);
07681         }
07682         return $this->doValidate($uri, $config, $context);
07683     }
07684 
07685 }
07686 
07687 
07688 
07689 
07690 
07694 class HTMLPurifier_URISchemeRegistry
07695 {
07696 
07704     public static function instance($prototype = null) {
07705         static $instance = null;
07706         if ($prototype !== null) {
07707             $instance = $prototype;
07708         } elseif ($instance === null || $prototype == true) {
07709             $instance = new HTMLPurifier_URISchemeRegistry();
07710         }
07711         return $instance;
07712     }
07713 
07717     protected $schemes = array();
07718 
07725     public function getScheme($scheme, $config, $context) {
07726         if (!$config) $config = HTMLPurifier_Config::createDefault();
07727 
07728         // important, otherwise attacker could include arbitrary file
07729         $allowed_schemes = $config->get('URI.AllowedSchemes');
07730         if (!$config->get('URI.OverrideAllowedSchemes') &&
07731             !isset($allowed_schemes[$scheme])
07732         ) {
07733             return;
07734         }
07735 
07736         if (isset($this->schemes[$scheme])) return $this->schemes[$scheme];
07737         if (!isset($allowed_schemes[$scheme])) return;
07738 
07739         $class = 'HTMLPurifier_URIScheme_' . $scheme;
07740         if (!class_exists($class)) return;
07741         $this->schemes[$scheme] = new $class();
07742         return $this->schemes[$scheme];
07743     }
07744 
07750     public function register($scheme, $scheme_obj) {
07751         $this->schemes[$scheme] = $scheme_obj;
07752     }
07753 
07754 }
07755 
07756 
07757 
07758 
07759 
07764 class HTMLPurifier_UnitConverter
07765 {
07766 
07767     const ENGLISH = 1;
07768     const METRIC = 2;
07769     const DIGITAL = 3;
07770 
07780     protected static $units = array(
07781         self::ENGLISH => array(
07782             'px' => 3, // This is as per CSS 2.1 and Firefox. Your mileage may vary
07783             'pt' => 4,
07784             'pc' => 48,
07785             'in' => 288,
07786             self::METRIC => array('pt', '0.352777778', 'mm'),
07787         ),
07788         self::METRIC => array(
07789             'mm' => 1,
07790             'cm' => 10,
07791             self::ENGLISH => array('mm', '2.83464567', 'pt'),
07792         ),
07793     );
07794 
07798     protected $outputPrecision;
07799 
07803     protected $internalPrecision;
07804 
07808     private $bcmath;
07809 
07810     public function __construct($output_precision = 4, $internal_precision = 10, $force_no_bcmath = false) {
07811         $this->outputPrecision = $output_precision;
07812         $this->internalPrecision = $internal_precision;
07813         $this->bcmath = !$force_no_bcmath && function_exists('bcmul');
07814     }
07815 
07834     public function convert($length, $to_unit) {
07835 
07836         if (!$length->isValid()) return false;
07837 
07838         $n    = $length->getN();
07839         $unit = $length->getUnit();
07840 
07841         if ($n === '0' || $unit === false) {
07842             return new HTMLPurifier_Length('0', false);
07843         }
07844 
07845         $state = $dest_state = false;
07846         foreach (self::$units as $k => $x) {
07847             if (isset($x[$unit])) $state = $k;
07848             if (isset($x[$to_unit])) $dest_state = $k;
07849         }
07850         if (!$state || !$dest_state) return false;
07851 
07852         // Some calculations about the initial precision of the number;
07853         // this will be useful when we need to do final rounding.
07854         $sigfigs = $this->getSigFigs($n);
07855         if ($sigfigs < $this->outputPrecision) $sigfigs = $this->outputPrecision;
07856 
07857         // BCMath's internal precision deals only with decimals. Use
07858         // our default if the initial number has no decimals, or increase
07859         // it by how ever many decimals, thus, the number of guard digits
07860         // will always be greater than or equal to internalPrecision.
07861         $log = (int) floor(log(abs($n), 10));
07862         $cp = ($log < 0) ? $this->internalPrecision - $log : $this->internalPrecision; // internal precision
07863 
07864         for ($i = 0; $i < 2; $i++) {
07865 
07866             // Determine what unit IN THIS SYSTEM we need to convert to
07867             if ($dest_state === $state) {
07868                 // Simple conversion
07869                 $dest_unit = $to_unit;
07870             } else {
07871                 // Convert to the smallest unit, pending a system shift
07872                 $dest_unit = self::$units[$state][$dest_state][0];
07873             }
07874 
07875             // Do the conversion if necessary
07876             if ($dest_unit !== $unit) {
07877                 $factor = $this->div(self::$units[$state][$unit], self::$units[$state][$dest_unit], $cp);
07878                 $n = $this->mul($n, $factor, $cp);
07879                 $unit = $dest_unit;
07880             }
07881 
07882             // Output was zero, so bail out early. Shouldn't ever happen.
07883             if ($n === '') {
07884                 $n = '0';
07885                 $unit = $to_unit;
07886                 break;
07887             }
07888 
07889             // It was a simple conversion, so bail out
07890             if ($dest_state === $state) {
07891                 break;
07892             }
07893 
07894             if ($i !== 0) {
07895                 // Conversion failed! Apparently, the system we forwarded
07896                 // to didn't have this unit. This should never happen!
07897                 return false;
07898             }
07899 
07900             // Pre-condition: $i == 0
07901 
07902             // Perform conversion to next system of units
07903             $n = $this->mul($n, self::$units[$state][$dest_state][1], $cp);
07904             $unit = self::$units[$state][$dest_state][2];
07905             $state = $dest_state;
07906 
07907             // One more loop around to convert the unit in the new system.
07908 
07909         }
07910 
07911         // Post-condition: $unit == $to_unit
07912         if ($unit !== $to_unit) return false;
07913 
07914         // Useful for debugging:
07915         //echo "<pre>n";
07916         //echo "$n\nsigfigs = $sigfigs\nnew_log = $new_log\nlog = $log\nrp = $rp\n</pre>\n";
07917 
07918         $n = $this->round($n, $sigfigs);
07919         if (strpos($n, '.') !== false) $n = rtrim($n, '0');
07920         $n = rtrim($n, '.');
07921 
07922         return new HTMLPurifier_Length($n, $unit);
07923     }
07924 
07930     public function getSigFigs($n) {
07931         $n = ltrim($n, '0+-');
07932         $dp = strpos($n, '.'); // decimal position
07933         if ($dp === false) {
07934             $sigfigs = strlen(rtrim($n, '0'));
07935         } else {
07936             $sigfigs = strlen(ltrim($n, '0.')); // eliminate extra decimal character
07937             if ($dp !== 0) $sigfigs--;
07938         }
07939         return $sigfigs;
07940     }
07941 
07945     private function add($s1, $s2, $scale) {
07946         if ($this->bcmath) return bcadd($s1, $s2, $scale);
07947         else return $this->scale($s1 + $s2, $scale);
07948     }
07949 
07953     private function mul($s1, $s2, $scale) {
07954         if ($this->bcmath) return bcmul($s1, $s2, $scale);
07955         else return $this->scale($s1 * $s2, $scale);
07956     }
07957 
07961     private function div($s1, $s2, $scale) {
07962         if ($this->bcmath) return bcdiv($s1, $s2, $scale);
07963         else return $this->scale($s1 / $s2, $scale);
07964     }
07965 
07970     private function round($n, $sigfigs) {
07971         $new_log = (int) floor(log(abs($n), 10)); // Number of digits left of decimal - 1
07972         $rp = $sigfigs - $new_log - 1; // Number of decimal places needed
07973         $neg = $n < 0 ? '-' : ''; // Negative sign
07974         if ($this->bcmath) {
07975             if ($rp >= 0) {
07976                 $n = bcadd($n, $neg . '0.' .  str_repeat('0', $rp) . '5', $rp + 1);
07977                 $n = bcdiv($n, '1', $rp);
07978             } else {
07979                 // This algorithm partially depends on the standardized
07980                 // form of numbers that comes out of bcmath.
07981                 $n = bcadd($n, $neg . '5' . str_repeat('0', $new_log - $sigfigs), 0);
07982                 $n = substr($n, 0, $sigfigs + strlen($neg)) . str_repeat('0', $new_log - $sigfigs + 1);
07983             }
07984             return $n;
07985         } else {
07986             return $this->scale(round($n, $sigfigs - $new_log - 1), $rp + 1);
07987         }
07988     }
07989 
07993     private function scale($r, $scale) {
07994         if ($scale < 0) {
07995             // The f sprintf type doesn't support negative numbers, so we
07996             // need to cludge things manually. First get the string.
07997             $r = sprintf('%.0f', (float) $r);
07998             // Due to floating point precision loss, $r will more than likely
07999             // look something like 4652999999999.9234. We grab one more digit
08000             // than we need to precise from $r and then use that to round
08001             // appropriately.
08002             $precise = (string) round(substr($r, 0, strlen($r) + $scale), -1);
08003             // Now we return it, truncating the zero that was rounded off.
08004             return substr($precise, 0, -1) . str_repeat('0', -$scale + 1);
08005         }
08006         return sprintf('%.' . $scale . 'f', (float) $r);
08007     }
08008 
08009 }
08010 
08011 
08012 
08013 
08014 
08019 class HTMLPurifier_VarParser
08020 {
08021 
08022     const STRING    = 1;
08023     const ISTRING   = 2;
08024     const TEXT      = 3;
08025     const ITEXT     = 4;
08026     const INT       = 5;
08027     const FLOAT     = 6;
08028     const BOOL      = 7;
08029     const LOOKUP    = 8;
08030     const ALIST     = 9;
08031     const HASH      = 10;
08032     const MIXED     = 11;
08033 
08038     static public $types = array(
08039         'string'    => self::STRING,
08040         'istring'   => self::ISTRING,
08041         'text'      => self::TEXT,
08042         'itext'     => self::ITEXT,
08043         'int'       => self::INT,
08044         'float'     => self::FLOAT,
08045         'bool'      => self::BOOL,
08046         'lookup'    => self::LOOKUP,
08047         'list'      => self::ALIST,
08048         'hash'      => self::HASH,
08049         'mixed'     => self::MIXED
08050     );
08051 
08056     static public $stringTypes = array(
08057         self::STRING    => true,
08058         self::ISTRING   => true,
08059         self::TEXT      => true,
08060         self::ITEXT     => true,
08061     );
08062 
08073     final public function parse($var, $type, $allow_null = false) {
08074         if (is_string($type)) {
08075             if (!isset(HTMLPurifier_VarParser::$types[$type])) {
08076                 throw new HTMLPurifier_VarParserException("Invalid type '$type'");
08077             } else {
08078                 $type = HTMLPurifier_VarParser::$types[$type];
08079             }
08080         }
08081         $var = $this->parseImplementation($var, $type, $allow_null);
08082         if ($allow_null && $var === null) return null;
08083         // These are basic checks, to make sure nothing horribly wrong
08084         // happened in our implementations.
08085         switch ($type) {
08086             case (self::STRING):
08087             case (self::ISTRING):
08088             case (self::TEXT):
08089             case (self::ITEXT):
08090                 if (!is_string($var)) break;
08091                 if ($type == self::ISTRING || $type == self::ITEXT) $var = strtolower($var);
08092                 return $var;
08093             case (self::INT):
08094                 if (!is_int($var)) break;
08095                 return $var;
08096             case (self::FLOAT):
08097                 if (!is_float($var)) break;
08098                 return $var;
08099             case (self::BOOL):
08100                 if (!is_bool($var)) break;
08101                 return $var;
08102             case (self::LOOKUP):
08103             case (self::ALIST):
08104             case (self::HASH):
08105                 if (!is_array($var)) break;
08106                 if ($type === self::LOOKUP) {
08107                     foreach ($var as $k) if ($k !== true) $this->error('Lookup table contains value other than true');
08108                 } elseif ($type === self::ALIST) {
08109                     $keys = array_keys($var);
08110                     if (array_keys($keys) !== $keys) $this->error('Indices for list are not uniform');
08111                 }
08112                 return $var;
08113             case (self::MIXED):
08114                 return $var;
08115             default:
08116                 $this->errorInconsistent(get_class($this), $type);
08117         }
08118         $this->errorGeneric($var, $type);
08119     }
08120 
08125     protected function parseImplementation($var, $type, $allow_null) {
08126         return $var;
08127     }
08128 
08132     protected function error($msg) {
08133         throw new HTMLPurifier_VarParserException($msg);
08134     }
08135 
08142     protected function errorInconsistent($class, $type) {
08143         throw new HTMLPurifier_Exception("Inconsistency in $class: ".HTMLPurifier_VarParser::getTypeName($type)." not implemented");
08144     }
08145 
08149     protected function errorGeneric($var, $type) {
08150         $vtype = gettype($var);
08151         $this->error("Expected type ".HTMLPurifier_VarParser::getTypeName($type).", got $vtype");
08152     }
08153 
08154     static public function getTypeName($type) {
08155         static $lookup;
08156         if (!$lookup) {
08157             // Lazy load the alternative lookup table
08158             $lookup = array_flip(HTMLPurifier_VarParser::$types);
08159         }
08160         if (!isset($lookup[$type])) return 'unknown';
08161         return $lookup[$type];
08162     }
08163 
08164 }
08165 
08166 
08167 
08168 
08169 
08173 class HTMLPurifier_VarParserException extends HTMLPurifier_Exception
08174 {
08175 
08176 }
08177 
08178 
08179 
08180 
08181 
08193 class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
08194 {
08195 
08196     public function validate($css, $config, $context) {
08197 
08198         $css = $this->parseCDATA($css);
08199 
08200         $definition = $config->getCSSDefinition();
08201 
08202         // we're going to break the spec and explode by semicolons.
08203         // This is because semicolon rarely appears in escaped form
08204         // Doing this is generally flaky but fast
08205         // IT MIGHT APPEAR IN URIs, see HTMLPurifier_AttrDef_CSSURI
08206         // for details
08207 
08208         $declarations = explode(';', $css);
08209         $propvalues = array();
08210 
08214         $property = false;
08215         $context->register('CurrentCSSProperty', $property);
08216 
08217         foreach ($declarations as $declaration) {
08218             if (!$declaration) continue;
08219             if (!strpos($declaration, ':')) continue;
08220             list($property, $value) = explode(':', $declaration, 2);
08221             $property = trim($property);
08222             $value    = trim($value);
08223             $ok = false;
08224             do {
08225                 if (isset($definition->info[$property])) {
08226                     $ok = true;
08227                     break;
08228                 }
08229                 if (ctype_lower($property)) break;
08230                 $property = strtolower($property);
08231                 if (isset($definition->info[$property])) {
08232                     $ok = true;
08233                     break;
08234                 }
08235             } while(0);
08236             if (!$ok) continue;
08237             // inefficient call, since the validator will do this again
08238             if (strtolower(trim($value)) !== 'inherit') {
08239                 // inherit works for everything (but only on the base property)
08240                 $result = $definition->info[$property]->validate(
08241                     $value, $config, $context );
08242             } else {
08243                 $result = 'inherit';
08244             }
08245             if ($result === false) continue;
08246             $propvalues[$property] = $result;
08247         }
08248 
08249         $context->destroy('CurrentCSSProperty');
08250 
08251         // procedure does not write the new CSS simultaneously, so it's
08252         // slightly inefficient, but it's the only way of getting rid of
08253         // duplicates. Perhaps config to optimize it, but not now.
08254 
08255         $new_declarations = '';
08256         foreach ($propvalues as $prop => $value) {
08257             $new_declarations .= "$prop:$value;";
08258         }
08259 
08260         return $new_declarations ? $new_declarations : false;
08261 
08262     }
08263 
08264 }
08265 
08266 
08267 
08268 
08269 
08274 class HTMLPurifier_AttrDef_Clone extends HTMLPurifier_AttrDef
08275 {
08279     protected $clone;
08280 
08281     public function __construct($clone) {
08282         $this->clone = $clone;
08283     }
08284 
08285     public function validate($v, $config, $context) {
08286         return $this->clone->validate($v, $config, $context);
08287     }
08288 
08289     public function make($string) {
08290         return clone $this->clone;
08291     }
08292 
08293 }
08294 
08295 
08296 
08297 
08298 
08299 // Enum = Enumerated
08306 class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
08307 {
08308 
08313     public $valid_values   = array();
08314 
08319     protected $case_sensitive = false; // values according to W3C spec
08320 
08325     public function __construct(
08326         $valid_values = array(), $case_sensitive = false
08327     ) {
08328         $this->valid_values = array_flip($valid_values);
08329         $this->case_sensitive = $case_sensitive;
08330     }
08331 
08332     public function validate($string, $config, $context) {
08333         $string = trim($string);
08334         if (!$this->case_sensitive) {
08335             // we may want to do full case-insensitive libraries
08336             $string = ctype_lower($string) ? $string : strtolower($string);
08337         }
08338         $result = isset($this->valid_values[$string]);
08339 
08340         return $result ? $string : false;
08341     }
08342 
08348     public function make($string) {
08349         if (strlen($string) > 2 && $string[0] == 's' && $string[1] == ':') {
08350             $string = substr($string, 2);
08351             $sensitive = true;
08352         } else {
08353             $sensitive = false;
08354         }
08355         $values = explode(',', $string);
08356         return new HTMLPurifier_AttrDef_Enum($values, $sensitive);
08357     }
08358 
08359 }
08360 
08361 
08362 
08363 
08364 
08372 class HTMLPurifier_AttrDef_Integer extends HTMLPurifier_AttrDef
08373 {
08374 
08378     protected $negative = true;
08379 
08383     protected $zero = true;
08384 
08388     protected $positive = true;
08389 
08395     public function __construct(
08396         $negative = true, $zero = true, $positive = true
08397     ) {
08398         $this->negative = $negative;
08399         $this->zero     = $zero;
08400         $this->positive = $positive;
08401     }
08402 
08403     public function validate($integer, $config, $context) {
08404 
08405         $integer = $this->parseCDATA($integer);
08406         if ($integer === '') return false;
08407 
08408         // we could possibly simply typecast it to integer, but there are
08409         // certain fringe cases that must not return an integer.
08410 
08411         // clip leading sign
08412         if ( $this->negative && $integer[0] === '-' ) {
08413             $digits = substr($integer, 1);
08414             if ($digits === '0') $integer = '0'; // rm minus sign for zero
08415         } elseif( $this->positive && $integer[0] === '+' ) {
08416             $digits = $integer = substr($integer, 1); // rm unnecessary plus
08417         } else {
08418             $digits = $integer;
08419         }
08420 
08421         // test if it's numeric
08422         if (!ctype_digit($digits)) return false;
08423 
08424         // perform scope tests
08425         if (!$this->zero     && $integer == 0) return false;
08426         if (!$this->positive && $integer > 0) return false;
08427         if (!$this->negative && $integer < 0) return false;
08428 
08429         return $integer;
08430 
08431     }
08432 
08433 }
08434 
08435 
08436 
08437 
08438 
08443 class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
08444 {
08445 
08446     public function validate($string, $config, $context) {
08447 
08448         $string = trim($string);
08449         if (!$string) return false;
08450 
08451         $subtags = explode('-', $string);
08452         $num_subtags = count($subtags);
08453 
08454         if ($num_subtags == 0) return false; // sanity check
08455 
08456         // process primary subtag : $subtags[0]
08457         $length = strlen($subtags[0]);
08458         switch ($length) {
08459             case 0:
08460                 return false;
08461             case 1:
08462                 if (! ($subtags[0] == 'x' || $subtags[0] == 'i') ) {
08463                     return false;
08464                 }
08465                 break;
08466             case 2:
08467             case 3:
08468                 if (! ctype_alpha($subtags[0]) ) {
08469                     return false;
08470                 } elseif (! ctype_lower($subtags[0]) ) {
08471                     $subtags[0] = strtolower($subtags[0]);
08472                 }
08473                 break;
08474             default:
08475                 return false;
08476         }
08477 
08478         $new_string = $subtags[0];
08479         if ($num_subtags == 1) return $new_string;
08480 
08481         // process second subtag : $subtags[1]
08482         $length = strlen($subtags[1]);
08483         if ($length == 0 || ($length == 1 && $subtags[1] != 'x') || $length > 8 || !ctype_alnum($subtags[1])) {
08484             return $new_string;
08485         }
08486         if (!ctype_lower($subtags[1])) $subtags[1] = strtolower($subtags[1]);
08487 
08488         $new_string .= '-' . $subtags[1];
08489         if ($num_subtags == 2) return $new_string;
08490 
08491         // process all other subtags, index 2 and up
08492         for ($i = 2; $i < $num_subtags; $i++) {
08493             $length = strlen($subtags[$i]);
08494             if ($length == 0 || $length > 8 || !ctype_alnum($subtags[$i])) {
08495                 return $new_string;
08496             }
08497             if (!ctype_lower($subtags[$i])) {
08498                 $subtags[$i] = strtolower($subtags[$i]);
08499             }
08500             $new_string .= '-' . $subtags[$i];
08501         }
08502 
08503         return $new_string;
08504 
08505     }
08506 
08507 }
08508 
08509 
08510 
08511 
08512 
08516 class HTMLPurifier_AttrDef_Switch
08517 {
08518 
08519     protected $tag;
08520     protected $withTag, $withoutTag;
08521 
08527     public function __construct($tag, $with_tag, $without_tag) {
08528         $this->tag = $tag;
08529         $this->withTag = $with_tag;
08530         $this->withoutTag = $without_tag;
08531     }
08532 
08533     public function validate($string, $config, $context) {
08534         $token = $context->get('CurrentToken', true);
08535         if (!$token || $token->name !== $this->tag) {
08536             return $this->withoutTag->validate($string, $config, $context);
08537         } else {
08538             return $this->withTag->validate($string, $config, $context);
08539         }
08540     }
08541 
08542 }
08543 
08544 
08545 
08546 
08547 
08551 class HTMLPurifier_AttrDef_Text extends HTMLPurifier_AttrDef
08552 {
08553 
08554     public function validate($string, $config, $context) {
08555         return $this->parseCDATA($string);
08556     }
08557 
08558 }
08559 
08560 
08561 
08562 
08563 
08568 class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
08569 {
08570 
08571     protected $parser;
08572     protected $embedsResource;
08573 
08577     public function __construct($embeds_resource = false) {
08578         $this->parser = new HTMLPurifier_URIParser();
08579         $this->embedsResource = (bool) $embeds_resource;
08580     }
08581 
08582     public function make($string) {
08583         $embeds = ($string === 'embedded');
08584         return new HTMLPurifier_AttrDef_URI($embeds);
08585     }
08586 
08587     public function validate($uri, $config, $context) {
08588 
08589         if ($config->get('URI.Disable')) return false;
08590 
08591         $uri = $this->parseCDATA($uri);
08592 
08593         // parse the URI
08594         $uri = $this->parser->parse($uri);
08595         if ($uri === false) return false;
08596 
08597         // add embedded flag to context for validators
08598         $context->register('EmbeddedURI', $this->embedsResource);
08599 
08600         $ok = false;
08601         do {
08602 
08603             // generic validation
08604             $result = $uri->validate($config, $context);
08605             if (!$result) break;
08606 
08607             // chained filtering
08608             $uri_def = $config->getDefinition('URI');
08609             $result = $uri_def->filter($uri, $config, $context);
08610             if (!$result) break;
08611 
08612             // scheme-specific validation
08613             $scheme_obj = $uri->getSchemeObj($config, $context);
08614             if (!$scheme_obj) break;
08615             if ($this->embedsResource && !$scheme_obj->browsable) break;
08616             $result = $scheme_obj->validate($uri, $config, $context);
08617             if (!$result) break;
08618 
08619             // Post chained filtering
08620             $result = $uri_def->postFilter($uri, $config, $context);
08621             if (!$result) break;
08622 
08623             // survived gauntlet
08624             $ok = true;
08625 
08626         } while (false);
08627 
08628         $context->destroy('EmbeddedURI');
08629         if (!$ok) return false;
08630 
08631         // back to string
08632         return $uri->toString();
08633 
08634     }
08635 
08636 }
08637 
08638 
08639 
08640 
08641 
08645 class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
08646 {
08647 
08651     protected $non_negative = false;
08652 
08656     public function __construct($non_negative = false) {
08657         $this->non_negative = $non_negative;
08658     }
08659 
08664     public function validate($number, $config, $context) {
08665 
08666         $number = $this->parseCDATA($number);
08667 
08668         if ($number === '') return false;
08669         if ($number === '0') return '0';
08670 
08671         $sign = '';
08672         switch ($number[0]) {
08673             case '-':
08674                 if ($this->non_negative) return false;
08675                 $sign = '-';
08676             case '+':
08677                 $number = substr($number, 1);
08678         }
08679 
08680         if (ctype_digit($number)) {
08681             $number = ltrim($number, '0');
08682             return $number ? $sign . $number : '0';
08683         }
08684 
08685         // Period is the only non-numeric character allowed
08686         if (strpos($number, '.') === false) return false;
08687 
08688         list($left, $right) = explode('.', $number, 2);
08689 
08690         if ($left === '' && $right === '') return false;
08691         if ($left !== '' && !ctype_digit($left)) return false;
08692 
08693         $left  = ltrim($left,  '0');
08694         $right = rtrim($right, '0');
08695 
08696         if ($right === '') {
08697             return $left ? $sign . $left : '0';
08698         } elseif (!ctype_digit($right)) {
08699             return false;
08700         }
08701 
08702         return $sign . $left . '.' . $right;
08703 
08704     }
08705 
08706 }
08707 
08708 
08709 
08710 
08711 
08712 class HTMLPurifier_AttrDef_CSS_AlphaValue extends HTMLPurifier_AttrDef_CSS_Number
08713 {
08714 
08715     public function __construct() {
08716         parent::__construct(false); // opacity is non-negative, but we will clamp it
08717     }
08718 
08719     public function validate($number, $config, $context) {
08720         $result = parent::validate($number, $config, $context);
08721         if ($result === false) return $result;
08722         $float = (float) $result;
08723         if ($float < 0.0) $result = '0';
08724         if ($float > 1.0) $result = '1';
08725         return $result;
08726     }
08727 
08728 }
08729 
08730 
08731 
08732 
08733 
08738 class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
08739 {
08740 
08745     protected $info;
08746 
08747     public function __construct($config) {
08748         $def = $config->getCSSDefinition();
08749         $this->info['background-color'] = $def->info['background-color'];
08750         $this->info['background-image'] = $def->info['background-image'];
08751         $this->info['background-repeat'] = $def->info['background-repeat'];
08752         $this->info['background-attachment'] = $def->info['background-attachment'];
08753         $this->info['background-position'] = $def->info['background-position'];
08754     }
08755 
08756     public function validate($string, $config, $context) {
08757 
08758         // regular pre-processing
08759         $string = $this->parseCDATA($string);
08760         if ($string === '') return false;
08761 
08762         // munge rgb() decl if necessary
08763         $string = $this->mungeRgb($string);
08764 
08765         // assumes URI doesn't have spaces in it
08766         $bits = explode(' ', strtolower($string)); // bits to process
08767 
08768         $caught = array();
08769         $caught['color']    = false;
08770         $caught['image']    = false;
08771         $caught['repeat']   = false;
08772         $caught['attachment'] = false;
08773         $caught['position'] = false;
08774 
08775         $i = 0; // number of catches
08776         $none = false;
08777 
08778         foreach ($bits as $bit) {
08779             if ($bit === '') continue;
08780             foreach ($caught as $key => $status) {
08781                 if ($key != 'position') {
08782                     if ($status !== false) continue;
08783                     $r = $this->info['background-' . $key]->validate($bit, $config, $context);
08784                 } else {
08785                     $r = $bit;
08786                 }
08787                 if ($r === false) continue;
08788                 if ($key == 'position') {
08789                     if ($caught[$key] === false) $caught[$key] = '';
08790                     $caught[$key] .= $r . ' ';
08791                 } else {
08792                     $caught[$key] = $r;
08793                 }
08794                 $i++;
08795                 break;
08796             }
08797         }
08798 
08799         if (!$i) return false;
08800         if ($caught['position'] !== false) {
08801             $caught['position'] = $this->info['background-position']->
08802                 validate($caught['position'], $config, $context);
08803         }
08804 
08805         $ret = array();
08806         foreach ($caught as $value) {
08807             if ($value === false) continue;
08808             $ret[] = $value;
08809         }
08810 
08811         if (empty($ret)) return false;
08812         return implode(' ', $ret);
08813 
08814     }
08815 
08816 }
08817 
08818 
08819 
08820 
08821 
08822 /* W3C says:
08823     [ // adjective and number must be in correct order, even if
08824       // you could switch them without introducing ambiguity.
08825       // some browsers support that syntax
08826         [
08827             <percentage> | <length> | left | center | right
08828         ]
08829         [
08830             <percentage> | <length> | top | center | bottom
08831         ]?
08832     ] |
08833     [ // this signifies that the vertical and horizontal adjectives
08834       // can be arbitrarily ordered, however, there can only be two,
08835       // one of each, or none at all
08836         [
08837             left | center | right
08838         ] ||
08839         [
08840             top | center | bottom
08841         ]
08842     ]
08843     top, left = 0%
08844     center, (none) = 50%
08845     bottom, right = 100%
08846 */
08847 
08848 /* QuirksMode says:
08849     keyword + length/percentage must be ordered correctly, as per W3C
08850 
08851     Internet Explorer and Opera, however, support arbitrary ordering. We
08852     should fix it up.
08853 
08854     Minor issue though, not strictly necessary.
08855 */
08856 
08857 // control freaks may appreciate the ability to convert these to
08858 // percentages or something, but it's not necessary
08859 
08863 class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
08864 {
08865 
08866     protected $length;
08867     protected $percentage;
08868 
08869     public function __construct() {
08870         $this->length     = new HTMLPurifier_AttrDef_CSS_Length();
08871         $this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage();
08872     }
08873 
08874     public function validate($string, $config, $context) {
08875         $string = $this->parseCDATA($string);
08876         $bits = explode(' ', $string);
08877 
08878         $keywords = array();
08879         $keywords['h'] = false; // left, right
08880         $keywords['v'] = false; // top, bottom
08881         $keywords['ch'] = false; // center (first word)
08882         $keywords['cv'] = false; // center (second word)
08883         $measures = array();
08884 
08885         $i = 0;
08886 
08887         $lookup = array(
08888             'top' => 'v',
08889             'bottom' => 'v',
08890             'left' => 'h',
08891             'right' => 'h',
08892             'center' => 'c'
08893         );
08894 
08895         foreach ($bits as $bit) {
08896             if ($bit === '') continue;
08897 
08898             // test for keyword
08899             $lbit = ctype_lower($bit) ? $bit : strtolower($bit);
08900             if (isset($lookup[$lbit])) {
08901                 $status = $lookup[$lbit];
08902                 if ($status == 'c') {
08903                     if ($i == 0) {
08904                         $status = 'ch';
08905                     } else {
08906                         $status = 'cv';
08907                     }
08908                 }
08909                 $keywords[$status] = $lbit;
08910                 $i++;
08911             }
08912 
08913             // test for length
08914             $r = $this->length->validate($bit, $config, $context);
08915             if ($r !== false) {
08916                 $measures[] = $r;
08917                 $i++;
08918             }
08919 
08920             // test for percentage
08921             $r = $this->percentage->validate($bit, $config, $context);
08922             if ($r !== false) {
08923                 $measures[] = $r;
08924                 $i++;
08925             }
08926 
08927         }
08928 
08929         if (!$i) return false; // no valid values were caught
08930 
08931         $ret = array();
08932 
08933         // first keyword
08934         if     ($keywords['h'])     $ret[] = $keywords['h'];
08935         elseif ($keywords['ch']) {
08936             $ret[] = $keywords['ch'];
08937             $keywords['cv'] = false; // prevent re-use: center = center center
08938         }
08939         elseif (count($measures))   $ret[] = array_shift($measures);
08940 
08941         if     ($keywords['v'])     $ret[] = $keywords['v'];
08942         elseif ($keywords['cv'])    $ret[] = $keywords['cv'];
08943         elseif (count($measures))   $ret[] = array_shift($measures);
08944 
08945         if (empty($ret)) return false;
08946         return implode(' ', $ret);
08947 
08948     }
08949 
08950 }
08951 
08952 
08953 
08954 
08955 
08959 class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
08960 {
08961 
08965     protected $info = array();
08966 
08967     public function __construct($config) {
08968         $def = $config->getCSSDefinition();
08969         $this->info['border-width'] = $def->info['border-width'];
08970         $this->info['border-style'] = $def->info['border-style'];
08971         $this->info['border-top-color'] = $def->info['border-top-color'];
08972     }
08973 
08974     public function validate($string, $config, $context) {
08975         $string = $this->parseCDATA($string);
08976         $string = $this->mungeRgb($string);
08977         $bits = explode(' ', $string);
08978         $done = array(); // segments we've finished
08979         $ret = ''; // return value
08980         foreach ($bits as $bit) {
08981             foreach ($this->info as $propname => $validator) {
08982                 if (isset($done[$propname])) continue;
08983                 $r = $validator->validate($bit, $config, $context);
08984                 if ($r !== false) {
08985                     $ret .= $r . ' ';
08986                     $done[$propname] = true;
08987                     break;
08988                 }
08989             }
08990         }
08991         return rtrim($ret);
08992     }
08993 
08994 }
08995 
08996 
08997 
08998 
08999 
09003 class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
09004 {
09005 
09006     public function validate($color, $config, $context) {
09007 
09008         static $colors = null;
09009         if ($colors === null) $colors = $config->get('Core.ColorKeywords');
09010 
09011         $color = trim($color);
09012         if ($color === '') return false;
09013 
09014         $lower = strtolower($color);
09015         if (isset($colors[$lower])) return $colors[$lower];
09016 
09017         if (strpos($color, 'rgb(') !== false) {
09018             // rgb literal handling
09019             $length = strlen($color);
09020             if (strpos($color, ')') !== $length - 1) return false;
09021             $triad = substr($color, 4, $length - 4 - 1);
09022             $parts = explode(',', $triad);
09023             if (count($parts) !== 3) return false;
09024             $type = false; // to ensure that they're all the same type
09025             $new_parts = array();
09026             foreach ($parts as $part) {
09027                 $part = trim($part);
09028                 if ($part === '') return false;
09029                 $length = strlen($part);
09030                 if ($part[$length - 1] === '%') {
09031                     // handle percents
09032                     if (!$type) {
09033                         $type = 'percentage';
09034                     } elseif ($type !== 'percentage') {
09035                         return false;
09036                     }
09037                     $num = (float) substr($part, 0, $length - 1);
09038                     if ($num < 0) $num = 0;
09039                     if ($num > 100) $num = 100;
09040                     $new_parts[] = "$num%";
09041                 } else {
09042                     // handle integers
09043                     if (!$type) {
09044                         $type = 'integer';
09045                     } elseif ($type !== 'integer') {
09046                         return false;
09047                     }
09048                     $num = (int) $part;
09049                     if ($num < 0) $num = 0;
09050                     if ($num > 255) $num = 255;
09051                     $new_parts[] = (string) $num;
09052                 }
09053             }
09054             $new_triad = implode(',', $new_parts);
09055             $color = "rgb($new_triad)";
09056         } else {
09057             // hexadecimal handling
09058             if ($color[0] === '#') {
09059                 $hex = substr($color, 1);
09060             } else {
09061                 $hex = $color;
09062                 $color = '#' . $color;
09063             }
09064             $length = strlen($hex);
09065             if ($length !== 3 && $length !== 6) return false;
09066             if (!ctype_xdigit($hex)) return false;
09067         }
09068 
09069         return $color;
09070 
09071     }
09072 
09073 }
09074 
09075 
09076 
09077 
09078 
09088 class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef
09089 {
09090 
09095     public $defs;
09096 
09100     public function __construct($defs) {
09101         $this->defs = $defs;
09102     }
09103 
09104     public function validate($string, $config, $context) {
09105         foreach ($this->defs as $i => $def) {
09106             $result = $this->defs[$i]->validate($string, $config, $context);
09107             if ($result !== false) return $result;
09108         }
09109         return false;
09110     }
09111 
09112 }
09113 
09114 
09115 
09116 
09117 
09121 class HTMLPurifier_AttrDef_CSS_DenyElementDecorator extends HTMLPurifier_AttrDef
09122 {
09123     public $def, $element;
09124 
09129     public function __construct($def, $element) {
09130         $this->def = $def;
09131         $this->element = $element;
09132     }
09136     public function validate($string, $config, $context) {
09137         $token = $context->get('CurrentToken', true);
09138         if ($token && $token->name == $this->element) return false;
09139         return $this->def->validate($string, $config, $context);
09140     }
09141 }
09142 
09143 
09144 
09145 
09146 
09152 class HTMLPurifier_AttrDef_CSS_Filter extends HTMLPurifier_AttrDef
09153 {
09154 
09155     protected $intValidator;
09156 
09157     public function __construct() {
09158         $this->intValidator = new HTMLPurifier_AttrDef_Integer();
09159     }
09160 
09161     public function validate($value, $config, $context) {
09162         $value = $this->parseCDATA($value);
09163         if ($value === 'none') return $value;
09164         // if we looped this we could support multiple filters
09165         $function_length = strcspn($value, '(');
09166         $function = trim(substr($value, 0, $function_length));
09167         if ($function !== 'alpha' &&
09168             $function !== 'Alpha' &&
09169             $function !== 'progid:DXImageTransform.Microsoft.Alpha'
09170             ) return false;
09171         $cursor = $function_length + 1;
09172         $parameters_length = strcspn($value, ')', $cursor);
09173         $parameters = substr($value, $cursor, $parameters_length);
09174         $params = explode(',', $parameters);
09175         $ret_params = array();
09176         $lookup = array();
09177         foreach ($params as $param) {
09178             list($key, $value) = explode('=', $param);
09179             $key   = trim($key);
09180             $value = trim($value);
09181             if (isset($lookup[$key])) continue;
09182             if ($key !== 'opacity') continue;
09183             $value = $this->intValidator->validate($value, $config, $context);
09184             if ($value === false) continue;
09185             $int = (int) $value;
09186             if ($int > 100) $value = '100';
09187             if ($int < 0) $value = '0';
09188             $ret_params[] = "$key=$value";
09189             $lookup[$key] = true;
09190         }
09191         $ret_parameters = implode(',', $ret_params);
09192         $ret_function = "$function($ret_parameters)";
09193         return $ret_function;
09194     }
09195 
09196 }
09197 
09198 
09199 
09200 
09201 
09205 class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
09206 {
09207 
09216     protected $info = array();
09217 
09218     public function __construct($config) {
09219         $def = $config->getCSSDefinition();
09220         $this->info['font-style']   = $def->info['font-style'];
09221         $this->info['font-variant'] = $def->info['font-variant'];
09222         $this->info['font-weight']  = $def->info['font-weight'];
09223         $this->info['font-size']    = $def->info['font-size'];
09224         $this->info['line-height']  = $def->info['line-height'];
09225         $this->info['font-family']  = $def->info['font-family'];
09226     }
09227 
09228     public function validate($string, $config, $context) {
09229 
09230         static $system_fonts = array(
09231             'caption' => true,
09232             'icon' => true,
09233             'menu' => true,
09234             'message-box' => true,
09235             'small-caption' => true,
09236             'status-bar' => true
09237         );
09238 
09239         // regular pre-processing
09240         $string = $this->parseCDATA($string);
09241         if ($string === '') return false;
09242 
09243         // check if it's one of the keywords
09244         $lowercase_string = strtolower($string);
09245         if (isset($system_fonts[$lowercase_string])) {
09246             return $lowercase_string;
09247         }
09248 
09249         $bits = explode(' ', $string); // bits to process
09250         $stage = 0; // this indicates what we're looking for
09251         $caught = array(); // which stage 0 properties have we caught?
09252         $stage_1 = array('font-style', 'font-variant', 'font-weight');
09253         $final = ''; // output
09254 
09255         for ($i = 0, $size = count($bits); $i < $size; $i++) {
09256             if ($bits[$i] === '') continue;
09257             switch ($stage) {
09258 
09259                 // attempting to catch font-style, font-variant or font-weight
09260                 case 0:
09261                     foreach ($stage_1 as $validator_name) {
09262                         if (isset($caught[$validator_name])) continue;
09263                         $r = $this->info[$validator_name]->validate(
09264                                                 $bits[$i], $config, $context);
09265                         if ($r !== false) {
09266                             $final .= $r . ' ';
09267                             $caught[$validator_name] = true;
09268                             break;
09269                         }
09270                     }
09271                     // all three caught, continue on
09272                     if (count($caught) >= 3) $stage = 1;
09273                     if ($r !== false) break;
09274 
09275                 // attempting to catch font-size and perhaps line-height
09276                 case 1:
09277                     $found_slash = false;
09278                     if (strpos($bits[$i], '/') !== false) {
09279                         list($font_size, $line_height) =
09280                                                     explode('/', $bits[$i]);
09281                         if ($line_height === '') {
09282                             // ooh, there's a space after the slash!
09283                             $line_height = false;
09284                             $found_slash = true;
09285                         }
09286                     } else {
09287                         $font_size = $bits[$i];
09288                         $line_height = false;
09289                     }
09290                     $r = $this->info['font-size']->validate(
09291                                               $font_size, $config, $context);
09292                     if ($r !== false) {
09293                         $final .= $r;
09294                         // attempt to catch line-height
09295                         if ($line_height === false) {
09296                             // we need to scroll forward
09297                             for ($j = $i + 1; $j < $size; $j++) {
09298                                 if ($bits[$j] === '') continue;
09299                                 if ($bits[$j] === '/') {
09300                                     if ($found_slash) {
09301                                         return false;
09302                                     } else {
09303                                         $found_slash = true;
09304                                         continue;
09305                                     }
09306                                 }
09307                                 $line_height = $bits[$j];
09308                                 break;
09309                             }
09310                         } else {
09311                             // slash already found
09312                             $found_slash = true;
09313                             $j = $i;
09314                         }
09315                         if ($found_slash) {
09316                             $i = $j;
09317                             $r = $this->info['line-height']->validate(
09318                                               $line_height, $config, $context);
09319                             if ($r !== false) {
09320                                 $final .= '/' . $r;
09321                             }
09322                         }
09323                         $final .= ' ';
09324                         $stage = 2;
09325                         break;
09326                     }
09327                     return false;
09328 
09329                 // attempting to catch font-family
09330                 case 2:
09331                     $font_family =
09332                         implode(' ', array_slice($bits, $i, $size - $i));
09333                     $r = $this->info['font-family']->validate(
09334                                               $font_family, $config, $context);
09335                     if ($r !== false) {
09336                         $final .= $r . ' ';
09337                         // processing completed successfully
09338                         return rtrim($final);
09339                     }
09340                     return false;
09341             }
09342         }
09343         return false;
09344     }
09345 
09346 }
09347 
09348 
09349 
09350 
09351 
09355 class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
09356 {
09357 
09358     protected $mask = null;
09359 
09360     public function __construct() {
09361         $this->mask = '- ';
09362         for ($c = 'a'; $c <= 'z'; $c++) $this->mask .= $c;
09363         for ($c = 'A'; $c <= 'Z'; $c++) $this->mask .= $c;
09364         for ($c = '0'; $c <= '9'; $c++) $this->mask .= $c; // cast-y, but should be fine
09365         // special bytes used by UTF-8
09366         for ($i = 0x80; $i <= 0xFF; $i++) {
09367             // We don't bother excluding invalid bytes in this range,
09368             // because the our restriction of well-formed UTF-8 will
09369             // prevent these from ever occurring.
09370             $this->mask .= chr($i);
09371         }
09372 
09373         /*
09374             PHP's internal strcspn implementation is
09375             O(length of string * length of mask), making it inefficient
09376             for large masks.  However, it's still faster than
09377             preg_match 8)
09378           for (p = s1;;) {
09379             spanp = s2;
09380             do {
09381               if (*spanp == c || p == s1_end) {
09382                 return p - s1;
09383               }
09384             } while (spanp++ < (s2_end - 1));
09385             c = *++p;
09386           }
09387          */
09388         // possible optimization: invert the mask.
09389     }
09390 
09391     public function validate($string, $config, $context) {
09392         static $generic_names = array(
09393             'serif' => true,
09394             'sans-serif' => true,
09395             'monospace' => true,
09396             'fantasy' => true,
09397             'cursive' => true
09398         );
09399         $allowed_fonts = $config->get('CSS.AllowedFonts');
09400 
09401         // assume that no font names contain commas in them
09402         $fonts = explode(',', $string);
09403         $final = '';
09404         foreach($fonts as $font) {
09405             $font = trim($font);
09406             if ($font === '') continue;
09407             // match a generic name
09408             if (isset($generic_names[$font])) {
09409                 if ($allowed_fonts === null || isset($allowed_fonts[$font])) {
09410                     $final .= $font . ', ';
09411                 }
09412                 continue;
09413             }
09414             // match a quoted name
09415             if ($font[0] === '"' || $font[0] === "'") {
09416                 $length = strlen($font);
09417                 if ($length <= 2) continue;
09418                 $quote = $font[0];
09419                 if ($font[$length - 1] !== $quote) continue;
09420                 $font = substr($font, 1, $length - 2);
09421             }
09422 
09423             $font = $this->expandCSSEscape($font);
09424 
09425             // $font is a pure representation of the font name
09426 
09427             if ($allowed_fonts !== null && !isset($allowed_fonts[$font])) {
09428                 continue;
09429             }
09430 
09431             if (ctype_alnum($font) && $font !== '') {
09432                 // very simple font, allow it in unharmed
09433                 $final .= $font . ', ';
09434                 continue;
09435             }
09436 
09437             // bugger out on whitespace.  form feed (0C) really
09438             // shouldn't show up regardless
09439             $font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font);
09440 
09441             // Here, there are various classes of characters which need
09442             // to be treated differently:
09443             //  - Alphanumeric characters are essentially safe.  We
09444             //    handled these above.
09445             //  - Spaces require quoting, though most parsers will do
09446             //    the right thing if there aren't any characters that
09447             //    can be misinterpreted
09448             //  - Dashes rarely occur, but they fairly unproblematic
09449             //    for parsing/rendering purposes.
09450             //  The above characters cover the majority of Western font
09451             //  names.
09452             //  - Arbitrary Unicode characters not in ASCII.  Because
09453             //    most parsers give little thought to Unicode, treatment
09454             //    of these codepoints is basically uniform, even for
09455             //    punctuation-like codepoints.  These characters can
09456             //    show up in non-Western pages and are supported by most
09457             //    major browsers, for example: "MS 明朝" is a
09458             //    legitimate font-name
09459             //    <http://ja.wikipedia.org/wiki/MS_明朝>.  See
09460             //    the CSS3 spec for more examples:
09461             //    <http://www.w3.org/TR/2011/WD-css3-fonts-20110324/localizedfamilynames.png>
09462             //    You can see live samples of these on the Internet:
09463             //    <http://www.google.co.jp/search?q=font-family+MS+明朝|ゴシック>
09464             //    However, most of these fonts have ASCII equivalents:
09465             //    for example, 'MS Mincho', and it's considered
09466             //    professional to use ASCII font names instead of
09467             //    Unicode font names.  Thanks Takeshi Terada for
09468             //    providing this information.
09469             //  The following characters, to my knowledge, have not been
09470             //  used to name font names.
09471             //  - Single quote.  While theoretically you might find a
09472             //    font name that has a single quote in its name (serving
09473             //    as an apostrophe, e.g. Dave's Scribble), I haven't
09474             //    been able to find any actual examples of this.
09475             //    Internet Explorer's cssText translation (which I
09476             //    believe is invoked by innerHTML) normalizes any
09477             //    quoting to single quotes, and fails to escape single
09478             //    quotes.  (Note that this is not IE's behavior for all
09479             //    CSS properties, just some sort of special casing for
09480             //    font-family).  So a single quote *cannot* be used
09481             //    safely in the font-family context if there will be an
09482             //    innerHTML/cssText translation.  Note that Firefox 3.x
09483             //    does this too.
09484             //  - Double quote.  In IE, these get normalized to
09485             //    single-quotes, no matter what the encoding.  (Fun
09486             //    fact, in IE8, the 'content' CSS property gained
09487             //    support, where they special cased to preserve encoded
09488             //    double quotes, but still translate unadorned double
09489             //    quotes into single quotes.)  So, because their
09490             //    fixpoint behavior is identical to single quotes, they
09491             //    cannot be allowed either.  Firefox 3.x displays
09492             //    single-quote style behavior.
09493             //  - Backslashes are reduced by one (so \\ -> \) every
09494             //    iteration, so they cannot be used safely.  This shows
09495             //    up in IE7, IE8 and FF3
09496             //  - Semicolons, commas and backticks are handled properly.
09497             //  - The rest of the ASCII punctuation is handled properly.
09498             // We haven't checked what browsers do to unadorned
09499             // versions, but this is not important as long as the
09500             // browser doesn't /remove/ surrounding quotes (as IE does
09501             // for HTML).
09502             //
09503             // With these results in hand, we conclude that there are
09504             // various levels of safety:
09505             //  - Paranoid: alphanumeric, spaces and dashes(?)
09506             //  - International: Paranoid + non-ASCII Unicode
09507             //  - Edgy: Everything except quotes, backslashes
09508             //  - NoJS: Standards compliance, e.g. sod IE. Note that
09509             //    with some judicious character escaping (since certain
09510             //    types of escaping doesn't work) this is theoretically
09511             //    OK as long as innerHTML/cssText is not called.
09512             // We believe that international is a reasonable default
09513             // (that we will implement now), and once we do more
09514             // extensive research, we may feel comfortable with dropping
09515             // it down to edgy.
09516 
09517             // Edgy: alphanumeric, spaces, dashes and Unicode.  Use of
09518             // str(c)spn assumes that the string was already well formed
09519             // Unicode (which of course it is).
09520             if (strspn($font, $this->mask) !== strlen($font)) {
09521                 continue;
09522             }
09523 
09524             // Historical:
09525             // In the absence of innerHTML/cssText, these ugly
09526             // transforms don't pose a security risk (as \\ and \"
09527             // might--these escapes are not supported by most browsers).
09528             // We could try to be clever and use single-quote wrapping
09529             // when there is a double quote present, but I have choosen
09530             // not to implement that.  (NOTE: you can reduce the amount
09531             // of escapes by one depending on what quoting style you use)
09532             // $font = str_replace('\\', '\\5C ', $font);
09533             // $font = str_replace('"',  '\\22 ', $font);
09534             // $font = str_replace("'",  '\\27 ', $font);
09535 
09536             // font possibly with spaces, requires quoting
09537             $final .= "'$font', ";
09538         }
09539         $final = rtrim($final, ', ');
09540         if ($final === '') return false;
09541         return $final;
09542     }
09543 
09544 }
09545 
09546 
09547 
09548 
09549 
09553 class HTMLPurifier_AttrDef_CSS_Ident extends HTMLPurifier_AttrDef
09554 {
09555 
09556     public function validate($string, $config, $context) {
09557 
09558         $string = trim($string);
09559 
09560         // early abort: '' and '0' (strings that convert to false) are invalid
09561         if (!$string) return false;
09562 
09563         $pattern = '/^(-?[A-Za-z_][A-Za-z_\-0-9]*)$/';
09564         if (!preg_match($pattern, $string)) return false;
09565         return $string;
09566 
09567     }
09568 
09569 }
09570 
09571 
09572 
09573 
09574 
09578 class HTMLPurifier_AttrDef_CSS_ImportantDecorator extends HTMLPurifier_AttrDef
09579 {
09580     public $def, $allow;
09581 
09586     public function __construct($def, $allow = false) {
09587         $this->def = $def;
09588         $this->allow = $allow;
09589     }
09593     public function validate($string, $config, $context) {
09594         // test for ! and important tokens
09595         $string = trim($string);
09596         $is_important = false;
09597         // :TODO: optimization: test directly for !important and ! important
09598         if (strlen($string) >= 9 && substr($string, -9) === 'important') {
09599             $temp = rtrim(substr($string, 0, -9));
09600             // use a temp, because we might want to restore important
09601             if (strlen($temp) >= 1 && substr($temp, -1) === '!') {
09602                 $string = rtrim(substr($temp, 0, -1));
09603                 $is_important = true;
09604             }
09605         }
09606         $string = $this->def->validate($string, $config, $context);
09607         if ($this->allow && $is_important) $string .= ' !important';
09608         return $string;
09609     }
09610 }
09611 
09612 
09613 
09614 
09615 
09619 class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef
09620 {
09621 
09622     protected $min, $max;
09623 
09628     public function __construct($min = null, $max = null) {
09629         $this->min = $min !== null ? HTMLPurifier_Length::make($min) : null;
09630         $this->max = $max !== null ? HTMLPurifier_Length::make($max) : null;
09631     }
09632 
09633     public function validate($string, $config, $context) {
09634         $string = $this->parseCDATA($string);
09635 
09636         // Optimizations
09637         if ($string === '') return false;
09638         if ($string === '0') return '0';
09639         if (strlen($string) === 1) return false;
09640 
09641         $length = HTMLPurifier_Length::make($string);
09642         if (!$length->isValid()) return false;
09643 
09644         if ($this->min) {
09645             $c = $length->compareTo($this->min);
09646             if ($c === false) return false;
09647             if ($c < 0) return false;
09648         }
09649         if ($this->max) {
09650             $c = $length->compareTo($this->max);
09651             if ($c === false) return false;
09652             if ($c > 0) return false;
09653         }
09654 
09655         return $length->toString();
09656     }
09657 
09658 }
09659 
09660 
09661 
09662 
09663 
09668 class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef
09669 {
09670 
09675     protected $info;
09676 
09677     public function __construct($config) {
09678         $def = $config->getCSSDefinition();
09679         $this->info['list-style-type']     = $def->info['list-style-type'];
09680         $this->info['list-style-position'] = $def->info['list-style-position'];
09681         $this->info['list-style-image'] = $def->info['list-style-image'];
09682     }
09683 
09684     public function validate($string, $config, $context) {
09685 
09686         // regular pre-processing
09687         $string = $this->parseCDATA($string);
09688         if ($string === '') return false;
09689 
09690         // assumes URI doesn't have spaces in it
09691         $bits = explode(' ', strtolower($string)); // bits to process
09692 
09693         $caught = array();
09694         $caught['type']     = false;
09695         $caught['position'] = false;
09696         $caught['image']    = false;
09697 
09698         $i = 0; // number of catches
09699         $none = false;
09700 
09701         foreach ($bits as $bit) {
09702             if ($i >= 3) return; // optimization bit
09703             if ($bit === '') continue;
09704             foreach ($caught as $key => $status) {
09705                 if ($status !== false) continue;
09706                 $r = $this->info['list-style-' . $key]->validate($bit, $config, $context);
09707                 if ($r === false) continue;
09708                 if ($r === 'none') {
09709                     if ($none) continue;
09710                     else $none = true;
09711                     if ($key == 'image') continue;
09712                 }
09713                 $caught[$key] = $r;
09714                 $i++;
09715                 break;
09716             }
09717         }
09718 
09719         if (!$i) return false;
09720 
09721         $ret = array();
09722 
09723         // construct type
09724         if ($caught['type']) $ret[] = $caught['type'];
09725 
09726         // construct image
09727         if ($caught['image']) $ret[] = $caught['image'];
09728 
09729         // construct position
09730         if ($caught['position']) $ret[] = $caught['position'];
09731 
09732         if (empty($ret)) return false;
09733         return implode(' ', $ret);
09734 
09735     }
09736 
09737 }
09738 
09739 
09740 
09741 
09742 
09754 class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef
09755 {
09756 
09761     public $single;
09762 
09767     public $max;
09768 
09773     public function __construct($single, $max = 4) {
09774         $this->single = $single;
09775         $this->max = $max;
09776     }
09777 
09778     public function validate($string, $config, $context) {
09779         $string = $this->parseCDATA($string);
09780         if ($string === '') return false;
09781         $parts = explode(' ', $string); // parseCDATA replaced \r, \t and \n
09782         $length = count($parts);
09783         $final = '';
09784         for ($i = 0, $num = 0; $i < $length && $num < $this->max; $i++) {
09785             if (ctype_space($parts[$i])) continue;
09786             $result = $this->single->validate($parts[$i], $config, $context);
09787             if ($result !== false) {
09788                 $final .= $result . ' ';
09789                 $num++;
09790             }
09791         }
09792         if ($final === '') return false;
09793         return rtrim($final);
09794     }
09795 
09796 }
09797 
09798 
09799 
09800 
09801 
09805 class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef
09806 {
09807 
09811     protected $number_def;
09812 
09816     public function __construct($non_negative = false) {
09817         $this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
09818     }
09819 
09820     public function validate($string, $config, $context) {
09821 
09822         $string = $this->parseCDATA($string);
09823 
09824         if ($string === '') return false;
09825         $length = strlen($string);
09826         if ($length === 1) return false;
09827         if ($string[$length - 1] !== '%') return false;
09828 
09829         $number = substr($string, 0, $length - 1);
09830         $number = $this->number_def->validate($number, $config, $context);
09831 
09832         if ($number === false) return false;
09833         return "$number%";
09834 
09835     }
09836 
09837 }
09838 
09839 
09840 
09841 
09842 
09848 class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
09849 {
09850 
09851     public function validate($string, $config, $context) {
09852 
09853         static $allowed_values = array(
09854             'line-through' => true,
09855             'overline' => true,
09856             'underline' => true,
09857         );
09858 
09859         $string = strtolower($this->parseCDATA($string));
09860 
09861         if ($string === 'none') return $string;
09862 
09863         $parts = explode(' ', $string);
09864         $final = '';
09865         foreach ($parts as $part) {
09866             if (isset($allowed_values[$part])) {
09867                 $final .= $part . ' ';
09868             }
09869         }
09870         $final = rtrim($final);
09871         if ($final === '') return false;
09872         return $final;
09873 
09874     }
09875 
09876 }
09877 
09878 
09879 
09880 
09881 
09891 class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
09892 {
09893 
09894     public function __construct() {
09895         parent::__construct(true); // always embedded
09896     }
09897 
09898     public function validate($uri_string, $config, $context) {
09899         // parse the URI out of the string and then pass it onto
09900         // the parent object
09901 
09902         $uri_string = $this->parseCDATA($uri_string);
09903         if (strpos($uri_string, 'url(') !== 0) return false;
09904         $uri_string = substr($uri_string, 4);
09905         $new_length = strlen($uri_string) - 1;
09906         if ($uri_string[$new_length] != ')') return false;
09907         $uri = trim(substr($uri_string, 0, $new_length));
09908 
09909         if (!empty($uri) && ($uri[0] == "'" || $uri[0] == '"')) {
09910             $quote = $uri[0];
09911             $new_length = strlen($uri) - 1;
09912             if ($uri[$new_length] !== $quote) return false;
09913             $uri = substr($uri, 1, $new_length - 1);
09914         }
09915 
09916         $uri = $this->expandCSSEscape($uri);
09917 
09918         $result = parent::validate($uri, $config, $context);
09919 
09920         if ($result === false) return false;
09921 
09922         // extra sanity check; should have been done by URI
09923         $result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result);
09924 
09925         // suspicious characters are ()'; we're going to percent encode
09926         // them for safety.
09927         $result = str_replace(array('(', ')', "'"), array('%28', '%29', '%27'), $result);
09928 
09929         // there's an extra bug where ampersands lose their escaping on
09930         // an innerHTML cycle, so a very unlucky query parameter could
09931         // then change the meaning of the URL.  Unfortunately, there's
09932         // not much we can do about that...
09933 
09934         return "url(\"$result\")";
09935 
09936     }
09937 
09938 }
09939 
09940 
09941 
09942 
09943 
09947 class HTMLPurifier_AttrDef_HTML_Bool extends HTMLPurifier_AttrDef
09948 {
09949 
09950     protected $name;
09951     public $minimized = true;
09952 
09953     public function __construct($name = false) {$this->name = $name;}
09954 
09955     public function validate($string, $config, $context) {
09956         if (empty($string)) return false;
09957         return $this->name;
09958     }
09959 
09963     public function make($string) {
09964         return new HTMLPurifier_AttrDef_HTML_Bool($string);
09965     }
09966 
09967 }
09968 
09969 
09970 
09971 
09972 
09976 class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
09977 {
09978 
09979     public function validate($string, $config, $context) {
09980 
09981         $string = trim($string);
09982 
09983         // early abort: '' and '0' (strings that convert to false) are invalid
09984         if (!$string) return false;
09985 
09986         $tokens = $this->split($string, $config, $context);
09987         $tokens = $this->filter($tokens, $config, $context);
09988         if (empty($tokens)) return false;
09989         return implode(' ', $tokens);
09990 
09991     }
09992 
09996     protected function split($string, $config, $context) {
09997         // OPTIMIZABLE!
09998         // do the preg_match, capture all subpatterns for reformulation
09999 
10000         // we don't support U+00A1 and up codepoints or
10001         // escaping because I don't know how to do that with regexps
10002         // and plus it would complicate optimization efforts (you never
10003         // see that anyway).
10004         $pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start
10005                    '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'.
10006                    '(?:(?=\s)|\z)/'; // look ahead for space or string end
10007         preg_match_all($pattern, $string, $matches);
10008         return $matches[1];
10009     }
10010 
10016     protected function filter($tokens, $config, $context) {
10017         return $tokens;
10018     }
10019 
10020 }
10021 
10022 
10023 
10024 
10025 
10029 class HTMLPurifier_AttrDef_HTML_Class extends HTMLPurifier_AttrDef_HTML_Nmtokens
10030 {
10031     protected function split($string, $config, $context) {
10032         // really, this twiddle should be lazy loaded
10033         $name = $config->getDefinition('HTML')->doctype->name;
10034         if ($name == "XHTML 1.1" || $name == "XHTML 2.0") {
10035             return parent::split($string, $config, $context);
10036         } else {
10037             return preg_split('/\s+/', $string);
10038         }
10039     }
10040     protected function filter($tokens, $config, $context) {
10041         $allowed = $config->get('Attr.AllowedClasses');
10042         $forbidden = $config->get('Attr.ForbiddenClasses');
10043         $ret = array();
10044         foreach ($tokens as $token) {
10045             if (
10046                 ($allowed === null || isset($allowed[$token])) &&
10047                 !isset($forbidden[$token]) &&
10048                 // We need this O(n) check because of PHP's array
10049                 // implementation that casts -0 to 0.
10050                 !in_array($token, $ret, true)
10051             ) {
10052                 $ret[] = $token;
10053             }
10054         }
10055         return $ret;
10056     }
10057 }
10058 
10059 
10060 
10064 class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef
10065 {
10066 
10067     public function validate($string, $config, $context) {
10068 
10069         static $colors = null;
10070         if ($colors === null) $colors = $config->get('Core.ColorKeywords');
10071 
10072         $string = trim($string);
10073 
10074         if (empty($string)) return false;
10075         if (isset($colors[strtolower($string)])) return $colors[$string];
10076         if ($string[0] === '#') $hex = substr($string, 1);
10077         else $hex = $string;
10078 
10079         $length = strlen($hex);
10080         if ($length !== 3 && $length !== 6) return false;
10081         if (!ctype_xdigit($hex)) return false;
10082         if ($length === 3) $hex = $hex[0].$hex[0].$hex[1].$hex[1].$hex[2].$hex[2];
10083 
10084         return "#$hex";
10085 
10086     }
10087 
10088 }
10089 
10090 
10091 
10092 
10093 
10097 class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum
10098 {
10099 
10100     public $valid_values = false; // uninitialized value
10101     protected $case_sensitive = false;
10102 
10103     public function __construct() {}
10104 
10105     public function validate($string, $config, $context) {
10106         if ($this->valid_values === false) $this->valid_values = $config->get('Attr.AllowedFrameTargets');
10107         return parent::validate($string, $config, $context);
10108     }
10109 
10110 }
10111 
10112 
10113 
10114 
10115 
10125 class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
10126 {
10127 
10128     // selector is NOT a valid thing to use for IDREFs, because IDREFs
10129     // *must* target IDs that exist, whereas selector #ids do not.
10130 
10135     protected $selector;
10136 
10137     public function __construct($selector = false) {
10138         $this->selector = $selector;
10139     }
10140 
10141     public function validate($id, $config, $context) {
10142 
10143         if (!$this->selector && !$config->get('Attr.EnableID')) return false;
10144 
10145         $id = trim($id); // trim it first
10146 
10147         if ($id === '') return false;
10148 
10149         $prefix = $config->get('Attr.IDPrefix');
10150         if ($prefix !== '') {
10151             $prefix .= $config->get('Attr.IDPrefixLocal');
10152             // prevent re-appending the prefix
10153             if (strpos($id, $prefix) !== 0) $id = $prefix . $id;
10154         } elseif ($config->get('Attr.IDPrefixLocal') !== '') {
10155             trigger_error('%Attr.IDPrefixLocal cannot be used unless '.
10156                 '%Attr.IDPrefix is set', E_USER_WARNING);
10157         }
10158 
10159         if (!$this->selector) {
10160             $id_accumulator =& $context->get('IDAccumulator');
10161             if (isset($id_accumulator->ids[$id])) return false;
10162         }
10163 
10164         // we purposely avoid using regex, hopefully this is faster
10165 
10166         if (ctype_alpha($id)) {
10167             $result = true;
10168         } else {
10169             if (!ctype_alpha(@$id[0])) return false;
10170             $trim = trim( // primitive style of regexps, I suppose
10171                 $id,
10172                 'A..Za..z0..9:-._'
10173               );
10174             $result = ($trim === '');
10175         }
10176 
10177         $regexp = $config->get('Attr.IDBlacklistRegexp');
10178         if ($regexp && preg_match($regexp, $id)) {
10179             return false;
10180         }
10181 
10182         if (!$this->selector && $result) $id_accumulator->add($id);
10183 
10184         // if no change was made to the ID, return the result
10185         // else, return the new id if stripping whitespace made it
10186         //     valid, or return false.
10187         return $result ? $id : false;
10188 
10189     }
10190 
10191 }
10192 
10193 
10194 
10195 
10196 
10200 class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
10201 {
10202 
10203     protected $max;
10204 
10205     public function __construct($max = null) {
10206         $this->max = $max;
10207     }
10208 
10209     public function validate($string, $config, $context) {
10210 
10211         $string = trim($string);
10212         if ($string === '0') return $string;
10213         if ($string === '')  return false;
10214         $length = strlen($string);
10215         if (substr($string, $length - 2) == 'px') {
10216             $string = substr($string, 0, $length - 2);
10217         }
10218         if (!is_numeric($string)) return false;
10219         $int = (int) $string;
10220 
10221         if ($int < 0) return '0';
10222 
10223         // upper-bound value, extremely high values can
10224         // crash operating systems, see <http://ha.ckers.org/imagecrash.html>
10225         // WARNING, above link WILL crash you if you're using Windows
10226 
10227         if ($this->max !== null && $int > $this->max) return (string) $this->max;
10228 
10229         return (string) $int;
10230 
10231     }
10232 
10233     public function make($string) {
10234         if ($string === '') $max = null;
10235         else $max = (int) $string;
10236         $class = get_class($this);
10237         return new $class($max);
10238     }
10239 
10240 }
10241 
10242 
10243 
10244 
10245 
10253 class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels
10254 {
10255 
10256     public function validate($string, $config, $context) {
10257 
10258         $string = trim($string);
10259         if ($string === '') return false;
10260 
10261         $parent_result = parent::validate($string, $config, $context);
10262         if ($parent_result !== false) return $parent_result;
10263 
10264         $length = strlen($string);
10265         $last_char = $string[$length - 1];
10266 
10267         if ($last_char !== '%') return false;
10268 
10269         $points = substr($string, 0, $length - 1);
10270 
10271         if (!is_numeric($points)) return false;
10272 
10273         $points = (int) $points;
10274 
10275         if ($points < 0) return '0%';
10276         if ($points > 100) return '100%';
10277 
10278         return ((string) $points) . '%';
10279 
10280     }
10281 
10282 }
10283 
10284 
10285 
10286 
10287 
10294 class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
10295 {
10296 
10298     protected $name;
10299 
10300     public function __construct($name) {
10301         $configLookup = array(
10302             'rel' => 'AllowedRel',
10303             'rev' => 'AllowedRev'
10304         );
10305         if (!isset($configLookup[$name])) {
10306             trigger_error('Unrecognized attribute name for link '.
10307                 'relationship.', E_USER_ERROR);
10308             return;
10309         }
10310         $this->name = $configLookup[$name];
10311     }
10312 
10313     public function validate($string, $config, $context) {
10314 
10315         $allowed = $config->get('Attr.' . $this->name);
10316         if (empty($allowed)) return false;
10317 
10318         $string = $this->parseCDATA($string);
10319         $parts = explode(' ', $string);
10320 
10321         // lookup to prevent duplicates
10322         $ret_lookup = array();
10323         foreach ($parts as $part) {
10324             $part = strtolower(trim($part));
10325             if (!isset($allowed[$part])) continue;
10326             $ret_lookup[$part] = true;
10327         }
10328 
10329         if (empty($ret_lookup)) return false;
10330         $string = implode(' ', array_keys($ret_lookup));
10331 
10332         return $string;
10333 
10334     }
10335 
10336 }
10337 
10338 
10339 
10340 
10341 
10348 class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Length
10349 {
10350 
10351     public function validate($string, $config, $context) {
10352 
10353         $string = trim($string);
10354         if ($string === '') return false;
10355 
10356         $parent_result = parent::validate($string, $config, $context);
10357         if ($parent_result !== false) return $parent_result;
10358 
10359         $length = strlen($string);
10360         $last_char = $string[$length - 1];
10361 
10362         if ($last_char !== '*') return false;
10363 
10364         $int = substr($string, 0, $length - 1);
10365 
10366         if ($int == '') return '*';
10367         if (!is_numeric($int)) return false;
10368 
10369         $int = (int) $int;
10370 
10371         if ($int < 0) return false;
10372         if ($int == 0) return '0';
10373         if ($int == 1) return '*';
10374         return ((string) $int) . '*';
10375 
10376     }
10377 
10378 }
10379 
10380 
10381 
10382 
10383 
10384 abstract class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
10385 {
10386 
10390     function unpack($string) {
10391         // needs to be implemented
10392     }
10393 
10394 }
10395 
10396 // sub-implementations
10397 
10398 
10399 
10400 
10401 
10405 class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
10406 {
10407 
10411     protected $ipv4;
10412 
10416     protected $ipv6;
10417 
10418     public function __construct() {
10419         $this->ipv4 = new HTMLPurifier_AttrDef_URI_IPv4();
10420         $this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6();
10421     }
10422 
10423     public function validate($string, $config, $context) {
10424         $length = strlen($string);
10425         // empty hostname is OK; it's usually semantically equivalent:
10426         // the default host as defined by a URI scheme is used:
10427         //
10428         //      If the URI scheme defines a default for host, then that
10429         //      default applies when the host subcomponent is undefined
10430         //      or when the registered name is empty (zero length).
10431         if ($string === '') return '';
10432         if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') {
10433             //IPv6
10434             $ip = substr($string, 1, $length - 2);
10435             $valid = $this->ipv6->validate($ip, $config, $context);
10436             if ($valid === false) return false;
10437             return '['. $valid . ']';
10438         }
10439 
10440         // need to do checks on unusual encodings too
10441         $ipv4 = $this->ipv4->validate($string, $config, $context);
10442         if ($ipv4 !== false) return $ipv4;
10443 
10444         // A regular domain name.
10445 
10446         // This doesn't match I18N domain names, but we don't have proper IRI support,
10447         // so force users to insert Punycode.
10448 
10449         // The productions describing this are:
10450         $a   = '[a-z]';     // alpha
10451         $an  = '[a-z0-9]';  // alphanum
10452         $and = '[a-z0-9-]'; // alphanum | "-"
10453         // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
10454         $domainlabel   = "$an($and*$an)?";
10455         // toplabel    = alpha | alpha *( alphanum | "-" ) alphanum
10456         $toplabel      = "$a($and*$an)?";
10457         // hostname    = *( domainlabel "." ) toplabel [ "." ]
10458         if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
10459             return $string;
10460         }
10461 
10462         // If we have Net_IDNA2 support, we can support IRIs by
10463         // punycoding them. (This is the most portable thing to do,
10464         // since otherwise we have to assume browsers support
10465 
10466         if ($config->get('Core.EnableIDNA')) {
10467             $idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true));
10468             // we need to encode each period separately
10469             $parts = explode('.', $string);
10470             try {
10471                 $new_parts = array();
10472                 foreach ($parts as $part) {
10473                     $encodable = false;
10474                     for ($i = 0, $c = strlen($part); $i < $c; $i++) {
10475                         if (ord($part[$i]) > 0x7a) {
10476                             $encodable = true;
10477                             break;
10478                         }
10479                     }
10480                     if (!$encodable) {
10481                         $new_parts[] = $part;
10482                     } else {
10483                         $new_parts[] = $idna->encode($part);
10484                     }
10485                 }
10486                 $string = implode('.', $new_parts);
10487                 if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
10488                     return $string;
10489                 }
10490             } catch (Exception $e) {
10491                 // XXX error reporting
10492             }
10493         }
10494 
10495         return false;
10496     }
10497 
10498 }
10499 
10500 
10501 
10502 
10503 
10508 class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
10509 {
10510 
10514     protected $ip4;
10515 
10516     public function validate($aIP, $config, $context) {
10517 
10518         if (!$this->ip4) $this->_loadRegex();
10519 
10520         if (preg_match('#^' . $this->ip4 . '$#s', $aIP))
10521         {
10522                 return $aIP;
10523         }
10524 
10525         return false;
10526 
10527     }
10528 
10533     protected function _loadRegex() {
10534         $oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
10535         $this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
10536     }
10537 
10538 }
10539 
10540 
10541 
10542 
10543 
10550 class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
10551 {
10552 
10553     public function validate($aIP, $config, $context) {
10554 
10555         if (!$this->ip4) $this->_loadRegex();
10556 
10557         $original = $aIP;
10558 
10559         $hex = '[0-9a-fA-F]';
10560         $blk = '(?:' . $hex . '{1,4})';
10561         $pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))';   // /0 - /128
10562 
10563         //      prefix check
10564         if (strpos($aIP, '/') !== false)
10565         {
10566                 if (preg_match('#' . $pre . '$#s', $aIP, $find))
10567                 {
10568                         $aIP = substr($aIP, 0, 0-strlen($find[0]));
10569                         unset($find);
10570                 }
10571                 else
10572                 {
10573                         return false;
10574                 }
10575         }
10576 
10577         //      IPv4-compatiblity check
10578         if (preg_match('#(?<=:'.')' . $this->ip4 . '$#s', $aIP, $find))
10579         {
10580                 $aIP = substr($aIP, 0, 0-strlen($find[0]));
10581                 $ip = explode('.', $find[0]);
10582                 $ip = array_map('dechex', $ip);
10583                 $aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3];
10584                 unset($find, $ip);
10585         }
10586 
10587         //      compression check
10588         $aIP = explode('::', $aIP);
10589         $c = count($aIP);
10590         if ($c > 2)
10591         {
10592                 return false;
10593         }
10594         elseif ($c == 2)
10595         {
10596                 list($first, $second) = $aIP;
10597                 $first = explode(':', $first);
10598                 $second = explode(':', $second);
10599 
10600                 if (count($first) + count($second) > 8)
10601                 {
10602                         return false;
10603                 }
10604 
10605                 while(count($first) < 8)
10606                 {
10607                         array_push($first, '0');
10608                 }
10609 
10610                 array_splice($first, 8 - count($second), 8, $second);
10611                 $aIP = $first;
10612                 unset($first,$second);
10613         }
10614         else
10615         {
10616                 $aIP = explode(':', $aIP[0]);
10617         }
10618         $c = count($aIP);
10619 
10620         if ($c != 8)
10621         {
10622                 return false;
10623         }
10624 
10625         //      All the pieces should be 16-bit hex strings. Are they?
10626         foreach ($aIP as $piece)
10627         {
10628                 if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece)))
10629                 {
10630                         return false;
10631                 }
10632         }
10633 
10634         return $original;
10635 
10636     }
10637 
10638 }
10639 
10640 
10641 
10642 
10643 
10648 class HTMLPurifier_AttrDef_URI_Email_SimpleCheck extends HTMLPurifier_AttrDef_URI_Email
10649 {
10650 
10651     public function validate($string, $config, $context) {
10652         // no support for named mailboxes i.e. "Bob <bob@example.com>"
10653         // that needs more percent encoding to be done
10654         if ($string == '') return false;
10655         $string = trim($string);
10656         $result = preg_match('/^[A-Z0-9._%-]+@[A-Z0-9.-]+\.[A-Z]{2,4}$/i', $string);
10657         return $result ? $string : false;
10658     }
10659 
10660 }
10661 
10662 
10663 
10664 
10665 
10669 class HTMLPurifier_AttrTransform_Background extends HTMLPurifier_AttrTransform {
10670 
10671     public function transform($attr, $config, $context) {
10672 
10673         if (!isset($attr['background'])) return $attr;
10674 
10675         $background = $this->confiscateAttr($attr, 'background');
10676         // some validation should happen here
10677 
10678         $this->prependCSS($attr, "background-image:url($background);");
10679 
10680         return $attr;
10681 
10682     }
10683 
10684 }
10685 
10686 
10687 
10688 
10689 
10690 // this MUST be placed in post, as it assumes that any value in dir is valid
10691 
10695 class HTMLPurifier_AttrTransform_BdoDir extends HTMLPurifier_AttrTransform
10696 {
10697 
10698     public function transform($attr, $config, $context) {
10699         if (isset($attr['dir'])) return $attr;
10700         $attr['dir'] = $config->get('Attr.DefaultTextDir');
10701         return $attr;
10702     }
10703 
10704 }
10705 
10706 
10707 
10708 
10709 
10713 class HTMLPurifier_AttrTransform_BgColor extends HTMLPurifier_AttrTransform {
10714 
10715     public function transform($attr, $config, $context) {
10716 
10717         if (!isset($attr['bgcolor'])) return $attr;
10718 
10719         $bgcolor = $this->confiscateAttr($attr, 'bgcolor');
10720         // some validation should happen here
10721 
10722         $this->prependCSS($attr, "background-color:$bgcolor;");
10723 
10724         return $attr;
10725 
10726     }
10727 
10728 }
10729 
10730 
10731 
10732 
10733 
10737 class HTMLPurifier_AttrTransform_BoolToCSS extends HTMLPurifier_AttrTransform {
10738 
10742     protected $attr;
10743 
10747     protected $css;
10748 
10753     public function __construct($attr, $css) {
10754         $this->attr = $attr;
10755         $this->css  = $css;
10756     }
10757 
10758     public function transform($attr, $config, $context) {
10759         if (!isset($attr[$this->attr])) return $attr;
10760         unset($attr[$this->attr]);
10761         $this->prependCSS($attr, $this->css);
10762         return $attr;
10763     }
10764 
10765 }
10766 
10767 
10768 
10769 
10770 
10774 class HTMLPurifier_AttrTransform_Border extends HTMLPurifier_AttrTransform {
10775 
10776     public function transform($attr, $config, $context) {
10777         if (!isset($attr['border'])) return $attr;
10778         $border_width = $this->confiscateAttr($attr, 'border');
10779         // some validation should happen here
10780         $this->prependCSS($attr, "border:{$border_width}px solid;");
10781         return $attr;
10782     }
10783 
10784 }
10785 
10786 
10787 
10788 
10789 
10794 class HTMLPurifier_AttrTransform_EnumToCSS extends HTMLPurifier_AttrTransform {
10795 
10799     protected $attr;
10800 
10804     protected $enumToCSS = array();
10805 
10811     protected $caseSensitive = false;
10812 
10818     public function __construct($attr, $enum_to_css, $case_sensitive = false) {
10819         $this->attr = $attr;
10820         $this->enumToCSS = $enum_to_css;
10821         $this->caseSensitive = (bool) $case_sensitive;
10822     }
10823 
10824     public function transform($attr, $config, $context) {
10825 
10826         if (!isset($attr[$this->attr])) return $attr;
10827 
10828         $value = trim($attr[$this->attr]);
10829         unset($attr[$this->attr]);
10830 
10831         if (!$this->caseSensitive) $value = strtolower($value);
10832 
10833         if (!isset($this->enumToCSS[$value])) {
10834             return $attr;
10835         }
10836 
10837         $this->prependCSS($attr, $this->enumToCSS[$value]);
10838 
10839         return $attr;
10840 
10841     }
10842 
10843 }
10844 
10845 
10846 
10847 
10848 
10849 // must be called POST validation
10850 
10857 class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
10858 {
10859 
10860     public function transform($attr, $config, $context) {
10861 
10862         $src = true;
10863         if (!isset($attr['src'])) {
10864             if ($config->get('Core.RemoveInvalidImg')) return $attr;
10865             $attr['src'] = $config->get('Attr.DefaultInvalidImage');
10866             $src = false;
10867         }
10868 
10869         if (!isset($attr['alt'])) {
10870             if ($src) {
10871                 $alt = $config->get('Attr.DefaultImageAlt');
10872                 if ($alt === null) {
10873                     // truncate if the alt is too long
10874                     $attr['alt'] = substr(basename($attr['src']),0,40);
10875                 } else {
10876                     $attr['alt'] = $alt;
10877                 }
10878             } else {
10879                 $attr['alt'] = $config->get('Attr.DefaultInvalidImageAlt');
10880             }
10881         }
10882 
10883         return $attr;
10884 
10885     }
10886 
10887 }
10888 
10889 
10890 
10891 
10892 
10896 class HTMLPurifier_AttrTransform_ImgSpace extends HTMLPurifier_AttrTransform {
10897 
10898     protected $attr;
10899     protected $css = array(
10900         'hspace' => array('left', 'right'),
10901         'vspace' => array('top', 'bottom')
10902     );
10903 
10904     public function __construct($attr) {
10905         $this->attr = $attr;
10906         if (!isset($this->css[$attr])) {
10907             trigger_error(htmlspecialchars($attr) . ' is not valid space attribute');
10908         }
10909     }
10910 
10911     public function transform($attr, $config, $context) {
10912 
10913         if (!isset($attr[$this->attr])) return $attr;
10914 
10915         $width = $this->confiscateAttr($attr, $this->attr);
10916         // some validation could happen here
10917 
10918         if (!isset($this->css[$this->attr])) return $attr;
10919 
10920         $style = '';
10921         foreach ($this->css[$this->attr] as $suffix) {
10922             $property = "margin-$suffix";
10923             $style .= "$property:{$width}px;";
10924         }
10925 
10926         $this->prependCSS($attr, $style);
10927 
10928         return $attr;
10929 
10930     }
10931 
10932 }
10933 
10934 
10935 
10936 
10937 
10942 class HTMLPurifier_AttrTransform_Input extends HTMLPurifier_AttrTransform {
10943 
10944     protected $pixels;
10945 
10946     public function __construct() {
10947         $this->pixels = new HTMLPurifier_AttrDef_HTML_Pixels();
10948     }
10949 
10950     public function transform($attr, $config, $context) {
10951         if (!isset($attr['type'])) $t = 'text';
10952         else $t = strtolower($attr['type']);
10953         if (isset($attr['checked']) && $t !== 'radio' && $t !== 'checkbox') {
10954             unset($attr['checked']);
10955         }
10956         if (isset($attr['maxlength']) && $t !== 'text' && $t !== 'password') {
10957             unset($attr['maxlength']);
10958         }
10959         if (isset($attr['size']) && $t !== 'text' && $t !== 'password') {
10960             $result = $this->pixels->validate($attr['size'], $config, $context);
10961             if ($result === false) unset($attr['size']);
10962             else $attr['size'] = $result;
10963         }
10964         if (isset($attr['src']) && $t !== 'image') {
10965             unset($attr['src']);
10966         }
10967         if (!isset($attr['value']) && ($t === 'radio' || $t === 'checkbox')) {
10968             $attr['value'] = '';
10969         }
10970         return $attr;
10971     }
10972 
10973 }
10974 
10975 
10976 
10977 
10978 
10984 class HTMLPurifier_AttrTransform_Lang extends HTMLPurifier_AttrTransform
10985 {
10986 
10987     public function transform($attr, $config, $context) {
10988 
10989         $lang     = isset($attr['lang']) ? $attr['lang'] : false;
10990         $xml_lang = isset($attr['xml:lang']) ? $attr['xml:lang'] : false;
10991 
10992         if ($lang !== false && $xml_lang === false) {
10993             $attr['xml:lang'] = $lang;
10994         } elseif ($xml_lang !== false) {
10995             $attr['lang'] = $xml_lang;
10996         }
10997 
10998         return $attr;
10999 
11000     }
11001 
11002 }
11003 
11004 
11005 
11006 
11007 
11011 class HTMLPurifier_AttrTransform_Length extends HTMLPurifier_AttrTransform
11012 {
11013 
11014     protected $name;
11015     protected $cssName;
11016 
11017     public function __construct($name, $css_name = null) {
11018         $this->name = $name;
11019         $this->cssName = $css_name ? $css_name : $name;
11020     }
11021 
11022     public function transform($attr, $config, $context) {
11023         if (!isset($attr[$this->name])) return $attr;
11024         $length = $this->confiscateAttr($attr, $this->name);
11025         if(ctype_digit($length)) $length .= 'px';
11026         $this->prependCSS($attr, $this->cssName . ":$length;");
11027         return $attr;
11028     }
11029 
11030 }
11031 
11032 
11033 
11034 
11035 
11039 class HTMLPurifier_AttrTransform_Name extends HTMLPurifier_AttrTransform
11040 {
11041 
11042     public function transform($attr, $config, $context) {
11043         // Abort early if we're using relaxed definition of name
11044         if ($config->get('HTML.Attr.Name.UseCDATA')) return $attr;
11045         if (!isset($attr['name'])) return $attr;
11046         $id = $this->confiscateAttr($attr, 'name');
11047         if ( isset($attr['id']))   return $attr;
11048         $attr['id'] = $id;
11049         return $attr;
11050     }
11051 
11052 }
11053 
11054 
11055 
11056 
11057 
11063 class HTMLPurifier_AttrTransform_NameSync extends HTMLPurifier_AttrTransform
11064 {
11065 
11066     public function __construct() {
11067         $this->idDef = new HTMLPurifier_AttrDef_HTML_ID();
11068     }
11069 
11070     public function transform($attr, $config, $context) {
11071         if (!isset($attr['name'])) return $attr;
11072         $name = $attr['name'];
11073         if (isset($attr['id']) && $attr['id'] === $name) return $attr;
11074         $result = $this->idDef->validate($name, $config, $context);
11075         if ($result === false) unset($attr['name']);
11076         else $attr['name'] = $result;
11077         return $attr;
11078     }
11079 
11080 }
11081 
11082 
11083 
11084 
11085 
11086 // must be called POST validation
11087 
11092 class HTMLPurifier_AttrTransform_Nofollow extends HTMLPurifier_AttrTransform
11093 {
11094     private $parser;
11095 
11096     public function __construct() {
11097         $this->parser = new HTMLPurifier_URIParser();
11098     }
11099 
11100     public function transform($attr, $config, $context) {
11101 
11102         if (!isset($attr['href'])) {
11103             return $attr;
11104         }
11105 
11106         // XXX Kind of inefficient
11107         $url = $this->parser->parse($attr['href']);
11108         $scheme = $url->getSchemeObj($config, $context);
11109 
11110         if ($scheme->browsable && !$url->isLocal($config, $context)) {
11111             if (isset($attr['rel'])) {
11112                 $rels = explode(' ', $attr);
11113                 if (!in_array('nofollow', $rels)) {
11114                     $rels[] = 'nofollow';
11115                 }
11116                 $attr['rel'] = implode(' ', $rels);
11117             } else {
11118                 $attr['rel'] = 'nofollow';
11119             }
11120         }
11121 
11122         return $attr;
11123 
11124     }
11125 
11126 }
11127 
11128 
11129 
11130 
11131 
11132 class HTMLPurifier_AttrTransform_SafeEmbed extends HTMLPurifier_AttrTransform
11133 {
11134     public $name = "SafeEmbed";
11135 
11136     public function transform($attr, $config, $context) {
11137         $attr['allowscriptaccess'] = 'never';
11138         $attr['allownetworking'] = 'internal';
11139         $attr['type'] = 'application/x-shockwave-flash';
11140         return $attr;
11141     }
11142 }
11143 
11144 
11145 
11146 
11147 
11151 class HTMLPurifier_AttrTransform_SafeObject extends HTMLPurifier_AttrTransform
11152 {
11153     public $name = "SafeObject";
11154 
11155     function transform($attr, $config, $context) {
11156         if (!isset($attr['type'])) $attr['type'] = 'application/x-shockwave-flash';
11157         return $attr;
11158     }
11159 }
11160 
11161 
11162 
11163 
11164 
11177 class HTMLPurifier_AttrTransform_SafeParam extends HTMLPurifier_AttrTransform
11178 {
11179     public $name = "SafeParam";
11180     private $uri;
11181 
11182     public function __construct() {
11183         $this->uri = new HTMLPurifier_AttrDef_URI(true); // embedded
11184         $this->wmode = new HTMLPurifier_AttrDef_Enum(array('window', 'opaque', 'transparent'));
11185     }
11186 
11187     public function transform($attr, $config, $context) {
11188         // If we add support for other objects, we'll need to alter the
11189         // transforms.
11190         switch ($attr['name']) {
11191             // application/x-shockwave-flash
11192             // Keep this synchronized with Injector/SafeObject.php
11193             case 'allowScriptAccess':
11194                 $attr['value'] = 'never';
11195                 break;
11196             case 'allowNetworking':
11197                 $attr['value'] = 'internal';
11198                 break;
11199             case 'allowFullScreen':
11200                 if ($config->get('HTML.FlashAllowFullScreen')) {
11201                     $attr['value'] = ($attr['value'] == 'true') ? 'true' : 'false';
11202                 } else {
11203                     $attr['value'] = 'false';
11204                 }
11205                 break;
11206             case 'wmode':
11207                 $attr['value'] = $this->wmode->validate($attr['value'], $config, $context);
11208                 break;
11209             case 'movie':
11210             case 'src':
11211                 $attr['name'] = "movie";
11212                 $attr['value'] = $this->uri->validate($attr['value'], $config, $context);
11213                 break;
11214             case 'flashvars':
11215                 // we're going to allow arbitrary inputs to the SWF, on
11216                 // the reasoning that it could only hack the SWF, not us.
11217                 break;
11218             // add other cases to support other param name/value pairs
11219             default:
11220                 $attr['name'] = $attr['value'] = null;
11221         }
11222         return $attr;
11223     }
11224 }
11225 
11226 
11227 
11228 
11229 
11233 class HTMLPurifier_AttrTransform_ScriptRequired extends HTMLPurifier_AttrTransform
11234 {
11235     public function transform($attr, $config, $context) {
11236         if (!isset($attr['type'])) {
11237             $attr['type'] = 'text/javascript';
11238         }
11239         return $attr;
11240     }
11241 }
11242 
11243 
11244 
11245 
11246 
11247 // must be called POST validation
11248 
11254 class HTMLPurifier_AttrTransform_TargetBlank extends HTMLPurifier_AttrTransform
11255 {
11256     private $parser;
11257 
11258     public function __construct() {
11259         $this->parser = new HTMLPurifier_URIParser();
11260     }
11261 
11262     public function transform($attr, $config, $context) {
11263 
11264         if (!isset($attr['href'])) {
11265             return $attr;
11266         }
11267 
11268         // XXX Kind of inefficient
11269         $url = $this->parser->parse($attr['href']);
11270         $scheme = $url->getSchemeObj($config, $context);
11271 
11272         if ($scheme->browsable && !$url->isBenign($config, $context)) {
11273             $attr['target'] = 'blank';
11274         }
11275 
11276         return $attr;
11277 
11278     }
11279 
11280 }
11281 
11282 
11283 
11284 
11285 
11289 class HTMLPurifier_AttrTransform_Textarea extends HTMLPurifier_AttrTransform
11290 {
11291 
11292     public function transform($attr, $config, $context) {
11293         // Calculated from Firefox
11294         if (!isset($attr['cols'])) $attr['cols'] = '22';
11295         if (!isset($attr['rows'])) $attr['rows'] = '3';
11296         return $attr;
11297     }
11298 
11299 }
11300 
11301 
11302 
11303 
11304 
11314 class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
11315 {
11316 
11320     public $inline;
11321 
11325     public $block;
11326 
11327     public $type = 'chameleon';
11328 
11333     public function __construct($inline, $block) {
11334         $this->inline = new HTMLPurifier_ChildDef_Optional($inline);
11335         $this->block  = new HTMLPurifier_ChildDef_Optional($block);
11336         $this->elements = $this->block->elements;
11337     }
11338 
11339     public function validateChildren($tokens_of_children, $config, $context) {
11340         if ($context->get('IsInline') === false) {
11341             return $this->block->validateChildren(
11342                 $tokens_of_children, $config, $context);
11343         } else {
11344             return $this->inline->validateChildren(
11345                 $tokens_of_children, $config, $context);
11346         }
11347     }
11348 }
11349 
11350 
11351 
11352 
11353 
11360 class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
11361 {
11362     public $type = 'custom';
11363     public $allow_empty = false;
11367     public $dtd_regex;
11372     private $_pcre_regex;
11376     public function __construct($dtd_regex) {
11377         $this->dtd_regex = $dtd_regex;
11378         $this->_compileRegex();
11379     }
11383     protected function _compileRegex() {
11384         $raw = str_replace(' ', '', $this->dtd_regex);
11385         if ($raw{0} != '(') {
11386             $raw = "($raw)";
11387         }
11388         $el = '[#a-zA-Z0-9_.-]+';
11389         $reg = $raw;
11390 
11391         // COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M
11392         // DOING! Seriously: if there's problems, please report them.
11393 
11394         // collect all elements into the $elements array
11395         preg_match_all("/$el/", $reg, $matches);
11396         foreach ($matches[0] as $match) {
11397             $this->elements[$match] = true;
11398         }
11399 
11400         // setup all elements as parentheticals with leading commas
11401         $reg = preg_replace("/$el/", '(,\\0)', $reg);
11402 
11403         // remove commas when they were not solicited
11404         $reg = preg_replace("/([^,(|]\(+),/", '\\1', $reg);
11405 
11406         // remove all non-paranthetical commas: they are handled by first regex
11407         $reg = preg_replace("/,\(/", '(', $reg);
11408 
11409         $this->_pcre_regex = $reg;
11410     }
11411     public function validateChildren($tokens_of_children, $config, $context) {
11412         $list_of_children = '';
11413         $nesting = 0; // depth into the nest
11414         foreach ($tokens_of_children as $token) {
11415             if (!empty($token->is_whitespace)) continue;
11416 
11417             $is_child = ($nesting == 0); // direct
11418 
11419             if ($token instanceof HTMLPurifier_Token_Start) {
11420                 $nesting++;
11421             } elseif ($token instanceof HTMLPurifier_Token_End) {
11422                 $nesting--;
11423             }
11424 
11425             if ($is_child) {
11426                 $list_of_children .= $token->name . ',';
11427             }
11428         }
11429         // add leading comma to deal with stray comma declarations
11430         $list_of_children = ',' . rtrim($list_of_children, ',');
11431         $okay =
11432             preg_match(
11433                 '/^,?'.$this->_pcre_regex.'$/',
11434                 $list_of_children
11435             );
11436 
11437         return (bool) $okay;
11438     }
11439 }
11440 
11441 
11442 
11443 
11444 
11452 class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
11453 {
11454     public $allow_empty = true;
11455     public $type = 'empty';
11456     public function __construct() {}
11457     public function validateChildren($tokens_of_children, $config, $context) {
11458         return array();
11459     }
11460 }
11461 
11462 
11463 
11464 
11465 
11469 class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef
11470 {
11471     public $type = 'list';
11472     // lying a little bit, so that we can handle ul and ol ourselves
11473     // XXX: This whole business with 'wrap' is all a bit unsatisfactory
11474     public $elements = array('li' => true, 'ul' => true, 'ol' => true);
11475     public function validateChildren($tokens_of_children, $config, $context) {
11476         // Flag for subclasses
11477         $this->whitespace = false;
11478 
11479         // if there are no tokens, delete parent node
11480         if (empty($tokens_of_children)) return false;
11