HTMLPurifier 4.4.0
/home/ezyang/Dev/htmlpurifier/library/HTMLPurifier.standalone.php
Go to the documentation of this file.
00001 <?php
00002 
00041 /*
00042     HTML Purifier 4.4.0 - Standards Compliant HTML Filtering
00043     Copyright (C) 2006-2008 Edward Z. Yang
00044 
00045     This library is free software; you can redistribute it and/or
00046     modify it under the terms of the GNU Lesser General Public
00047     License as published by the Free Software Foundation; either
00048     version 2.1 of the License, or (at your option) any later version.
00049 
00050     This library is distributed in the hope that it will be useful,
00051     but WITHOUT ANY WARRANTY; without even the implied warranty of
00052     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00053     Lesser General Public License for more details.
00054 
00055     You should have received a copy of the GNU Lesser General Public
00056     License along with this library; if not, write to the Free Software
00057     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
00058  */
00059 
00074 class HTMLPurifier
00075 {
00076 
00078     public $version = '4.4.0';
00079 
00081     const VERSION = '4.4.0';
00082 
00084     public $config;
00085 
00087     private $filters = array();
00088 
00090     private static $instance;
00091 
00092     protected $strategy, $generator;
00093 
00098     public $context;
00099 
00108     public function __construct($config = null) {
00109 
00110         $this->config = HTMLPurifier_Config::create($config);
00111 
00112         $this->strategy     = new HTMLPurifier_Strategy_Core();
00113 
00114     }
00115 
00120     public function addFilter($filter) {
00121         trigger_error('HTMLPurifier->addFilter() is deprecated, use configuration directives in the Filter namespace or Filter.Custom', E_USER_WARNING);
00122         $this->filters[] = $filter;
00123     }
00124 
00135     public function purify($html, $config = null) {
00136 
00137         // :TODO: make the config merge in, instead of replace
00138         $config = $config ? HTMLPurifier_Config::create($config) : $this->config;
00139 
00140         // implementation is partially environment dependant, partially
00141         // configuration dependant
00142         $lexer = HTMLPurifier_Lexer::create($config);
00143 
00144         $context = new HTMLPurifier_Context();
00145 
00146         // setup HTML generator
00147         $this->generator = new HTMLPurifier_Generator($config, $context);
00148         $context->register('Generator', $this->generator);
00149 
00150         // set up global context variables
00151         if ($config->get('Core.CollectErrors')) {
00152             // may get moved out if other facilities use it
00153             $language_factory = HTMLPurifier_LanguageFactory::instance();
00154             $language = $language_factory->create($config, $context);
00155             $context->register('Locale', $language);
00156 
00157             $error_collector = new HTMLPurifier_ErrorCollector($context);
00158             $context->register('ErrorCollector', $error_collector);
00159         }
00160 
00161         // setup id_accumulator context, necessary due to the fact that
00162         // AttrValidator can be called from many places
00163         $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
00164         $context->register('IDAccumulator', $id_accumulator);
00165 
00166         $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
00167 
00168         // setup filters
00169         $filter_flags = $config->getBatch('Filter');
00170         $custom_filters = $filter_flags['Custom'];
00171         unset($filter_flags['Custom']);
00172         $filters = array();
00173         foreach ($filter_flags as $filter => $flag) {
00174             if (!$flag) continue;
00175             if (strpos($filter, '.') !== false) continue;
00176             $class = "HTMLPurifier_Filter_$filter";
00177             $filters[] = new $class;
00178         }
00179         foreach ($custom_filters as $filter) {
00180             // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat
00181             $filters[] = $filter;
00182         }
00183         $filters = array_merge($filters, $this->filters);
00184         // maybe prepare(), but later
00185 
00186         for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) {
00187             $html = $filters[$i]->preFilter($html, $config, $context);
00188         }
00189 
00190         // purified HTML
00191         $html =
00192             $this->generator->generateFromTokens(
00193                 // list of tokens
00194                 $this->strategy->execute(
00195                     // list of un-purified tokens
00196                     $lexer->tokenizeHTML(
00197                         // un-purified HTML
00198                         $html, $config, $context
00199                     ),
00200                     $config, $context
00201                 )
00202             );
00203 
00204         for ($i = $filter_size - 1; $i >= 0; $i--) {
00205             $html = $filters[$i]->postFilter($html, $config, $context);
00206         }
00207 
00208         $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
00209         $this->context =& $context;
00210         return $html;
00211     }
00212 
00219     public function purifyArray($array_of_html, $config = null) {
00220         $context_array = array();
00221         foreach ($array_of_html as $key => $html) {
00222             $array_of_html[$key] = $this->purify($html, $config);
00223             $context_array[$key] = $this->context;
00224         }
00225         $this->context = $context_array;
00226         return $array_of_html;
00227     }
00228 
00235     public static function instance($prototype = null) {
00236         if (!self::$instance || $prototype) {
00237             if ($prototype instanceof HTMLPurifier) {
00238                 self::$instance = $prototype;
00239             } elseif ($prototype) {
00240                 self::$instance = new HTMLPurifier($prototype);
00241             } else {
00242                 self::$instance = new HTMLPurifier();
00243             }
00244         }
00245         return self::$instance;
00246     }
00247 
00251     public static function getInstance($prototype = null) {
00252         return HTMLPurifier::instance($prototype);
00253     }
00254 
00255 }
00256 
00257 
00258 
00259 
00260 
00265 class HTMLPurifier_AttrCollections
00266 {
00267 
00271     public $info = array();
00272 
00280     public function __construct($attr_types, $modules) {
00281         // load extensions from the modules
00282         foreach ($modules as $module) {
00283             foreach ($module->attr_collections as $coll_i => $coll) {
00284                 if (!isset($this->info[$coll_i])) {
00285                     $this->info[$coll_i] = array();
00286                 }
00287                 foreach ($coll as $attr_i => $attr) {
00288                     if ($attr_i === 0 && isset($this->info[$coll_i][$attr_i])) {
00289                         // merge in includes
00290                         $this->info[$coll_i][$attr_i] = array_merge(
00291                             $this->info[$coll_i][$attr_i], $attr);
00292                         continue;
00293                     }
00294                     $this->info[$coll_i][$attr_i] = $attr;
00295                 }
00296             }
00297         }
00298         // perform internal expansions and inclusions
00299         foreach ($this->info as $name => $attr) {
00300             // merge attribute collections that include others
00301             $this->performInclusions($this->info[$name]);
00302             // replace string identifiers with actual attribute objects
00303             $this->expandIdentifiers($this->info[$name], $attr_types);
00304         }
00305     }
00306 
00312     public function performInclusions(&$attr) {
00313         if (!isset($attr[0])) return;
00314         $merge = $attr[0];
00315         $seen  = array(); // recursion guard
00316         // loop through all the inclusions
00317         for ($i = 0; isset($merge[$i]); $i++) {
00318             if (isset($seen[$merge[$i]])) continue;
00319             $seen[$merge[$i]] = true;
00320             // foreach attribute of the inclusion, copy it over
00321             if (!isset($this->info[$merge[$i]])) continue;
00322             foreach ($this->info[$merge[$i]] as $key => $value) {
00323                 if (isset($attr[$key])) continue; // also catches more inclusions
00324                 $attr[$key] = $value;
00325             }
00326             if (isset($this->info[$merge[$i]][0])) {
00327                 // recursion
00328                 $merge = array_merge($merge, $this->info[$merge[$i]][0]);
00329             }
00330         }
00331         unset($attr[0]);
00332     }
00333 
00340     public function expandIdentifiers(&$attr, $attr_types) {
00341 
00342         // because foreach will process new elements we add, make sure we
00343         // skip duplicates
00344         $processed = array();
00345 
00346         foreach ($attr as $def_i => $def) {
00347             // skip inclusions
00348             if ($def_i === 0) continue;
00349 
00350             if (isset($processed[$def_i])) continue;
00351 
00352             // determine whether or not attribute is required
00353             if ($required = (strpos($def_i, '*') !== false)) {
00354                 // rename the definition
00355                 unset($attr[$def_i]);
00356                 $def_i = trim($def_i, '*');
00357                 $attr[$def_i] = $def;
00358             }
00359 
00360             $processed[$def_i] = true;
00361 
00362             // if we've already got a literal object, move on
00363             if (is_object($def)) {
00364                 // preserve previous required
00365                 $attr[$def_i]->required = ($required || $attr[$def_i]->required);
00366                 continue;
00367             }
00368 
00369             if ($def === false) {
00370                 unset($attr[$def_i]);
00371                 continue;
00372             }
00373 
00374             if ($t = $attr_types->get($def)) {
00375                 $attr[$def_i] = $t;
00376                 $attr[$def_i]->required = $required;
00377             } else {
00378                 unset($attr[$def_i]);
00379             }
00380         }
00381 
00382     }
00383 
00384 }
00385 
00386 
00387 
00388 
00389 
00400 abstract class HTMLPurifier_AttrDef
00401 {
00402 
00407     public $minimized = false;
00408 
00413     public $required = false;
00414 
00422     abstract public function validate($string, $config, $context);
00423 
00445     public function parseCDATA($string) {
00446         $string = trim($string);
00447         $string = str_replace(array("\n", "\t", "\r"), ' ', $string);
00448         return $string;
00449     }
00450 
00456     public function make($string) {
00457         // default implementation, return a flyweight of this object.
00458         // If $string has an effect on the returned object (i.e. you
00459         // need to overload this method), it is best
00460         // to clone or instantiate new copies. (Instantiation is safer.)
00461         return $this;
00462     }
00463 
00468     protected function mungeRgb($string) {
00469         return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
00470     }
00471 
00476     protected function expandCSSEscape($string) {
00477         // flexibly parse it
00478         $ret = '';
00479         for ($i = 0, $c = strlen($string); $i < $c; $i++) {
00480             if ($string[$i] === '\\') {
00481                 $i++;
00482                 if ($i >= $c) {
00483                     $ret .= '\\';
00484                     break;
00485                 }
00486                 if (ctype_xdigit($string[$i])) {
00487                     $code = $string[$i];
00488                     for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
00489                         if (!ctype_xdigit($string[$i])) break;
00490                         $code .= $string[$i];
00491                     }
00492                     // We have to be extremely careful when adding
00493                     // new characters, to make sure we're not breaking
00494                     // the encoding.
00495                     $char = HTMLPurifier_Encoder::unichr(hexdec($code));
00496                     if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
00497                     $ret .= $char;
00498                     if ($i < $c && trim($string[$i]) !== '') $i--;
00499                     continue;
00500                 }
00501                 if ($string[$i] === "\n") continue;
00502             }
00503             $ret .= $string[$i];
00504         }
00505         return $ret;
00506     }
00507 
00508 }
00509 
00510 
00511 
00512 
00513 
00528 abstract class HTMLPurifier_AttrTransform
00529 {
00530 
00540     abstract public function transform($attr, $config, $context);
00541 
00548     public function prependCSS(&$attr, $css) {
00549         $attr['style'] = isset($attr['style']) ? $attr['style'] : '';
00550         $attr['style'] = $css . $attr['style'];
00551     }
00552 
00558     public function confiscateAttr(&$attr, $key) {
00559         if (!isset($attr[$key])) return null;
00560         $value = $attr[$key];
00561         unset($attr[$key]);
00562         return $value;
00563     }
00564 
00565 }
00566 
00567 
00568 
00569 
00570 
00574 class HTMLPurifier_AttrTypes
00575 {
00579     protected $info = array();
00580 
00585     public function __construct() {
00586         // XXX This is kind of poor, since we don't actually /clone/
00587         // instances; instead, we use the supplied make() attribute. So,
00588         // the underlying class must know how to deal with arguments.
00589         // With the old implementation of Enum, that ignored its
00590         // arguments when handling a make dispatch, the IAlign
00591         // definition wouldn't work.
00592 
00593         // pseudo-types, must be instantiated via shorthand
00594         $this->info['Enum']    = new HTMLPurifier_AttrDef_Enum();
00595         $this->info['Bool']    = new HTMLPurifier_AttrDef_HTML_Bool();
00596 
00597         $this->info['CDATA']    = new HTMLPurifier_AttrDef_Text();
00598         $this->info['ID']       = new HTMLPurifier_AttrDef_HTML_ID();
00599         $this->info['Length']   = new HTMLPurifier_AttrDef_HTML_Length();
00600         $this->info['MultiLength'] = new HTMLPurifier_AttrDef_HTML_MultiLength();
00601         $this->info['NMTOKENS'] = new HTMLPurifier_AttrDef_HTML_Nmtokens();
00602         $this->info['Pixels']   = new HTMLPurifier_AttrDef_HTML_Pixels();
00603         $this->info['Text']     = new HTMLPurifier_AttrDef_Text();
00604         $this->info['URI']      = new HTMLPurifier_AttrDef_URI();
00605         $this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang();
00606         $this->info['Color']    = new HTMLPurifier_AttrDef_HTML_Color();
00607         $this->info['IAlign']   = self::makeEnum('top,middle,bottom,left,right');
00608         $this->info['LAlign']   = self::makeEnum('top,bottom,left,right');
00609         $this->info['FrameTarget'] = new HTMLPurifier_AttrDef_HTML_FrameTarget();
00610 
00611         // unimplemented aliases
00612         $this->info['ContentType'] = new HTMLPurifier_AttrDef_Text();
00613         $this->info['ContentTypes'] = new HTMLPurifier_AttrDef_Text();
00614         $this->info['Charsets'] = new HTMLPurifier_AttrDef_Text();
00615         $this->info['Character'] = new HTMLPurifier_AttrDef_Text();
00616 
00617         // "proprietary" types
00618         $this->info['Class'] = new HTMLPurifier_AttrDef_HTML_Class();
00619 
00620         // number is really a positive integer (one or more digits)
00621         // FIXME: ^^ not always, see start and value of list items
00622         $this->info['Number']   = new HTMLPurifier_AttrDef_Integer(false, false, true);
00623     }
00624 
00625     private static function makeEnum($in) {
00626         return new HTMLPurifier_AttrDef_Clone(new HTMLPurifier_AttrDef_Enum(explode(',', $in)));
00627     }
00628 
00634     public function get($type) {
00635 
00636         // determine if there is any extra info tacked on
00637         if (strpos($type, '#') !== false) list($type, $string) = explode('#', $type, 2);
00638         else $string = '';
00639 
00640         if (!isset($this->info[$type])) {
00641             trigger_error('Cannot retrieve undefined attribute type ' . $type, E_USER_ERROR);
00642             return;
00643         }
00644 
00645         return $this->info[$type]->make($string);
00646 
00647     }
00648 
00654     public function set($type, $impl) {
00655         $this->info[$type] = $impl;
00656     }
00657 }
00658 
00659 
00660 
00661 
00662 
00668 class HTMLPurifier_AttrValidator
00669 {
00670 
00681     public function validateToken(&$token, &$config, $context) {
00682 
00683         $definition = $config->getHTMLDefinition();
00684         $e =& $context->get('ErrorCollector', true);
00685 
00686         // initialize IDAccumulator if necessary
00687         $ok =& $context->get('IDAccumulator', true);
00688         if (!$ok) {
00689             $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
00690             $context->register('IDAccumulator', $id_accumulator);
00691         }
00692 
00693         // initialize CurrentToken if necessary
00694         $current_token =& $context->get('CurrentToken', true);
00695         if (!$current_token) $context->register('CurrentToken', $token);
00696 
00697         if (
00698             !$token instanceof HTMLPurifier_Token_Start &&
00699             !$token instanceof HTMLPurifier_Token_Empty
00700         ) return $token;
00701 
00702         // create alias to global definition array, see also $defs
00703         // DEFINITION CALL
00704         $d_defs = $definition->info_global_attr;
00705 
00706         // don't update token until the very end, to ensure an atomic update
00707         $attr = $token->attr;
00708 
00709         // do global transformations (pre)
00710         // nothing currently utilizes this
00711         foreach ($definition->info_attr_transform_pre as $transform) {
00712             $attr = $transform->transform($o = $attr, $config, $context);
00713             if ($e) {
00714                 if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
00715             }
00716         }
00717 
00718         // do local transformations only applicable to this element (pre)
00719         // ex. <p align="right"> to <p style="text-align:right;">
00720         foreach ($definition->info[$token->name]->attr_transform_pre as $transform) {
00721             $attr = $transform->transform($o = $attr, $config, $context);
00722             if ($e) {
00723                 if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
00724             }
00725         }
00726 
00727         // create alias to this element's attribute definition array, see
00728         // also $d_defs (global attribute definition array)
00729         // DEFINITION CALL
00730         $defs = $definition->info[$token->name]->attr;
00731 
00732         $attr_key = false;
00733         $context->register('CurrentAttr', $attr_key);
00734 
00735         // iterate through all the attribute keypairs
00736         // Watch out for name collisions: $key has previously been used
00737         foreach ($attr as $attr_key => $value) {
00738 
00739             // call the definition
00740             if ( isset($defs[$attr_key]) ) {
00741                 // there is a local definition defined
00742                 if ($defs[$attr_key] === false) {
00743                     // We've explicitly been told not to allow this element.
00744                     // This is usually when there's a global definition
00745                     // that must be overridden.
00746                     // Theoretically speaking, we could have a
00747                     // AttrDef_DenyAll, but this is faster!
00748                     $result = false;
00749                 } else {
00750                     // validate according to the element's definition
00751                     $result = $defs[$attr_key]->validate(
00752                                     $value, $config, $context
00753                                );
00754                 }
00755             } elseif ( isset($d_defs[$attr_key]) ) {
00756                 // there is a global definition defined, validate according
00757                 // to the global definition
00758                 $result = $d_defs[$attr_key]->validate(
00759                                 $value, $config, $context
00760                            );
00761             } else {
00762                 // system never heard of the attribute? DELETE!
00763                 $result = false;
00764             }
00765 
00766             // put the results into effect
00767             if ($result === false || $result === null) {
00768                 // this is a generic error message that should replaced
00769                 // with more specific ones when possible
00770                 if ($e) $e->send(E_ERROR, 'AttrValidator: Attribute removed');
00771 
00772                 // remove the attribute
00773                 unset($attr[$attr_key]);
00774             } elseif (is_string($result)) {
00775                 // generally, if a substitution is happening, there
00776                 // was some sort of implicit correction going on. We'll
00777                 // delegate it to the attribute classes to say exactly what.
00778 
00779                 // simple substitution
00780                 $attr[$attr_key] = $result;
00781             } else {
00782                 // nothing happens
00783             }
00784 
00785             // we'd also want slightly more complicated substitution
00786             // involving an array as the return value,
00787             // although we're not sure how colliding attributes would
00788             // resolve (certain ones would be completely overriden,
00789             // others would prepend themselves).
00790         }
00791 
00792         $context->destroy('CurrentAttr');
00793 
00794         // post transforms
00795 
00796         // global (error reporting untested)
00797         foreach ($definition->info_attr_transform_post as $transform) {
00798             $attr = $transform->transform($o = $attr, $config, $context);
00799             if ($e) {
00800                 if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
00801             }
00802         }
00803 
00804         // local (error reporting untested)
00805         foreach ($definition->info[$token->name]->attr_transform_post as $transform) {
00806             $attr = $transform->transform($o = $attr, $config, $context);
00807             if ($e) {
00808                 if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
00809             }
00810         }
00811 
00812         $token->attr = $attr;
00813 
00814         // destroy CurrentToken if we made it ourselves
00815         if (!$current_token) $context->destroy('CurrentToken');
00816 
00817     }
00818 
00819 
00820 }
00821 
00822 
00823 
00824 
00825 
00826 // constants are slow, so we use as few as possible
00827 if (!defined('HTMLPURIFIER_PREFIX')) {
00828     define('HTMLPURIFIER_PREFIX', dirname(__FILE__) . '/standalone');
00829     set_include_path(HTMLPURIFIER_PREFIX . PATH_SEPARATOR . get_include_path());
00830 }
00831 
00832 // accomodations for versions earlier than 5.0.2
00833 // borrowed from PHP_Compat, LGPL licensed, by Aidan Lister <aidan@php.net>
00834 if (!defined('PHP_EOL')) {
00835     switch (strtoupper(substr(PHP_OS, 0, 3))) {
00836         case 'WIN':
00837             define('PHP_EOL', "\r\n");
00838             break;
00839         case 'DAR':
00840             define('PHP_EOL', "\r");
00841             break;
00842         default:
00843             define('PHP_EOL', "\n");
00844     }
00845 }
00846 
00854 class HTMLPurifier_Bootstrap
00855 {
00856 
00861     public static function autoload($class) {
00862         $file = HTMLPurifier_Bootstrap::getPath($class);
00863         if (!$file) return false;
00864         // Technically speaking, it should be ok and more efficient to
00865         // just do 'require', but Antonio Parraga reports that with
00866         // Zend extensions such as Zend debugger and APC, this invariant
00867         // may be broken.  Since we have efficient alternatives, pay
00868         // the cost here and avoid the bug.
00869         require_once HTMLPURIFIER_PREFIX . '/' . $file;
00870         return true;
00871     }
00872 
00876     public static function getPath($class) {
00877         if (strncmp('HTMLPurifier', $class, 12) !== 0) return false;
00878         // Custom implementations
00879         if (strncmp('HTMLPurifier_Language_', $class, 22) === 0) {
00880             $code = str_replace('_', '-', substr($class, 22));
00881             $file = 'HTMLPurifier/Language/classes/' . $code . '.php';
00882         } else {
00883             $file = str_replace('_', '/', $class) . '.php';
00884         }
00885         if (!file_exists(HTMLPURIFIER_PREFIX . '/' . $file)) return false;
00886         return $file;
00887     }
00888 
00892     public static function registerAutoload() {
00893         $autoload = array('HTMLPurifier_Bootstrap', 'autoload');
00894         if ( ($funcs = spl_autoload_functions()) === false ) {
00895             spl_autoload_register($autoload);
00896         } elseif (function_exists('spl_autoload_unregister')) {
00897             $buggy  = version_compare(PHP_VERSION, '5.2.11', '<');
00898             $compat = version_compare(PHP_VERSION, '5.1.2', '<=') &&
00899                       version_compare(PHP_VERSION, '5.1.0', '>=');
00900             foreach ($funcs as $func) {
00901                 if ($buggy && is_array($func)) {
00902                     // :TRICKY: There are some compatibility issues and some
00903                     // places where we need to error out
00904                     $reflector = new ReflectionMethod($func[0], $func[1]);
00905                     if (!$reflector->isStatic()) {
00906                         throw new Exception('
00907                             HTML Purifier autoloader registrar is not compatible
00908                             with non-static object methods due to PHP Bug #44144;
00909                             Please do not use HTMLPurifier.autoload.php (or any
00910                             file that includes this file); instead, place the code:
00911                             spl_autoload_register(array(\'HTMLPurifier_Bootstrap\', \'autoload\'))
00912                             after your own autoloaders.
00913                         ');
00914                     }
00915                     // Suprisingly, spl_autoload_register supports the
00916                     // Class::staticMethod callback format, although call_user_func doesn't
00917                     if ($compat) $func = implode('::', $func);
00918                 }
00919                 spl_autoload_unregister($func);
00920             }
00921             spl_autoload_register($autoload);
00922             foreach ($funcs as $func) spl_autoload_register($func);
00923         }
00924     }
00925 
00926 }
00927 
00928 
00929 
00930 
00931 
00936 abstract class HTMLPurifier_Definition
00937 {
00938 
00942     public $setup = false;
00943 
00953     public $optimized = null;
00954 
00958     public $type;
00959 
00965     abstract protected function doSetup($config);
00966 
00971     public function setup($config) {
00972         if ($this->setup) return;
00973         $this->setup = true;
00974         $this->doSetup($config);
00975     }
00976 
00977 }
00978 
00979 
00980 
00981 
00982 
00987 class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
00988 {
00989 
00990     public $type = 'CSS';
00991 
00995     public $info = array();
00996 
01000     protected function doSetup($config) {
01001 
01002         $this->info['text-align'] = new HTMLPurifier_AttrDef_Enum(
01003             array('left', 'right', 'center', 'justify'), false);
01004 
01005         $border_style =
01006         $this->info['border-bottom-style'] =
01007         $this->info['border-right-style'] =
01008         $this->info['border-left-style'] =
01009         $this->info['border-top-style'] =  new HTMLPurifier_AttrDef_Enum(
01010             array('none', 'hidden', 'dotted', 'dashed', 'solid', 'double',
01011             'groove', 'ridge', 'inset', 'outset'), false);
01012 
01013         $this->info['border-style'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_style);
01014 
01015         $this->info['clear'] = new HTMLPurifier_AttrDef_Enum(
01016             array('none', 'left', 'right', 'both'), false);
01017         $this->info['float'] = new HTMLPurifier_AttrDef_Enum(
01018             array('none', 'left', 'right'), false);
01019         $this->info['font-style'] = new HTMLPurifier_AttrDef_Enum(
01020             array('normal', 'italic', 'oblique'), false);
01021         $this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum(
01022             array('normal', 'small-caps'), false);
01023 
01024         $uri_or_none = new HTMLPurifier_AttrDef_CSS_Composite(
01025             array(
01026                 new HTMLPurifier_AttrDef_Enum(array('none')),
01027                 new HTMLPurifier_AttrDef_CSS_URI()
01028             )
01029         );
01030 
01031         $this->info['list-style-position'] = new HTMLPurifier_AttrDef_Enum(
01032             array('inside', 'outside'), false);
01033         $this->info['list-style-type'] = new HTMLPurifier_AttrDef_Enum(
01034             array('disc', 'circle', 'square', 'decimal', 'lower-roman',
01035             'upper-roman', 'lower-alpha', 'upper-alpha', 'none'), false);
01036         $this->info['list-style-image'] = $uri_or_none;
01037 
01038         $this->info['list-style'] = new HTMLPurifier_AttrDef_CSS_ListStyle($config);
01039 
01040         $this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum(
01041             array('capitalize', 'uppercase', 'lowercase', 'none'), false);
01042         $this->info['color'] = new HTMLPurifier_AttrDef_CSS_Color();
01043 
01044         $this->info['background-image'] = $uri_or_none;
01045         $this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum(
01046             array('repeat', 'repeat-x', 'repeat-y', 'no-repeat')
01047         );
01048         $this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum(
01049             array('scroll', 'fixed')
01050         );
01051         $this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition();
01052 
01053         $border_color =
01054         $this->info['border-top-color'] =
01055         $this->info['border-bottom-color'] =
01056         $this->info['border-left-color'] =
01057         $this->info['border-right-color'] =
01058         $this->info['background-color'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01059             new HTMLPurifier_AttrDef_Enum(array('transparent')),
01060             new HTMLPurifier_AttrDef_CSS_Color()
01061         ));
01062 
01063         $this->info['background'] = new HTMLPurifier_AttrDef_CSS_Background($config);
01064 
01065         $this->info['border-color'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_color);
01066 
01067         $border_width =
01068         $this->info['border-top-width'] =
01069         $this->info['border-bottom-width'] =
01070         $this->info['border-left-width'] =
01071         $this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01072             new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')),
01073             new HTMLPurifier_AttrDef_CSS_Length('0') //disallow negative
01074         ));
01075 
01076         $this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width);
01077 
01078         $this->info['letter-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01079             new HTMLPurifier_AttrDef_Enum(array('normal')),
01080             new HTMLPurifier_AttrDef_CSS_Length()
01081         ));
01082 
01083         $this->info['word-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01084             new HTMLPurifier_AttrDef_Enum(array('normal')),
01085             new HTMLPurifier_AttrDef_CSS_Length()
01086         ));
01087 
01088         $this->info['font-size'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01089             new HTMLPurifier_AttrDef_Enum(array('xx-small', 'x-small',
01090                 'small', 'medium', 'large', 'x-large', 'xx-large',
01091                 'larger', 'smaller')),
01092             new HTMLPurifier_AttrDef_CSS_Percentage(),
01093             new HTMLPurifier_AttrDef_CSS_Length()
01094         ));
01095 
01096         $this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01097             new HTMLPurifier_AttrDef_Enum(array('normal')),
01098             new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives
01099             new HTMLPurifier_AttrDef_CSS_Length('0'),
01100             new HTMLPurifier_AttrDef_CSS_Percentage(true)
01101         ));
01102 
01103         $margin =
01104         $this->info['margin-top'] =
01105         $this->info['margin-bottom'] =
01106         $this->info['margin-left'] =
01107         $this->info['margin-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01108             new HTMLPurifier_AttrDef_CSS_Length(),
01109             new HTMLPurifier_AttrDef_CSS_Percentage(),
01110             new HTMLPurifier_AttrDef_Enum(array('auto'))
01111         ));
01112 
01113         $this->info['margin'] = new HTMLPurifier_AttrDef_CSS_Multiple($margin);
01114 
01115         // non-negative
01116         $padding =
01117         $this->info['padding-top'] =
01118         $this->info['padding-bottom'] =
01119         $this->info['padding-left'] =
01120         $this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01121             new HTMLPurifier_AttrDef_CSS_Length('0'),
01122             new HTMLPurifier_AttrDef_CSS_Percentage(true)
01123         ));
01124 
01125         $this->info['padding'] = new HTMLPurifier_AttrDef_CSS_Multiple($padding);
01126 
01127         $this->info['text-indent'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01128             new HTMLPurifier_AttrDef_CSS_Length(),
01129             new HTMLPurifier_AttrDef_CSS_Percentage()
01130         ));
01131 
01132         $trusted_wh = new HTMLPurifier_AttrDef_CSS_Composite(array(
01133             new HTMLPurifier_AttrDef_CSS_Length('0'),
01134             new HTMLPurifier_AttrDef_CSS_Percentage(true),
01135             new HTMLPurifier_AttrDef_Enum(array('auto'))
01136         ));
01137         $max = $config->get('CSS.MaxImgLength');
01138 
01139         $this->info['width'] =
01140         $this->info['height'] =
01141             $max === null ?
01142             $trusted_wh :
01143             new HTMLPurifier_AttrDef_Switch('img',
01144                 // For img tags:
01145                 new HTMLPurifier_AttrDef_CSS_Composite(array(
01146                     new HTMLPurifier_AttrDef_CSS_Length('0', $max),
01147                     new HTMLPurifier_AttrDef_Enum(array('auto'))
01148                 )),
01149                 // For everyone else:
01150                 $trusted_wh
01151             );
01152 
01153         $this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration();
01154 
01155         $this->info['font-family'] = new HTMLPurifier_AttrDef_CSS_FontFamily();
01156 
01157         // this could use specialized code
01158         $this->info['font-weight'] = new HTMLPurifier_AttrDef_Enum(
01159             array('normal', 'bold', 'bolder', 'lighter', '100', '200', '300',
01160             '400', '500', '600', '700', '800', '900'), false);
01161 
01162         // MUST be called after other font properties, as it references
01163         // a CSSDefinition object
01164         $this->info['font'] = new HTMLPurifier_AttrDef_CSS_Font($config);
01165 
01166         // same here
01167         $this->info['border'] =
01168         $this->info['border-bottom'] =
01169         $this->info['border-top'] =
01170         $this->info['border-left'] =
01171         $this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config);
01172 
01173         $this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(array(
01174             'collapse', 'separate'));
01175 
01176         $this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum(array(
01177             'top', 'bottom'));
01178 
01179         $this->info['table-layout'] = new HTMLPurifier_AttrDef_Enum(array(
01180             'auto', 'fixed'));
01181 
01182         $this->info['vertical-align'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01183             new HTMLPurifier_AttrDef_Enum(array('baseline', 'sub', 'super',
01184                 'top', 'text-top', 'middle', 'bottom', 'text-bottom')),
01185             new HTMLPurifier_AttrDef_CSS_Length(),
01186             new HTMLPurifier_AttrDef_CSS_Percentage()
01187         ));
01188 
01189         $this->info['border-spacing'] = new HTMLPurifier_AttrDef_CSS_Multiple(new HTMLPurifier_AttrDef_CSS_Length(), 2);
01190 
01191         // partial support
01192         $this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(array('nowrap'));
01193 
01194         if ($config->get('CSS.Proprietary')) {
01195             $this->doSetupProprietary($config);
01196         }
01197 
01198         if ($config->get('CSS.AllowTricky')) {
01199             $this->doSetupTricky($config);
01200         }
01201 
01202         if ($config->get('CSS.Trusted')) {
01203             $this->doSetupTrusted($config);
01204         }
01205 
01206         $allow_important = $config->get('CSS.AllowImportant');
01207         // wrap all attr-defs with decorator that handles !important
01208         foreach ($this->info as $k => $v) {
01209             $this->info[$k] = new HTMLPurifier_AttrDef_CSS_ImportantDecorator($v, $allow_important);
01210         }
01211 
01212         $this->setupConfigStuff($config);
01213     }
01214 
01215     protected function doSetupProprietary($config) {
01216         // Internet Explorer only scrollbar colors
01217         $this->info['scrollbar-arrow-color']        = new HTMLPurifier_AttrDef_CSS_Color();
01218         $this->info['scrollbar-base-color']         = new HTMLPurifier_AttrDef_CSS_Color();
01219         $this->info['scrollbar-darkshadow-color']   = new HTMLPurifier_AttrDef_CSS_Color();
01220         $this->info['scrollbar-face-color']         = new HTMLPurifier_AttrDef_CSS_Color();
01221         $this->info['scrollbar-highlight-color']    = new HTMLPurifier_AttrDef_CSS_Color();
01222         $this->info['scrollbar-shadow-color']       = new HTMLPurifier_AttrDef_CSS_Color();
01223 
01224         // technically not proprietary, but CSS3, and no one supports it
01225         $this->info['opacity']          = new HTMLPurifier_AttrDef_CSS_AlphaValue();
01226         $this->info['-moz-opacity']     = new HTMLPurifier_AttrDef_CSS_AlphaValue();
01227         $this->info['-khtml-opacity']   = new HTMLPurifier_AttrDef_CSS_AlphaValue();
01228 
01229         // only opacity, for now
01230         $this->info['filter'] = new HTMLPurifier_AttrDef_CSS_Filter();
01231 
01232     }
01233 
01234     protected function doSetupTricky($config) {
01235         $this->info['display'] = new HTMLPurifier_AttrDef_Enum(array(
01236             'inline', 'block', 'list-item', 'run-in', 'compact',
01237             'marker', 'table', 'inline-table', 'table-row-group',
01238             'table-header-group', 'table-footer-group', 'table-row',
01239             'table-column-group', 'table-column', 'table-cell', 'table-caption', 'none'
01240         ));
01241         $this->info['visibility'] = new HTMLPurifier_AttrDef_Enum(array(
01242             'visible', 'hidden', 'collapse'
01243         ));
01244         $this->info['overflow'] = new HTMLPurifier_AttrDef_Enum(array('visible', 'hidden', 'auto', 'scroll'));
01245     }
01246 
01247     protected function doSetupTrusted($config) {
01248         $this->info['position'] = new HTMLPurifier_AttrDef_Enum(array(
01249             'static', 'relative', 'absolute', 'fixed'
01250         ));
01251         $this->info['top'] =
01252         $this->info['left'] =
01253         $this->info['right'] =
01254         $this->info['bottom'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01255             new HTMLPurifier_AttrDef_CSS_Length(),
01256             new HTMLPurifier_AttrDef_CSS_Percentage(),
01257             new HTMLPurifier_AttrDef_Enum(array('auto')),
01258         ));
01259         $this->info['z-index'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
01260             new HTMLPurifier_AttrDef_Integer(),
01261             new HTMLPurifier_AttrDef_Enum(array('auto')),
01262         ));
01263     }
01264 
01271     protected function setupConfigStuff($config) {
01272 
01273         // setup allowed elements
01274         $support = "(for information on implementing this, see the ".
01275                    "support forums) ";
01276         $allowed_properties = $config->get('CSS.AllowedProperties');
01277         if ($allowed_properties !== null) {
01278             foreach ($this->info as $name => $d) {
01279                 if(!isset($allowed_properties[$name])) unset($this->info[$name]);
01280                 unset($allowed_properties[$name]);
01281             }
01282             // emit errors
01283             foreach ($allowed_properties as $name => $d) {
01284                 // :TODO: Is this htmlspecialchars() call really necessary?
01285                 $name = htmlspecialchars($name);
01286                 trigger_error("Style attribute '$name' is not supported $support", E_USER_WARNING);
01287             }
01288         }
01289 
01290         $forbidden_properties = $config->get('CSS.ForbiddenProperties');
01291         if ($forbidden_properties !== null) {
01292             foreach ($this->info as $name => $d) {
01293                 if (isset($forbidden_properties[$name])) {
01294                     unset($this->info[$name]);
01295                 }
01296             }
01297         }
01298 
01299     }
01300 }
01301 
01302 
01303 
01304 
01305 
01309 abstract class HTMLPurifier_ChildDef
01310 {
01315     public $type;
01316 
01323     public $allow_empty;
01324 
01328     public $elements = array();
01329 
01334     public function getAllowedElements($config) {
01335         return $this->elements;
01336     }
01337 
01348     abstract public function validateChildren($tokens_of_children, $config, $context);
01349 }
01350 
01351 
01352 
01353 
01354 
01369 class HTMLPurifier_Config
01370 {
01371 
01375     public $version = '4.4.0';
01376 
01381     public $autoFinalize = true;
01382 
01383     // protected member variables
01384 
01389     protected $serials = array();
01390 
01394     protected $serial;
01395 
01399     protected $parser = null;
01400 
01406     public $def;
01407 
01411     protected $definitions;
01412 
01416     protected $finalized = false;
01417 
01421     protected $plist;
01422 
01427     private $aliasMode;
01428 
01434     public $chatty = true;
01435 
01439     private $lock;
01440 
01445     public function __construct($definition, $parent = null) {
01446         $parent = $parent ? $parent : $definition->defaultPlist;
01447         $this->plist = new HTMLPurifier_PropertyList($parent);
01448         $this->def = $definition; // keep a copy around for checking
01449         $this->parser = new HTMLPurifier_VarParser_Flexible();
01450     }
01451 
01461     public static function create($config, $schema = null) {
01462         if ($config instanceof HTMLPurifier_Config) {
01463             // pass-through
01464             return $config;
01465         }
01466         if (!$schema) {
01467             $ret = HTMLPurifier_Config::createDefault();
01468         } else {
01469             $ret = new HTMLPurifier_Config($schema);
01470         }
01471         if (is_string($config)) $ret->loadIni($config);
01472         elseif (is_array($config)) $ret->loadArray($config);
01473         return $ret;
01474     }
01475 
01482     public static function inherit(HTMLPurifier_Config $config) {
01483         return new HTMLPurifier_Config($config->def, $config->plist);
01484     }
01485 
01490     public static function createDefault() {
01491         $definition = HTMLPurifier_ConfigSchema::instance();
01492         $config = new HTMLPurifier_Config($definition);
01493         return $config;
01494     }
01495 
01500     public function get($key, $a = null) {
01501         if ($a !== null) {
01502             $this->triggerError("Using deprecated API: use \$config->get('$key.$a') instead", E_USER_WARNING);
01503             $key = "$key.$a";
01504         }
01505         if (!$this->finalized) $this->autoFinalize();
01506         if (!isset($this->def->info[$key])) {
01507             // can't add % due to SimpleTest bug
01508             $this->triggerError('Cannot retrieve value of undefined directive ' . htmlspecialchars($key),
01509                 E_USER_WARNING);
01510             return;
01511         }
01512         if (isset($this->def->info[$key]->isAlias)) {
01513             $d = $this->def->info[$key];
01514             $this->triggerError('Cannot get value from aliased directive, use real name ' . $d->key,
01515                 E_USER_ERROR);
01516             return;
01517         }
01518         if ($this->lock) {
01519             list($ns) = explode('.', $key);
01520             if ($ns !== $this->lock) {
01521                 $this->triggerError('Cannot get value of namespace ' . $ns . ' when lock for ' . $this->lock . ' is active, this probably indicates a Definition setup method is accessing directives that are not within its namespace', E_USER_ERROR);
01522                 return;
01523             }
01524         }
01525         return $this->plist->get($key);
01526     }
01527 
01532     public function getBatch($namespace) {
01533         if (!$this->finalized) $this->autoFinalize();
01534         $full = $this->getAll();
01535         if (!isset($full[$namespace])) {
01536             $this->triggerError('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace),
01537                 E_USER_WARNING);
01538             return;
01539         }
01540         return $full[$namespace];
01541     }
01542 
01550     public function getBatchSerial($namespace) {
01551         if (empty($this->serials[$namespace])) {
01552             $batch = $this->getBatch($namespace);
01553             unset($batch['DefinitionRev']);
01554             $this->serials[$namespace] = md5(serialize($batch));
01555         }
01556         return $this->serials[$namespace];
01557     }
01558 
01563     public function getSerial() {
01564         if (empty($this->serial)) {
01565             $this->serial = md5(serialize($this->getAll()));
01566         }
01567         return $this->serial;
01568     }
01569 
01574     public function getAll() {
01575         if (!$this->finalized) $this->autoFinalize();
01576         $ret = array();
01577         foreach ($this->plist->squash() as $name => $value) {
01578             list($ns, $key) = explode('.', $name, 2);
01579             $ret[$ns][$key] = $value;
01580         }
01581         return $ret;
01582     }
01583 
01589     public function set($key, $value, $a = null) {
01590         if (strpos($key, '.') === false) {
01591             $namespace = $key;
01592             $directive = $value;
01593             $value = $a;
01594             $key = "$key.$directive";
01595             $this->triggerError("Using deprecated API: use \$config->set('$key', ...) instead", E_USER_NOTICE);
01596         } else {
01597             list($namespace) = explode('.', $key);
01598         }
01599         if ($this->isFinalized('Cannot set directive after finalization')) return;
01600         if (!isset($this->def->info[$key])) {
01601             $this->triggerError('Cannot set undefined directive ' . htmlspecialchars($key) . ' to value',
01602                 E_USER_WARNING);
01603             return;
01604         }
01605         $def = $this->def->info[$key];
01606 
01607         if (isset($def->isAlias)) {
01608             if ($this->aliasMode) {
01609                 $this->triggerError('Double-aliases not allowed, please fix '.
01610                     'ConfigSchema bug with' . $key, E_USER_ERROR);
01611                 return;
01612             }
01613             $this->aliasMode = true;
01614             $this->set($def->key, $value);
01615             $this->aliasMode = false;
01616             $this->triggerError("$key is an alias, preferred directive name is {$def->key}", E_USER_NOTICE);
01617             return;
01618         }
01619 
01620         // Raw type might be negative when using the fully optimized form
01621         // of stdclass, which indicates allow_null == true
01622         $rtype = is_int($def) ? $def : $def->type;
01623         if ($rtype < 0) {
01624             $type = -$rtype;
01625             $allow_null = true;
01626         } else {
01627             $type = $rtype;
01628             $allow_null = isset($def->allow_null);
01629         }
01630 
01631         try {
01632             $value = $this->parser->parse($value, $type, $allow_null);
01633         } catch (HTMLPurifier_VarParserException $e) {
01634             $this->triggerError('Value for ' . $key . ' is of invalid type, should be ' . HTMLPurifier_VarParser::getTypeName($type), E_USER_WARNING);
01635             return;
01636         }
01637         if (is_string($value) && is_object($def)) {
01638             // resolve value alias if defined
01639             if (isset($def->aliases[$value])) {
01640                 $value = $def->aliases[$value];
01641             }
01642             // check to see if the value is allowed
01643             if (isset($def->allowed) && !isset($def->allowed[$value])) {
01644                 $this->triggerError('Value not supported, valid values are: ' .
01645                     $this->_listify($def->allowed), E_USER_WARNING);
01646                 return;
01647             }
01648         }
01649         $this->plist->set($key, $value);
01650 
01651         // reset definitions if the directives they depend on changed
01652         // this is a very costly process, so it's discouraged
01653         // with finalization
01654         if ($namespace == 'HTML' || $namespace == 'CSS' || $namespace == 'URI') {
01655             $this->definitions[$namespace] = null;
01656         }
01657 
01658         $this->serials[$namespace] = false;
01659     }
01660 
01664     private function _listify($lookup) {
01665         $list = array();
01666         foreach ($lookup as $name => $b) $list[] = $name;
01667         return implode(', ', $list);
01668     }
01669 
01681     public function getHTMLDefinition($raw = false, $optimized = false) {
01682         return $this->getDefinition('HTML', $raw, $optimized);
01683     }
01684 
01696     public function getCSSDefinition($raw = false, $optimized = false) {
01697         return $this->getDefinition('CSS', $raw, $optimized);
01698     }
01699 
01711     public function getURIDefinition($raw = false, $optimized = false) {
01712         return $this->getDefinition('URI', $raw, $optimized);
01713     }
01714 
01728     public function getDefinition($type, $raw = false, $optimized = false) {
01729         if ($optimized && !$raw) {
01730             throw new HTMLPurifier_Exception("Cannot set optimized = true when raw = false");
01731         }
01732         if (!$this->finalized) $this->autoFinalize();
01733         // temporarily suspend locks, so we can handle recursive definition calls
01734         $lock = $this->lock;
01735         $this->lock = null;
01736         $factory = HTMLPurifier_DefinitionCacheFactory::instance();
01737         $cache = $factory->create($type, $this);
01738         $this->lock = $lock;
01739         if (!$raw) {
01740             // full definition
01741             // ---------------
01742             // check if definition is in memory
01743             if (!empty($this->definitions[$type])) {
01744                 $def = $this->definitions[$type];
01745                 // check if the definition is setup
01746                 if ($def->setup) {
01747                     return $def;
01748                 } else {
01749                     $def->setup($this);
01750                     if ($def->optimized) $cache->add($def, $this);
01751                     return $def;
01752                 }
01753             }
01754             // check if definition is in cache
01755             $def = $cache->get($this);
01756             if ($def) {
01757                 // definition in cache, save to memory and return it
01758                 $this->definitions[$type] = $def;
01759                 return $def;
01760             }
01761             // initialize it
01762             $def = $this->initDefinition($type);
01763             // set it up
01764             $this->lock = $type;
01765             $def->setup($this);
01766             $this->lock = null;
01767             // save in cache
01768             $cache->add($def, $this);
01769             // return it
01770             return $def;
01771         } else {
01772             // raw definition
01773             // --------------
01774             // check preconditions
01775             $def = null;
01776             if ($optimized) {
01777                 if (is_null($this->get($type . '.DefinitionID'))) {
01778                     // fatally error out if definition ID not set
01779                     throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID");
01780                 }
01781             }
01782             if (!empty($this->definitions[$type])) {
01783                 $def = $this->definitions[$type];
01784                 if ($def->setup && !$optimized) {
01785                     $extra = $this->chatty ? " (try moving this code block earlier in your initialization)" : "";
01786                     throw new HTMLPurifier_Exception("Cannot retrieve raw definition after it has already been setup" . $extra);
01787                 }
01788                 if ($def->optimized === null) {
01789                     $extra = $this->chatty ? " (try flushing your cache)" : "";
01790                     throw new HTMLPurifier_Exception("Optimization status of definition is unknown" . $extra);
01791                 }
01792                 if ($def->optimized !== $optimized) {
01793                     $msg = $optimized ? "optimized" : "unoptimized";
01794                     $extra = $this->chatty ? " (this backtrace is for the first inconsistent call, which was for a $msg raw definition)" : "";
01795                     throw new HTMLPurifier_Exception("Inconsistent use of optimized and unoptimized raw definition retrievals" . $extra);
01796                 }
01797             }
01798             // check if definition was in memory
01799             if ($def) {
01800                 if ($def->setup) {
01801                     // invariant: $optimized === true (checked above)
01802                     return null;
01803                 } else {
01804                     return $def;
01805                 }
01806             }
01807             // if optimized, check if definition was in cache
01808             // (because we do the memory check first, this formulation
01809             // is prone to cache slamming, but I think
01810             // guaranteeing that either /all/ of the raw
01811             // setup code or /none/ of it is run is more important.)
01812             if ($optimized) {
01813                 // This code path only gets run once; once we put
01814                 // something in $definitions (which is guaranteed by the
01815                 // trailing code), we always short-circuit above.
01816                 $def = $cache->get($this);
01817                 if ($def) {
01818                     // save the full definition for later, but don't
01819                     // return it yet
01820                     $this->definitions[$type] = $def;
01821                     return null;
01822                 }
01823             }
01824             // check invariants for creation
01825             if (!$optimized) {
01826                 if (!is_null($this->get($type . '.DefinitionID'))) {
01827                     if ($this->chatty) {
01828                         $this->triggerError("Due to a documentation error in previous version of HTML Purifier, your definitions are not being cached.  If this is OK, you can remove the %$type.DefinitionRev and %$type.DefinitionID declaration.  Otherwise, modify your code to use maybeGetRawDefinition, and test if the returned value is null before making any edits (if it is null, that means that a cached version is available, and no raw operations are necessary).  See <a href='http://htmlpurifier.org/docs/enduser-customize.html#optimized'>Customize</a> for more details", E_USER_WARNING);
01829                     } else {
01830                         $this->triggerError("Useless DefinitionID declaration", E_USER_WARNING);
01831                     }
01832                 }
01833             }
01834             // initialize it
01835             $def = $this->initDefinition($type);
01836             $def->optimized = $optimized;
01837             return $def;
01838         }
01839         throw new HTMLPurifier_Exception("The impossible happened!");
01840     }
01841 
01842     private function initDefinition($type) {
01843         // quick checks failed, let's create the object
01844         if ($type == 'HTML') {
01845             $def = new HTMLPurifier_HTMLDefinition();
01846         } elseif ($type == 'CSS') {
01847             $def = new HTMLPurifier_CSSDefinition();
01848         } elseif ($type == 'URI') {
01849             $def = new HTMLPurifier_URIDefinition();
01850         } else {
01851             throw new HTMLPurifier_Exception("Definition of $type type not supported");
01852         }
01853         $this->definitions[$type] = $def;
01854         return $def;
01855     }
01856 
01857     public function maybeGetRawDefinition($name) {
01858         return $this->getDefinition($name, true, true);
01859     }
01860 
01861     public function maybeGetRawHTMLDefinition() {
01862         return $this->getDefinition('HTML', true, true);
01863     }
01864 
01865     public function maybeGetRawCSSDefinition() {
01866         return $this->getDefinition('CSS', true, true);
01867     }
01868 
01869     public function maybeGetRawURIDefinition() {
01870         return $this->getDefinition('URI', true, true);
01871     }
01872 
01878     public function loadArray($config_array) {
01879         if ($this->isFinalized('Cannot load directives after finalization')) return;
01880         foreach ($config_array as $key => $value) {
01881             $key = str_replace('_', '.', $key);
01882             if (strpos($key, '.') !== false) {
01883                 $this->set($key, $value);
01884             } else {
01885                 $namespace = $key;
01886                 $namespace_values = $value;
01887                 foreach ($namespace_values as $directive => $value) {
01888                     $this->set($namespace .'.'. $directive, $value);
01889                 }
01890             }
01891         }
01892     }
01893 
01900     public static function getAllowedDirectivesForForm($allowed, $schema = null) {
01901         if (!$schema) {
01902             $schema = HTMLPurifier_ConfigSchema::instance();
01903         }
01904         if ($allowed !== true) {
01905              if (is_string($allowed)) $allowed = array($allowed);
01906              $allowed_ns = array();
01907              $allowed_directives = array();
01908              $blacklisted_directives = array();
01909              foreach ($allowed as $ns_or_directive) {
01910                  if (strpos($ns_or_directive, '.') !== false) {
01911                      // directive
01912                      if ($ns_or_directive[0] == '-') {
01913                          $blacklisted_directives[substr($ns_or_directive, 1)] = true;
01914                      } else {
01915                          $allowed_directives[$ns_or_directive] = true;
01916                      }
01917                  } else {
01918                      // namespace
01919                      $allowed_ns[$ns_or_directive] = true;
01920                  }
01921              }
01922         }
01923         $ret = array();
01924         foreach ($schema->info as $key => $def) {
01925             list($ns, $directive) = explode('.', $key, 2);
01926             if ($allowed !== true) {
01927                 if (isset($blacklisted_directives["$ns.$directive"])) continue;
01928                 if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue;
01929             }
01930             if (isset($def->isAlias)) continue;
01931             if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue;
01932             $ret[] = array($ns, $directive);
01933         }
01934         return $ret;
01935     }
01936 
01946     public static function loadArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) {
01947         $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $schema);
01948         $config = HTMLPurifier_Config::create($ret, $schema);
01949         return $config;
01950     }
01951 
01956     public function mergeArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true) {
01957          $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $this->def);
01958          $this->loadArray($ret);
01959     }
01960 
01965     public static function prepareArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) {
01966         if ($index !== false) $array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array();
01967         $mq = $mq_fix && function_exists('get_magic_quotes_gpc') && get_magic_quotes_gpc();
01968 
01969         $allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed, $schema);
01970         $ret = array();
01971         foreach ($allowed as $key) {
01972             list($ns, $directive) = $key;
01973             $skey = "$ns.$directive";
01974             if (!empty($array["Null_$skey"])) {
01975                 $ret[$ns][$directive] = null;
01976                 continue;
01977             }
01978             if (!isset($array[$skey])) continue;
01979             $value = $mq ? stripslashes($array[$skey]) : $array[$skey];
01980             $ret[$ns][$directive] = $value;
01981         }
01982         return $ret;
01983     }
01984 
01989     public function loadIni($filename) {
01990         if ($this->isFinalized('Cannot load directives after finalization')) return;
01991         $array = parse_ini_file($filename, true);
01992         $this->loadArray($array);
01993     }
01994 
01999     public function isFinalized($error = false) {
02000         if ($this->finalized && $error) {
02001             $this->triggerError($error, E_USER_ERROR);
02002         }
02003         return $this->finalized;
02004     }
02005 
02010     public function autoFinalize() {
02011         if ($this->autoFinalize) {
02012             $this->finalize();
02013         } else {
02014             $this->plist->squash(true);
02015         }
02016     }
02017 
02021     public function finalize() {
02022         $this->finalized = true;
02023         $this->parser = null;
02024     }
02025 
02030     protected function triggerError($msg, $no) {
02031         // determine previous stack frame
02032         $extra = '';
02033         if ($this->chatty) {
02034             $trace = debug_backtrace();
02035             // zip(tail(trace), trace) -- but PHP is not Haskell har har
02036             for ($i = 0, $c = count($trace); $i < $c - 1; $i++) {
02037                 if ($trace[$i + 1]['class'] === 'HTMLPurifier_Config') {
02038                     continue;
02039                 }
02040                 $frame = $trace[$i];
02041                 $extra = " invoked on line {$frame['line']} in file {$frame['file']}";
02042                 break;
02043             }
02044         }
02045         trigger_error($msg . $extra, $no);
02046     }
02047 
02052     public function serialize() {
02053         $this->getDefinition('HTML');
02054         $this->getDefinition('CSS');
02055         $this->getDefinition('URI');
02056         return serialize($this);
02057     }
02058 
02059 }
02060 
02061 
02062 
02063 
02064 
02068 class HTMLPurifier_ConfigSchema {
02069 
02074     public $defaults = array();
02075 
02079     public $defaultPlist;
02080 
02110     public $info = array();
02111 
02115     static protected $singleton;
02116 
02117     public function __construct() {
02118         $this->defaultPlist = new HTMLPurifier_PropertyList();
02119     }
02120 
02124     public static function makeFromSerial() {
02125         $contents = file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser');
02126         $r = unserialize($contents);
02127         if (!$r) {
02128             $hash = sha1($contents);
02129             trigger_error("Unserialization of configuration schema failed, sha1 of file was $hash", E_USER_ERROR);
02130         }
02131         return $r;
02132     }
02133 
02137     public static function instance($prototype = null) {
02138         if ($prototype !== null) {
02139             HTMLPurifier_ConfigSchema::$singleton = $prototype;
02140         } elseif (HTMLPurifier_ConfigSchema::$singleton === null || $prototype === true) {
02141             HTMLPurifier_ConfigSchema::$singleton = HTMLPurifier_ConfigSchema::makeFromSerial();
02142         }
02143         return HTMLPurifier_ConfigSchema::$singleton;
02144     }
02145 
02158     public function add($key, $default, $type, $allow_null) {
02159         $obj = new stdclass();
02160         $obj->type = is_int($type) ? $type : HTMLPurifier_VarParser::$types[$type];
02161         if ($allow_null) $obj->allow_null = true;
02162         $this->info[$key] = $obj;
02163         $this->defaults[$key] = $default;
02164         $this->defaultPlist->set($key, $default);
02165     }
02166 
02176     public function addValueAliases($key, $aliases) {
02177         if (!isset($this->info[$key]->aliases)) {
02178             $this->info[$key]->aliases = array();
02179         }
02180         foreach ($aliases as $alias => $real) {
02181             $this->info[$key]->aliases[$alias] = $real;
02182         }
02183     }
02184 
02193     public function addAllowedValues($key, $allowed) {
02194         $this->info[$key]->allowed = $allowed;
02195     }
02196 
02204     public function addAlias($key, $new_key) {
02205         $obj = new stdclass;
02206         $obj->key = $new_key;
02207         $obj->isAlias = true;
02208         $this->info[$key] = $obj;
02209     }
02210 
02214     public function postProcess() {
02215         foreach ($this->info as $key => $v) {
02216             if (count((array) $v) == 1) {
02217                 $this->info[$key] = $v->type;
02218             } elseif (count((array) $v) == 2 && isset($v->allow_null)) {
02219                 $this->info[$key] = -$v->type;
02220             }
02221         }
02222     }
02223 
02224 }
02225 
02226 
02227 
02228 
02229 
02233 class HTMLPurifier_ContentSets
02234 {
02235 
02239     public $info = array();
02240 
02245     public $lookup = array();
02246 
02250     protected $keys = array();
02254     protected $values = array();
02255 
02261     public function __construct($modules) {
02262         if (!is_array($modules)) $modules = array($modules);
02263         // populate content_sets based on module hints
02264         // sorry, no way of overloading
02265         foreach ($modules as $module_i => $module) {
02266             foreach ($module->content_sets as $key => $value) {
02267                 $temp = $this->convertToLookup($value);
02268                 if (isset($this->lookup[$key])) {
02269                     // add it into the existing content set
02270                     $this->lookup[$key] = array_merge($this->lookup[$key], $temp);
02271                 } else {
02272                     $this->lookup[$key] = $temp;
02273                 }
02274             }
02275         }
02276         $old_lookup = false;
02277         while ($old_lookup !== $this->lookup) {
02278             $old_lookup = $this->lookup;
02279             foreach ($this->lookup as $i => $set) {
02280                 $add = array();
02281                 foreach ($set as $element => $x) {
02282                     if (isset($this->lookup[$element])) {
02283                         $add += $this->lookup[$element];
02284                         unset($this->lookup[$i][$element]);
02285                     }
02286                 }
02287                 $this->lookup[$i] += $add;
02288             }
02289         }
02290 
02291         foreach ($this->lookup as $key => $lookup) {
02292             $this->info[$key] = implode(' | ', array_keys($lookup));
02293         }
02294         $this->keys   = array_keys($this->info);
02295         $this->values = array_values($this->info);
02296     }
02297 
02303     public function generateChildDef(&$def, $module) {
02304         if (!empty($def->child)) return; // already done!
02305         $content_model = $def->content_model;
02306         if (is_string($content_model)) {
02307             // Assume that $this->keys is alphanumeric
02308             $def->content_model = preg_replace_callback(
02309                 '/\b(' . implode('|', $this->keys) . ')\b/',
02310                 array($this, 'generateChildDefCallback'),
02311                 $content_model
02312             );
02313             //$def->content_model = str_replace(
02314             //    $this->keys, $this->values, $content_model);
02315         }
02316         $def->child = $this->getChildDef($def, $module);
02317     }
02318 
02319     public function generateChildDefCallback($matches) {
02320         return $this->info[$matches[0]];
02321     }
02322 
02331     public function getChildDef($def, $module) {
02332         $value = $def->content_model;
02333         if (is_object($value)) {
02334             trigger_error(
02335                 'Literal object child definitions should be stored in '.
02336                 'ElementDef->child not ElementDef->content_model',
02337                 E_USER_NOTICE
02338             );
02339             return $value;
02340         }
02341         switch ($def->content_model_type) {
02342             case 'required':
02343                 return new HTMLPurifier_ChildDef_Required($value);
02344             case 'optional':
02345                 return new HTMLPurifier_ChildDef_Optional($value);
02346             case 'empty':
02347                 return new HTMLPurifier_ChildDef_Empty();
02348             case 'custom':
02349                 return new HTMLPurifier_ChildDef_Custom($value);
02350         }
02351         // defer to its module
02352         $return = false;
02353         if ($module->defines_child_def) { // save a func call
02354             $return = $module->getChildDef($def);
02355         }
02356         if ($return !== false) return $return;
02357         // error-out
02358         trigger_error(
02359             'Could not determine which ChildDef class to instantiate',
02360             E_USER_ERROR
02361         );
02362         return false;
02363     }
02364 
02371     protected function convertToLookup($string) {
02372         $array = explode('|', str_replace(' ', '', $string));
02373         $ret = array();
02374         foreach ($array as $i => $k) {
02375             $ret[$k] = true;
02376         }
02377         return $ret;
02378     }
02379 
02380 }
02381 
02382 
02383 
02384 
02385 
02393 class HTMLPurifier_Context
02394 {
02395 
02399     private $_storage = array();
02400 
02406     public function register($name, &$ref) {
02407         if (isset($this->_storage[$name])) {
02408             trigger_error("Name $name produces collision, cannot re-register",
02409                           E_USER_ERROR);
02410             return;
02411         }
02412         $this->_storage[$name] =& $ref;
02413     }
02414 
02420     public function &get($name, $ignore_error = false) {
02421         if (!isset($this->_storage[$name])) {
02422             if (!$ignore_error) {
02423                 trigger_error("Attempted to retrieve non-existent variable $name",
02424                               E_USER_ERROR);
02425             }
02426             $var = null; // so we can return by reference
02427             return $var;
02428         }
02429         return $this->_storage[$name];
02430     }
02431 
02436     public function destroy($name) {
02437         if (!isset($this->_storage[$name])) {
02438             trigger_error("Attempted to destroy non-existent variable $name",
02439                           E_USER_ERROR);
02440             return;
02441         }
02442         unset($this->_storage[$name]);
02443     }
02444 
02449     public function exists($name) {
02450         return isset($this->_storage[$name]);
02451     }
02452 
02457     public function loadArray($context_array) {
02458         foreach ($context_array as $key => $discard) {
02459             $this->register($key, $context_array[$key]);
02460         }
02461     }
02462 
02463 }
02464 
02465 
02466 
02467 
02468 
02477 abstract class HTMLPurifier_DefinitionCache
02478 {
02479 
02480     public $type;
02481 
02486     public function __construct($type) {
02487         $this->type = $type;
02488     }
02489 
02494     public function generateKey($config) {
02495         return $config->version . ',' . // possibly replace with function calls
02496                $config->getBatchSerial($this->type) . ',' .
02497                $config->get($this->type . '.DefinitionRev');
02498     }
02499 
02506     public function isOld($key, $config) {
02507         if (substr_count($key, ',') < 2) return true;
02508         list($version, $hash, $revision) = explode(',', $key, 3);
02509         $compare = version_compare($version, $config->version);
02510         // version mismatch, is always old
02511         if ($compare != 0) return true;
02512         // versions match, ids match, check revision number
02513         if (
02514             $hash == $config->getBatchSerial($this->type) &&
02515             $revision < $config->get($this->type . '.DefinitionRev')
02516         ) return true;
02517         return false;
02518     }
02519 
02526     public function checkDefType($def) {
02527         if ($def->type !== $this->type) {
02528             trigger_error("Cannot use definition of type {$def->type} in cache for {$this->type}");
02529             return false;
02530         }
02531         return true;
02532     }
02533 
02537     abstract public function add($def, $config);
02538 
02542     abstract public function set($def, $config);
02543 
02547     abstract public function replace($def, $config);
02548 
02552     abstract public function get($config);
02553 
02557     abstract public function remove($config);
02558 
02562     abstract public function flush($config);
02563 
02570     abstract public function cleanup($config);
02571 
02572 }
02573 
02574 
02575 
02576 
02577 
02581 class HTMLPurifier_DefinitionCacheFactory
02582 {
02583 
02584     protected $caches = array('Serializer' => array());
02585     protected $implementations = array();
02586     protected $decorators = array();
02587 
02591     public function setup() {
02592         $this->addDecorator('Cleanup');
02593     }
02594 
02598     public static function instance($prototype = null) {
02599         static $instance;
02600         if ($prototype !== null) {
02601             $instance = $prototype;
02602         } elseif ($instance === null || $prototype === true) {
02603             $instance = new HTMLPurifier_DefinitionCacheFactory();
02604             $instance->setup();
02605         }
02606         return $instance;
02607     }
02608 
02614     public function register($short, $long) {
02615         $this->implementations[$short] = $long;
02616     }
02617 
02623     public function create($type, $config) {
02624         $method = $config->get('Cache.DefinitionImpl');
02625         if ($method === null) {
02626             return new HTMLPurifier_DefinitionCache_Null($type);
02627         }
02628         if (!empty($this->caches[$method][$type])) {
02629             return $this->caches[$method][$type];
02630         }
02631         if (
02632           isset($this->implementations[$method]) &&
02633           class_exists($class = $this->implementations[$method], false)
02634         ) {
02635             $cache = new $class($type);
02636         } else {
02637             if ($method != 'Serializer') {
02638                 trigger_error("Unrecognized DefinitionCache $method, using Serializer instead", E_USER_WARNING);
02639             }
02640             $cache = new HTMLPurifier_DefinitionCache_Serializer($type);
02641         }
02642         foreach ($this->decorators as $decorator) {
02643             $new_cache = $decorator->decorate($cache);
02644             // prevent infinite recursion in PHP 4
02645             unset($cache);
02646             $cache = $new_cache;
02647         }
02648         $this->caches[$method][$type] = $cache;
02649         return $this->caches[$method][$type];
02650     }
02651 
02656     public function addDecorator($decorator) {
02657         if (is_string($decorator)) {
02658             $class = "HTMLPurifier_DefinitionCache_Decorator_$decorator";
02659             $decorator = new $class;
02660         }
02661         $this->decorators[$decorator->name] = $decorator;
02662     }
02663 
02664 }
02665 
02666 
02667 
02668 
02669 
02676 class HTMLPurifier_Doctype
02677 {
02681     public $name;
02682 
02687     public $modules = array();
02688 
02692     public $tidyModules = array();
02693 
02697     public $xml = true;
02698 
02702     public $aliases = array();
02703 
02707     public $dtdPublic;
02708 
02712     public $dtdSystem;
02713 
02714     public function __construct($name = null, $xml = true, $modules = array(),
02715         $tidyModules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null
02716     ) {
02717         $this->name         = $name;
02718         $this->xml          = $xml;
02719         $this->modules      = $modules;
02720         $this->tidyModules  = $tidyModules;
02721         $this->aliases      = $aliases;
02722         $this->dtdPublic    = $dtd_public;
02723         $this->dtdSystem    = $dtd_system;
02724     }
02725 }
02726 
02727 
02728 
02729 
02730 
02731 class HTMLPurifier_DoctypeRegistry
02732 {
02733 
02737     protected $doctypes;
02738 
02742     protected $aliases;
02743 
02754     public function register($doctype, $xml = true, $modules = array(),
02755         $tidy_modules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null
02756     ) {
02757         if (!is_array($modules)) $modules = array($modules);
02758         if (!is_array($tidy_modules)) $tidy_modules = array($tidy_modules);
02759         if (!is_array($aliases)) $aliases = array($aliases);
02760         if (!is_object($doctype)) {
02761             $doctype = new HTMLPurifier_Doctype(
02762                 $doctype, $xml, $modules, $tidy_modules, $aliases, $dtd_public, $dtd_system
02763             );
02764         }
02765         $this->doctypes[$doctype->name] = $doctype;
02766         $name = $doctype->name;
02767         // hookup aliases
02768         foreach ($doctype->aliases as $alias) {
02769             if (isset($this->doctypes[$alias])) continue;
02770             $this->aliases[$alias] = $name;
02771         }
02772         // remove old aliases
02773         if (isset($this->aliases[$name])) unset($this->aliases[$name]);
02774         return $doctype;
02775     }
02776 
02784     public function get($doctype) {
02785         if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype];
02786         if (!isset($this->doctypes[$doctype])) {
02787             trigger_error('Doctype ' . htmlspecialchars($doctype) . ' does not exist', E_USER_ERROR);
02788             $anon = new HTMLPurifier_Doctype($doctype);
02789             return $anon;
02790         }
02791         return $this->doctypes[$doctype];
02792     }
02793 
02802     public function make($config) {
02803         return clone $this->get($this->getDoctypeFromConfig($config));
02804     }
02805 
02809     public function getDoctypeFromConfig($config) {
02810         // recommended test
02811         $doctype = $config->get('HTML.Doctype');
02812         if (!empty($doctype)) return $doctype;
02813         $doctype = $config->get('HTML.CustomDoctype');
02814         if (!empty($doctype)) return $doctype;
02815         // backwards-compatibility
02816         if ($config->get('HTML.XHTML')) {
02817             $doctype = 'XHTML 1.0';
02818         } else {
02819             $doctype = 'HTML 4.01';
02820         }
02821         if ($config->get('HTML.Strict')) {
02822             $doctype .= ' Strict';
02823         } else {
02824             $doctype .= ' Transitional';
02825         }
02826         return $doctype;
02827     }
02828 
02829 }
02830 
02831 
02832 
02833 
02834 
02843 class HTMLPurifier_ElementDef
02844 {
02845 
02850     public $standalone = true;
02851 
02863     public $attr = array();
02864 
02868     public $attr_transform_pre = array();
02869 
02873     public $attr_transform_post = array();
02874 
02878     public $child;
02879 
02887     public $content_model;
02888 
02896     public $content_model_type;
02897 
02898 
02899 
02906     public $descendants_are_inline = false;
02907 
02912     public $required_attr = array();
02913 
02925     public $excludes = array();
02926 
02930     public $autoclose = array();
02931 
02937     public $wrap;
02938 
02943     public $formatting;
02944 
02948     public static function create($content_model, $content_model_type, $attr) {
02949         $def = new HTMLPurifier_ElementDef();
02950         $def->content_model = $content_model;
02951         $def->content_model_type = $content_model_type;
02952         $def->attr = $attr;
02953         return $def;
02954     }
02955 
02961     public function mergeIn($def) {
02962 
02963         // later keys takes precedence
02964         foreach($def->attr as $k => $v) {
02965             if ($k === 0) {
02966                 // merge in the includes
02967                 // sorry, no way to override an include
02968                 foreach ($v as $v2) {
02969                     $this->attr[0][] = $v2;
02970                 }
02971                 continue;
02972             }
02973             if ($v === false) {
02974                 if (isset($this->attr[$k])) unset($this->attr[$k]);
02975                 continue;
02976             }
02977             $this->attr[$k] = $v;
02978         }
02979         $this->_mergeAssocArray($this->attr_transform_pre, $def->attr_transform_pre);
02980         $this->_mergeAssocArray($this->attr_transform_post, $def->attr_transform_post);
02981         $this->_mergeAssocArray($this->excludes, $def->excludes);
02982 
02983         if(!empty($def->content_model)) {
02984             $this->content_model =
02985                 str_replace("#SUPER", $this->content_model, $def->content_model);
02986             $this->child = false;
02987         }
02988         if(!empty($def->content_model_type)) {
02989             $this->content_model_type = $def->content_model_type;
02990             $this->child = false;
02991         }
02992         if(!is_null($def->child)) $this->child = $def->child;
02993         if(!is_null($def->formatting)) $this->formatting = $def->formatting;
02994         if($def->descendants_are_inline) $this->descendants_are_inline = $def->descendants_are_inline;
02995 
02996     }
02997 
03003     private function _mergeAssocArray(&$a1, $a2) {
03004         foreach ($a2 as $k => $v) {
03005             if ($v === false) {
03006                 if (isset($a1[$k])) unset($a1[$k]);
03007                 continue;
03008             }
03009             $a1[$k] = $v;
03010         }
03011     }
03012 
03013 }
03014 
03015 
03016 
03017 
03018 
03023 class HTMLPurifier_Encoder
03024 {
03025 
03029     private function __construct() {
03030         trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR);
03031     }
03032 
03036     public static function muteErrorHandler() {}
03037 
03041     public static function unsafeIconv($in, $out, $text) {
03042         set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
03043         $r = iconv($in, $out, $text);
03044         restore_error_handler();
03045         return $r;
03046     }
03047 
03051     public static function iconv($in, $out, $text, $max_chunk_size = 8000) {
03052         $code = self::testIconvTruncateBug();
03053         if ($code == self::ICONV_OK) {
03054             return self::unsafeIconv($in, $out, $text);
03055         } elseif ($code == self::ICONV_TRUNCATES) {
03056             // we can only work around this if the input character set
03057             // is utf-8
03058             if ($in == 'utf-8') {
03059                 if ($max_chunk_size < 4) {
03060                     trigger_error('max_chunk_size is too small', E_USER_WARNING);
03061                     return false;
03062                 }
03063                 // split into 8000 byte chunks, but be careful to handle
03064                 // multibyte boundaries properly
03065                 if (($c = strlen($text)) <= $max_chunk_size) {
03066                     return self::unsafeIconv($in, $out, $text);
03067                 }
03068                 $r = '';
03069                 $i = 0;
03070                 while (true) {
03071                     if ($i + $max_chunk_size >= $c) {
03072                         $r .= self::unsafeIconv($in, $out, substr($text, $i));
03073                         break;
03074                     }
03075                     // wibble the boundary
03076                     if (0x80 != (0xC0 & ord($text[$i + $max_chunk_size]))) {
03077                         $chunk_size = $max_chunk_size;
03078                     } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 1]))) {
03079                         $chunk_size = $max_chunk_size - 1;
03080                     } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 2]))) {
03081                         $chunk_size = $max_chunk_size - 2;
03082                     } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 3]))) {
03083                         $chunk_size = $max_chunk_size - 3;
03084                     } else {
03085                         return false; // rather confusing UTF-8...
03086                     }
03087                     $chunk = substr($text, $i, $chunk_size); // substr doesn't mind overlong lengths
03088                     $r .= self::unsafeIconv($in, $out, $chunk);
03089                     $i += $chunk_size;
03090                 }
03091                 return $r;
03092             } else {
03093                 return false;
03094             }
03095         } else {
03096             return false;
03097         }
03098     }
03099 
03125     public static function cleanUTF8($str, $force_php = false) {
03126 
03127         // UTF-8 validity is checked since PHP 4.3.5
03128         // This is an optimization: if the string is already valid UTF-8, no
03129         // need to do PHP stuff. 99% of the time, this will be the case.
03130         // The regexp matches the XML char production, as well as well as excluding
03131         // non-SGML codepoints U+007F to U+009F
03132         if (preg_match('/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du', $str)) {
03133             return $str;
03134         }
03135 
03136         $mState = 0; // cached expected number of octets after the current octet
03137                      // until the beginning of the next UTF8 character sequence
03138         $mUcs4  = 0; // cached Unicode character
03139         $mBytes = 1; // cached expected number of octets in the current sequence
03140 
03141         // original code involved an $out that was an array of Unicode
03142         // codepoints.  Instead of having to convert back into UTF-8, we've
03143         // decided to directly append valid UTF-8 characters onto a string
03144         // $out once they're done.  $char accumulates raw bytes, while $mUcs4
03145         // turns into the Unicode code point, so there's some redundancy.
03146 
03147         $out = '';
03148         $char = '';
03149 
03150         $len = strlen($str);
03151         for($i = 0; $i < $len; $i++) {
03152             $in = ord($str{$i});
03153             $char .= $str[$i]; // append byte to char
03154             if (0 == $mState) {
03155                 // When mState is zero we expect either a US-ASCII character
03156                 // or a multi-octet sequence.
03157                 if (0 == (0x80 & ($in))) {
03158                     // US-ASCII, pass straight through.
03159                     if (($in <= 31 || $in == 127) &&
03160                         !($in == 9 || $in == 13 || $in == 10) // save \r\t\n
03161                     ) {
03162                         // control characters, remove
03163                     } else {
03164                         $out .= $char;
03165                     }
03166                     // reset
03167                     $char = '';
03168                     $mBytes = 1;
03169                 } elseif (0xC0 == (0xE0 & ($in))) {
03170                     // First octet of 2 octet sequence
03171                     $mUcs4 = ($in);
03172                     $mUcs4 = ($mUcs4 & 0x1F) << 6;
03173                     $mState = 1;
03174                     $mBytes = 2;
03175                 } elseif (0xE0 == (0xF0 & ($in))) {
03176                     // First octet of 3 octet sequence
03177                     $mUcs4 = ($in);
03178                     $mUcs4 = ($mUcs4 & 0x0F) << 12;
03179                     $mState = 2;
03180                     $mBytes = 3;
03181                 } elseif (0xF0 == (0xF8 & ($in))) {
03182                     // First octet of 4 octet sequence
03183                     $mUcs4 = ($in);
03184                     $mUcs4 = ($mUcs4 & 0x07) << 18;
03185                     $mState = 3;
03186                     $mBytes = 4;
03187                 } elseif (0xF8 == (0xFC & ($in))) {
03188                     // First octet of 5 octet sequence.
03189                     //
03190                     // This is illegal because the encoded codepoint must be
03191                     // either:
03192                     // (a) not the shortest form or
03193                     // (b) outside the Unicode range of 0-0x10FFFF.
03194                     // Rather than trying to resynchronize, we will carry on
03195                     // until the end of the sequence and let the later error
03196                     // handling code catch it.
03197                     $mUcs4 = ($in);
03198                     $mUcs4 = ($mUcs4 & 0x03) << 24;
03199                     $mState = 4;
03200                     $mBytes = 5;
03201                 } elseif (0xFC == (0xFE & ($in))) {
03202                     // First octet of 6 octet sequence, see comments for 5
03203                     // octet sequence.
03204                     $mUcs4 = ($in);
03205                     $mUcs4 = ($mUcs4 & 1) << 30;
03206                     $mState = 5;
03207                     $mBytes = 6;
03208                 } else {
03209                     // Current octet is neither in the US-ASCII range nor a
03210                     // legal first octet of a multi-octet sequence.
03211                     $mState = 0;
03212                     $mUcs4  = 0;
03213                     $mBytes = 1;
03214                     $char = '';
03215                 }
03216             } else {
03217                 // When mState is non-zero, we expect a continuation of the
03218                 // multi-octet sequence
03219                 if (0x80 == (0xC0 & ($in))) {
03220                     // Legal continuation.
03221                     $shift = ($mState - 1) * 6;
03222                     $tmp = $in;
03223                     $tmp = ($tmp & 0x0000003F) << $shift;
03224                     $mUcs4 |= $tmp;
03225 
03226                     if (0 == --$mState) {
03227                         // End of the multi-octet sequence. mUcs4 now contains
03228                         // the final Unicode codepoint to be output
03229 
03230                         // Check for illegal sequences and codepoints.
03231 
03232                         // From Unicode 3.1, non-shortest form is illegal
03233                         if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
03234                             ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
03235                             ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
03236                             (4 < $mBytes) ||
03237                             // From Unicode 3.2, surrogate characters = illegal
03238                             (($mUcs4 & 0xFFFFF800) == 0xD800) ||
03239                             // Codepoints outside the Unicode range are illegal
03240                             ($mUcs4 > 0x10FFFF)
03241                         ) {
03242 
03243                         } elseif (0xFEFF != $mUcs4 && // omit BOM
03244                             // check for valid Char unicode codepoints
03245                             (
03246                                 0x9 == $mUcs4 ||
03247                                 0xA == $mUcs4 ||
03248                                 0xD == $mUcs4 ||
03249                                 (0x20 <= $mUcs4 && 0x7E >= $mUcs4) ||
03250                                 // 7F-9F is not strictly prohibited by XML,
03251                                 // but it is non-SGML, and thus we don't allow it
03252                                 (0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) ||
03253                                 (0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4)
03254                             )
03255                         ) {
03256                             $out .= $char;
03257                         }
03258                         // initialize UTF8 cache (reset)
03259                         $mState = 0;
03260                         $mUcs4  = 0;
03261                         $mBytes = 1;
03262                         $char = '';
03263                     }
03264                 } else {
03265                     // ((0xC0 & (*in) != 0x80) && (mState != 0))
03266                     // Incomplete multi-octet sequence.
03267                     // used to result in complete fail, but we'll reset
03268                     $mState = 0;
03269                     $mUcs4  = 0;
03270                     $mBytes = 1;
03271                     $char ='';
03272                 }
03273             }
03274         }
03275         return $out;
03276     }
03277 
03291     // +----------+----------+----------+----------+
03292     // | 33222222 | 22221111 | 111111   |          |
03293     // | 10987654 | 32109876 | 54321098 | 76543210 | bit
03294     // +----------+----------+----------+----------+
03295     // |          |          |          | 0xxxxxxx | 1 byte 0x00000000..0x0000007F
03296     // |          |          | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF
03297     // |          | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF
03298     // | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF
03299     // +----------+----------+----------+----------+
03300     // | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF)
03301     // | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes
03302     // +----------+----------+----------+----------+
03303 
03304     public static function unichr($code) {
03305         if($code > 1114111 or $code < 0 or
03306           ($code >= 55296 and $code <= 57343) ) {
03307             // bits are set outside the "valid" range as defined
03308             // by UNICODE 4.1.0
03309             return '';
03310         }
03311 
03312         $x = $y = $z = $w = 0;
03313         if ($code < 128) {
03314             // regular ASCII character
03315             $x = $code;
03316         } else {
03317             // set up bits for UTF-8
03318             $x = ($code & 63) | 128;
03319             if ($code < 2048) {
03320                 $y = (($code & 2047) >> 6) | 192;
03321             } else {
03322                 $y = (($code & 4032) >> 6) | 128;
03323                 if($code < 65536) {
03324                     $z = (($code >> 12) & 15) | 224;
03325                 } else {
03326                     $z = (($code >> 12) & 63) | 128;
03327                     $w = (($code >> 18) & 7)  | 240;
03328                 }
03329             }
03330         }
03331         // set up the actual character
03332         $ret = '';
03333         if($w) $ret .= chr($w);
03334         if($z) $ret .= chr($z);
03335         if($y) $ret .= chr($y);
03336         $ret .= chr($x);
03337 
03338         return $ret;
03339     }
03340 
03341     public static function iconvAvailable() {
03342         static $iconv = null;
03343         if ($iconv === null) {
03344             $iconv = function_exists('iconv') && self::testIconvTruncateBug() != self::ICONV_UNUSABLE;
03345         }
03346         return $iconv;
03347     }
03348 
03352     public static function convertToUTF8($str, $config, $context) {
03353         $encoding = $config->get('Core.Encoding');
03354         if ($encoding === 'utf-8') return $str;
03355         static $iconv = null;
03356         if ($iconv === null) $iconv = self::iconvAvailable();
03357         if ($iconv && !$config->get('Test.ForceNoIconv')) {
03358             // unaffected by bugs, since UTF-8 support all characters
03359             $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
03360             if ($str === false) {
03361                 // $encoding is not a valid encoding
03362                 trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR);
03363                 return '';
03364             }
03365             // If the string is bjorked by Shift_JIS or a similar encoding
03366             // that doesn't support all of ASCII, convert the naughty
03367             // characters to their true byte-wise ASCII/UTF-8 equivalents.
03368             $str = strtr($str, self::testEncodingSupportsASCII($encoding));
03369             return $str;
03370         } elseif ($encoding === 'iso-8859-1') {
03371             $str = utf8_encode($str);
03372             return $str;
03373         }
03374         trigger_error('Encoding not supported, please install iconv', E_USER_ERROR);
03375     }
03376 
03382     public static function convertFromUTF8($str, $config, $context) {
03383         $encoding = $config->get('Core.Encoding');
03384         if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
03385             $str = self::convertToASCIIDumbLossless($str);
03386         }
03387         if ($encoding === 'utf-8') return $str;
03388         static $iconv = null;
03389         if ($iconv === null) $iconv = self::iconvAvailable();
03390         if ($iconv && !$config->get('Test.ForceNoIconv')) {
03391             // Undo our previous fix in convertToUTF8, otherwise iconv will barf
03392             $ascii_fix = self::testEncodingSupportsASCII($encoding);
03393             if (!$escape && !empty($ascii_fix)) {
03394                 $clear_fix = array();
03395                 foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = '';
03396                 $str = strtr($str, $clear_fix);
03397             }
03398             $str = strtr($str, array_flip($ascii_fix));
03399             // Normal stuff
03400             $str = self::iconv('utf-8', $encoding . '//IGNORE', $str);
03401             return $str;
03402         } elseif ($encoding === 'iso-8859-1') {
03403             $str = utf8_decode($str);
03404             return $str;
03405         }
03406         trigger_error('Encoding not supported', E_USER_ERROR);
03407         // You might be tempted to assume that the ASCII representation
03408         // might be OK, however, this is *not* universally true over all
03409         // encodings.  So we take the conservative route here, rather
03410         // than forcibly turn on %Core.EscapeNonASCIICharacters
03411     }
03412 
03429     public static function convertToASCIIDumbLossless($str) {
03430         $bytesleft = 0;
03431         $result = '';
03432         $working = 0;
03433         $len = strlen($str);
03434         for( $i = 0; $i < $len; $i++ ) {
03435             $bytevalue = ord( $str[$i] );
03436             if( $bytevalue <= 0x7F ) { //0xxx xxxx
03437                 $result .= chr( $bytevalue );
03438                 $bytesleft = 0;
03439             } elseif( $bytevalue <= 0xBF ) { //10xx xxxx
03440                 $working = $working << 6;
03441                 $working += ($bytevalue & 0x3F);
03442                 $bytesleft--;
03443                 if( $bytesleft <= 0 ) {
03444                     $result .= "&#" . $working . ";";
03445                 }
03446             } elseif( $bytevalue <= 0xDF ) { //110x xxxx
03447                 $working = $bytevalue & 0x1F;
03448                 $bytesleft = 1;
03449             } elseif( $bytevalue <= 0xEF ) { //1110 xxxx
03450                 $working = $bytevalue & 0x0F;
03451                 $bytesleft = 2;
03452             } else { //1111 0xxx
03453                 $working = $bytevalue & 0x07;
03454                 $bytesleft = 3;
03455             }
03456         }
03457         return $result;
03458     }
03459 
03461     const ICONV_OK = 0;
03462 
03465     const ICONV_TRUNCATES = 1;
03466 
03469     const ICONV_UNUSABLE = 2;
03470 
03485     public static function testIconvTruncateBug() {
03486         static $code = null;
03487         if ($code === null) {
03488             // better not use iconv, otherwise infinite loop!
03489             $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000));
03490             if ($r === false) {
03491                 $code = self::ICONV_UNUSABLE;
03492             } elseif (($c = strlen($r)) < 9000) {
03493                 $code = self::ICONV_TRUNCATES;
03494             } elseif ($c > 9000) {
03495                 trigger_error('Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: include your iconv version as per phpversion()', E_USER_ERROR);
03496             } else {
03497                 $code = self::ICONV_OK;
03498             }
03499         }
03500         return $code;
03501     }
03502 
03514     public static function testEncodingSupportsASCII($encoding, $bypass = false) {
03515         // All calls to iconv here are unsafe, proof by case analysis:
03516         // If ICONV_OK, no difference.
03517         // If ICONV_TRUNCATE, all calls involve one character inputs,
03518         // so bug is not triggered.
03519         // If ICONV_UNUSABLE, this call is irrelevant
03520         static $encodings = array();
03521         if (!$bypass) {
03522             if (isset($encodings[$encoding])) return $encodings[$encoding];
03523             $lenc = strtolower($encoding);
03524             switch ($lenc) {
03525                 case 'shift_jis':
03526                     return array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~');
03527                 case 'johab':
03528                     return array("\xE2\x82\xA9" => '\\');
03529             }
03530             if (strpos($lenc, 'iso-8859-') === 0) return array();
03531         }
03532         $ret = array();
03533         if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) return false;
03534         for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
03535             $c = chr($i); // UTF-8 char
03536             $r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion
03537             if (
03538                 $r === '' ||
03539                 // This line is needed for iconv implementations that do not
03540                 // omit characters that do not exist in the target character set
03541                 ($r === $c && self::unsafeIconv($encoding, 'UTF-8//IGNORE', $r) !== $c)
03542             ) {
03543                 // Reverse engineer: what's the UTF-8 equiv of this byte
03544                 // sequence? This assumes that there's no variable width
03545                 // encoding that doesn't support ASCII.
03546                 $ret[self::unsafeIconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
03547             }
03548         }
03549         $encodings[$encoding] = $ret;
03550         return $ret;
03551     }
03552 
03553 
03554 }
03555 
03556 
03557 
03558 
03559 
03563 class HTMLPurifier_EntityLookup {
03564 
03568     public $table;
03569 
03576     public function setup($file = false) {
03577         if (!$file) {
03578             $file = HTMLPURIFIER_PREFIX . '/HTMLPurifier/EntityLookup/entities.ser';
03579         }
03580         $this->table = unserialize(file_get_contents($file));
03581     }
03582 
03587     public static function instance($prototype = false) {
03588         // no references, since PHP doesn't copy unless modified
03589         static $instance = null;
03590         if ($prototype) {
03591             $instance = $prototype;
03592         } elseif (!$instance) {
03593             $instance = new HTMLPurifier_EntityLookup();
03594             $instance->setup();
03595         }
03596         return $instance;
03597     }
03598 
03599 }
03600 
03601 
03602 
03603 
03604 
03605 // if want to implement error collecting here, we'll need to use some sort
03606 // of global data (probably trigger_error) because it's impossible to pass
03607 // $config or $context to the callback functions.
03608 
03612 class HTMLPurifier_EntityParser
03613 {
03614 
03618     protected $_entity_lookup;
03619 
03623     protected $_substituteEntitiesRegex =
03624 '/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/';
03625 //     1. hex             2. dec      3. string (XML style)
03626 
03627 
03631     protected $_special_dec2str =
03632             array(
03633                     34 => '"',
03634                     38 => '&',
03635                     39 => "'",
03636                     60 => '<',
03637                     62 => '>'
03638             );
03639 
03643     protected $_special_ent2dec =
03644             array(
03645                     'quot' => 34,
03646                     'amp'  => 38,
03647                     'lt'   => 60,
03648                     'gt'   => 62
03649             );
03650 
03659     public function substituteNonSpecialEntities($string) {
03660         // it will try to detect missing semicolons, but don't rely on it
03661         return preg_replace_callback(
03662             $this->_substituteEntitiesRegex,
03663             array($this, 'nonSpecialEntityCallback'),
03664             $string
03665             );
03666     }
03667 
03677     protected function nonSpecialEntityCallback($matches) {
03678         // replaces all but big five
03679         $entity = $matches[0];
03680         $is_num = (@$matches[0][1] === '#');
03681         if ($is_num) {
03682             $is_hex = (@$entity[2] === 'x');
03683             $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
03684 
03685             // abort for special characters
03686             if (isset($this->_special_dec2str[$code]))  return $entity;
03687 
03688             return HTMLPurifier_Encoder::unichr($code);
03689         } else {
03690             if (isset($this->_special_ent2dec[$matches[3]])) return $entity;
03691             if (!$this->_entity_lookup) {
03692                 $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
03693             }
03694             if (isset($this->_entity_lookup->table[$matches[3]])) {
03695                 return $this->_entity_lookup->table[$matches[3]];
03696             } else {
03697                 return $entity;
03698             }
03699         }
03700     }
03701 
03711     public function substituteSpecialEntities($string) {
03712         return preg_replace_callback(
03713             $this->_substituteEntitiesRegex,
03714             array($this, 'specialEntityCallback'),
03715             $string);
03716     }
03717 
03728     protected function specialEntityCallback($matches) {
03729         $entity = $matches[0];
03730         $is_num = (@$matches[0][1] === '#');
03731         if ($is_num) {
03732             $is_hex = (@$entity[2] === 'x');
03733             $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
03734             return isset($this->_special_dec2str[$int]) ?
03735                 $this->_special_dec2str[$int] :
03736                 $entity;
03737         } else {
03738             return isset($this->_special_ent2dec[$matches[3]]) ?
03739                 $this->_special_ent2dec[$matches[3]] :
03740                 $entity;
03741         }
03742     }
03743 
03744 }
03745 
03746 
03747 
03748 
03749 
03754 class HTMLPurifier_ErrorCollector
03755 {
03756 
03761     const LINENO   = 0;
03762     const SEVERITY = 1;
03763     const MESSAGE  = 2;
03764     const CHILDREN = 3;
03765 
03766     protected $errors;
03767     protected $_current;
03768     protected $_stacks = array(array());
03769     protected $locale;
03770     protected $generator;
03771     protected $context;
03772 
03773     protected $lines = array();
03774 
03775     public function __construct($context) {
03776         $this->locale    =& $context->get('Locale');
03777         $this->context   = $context;
03778         $this->_current  =& $this->_stacks[0];
03779         $this->errors    =& $this->_stacks[0];
03780     }
03781 
03789     public function send($severity, $msg) {
03790 
03791         $args = array();
03792         if (func_num_args() > 2) {
03793             $args = func_get_args();
03794             array_shift($args);
03795             unset($args[0]);
03796         }
03797 
03798         $token = $this->context->get('CurrentToken', true);
03799         $line  = $token ? $token->line : $this->context->get('CurrentLine', true);
03800         $col   = $token ? $token->col  : $this->context->get('CurrentCol',  true);
03801         $attr  = $this->context->get('CurrentAttr', true);
03802 
03803         // perform special substitutions, also add custom parameters
03804         $subst = array();
03805         if (!is_null($token)) {
03806             $args['CurrentToken'] = $token;
03807         }
03808         if (!is_null($attr)) {
03809             $subst['$CurrentAttr.Name'] = $attr;
03810             if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr];
03811         }
03812 
03813         if (empty($args)) {
03814             $msg = $this->locale->getMessage($msg);
03815         } else {
03816             $msg = $this->locale->formatMessage($msg, $args);
03817         }
03818 
03819         if (!empty($subst)) $msg = strtr($msg, $subst);
03820 
03821         // (numerically indexed)
03822         $error = array(
03823             self::LINENO   => $line,
03824             self::SEVERITY => $severity,
03825             self::MESSAGE  => $msg,
03826             self::CHILDREN => array()
03827         );
03828         $this->_current[] = $error;
03829 
03830 
03831         // NEW CODE BELOW ...
03832 
03833         $struct = null;
03834         // Top-level errors are either:
03835         //  TOKEN type, if $value is set appropriately, or
03836         //  "syntax" type, if $value is null
03837         $new_struct = new HTMLPurifier_ErrorStruct();
03838         $new_struct->type = HTMLPurifier_ErrorStruct::TOKEN;
03839         if ($token) $new_struct->value = clone $token;
03840         if (is_int($line) && is_int($col)) {
03841             if (isset($this->lines[$line][$col])) {
03842                 $struct = $this->lines[$line][$col];
03843             } else {
03844                 $struct = $this->lines[$line][$col] = $new_struct;
03845             }
03846             // These ksorts may present a performance problem
03847             ksort($this->lines[$line], SORT_NUMERIC);
03848         } else {
03849             if (isset($this->lines[-1])) {
03850                 $struct = $this->lines[-1];
03851             } else {
03852                 $struct = $this->lines[-1] = $new_struct;
03853             }
03854         }
03855         ksort($this->lines, SORT_NUMERIC);
03856 
03857         // Now, check if we need to operate on a lower structure
03858         if (!empty($attr)) {
03859             $struct = $struct->getChild(HTMLPurifier_ErrorStruct::ATTR, $attr);
03860             if (!$struct->value) {
03861                 $struct->value = array($attr, 'PUT VALUE HERE');
03862             }
03863         }
03864         if (!empty($cssprop)) {
03865             $struct = $struct->getChild(HTMLPurifier_ErrorStruct::CSSPROP, $cssprop);
03866             if (!$struct->value) {
03867                 // if we tokenize CSS this might be a little more difficult to do
03868                 $struct->value = array($cssprop, 'PUT VALUE HERE');
03869             }
03870         }
03871 
03872         // Ok, structs are all setup, now time to register the error
03873         $struct->addError($severity, $msg);
03874     }
03875 
03882     public function getRaw() {
03883         return $this->errors;
03884     }
03885 
03891     public function getHTMLFormatted($config, $errors = null) {
03892         $ret = array();
03893 
03894         $this->generator = new HTMLPurifier_Generator($config, $this->context);
03895         if ($errors === null) $errors = $this->errors;
03896 
03897         // 'At line' message needs to be removed
03898 
03899         // generation code for new structure goes here. It needs to be recursive.
03900         foreach ($this->lines as $line => $col_array) {
03901             if ($line == -1) continue;
03902             foreach ($col_array as $col => $struct) {
03903                 $this->_renderStruct($ret, $struct, $line, $col);
03904             }
03905         }
03906         if (isset($this->lines[-1])) {
03907             $this->_renderStruct($ret, $this->lines[-1]);
03908         }
03909 
03910         if (empty($errors)) {
03911             return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>';
03912         } else {
03913             return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>';
03914         }
03915 
03916     }
03917 
03918     private function _renderStruct(&$ret, $struct, $line = null, $col = null) {
03919         $stack = array($struct);
03920         $context_stack = array(array());
03921         while ($current = array_pop($stack)) {
03922             $context = array_pop($context_stack);
03923             foreach ($current->errors as $error) {
03924                 list($severity, $msg) = $error;
03925                 $string = '';
03926                 $string .= '<div>';
03927                 // W3C uses an icon to indicate the severity of the error.
03928                 $error = $this->locale->getErrorName($severity);
03929                 $string .= "<span class=\"error e$severity\"><strong>$error</strong></span> ";
03930                 if (!is_null($line) && !is_null($col)) {
03931                     $string .= "<em class=\"location\">Line $line, Column $col: </em> ";
03932                 } else {
03933                     $string .= '<em class="location">End of Document: </em> ';
03934                 }
03935                 $string .= '<strong class="description">' . $this->generator->escape($msg) . '</strong> ';
03936                 $string .= '</div>';
03937                 // Here, have a marker for the character on the column appropriate.
03938                 // Be sure to clip extremely long lines.
03939                 //$string .= '<pre>';
03940                 //$string .= '';
03941                 //$string .= '</pre>';
03942                 $ret[] = $string;
03943             }
03944             foreach ($current->children as $type => $array) {
03945                 $context[] = $current;
03946                 $stack = array_merge($stack, array_reverse($array, true));
03947                 for ($i = count($array); $i > 0; $i--) {
03948                     $context_stack[] = $context;
03949                 }
03950             }
03951         }
03952     }
03953 
03954 }
03955 
03956 
03957 
03958 
03959 
03966 class HTMLPurifier_ErrorStruct
03967 {
03968 
03973     const TOKEN     = 0;
03974     const ATTR      = 1;
03975     const CSSPROP   = 2;
03976 
03980     public $type;
03981 
03989     public $value;
03990 
03994     public $errors = array();
03995 
04001     public $children = array();
04002 
04003     public function getChild($type, $id) {
04004         if (!isset($this->children[$type][$id])) {
04005             $this->children[$type][$id] = new HTMLPurifier_ErrorStruct();
04006             $this->children[$type][$id]->type = $type;
04007         }
04008         return $this->children[$type][$id];
04009     }
04010 
04011     public function addError($severity, $message) {
04012         $this->errors[] = array($severity, $message);
04013     }
04014 
04015 }
04016 
04017 
04018 
04019 
04020 
04025 class HTMLPurifier_Exception extends Exception
04026 {
04027 
04028 }
04029 
04030 
04031 
04032 
04033 
04053 class HTMLPurifier_Filter
04054 {
04055 
04059     public $name;
04060 
04064     public function preFilter($html, $config, $context) {
04065         return $html;
04066     }
04067 
04071     public function postFilter($html, $config, $context) {
04072         return $html;
04073     }
04074 
04075 }
04076 
04077 
04078 
04079 
04080 
04088 class HTMLPurifier_Generator
04089 {
04090 
04094     private $_xhtml = true;
04095 
04099     private $_scriptFix = false;
04100 
04105     private $_def;
04106 
04110     private $_sortAttr;
04111 
04115     private $_flashCompat;
04116 
04120     private $_innerHTMLFix;
04121 
04126     private $_flashStack = array();
04127 
04131     protected $config;
04132 
04137     public function __construct($config, $context) {
04138         $this->config = $config;
04139         $this->_scriptFix = $config->get('Output.CommentScriptContents');
04140         $this->_innerHTMLFix = $config->get('Output.FixInnerHTML');
04141         $this->_sortAttr = $config->get('Output.SortAttr');
04142         $this->_flashCompat = $config->get('Output.FlashCompat');
04143         $this->_def = $config->getHTMLDefinition();
04144         $this->_xhtml = $this->_def->doctype->xml;
04145     }
04146 
04153     public function generateFromTokens($tokens) {
04154         if (!$tokens) return '';
04155 
04156         // Basic algorithm
04157         $html = '';
04158         for ($i = 0, $size = count($tokens); $i < $size; $i++) {
04159             if ($this->_scriptFix && $tokens[$i]->name === 'script'
04160                 && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) {
04161                 // script special case
04162                 // the contents of the script block must be ONE token
04163                 // for this to work.
04164                 $html .= $this->generateFromToken($tokens[$i++]);
04165                 $html .= $this->generateScriptFromToken($tokens[$i++]);
04166             }
04167             $html .= $this->generateFromToken($tokens[$i]);
04168         }
04169 
04170         // Tidy cleanup
04171         if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) {
04172             $tidy = new Tidy;
04173             $tidy->parseString($html, array(
04174                'indent'=> true,
04175                'output-xhtml' => $this->_xhtml,
04176                'show-body-only' => true,
04177                'indent-spaces' => 2,
04178                'wrap' => 68,
04179             ), 'utf8');
04180             $tidy->cleanRepair();
04181             $html = (string) $tidy; // explicit cast necessary
04182         }
04183 
04184         // Normalize newlines to system defined value
04185         if ($this->config->get('Core.NormalizeNewlines')) {
04186             $nl = $this->config->get('Output.Newline');
04187             if ($nl === null) $nl = PHP_EOL;
04188             if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
04189         }
04190         return $html;
04191     }
04192 
04198     public function generateFromToken($token) {
04199         if (!$token instanceof HTMLPurifier_Token) {
04200             trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING);
04201             return '';
04202 
04203         } elseif ($token instanceof HTMLPurifier_Token_Start) {
04204             $attr = $this->generateAttributes($token->attr, $token->name);
04205             if ($this->_flashCompat) {
04206                 if ($token->name == "object") {
04207                     $flash = new stdclass();
04208                     $flash->attr = $token->attr;
04209                     $flash->param = array();
04210                     $this->_flashStack[] = $flash;
04211                 }
04212             }
04213             return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
04214 
04215         } elseif ($token instanceof HTMLPurifier_Token_End) {
04216             $_extra = '';
04217             if ($this->_flashCompat) {
04218                 if ($token->name == "object" && !empty($this->_flashStack)) {
04219                     // doesn't do anything for now
04220                 }
04221             }
04222             return $_extra . '</' . $token->name . '>';
04223 
04224         } elseif ($token instanceof HTMLPurifier_Token_Empty) {
04225             if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) {
04226                 $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value'];
04227             }
04228             $attr = $this->generateAttributes($token->attr, $token->name);
04229              return '<' . $token->name . ($attr ? ' ' : '') . $attr .
04230                 ( $this->_xhtml ? ' /': '' ) // <br /> v. <br>
04231                 . '>';
04232 
04233         } elseif ($token instanceof HTMLPurifier_Token_Text) {
04234             return $this->escape($token->data, ENT_NOQUOTES);
04235 
04236         } elseif ($token instanceof HTMLPurifier_Token_Comment) {
04237             return '<!--' . $token->data . '-->';
04238         } else {
04239             return '';
04240 
04241         }
04242     }
04243 
04249     public function generateScriptFromToken($token) {
04250         if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token);
04251         // Thanks <http://lachy.id.au/log/2005/05/script-comments>
04252         $data = preg_replace('#//\s*$#', '', $token->data);
04253         return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
04254     }
04255 
04264     public function generateAttributes($assoc_array_of_attributes, $element = false) {
04265         $html = '';
04266         if ($this->_sortAttr) ksort($assoc_array_of_attributes);
04267         foreach ($assoc_array_of_attributes as $key => $value) {
04268             if (!$this->_xhtml) {
04269                 // Remove namespaced attributes
04270                 if (strpos($key, ':') !== false) continue;
04271                 // Check if we should minimize the attribute: val="val" -> val
04272                 if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) {
04273                     $html .= $key . ' ';
04274                     continue;
04275                 }
04276             }
04277             // Workaround for Internet Explorer innerHTML bug.
04278             // Essentially, Internet Explorer, when calculating
04279             // innerHTML, omits quotes if there are no instances of
04280             // angled brackets, quotes or spaces.  However, when parsing
04281             // HTML (for example, when you assign to innerHTML), it
04282             // treats backticks as quotes.  Thus,
04283             //      <img alt="``" />
04284             // becomes
04285             //      <img alt=`` />
04286             // becomes
04287             //      <img alt='' />
04288             // Fortunately, all we need to do is trigger an appropriate
04289             // quoting style, which we do by adding an extra space.
04290             // This also is consistent with the W3C spec, which states
04291             // that user agents may ignore leading or trailing
04292             // whitespace (in fact, most don't, at least for attributes
04293             // like alt, but an extra space at the end is barely
04294             // noticeable).  Still, we have a configuration knob for
04295             // this, since this transformation is not necesary if you
04296             // don't process user input with innerHTML or you don't plan
04297             // on supporting Internet Explorer.
04298             if ($this->_innerHTMLFix) {
04299                 if (strpos($value, '`') !== false) {
04300                     // check if correct quoting style would not already be
04301                     // triggered
04302                     if (strcspn($value, '"\' <>') === strlen($value)) {
04303                         // protect!
04304                         $value .= ' ';
04305                     }
04306                 }
04307             }
04308             $html .= $key.'="'.$this->escape($value).'" ';
04309         }
04310         return rtrim($html);
04311     }
04312 
04323     public function escape($string, $quote = null) {
04324         // Workaround for APC bug on Mac Leopard reported by sidepodcast
04325         // http://htmlpurifier.org/phorum/read.php?3,4823,4846
04326         if ($quote === null) $quote = ENT_COMPAT;
04327         return htmlspecialchars($string, $quote, 'UTF-8');
04328     }
04329 
04330 }
04331 
04332 
04333 
04334 
04335 
04359 class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
04360 {
04361 
04362     // FULLY-PUBLIC VARIABLES ---------------------------------------------
04363 
04367     public $info = array();
04368 
04372     public $info_global_attr = array();
04373 
04377     public $info_parent = 'div';
04378 
04383     public $info_parent_def;
04384 
04389     public $info_block_wrapper = 'p';
04390 
04394     public $info_tag_transform = array();
04395 
04399     public $info_attr_transform_pre = array();
04400 
04404     public $info_attr_transform_post = array();
04405 
04410     public $info_content_sets = array();
04411 
04415     public $info_injector = array();
04416 
04420     public $doctype;
04421 
04422 
04423 
04424     // RAW CUSTOMIZATION STUFF --------------------------------------------
04425 
04435     public function addAttribute($element_name, $attr_name, $def) {
04436         $module = $this->getAnonymousModule();
04437         if (!isset($module->info[$element_name])) {
04438             $element = $module->addBlankElement($element_name);
04439         } else {
04440             $element = $module->info[$element_name];
04441         }
04442         $element->attr[$attr_name] = $def;
04443     }
04444 
04450     public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array()) {
04451         $module = $this->getAnonymousModule();
04452         // assume that if the user is calling this, the element
04453         // is safe. This may not be a good idea
04454         $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes);
04455         return $element;
04456     }
04457 
04464     public function addBlankElement($element_name) {
04465         $module  = $this->getAnonymousModule();
04466         $element = $module->addBlankElement($element_name);
04467         return $element;
04468     }
04469 
04475     public function getAnonymousModule() {
04476         if (!$this->_anonModule) {
04477             $this->_anonModule = new HTMLPurifier_HTMLModule();
04478             $this->_anonModule->name = 'Anonymous';
04479         }
04480         return $this->_anonModule;
04481     }
04482 
04483     private $_anonModule = null;
04484 
04485 
04486     // PUBLIC BUT INTERNAL VARIABLES --------------------------------------
04487 
04488     public $type = 'HTML';
04489     public $manager; 
04494     public function __construct() {
04495         $this->manager = new HTMLPurifier_HTMLModuleManager();
04496     }
04497 
04498     protected function doSetup($config) {
04499         $this->processModules($config);
04500         $this->setupConfigStuff($config);
04501         unset($this->manager);
04502 
04503         // cleanup some of the element definitions
04504         foreach ($this->info as $k => $v) {
04505             unset($this->info[$k]->content_model);
04506             unset($this->info[$k]->content_model_type);
04507         }
04508     }
04509 
04513     protected function processModules($config) {
04514 
04515         if ($this->_anonModule) {
04516             // for user specific changes
04517             // this is late-loaded so we don't have to deal with PHP4
04518             // reference wonky-ness
04519             $this->manager->addModule($this->_anonModule);
04520             unset($this->_anonModule);
04521         }
04522 
04523         $this->manager->setup($config);
04524         $this->doctype = $this->manager->doctype;
04525 
04526         foreach ($this->manager->modules as $module) {
04527             foreach($module->info_tag_transform as $k => $v) {
04528                 if ($v === false) unset($this->info_tag_transform[$k]);
04529                 else $this->info_tag_transform[$k] = $v;
04530             }
04531             foreach($module->info_attr_transform_pre as $k => $v) {
04532                 if ($v === false) unset($this->info_attr_transform_pre[$k]);
04533                 else $this->info_attr_transform_pre[$k] = $v;
04534             }
04535             foreach($module->info_attr_transform_post as $k => $v) {
04536                 if ($v === false) unset($this->info_attr_transform_post[$k]);
04537                 else $this->info_attr_transform_post[$k] = $v;
04538             }
04539             foreach ($module->info_injector as $k => $v) {
04540                 if ($v === false) unset($this->info_injector[$k]);
04541                 else $this->info_injector[$k] = $v;
04542             }
04543         }
04544 
04545         $this->info = $this->manager->getElements();
04546         $this->info_content_sets = $this->manager->contentSets->lookup;
04547 
04548     }
04549 
04553     protected function setupConfigStuff($config) {
04554 
04555         $block_wrapper = $config->get('HTML.BlockWrapper');
04556         if (isset($this->info_content_sets['Block'][$block_wrapper])) {
04557             $this->info_block_wrapper = $block_wrapper;
04558         } else {
04559             trigger_error('Cannot use non-block element as block wrapper',
04560                 E_USER_ERROR);
04561         }
04562 
04563         $parent = $config->get('HTML.Parent');
04564         $def = $this->manager->getElement($parent, true);
04565         if ($def) {
04566             $this->info_parent = $parent;
04567             $this->info_parent_def = $def;
04568         } else {
04569             trigger_error('Cannot use unrecognized element as parent',
04570                 E_USER_ERROR);
04571             $this->info_parent_def = $this->manager->getElement($this->info_parent, true);
04572         }
04573 
04574         // support template text
04575         $support = "(for information on implementing this, see the ".
04576                    "support forums) ";
04577 
04578         // setup allowed elements -----------------------------------------
04579 
04580         $allowed_elements = $config->get('HTML.AllowedElements');
04581         $allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early
04582 
04583         if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
04584             $allowed = $config->get('HTML.Allowed');
04585             if (is_string($allowed)) {
04586                 list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);
04587             }
04588         }
04589 
04590         if (is_array($allowed_elements)) {
04591             foreach ($this->info as $name => $d) {
04592                 if(!isset($allowed_elements[$name])) unset($this->info[$name]);
04593                 unset($allowed_elements[$name]);
04594             }
04595             // emit errors
04596             foreach ($allowed_elements as $element => $d) {
04597                 $element = htmlspecialchars($element); // PHP doesn't escape errors, be careful!
04598                 trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
04599             }
04600         }
04601 
04602         // setup allowed attributes ---------------------------------------
04603 
04604         $allowed_attributes_mutable = $allowed_attributes; // by copy!
04605         if (is_array($allowed_attributes)) {
04606 
04607             // This actually doesn't do anything, since we went away from
04608             // global attributes. It's possible that userland code uses
04609             // it, but HTMLModuleManager doesn't!
04610             foreach ($this->info_global_attr as $attr => $x) {
04611                 $keys = array($attr, "*@$attr", "*.$attr");
04612                 $delete = true;
04613                 foreach ($keys as $key) {
04614                     if ($delete && isset($allowed_attributes[$key])) {
04615                         $delete = false;
04616                     }
04617                     if (isset($allowed_attributes_mutable[$key])) {
04618                         unset($allowed_attributes_mutable[$key]);
04619                     }
04620                 }
04621                 if ($delete) unset($this->info_global_attr[$attr]);
04622             }
04623 
04624             foreach ($this->info as $tag => $info) {
04625                 foreach ($info->attr as $attr => $x) {
04626                     $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr");
04627                     $delete = true;
04628                     foreach ($keys as $key) {
04629                         if ($delete && isset($allowed_attributes[$key])) {
04630                             $delete = false;
04631                         }
04632                         if (isset($allowed_attributes_mutable[$key])) {
04633                             unset($allowed_attributes_mutable[$key]);
04634                         }
04635                     }
04636                     if ($delete) {
04637                         if ($this->info[$tag]->attr[$attr]->required) {
04638                             trigger_error("Required attribute '$attr' in element '$tag' was not allowed, which means '$tag' will not be allowed either", E_USER_WARNING);
04639                         }
04640                         unset($this->info[$tag]->attr[$attr]);
04641                     }
04642                 }
04643             }
04644             // emit errors
04645             foreach ($allowed_attributes_mutable as $elattr => $d) {
04646                 $bits = preg_split('/[.@]/', $elattr, 2);
04647                 $c = count($bits);
04648                 switch ($c) {
04649                     case 2:
04650                         if ($bits[0] !== '*') {
04651                             $element = htmlspecialchars($bits[0]);
04652                             $attribute = htmlspecialchars($bits[1]);
04653                             if (!isset($this->info[$element])) {
04654                                 trigger_error("Cannot allow attribute '$attribute' if element '$element' is not allowed/supported $support");
04655                             } else {
04656                                 trigger_error("Attribute '$attribute' in element '$element' not supported $support",
04657                                     E_USER_WARNING);
04658                             }
04659                             break;
04660                         }
04661                         // otherwise fall through
04662                     case 1:
04663                         $attribute = htmlspecialchars($bits[0]);
04664                         trigger_error("Global attribute '$attribute' is not ".
04665                             "supported in any elements $support",
04666                             E_USER_WARNING);
04667                         break;
04668                 }
04669             }
04670 
04671         }
04672 
04673         // setup forbidden elements ---------------------------------------
04674 
04675         $forbidden_elements   = $config->get('HTML.ForbiddenElements');
04676         $forbidden_attributes = $config->get('HTML.ForbiddenAttributes');
04677 
04678         foreach ($this->info as $tag => $info) {
04679             if (isset($forbidden_elements[$tag])) {
04680                 unset($this->info[$tag]);
04681                 continue;
04682             }
04683             foreach ($info->attr as $attr => $x) {
04684                 if (
04685                     isset($forbidden_attributes["$tag@$attr"]) ||
04686                     isset($forbidden_attributes["*@$attr"]) ||
04687                     isset($forbidden_attributes[$attr])
04688                 ) {
04689                     unset($this->info[$tag]->attr[$attr]);
04690                     continue;
04691                 } // this segment might get removed eventually
04692                 elseif (isset($forbidden_attributes["$tag.$attr"])) {
04693                     // $tag.$attr are not user supplied, so no worries!
04694                     trigger_error("Error with $tag.$attr: tag.attr syntax not supported for HTML.ForbiddenAttributes; use tag@attr instead", E_USER_WARNING);
04695                 }
04696             }
04697         }
04698         foreach ($forbidden_attributes as $key => $v) {
04699             if (strlen($key) < 2) continue;
04700             if ($key[0] != '*') continue;
04701             if ($key[1] == '.') {
04702                 trigger_error("Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", E_USER_WARNING);
04703             }
04704         }
04705 
04706         // setup injectors -----------------------------------------------------
04707         foreach ($this->info_injector as $i => $injector) {
04708             if ($injector->checkNeeded($config) !== false) {
04709                 // remove injector that does not have it's required
04710                 // elements/attributes present, and is thus not needed.
04711                 unset($this->info_injector[$i]);
04712             }
04713         }
04714     }
04715 
04725     public function parseTinyMCEAllowedList($list) {
04726 
04727         $list = str_replace(array(' ', "\t"), '', $list);
04728 
04729         $elements = array();
04730         $attributes = array();
04731 
04732         $chunks = preg_split('/(,|[\n\r]+)/', $list);
04733         foreach ($chunks as $chunk) {
04734             if (empty($chunk)) continue;
04735             // remove TinyMCE element control characters
04736             if (!strpos($chunk, '[')) {
04737                 $element = $chunk;
04738                 $attr = false;
04739             } else {
04740                 list($element, $attr) = explode('[', $chunk);
04741             }
04742             if ($element !== '*') $elements[$element] = true;
04743             if (!$attr) continue;
04744             $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
04745             $attr = explode('|', $attr);
04746             foreach ($attr as $key) {
04747                 $attributes["$element.$key"] = true;
04748             }
04749         }
04750 
04751         return array($elements, $attributes);
04752 
04753     }
04754 
04755 
04756 }
04757 
04758 
04759 
04760 
04761 
04777 class HTMLPurifier_HTMLModule
04778 {
04779 
04780     // -- Overloadable ----------------------------------------------------
04781 
04785     public $name;
04786 
04791     public $elements = array();
04792 
04798     public $info = array();
04799 
04806     public $content_sets = array();
04807 
04816     public $attr_collections = array();
04817 
04821     public $info_tag_transform = array();
04822 
04826     public $info_attr_transform_pre = array();
04827 
04831     public $info_attr_transform_post = array();
04832 
04839     public $info_injector = array();
04840 
04847     public $defines_child_def = false;
04848 
04861     public $safe = true;
04862 
04871     public function getChildDef($def) {return false;}
04872 
04873     // -- Convenience -----------------------------------------------------
04874 
04889     public function addElement($element, $type, $contents, $attr_includes = array(), $attr = array()) {
04890         $this->elements[] = $element;
04891         // parse content_model
04892         list($content_model_type, $content_model) = $this->parseContents($contents);
04893         // merge in attribute inclusions
04894         $this->mergeInAttrIncludes($attr, $attr_includes);
04895         // add element to content sets
04896         if ($type) $this->addElementToContentSet($element, $type);
04897         // create element
04898         $this->info[$element] = HTMLPurifier_ElementDef::create(
04899             $content_model, $content_model_type, $attr
04900         );
04901         // literal object $contents means direct child manipulation
04902         if (!is_string($contents)) $this->info[$element]->child = $contents;
04903         return $this->info[$element];
04904     }
04905 
04912     public function addBlankElement($element) {
04913         if (!isset($this->info[$element])) {
04914             $this->elements[] = $element;
04915             $this->info[$element] = new HTMLPurifier_ElementDef();
04916             $this->info[$element]->standalone = false;
04917         } else {
04918             trigger_error("Definition for $element already exists in module, cannot redefine");
04919         }
04920         return $this->info[$element];
04921     }
04922 
04929     public function addElementToContentSet($element, $type) {
04930         if (!isset($this->content_sets[$type])) $this->content_sets[$type] = '';
04931         else $this->content_sets[$type] .= ' | ';
04932         $this->content_sets[$type] .= $element;
04933     }
04934 
04944     public function parseContents($contents) {
04945         if (!is_string($contents)) return array(null, null); // defer
04946         switch ($contents) {
04947             // check for shorthand content model forms
04948             case 'Empty':
04949                 return array('empty', '');
04950             case 'Inline':
04951                 return array('optional', 'Inline | #PCDATA');
04952             case 'Flow':
04953                 return array('optional', 'Flow | #PCDATA');
04954         }
04955         list($content_model_type, $content_model) = explode(':', $contents);
04956         $content_model_type = strtolower(trim($content_model_type));
04957         $content_model = trim($content_model);
04958         return array($content_model_type, $content_model);
04959     }
04960 
04967     public function mergeInAttrIncludes(&$attr, $attr_includes) {
04968         if (!is_array($attr_includes)) {
04969             if (empty($attr_includes)) $attr_includes = array();
04970             else $attr_includes = array($attr_includes);
04971         }
04972         $attr[0] = $attr_includes;
04973     }
04974 
04983     public function makeLookup($list) {
04984         if (is_string($list)) $list = func_get_args();
04985         $ret = array();
04986         foreach ($list as $value) {
04987             if (is_null($value)) continue;
04988             $ret[$value] = true;
04989         }
04990         return $ret;
04991     }
04992 
04999     public function setup($config) {}
05000 
05001 }
05002 
05003 
05004 
05005 
05006 
05007 class HTMLPurifier_HTMLModuleManager
05008 {
05009 
05013     public $doctypes;
05014 
05018     public $doctype;
05019 
05023     public $attrTypes;
05024 
05029     public $modules = array();
05030 
05036     public $registeredModules = array();
05037 
05043     public $userModules = array();
05044 
05049     public $elementLookup = array();
05050 
05052     public $prefixes = array('HTMLPurifier_HTMLModule_');
05053 
05054     public $contentSets;     
05055     public $attrCollections; 
05058     public $trusted = false;
05059 
05060     public function __construct() {
05061 
05062         // editable internal objects
05063         $this->attrTypes = new HTMLPurifier_AttrTypes();
05064         $this->doctypes  = new HTMLPurifier_DoctypeRegistry();
05065 
05066         // setup basic modules
05067         $common = array(
05068             'CommonAttributes', 'Text', 'Hypertext', 'List',
05069             'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
05070             'StyleAttribute',
05071             // Unsafe:
05072             'Scripting', 'Object', 'Forms',
05073             // Sorta legacy, but present in strict:
05074             'Name',
05075         );
05076         $transitional = array('Legacy', 'Target', 'Iframe');
05077         $xml = array('XMLCommonAttributes');
05078         $non_xml = array('NonXMLCommonAttributes');
05079 
05080         // setup basic doctypes
05081         $this->doctypes->register(
05082             'HTML 4.01 Transitional', false,
05083             array_merge($common, $transitional, $non_xml),
05084             array('Tidy_Transitional', 'Tidy_Proprietary'),
05085             array(),
05086             '-//W3C//DTD HTML 4.01 Transitional//EN',
05087             'http://www.w3.org/TR/html4/loose.dtd'
05088         );
05089 
05090         $this->doctypes->register(
05091             'HTML 4.01 Strict', false,
05092             array_merge($common, $non_xml),
05093             array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
05094             array(),
05095             '-//W3C//DTD HTML 4.01//EN',
05096             'http://www.w3.org/TR/html4/strict.dtd'
05097         );
05098 
05099         $this->doctypes->register(
05100             'XHTML 1.0 Transitional', true,
05101             array_merge($common, $transitional, $xml, $non_xml),
05102             array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'),
05103             array(),
05104             '-//W3C//DTD XHTML 1.0 Transitional//EN',
05105             'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
05106         );
05107 
05108         $this->doctypes->register(
05109             'XHTML 1.0 Strict', true,
05110             array_merge($common, $xml, $non_xml),
05111             array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
05112             array(),
05113             '-//W3C//DTD XHTML 1.0 Strict//EN',
05114             'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
05115         );
05116 
05117         $this->doctypes->register(
05118             'XHTML 1.1', true,
05119             // Iframe is a real XHTML 1.1 module, despite being
05120             // "transitional"!
05121             array_merge($common, $xml, array('Ruby', 'Iframe')),
05122             array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1
05123             array(),
05124             '-//W3C//DTD XHTML 1.1//EN',
05125             'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
05126         );
05127 
05128     }
05129 
05151     public function registerModule($module, $overload = false) {
05152         if (is_string($module)) {
05153             // attempt to load the module
05154             $original_module = $module;
05155             $ok = false;
05156             foreach ($this->prefixes as $prefix) {
05157                 $module = $prefix . $original_module;
05158                 if (class_exists($module)) {
05159                     $ok = true;
05160                     break;
05161                 }
05162             }
05163             if (!$ok) {
05164                 $module = $original_module;
05165                 if (!class_exists($module)) {
05166                     trigger_error($original_module . ' module does not exist',
05167                         E_USER_ERROR);
05168                     return;
05169                 }
05170             }
05171             $module = new $module();
05172         }
05173         if (empty($module->name)) {
05174             trigger_error('Module instance of ' . get_class($module) . ' must have name');
05175             return;
05176         }
05177         if (!$overload && isset($this->registeredModules[$module->name])) {
05178             trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING);
05179         }
05180         $this->registeredModules[$module->name] = $module;
05181     }
05182 
05187     public function addModule($module) {
05188         $this->registerModule($module);
05189         if (is_object($module)) $module = $module->name;
05190         $this->userModules[] = $module;
05191     }
05192 
05197     public function addPrefix($prefix) {
05198         $this->prefixes[] = $prefix;
05199     }
05200 
05206     public function setup($config) {
05207 
05208         $this->trusted = $config->get('HTML.Trusted');
05209 
05210         // generate
05211         $this->doctype = $this->doctypes->make($config);
05212         $modules = $this->doctype->modules;
05213 
05214         // take out the default modules that aren't allowed
05215         $lookup = $config->get('HTML.AllowedModules');
05216         $special_cases = $config->get('HTML.CoreModules');
05217 
05218         if (is_array($lookup)) {
05219             foreach ($modules as $k => $m) {
05220                 if (isset($special_cases[$m])) continue;
05221                 if (!isset($lookup[$m])) unset($modules[$k]);
05222             }
05223         }
05224 
05225         // custom modules
05226         if ($config->get('HTML.Proprietary')) {
05227             $modules[] = 'Proprietary';
05228         }
05229         if ($config->get('HTML.SafeObject')) {
05230             $modules[] = 'SafeObject';
05231         }
05232         if ($config->get('HTML.SafeEmbed')) {
05233             $modules[] = 'SafeEmbed';
05234         }
05235         if ($config->get('HTML.Nofollow')) {
05236             $modules[] = 'Nofollow';
05237         }
05238         if ($config->get('HTML.TargetBlank')) {
05239             $modules[] = 'TargetBlank';
05240         }
05241 
05242         // merge in custom modules
05243         $modules = array_merge($modules, $this->userModules);
05244 
05245         foreach ($modules as $module) {
05246             $this->processModule($module);
05247             $this->modules[$module]->setup($config);
05248         }
05249 
05250         foreach ($this->doctype->tidyModules as $module) {
05251             $this->processModule($module);
05252             $this->modules[$module]->setup($config);
05253         }
05254 
05255         // prepare any injectors
05256         foreach ($this->modules as $module) {
05257             $n = array();
05258             foreach ($module->info_injector as $i => $injector) {
05259                 if (!is_object($injector)) {
05260                     $class = "HTMLPurifier_Injector_$injector";
05261                     $injector = new $class;
05262                 }
05263                 $n[$injector->name] = $injector;
05264             }
05265             $module->info_injector = $n;
05266         }
05267 
05268         // setup lookup table based on all valid modules
05269         foreach ($this->modules as $module) {
05270             foreach ($module->info as $name => $def) {
05271                 if (!isset($this->elementLookup[$name])) {
05272                     $this->elementLookup[$name] = array();
05273                 }
05274                 $this->elementLookup[$name][] = $module->name;
05275             }
05276         }
05277 
05278         // note the different choice
05279         $this->contentSets = new HTMLPurifier_ContentSets(
05280             // content set assembly deals with all possible modules,
05281             // not just ones deemed to be "safe"
05282             $this->modules
05283         );
05284         $this->attrCollections = new HTMLPurifier_AttrCollections(
05285             $this->attrTypes,
05286             // there is no way to directly disable a global attribute,
05287             // but using AllowedAttributes or simply not including
05288             // the module in your custom doctype should be sufficient
05289             $this->modules
05290         );
05291     }
05292 
05297     public function processModule($module) {
05298         if (!isset($this->registeredModules[$module]) || is_object($module)) {
05299             $this->registerModule($module);
05300         }
05301         $this->modules[$module] = $this->registeredModules[$module];
05302     }
05303 
05308     public function getElements() {
05309 
05310         $elements = array();
05311         foreach ($this->modules as $module) {
05312             if (!$this->trusted && !$module->safe) continue;
05313             foreach ($module->info as $name => $v) {
05314                 if (isset($elements[$name])) continue;
05315                 $elements[$name] = $this->getElement($name);
05316             }
05317         }
05318 
05319         // remove dud elements, this happens when an element that
05320         // appeared to be safe actually wasn't
05321         foreach ($elements as $n => $v) {
05322             if ($v === false) unset($elements[$n]);
05323         }
05324 
05325         return $elements;
05326 
05327     }
05328 
05339     public function getElement($name, $trusted = null) {
05340 
05341         if (!isset($this->elementLookup[$name])) {
05342             return false;
05343         }
05344 
05345         // setup global state variables
05346         $def = false;
05347         if ($trusted === null) $trusted = $this->trusted;
05348 
05349         // iterate through each module that has registered itself to this
05350         // element
05351         foreach($this->elementLookup[$name] as $module_name) {
05352 
05353             $module = $this->modules[$module_name];
05354 
05355             // refuse to create/merge from a module that is deemed unsafe--
05356             // pretend the module doesn't exist--when trusted mode is not on.
05357             if (!$trusted && !$module->safe) {
05358                 continue;
05359             }
05360 
05361             // clone is used because, ideally speaking, the original
05362             // definition should not be modified. Usually, this will
05363             // make no difference, but for consistency's sake
05364             $new_def = clone $module->info[$name];
05365 
05366             if (!$def && $new_def->standalone) {
05367                 $def = $new_def;
05368             } elseif ($def) {
05369                 // This will occur even if $new_def is standalone. In practice,
05370                 // this will usually result in a full replacement.
05371                 $def->mergeIn($new_def);
05372             } else {
05373                 // :TODO:
05374                 // non-standalone definitions that don't have a standalone
05375                 // to merge into could be deferred to the end
05376                 // HOWEVER, it is perfectly valid for a non-standalone
05377                 // definition to lack a standalone definition, even
05378                 // after all processing: this allows us to safely
05379                 // specify extra attributes for elements that may not be
05380                 // enabled all in one place.  In particular, this might
05381                 // be the case for trusted elements.  WARNING: care must
05382                 // be taken that the /extra/ definitions are all safe.
05383                 continue;
05384             }
05385 
05386             // attribute value expansions
05387             $this->attrCollections->performInclusions($def->attr);
05388             $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes);
05389 
05390             // descendants_are_inline, for ChildDef_Chameleon
05391             if (is_string($def->content_model) &&
05392                 strpos($def->content_model, 'Inline') !== false) {
05393                 if ($name != 'del' && $name != 'ins') {
05394                     // this is for you, ins/del
05395                     $def->descendants_are_inline = true;
05396                 }
05397             }
05398 
05399             $this->contentSets->generateChildDef($def, $module);
05400         }
05401 
05402         // This can occur if there is a blank definition, but no base to
05403         // mix it in with
05404         if (!$def) return false;
05405 
05406         // add information on required attributes
05407         foreach ($def->attr as $attr_name => $attr_def) {
05408             if ($attr_def->required) {
05409                 $def->required_attr[] = $attr_name;
05410             }
05411         }
05412 
05413         return $def;
05414 
05415     }
05416 
05417 }
05418 
05419 
05420 
05421 
05422 
05429 class HTMLPurifier_IDAccumulator
05430 {
05431 
05436     public $ids = array();
05437 
05444     public static function build($config, $context) {
05445         $id_accumulator = new HTMLPurifier_IDAccumulator();
05446         $id_accumulator->load($config->get('Attr.IDBlacklist'));
05447         return $id_accumulator;
05448     }
05449 
05455     public function add($id) {
05456         if (isset($this->ids[$id])) return false;
05457         return $this->ids[$id] = true;
05458     }
05459 
05465     public function load($array_of_ids) {
05466         foreach ($array_of_ids as $id) {
05467             $this->ids[$id] = true;
05468         }
05469     }
05470 
05471 }
05472 
05473 
05474 
05475 
05476 
05490 abstract class HTMLPurifier_Injector
05491 {
05492 
05496     public $name;
05497 
05501     protected $htmlDefinition;
05502 
05507     protected $currentNesting;
05508 
05513     protected $inputTokens;
05514 
05520     protected $inputIndex;
05521 
05527     public $needed = array();
05528 
05532     protected $rewind = false;
05533 
05542     public function rewind($index) {
05543         $this->rewind = $index;
05544     }
05545 
05549     public function getRewind() {
05550         $r = $this->rewind;
05551         $this->rewind = false;
05552         return $r;
05553     }
05554 
05564     public function prepare($config, $context) {
05565         $this->htmlDefinition = $config->getHTMLDefinition();
05566         // Even though this might fail, some unit tests ignore this and
05567         // still test checkNeeded, so be careful. Maybe get rid of that
05568         // dependency.
05569         $result = $this->checkNeeded($config);
05570         if ($result !== false) return $result;
05571         $this->currentNesting =& $context->get('CurrentNesting');
05572         $this->inputTokens    =& $context->get('InputTokens');
05573         $this->inputIndex     =& $context->get('InputIndex');
05574         return false;
05575     }
05576 
05585     public function checkNeeded($config) {
05586         $def = $config->getHTMLDefinition();
05587         foreach ($this->needed as $element => $attributes) {
05588             if (is_int($element)) $element = $attributes;
05589             if (!isset($def->info[$element])) return $element;
05590             if (!is_array($attributes)) continue;
05591             foreach ($attributes as $name) {
05592                 if (!isset($def->info[$element]->attr[$name])) return "$element.$name";
05593             }
05594         }
05595         return false;
05596     }
05597 
05603     public function allowsElement($name) {
05604         if (!empty($this->currentNesting)) {
05605             $parent_token = array_pop($this->currentNesting);
05606             $this->currentNesting[] = $parent_token;
05607             $parent = $this->htmlDefinition->info[$parent_token->name];
05608         } else {
05609             $parent = $this->htmlDefinition->info_parent_def;
05610         }
05611         if (!isset($parent->child->elements[$name]) || isset($parent->excludes[$name])) {
05612             return false;
05613         }
05614         // check for exclusion
05615         for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) {
05616             $node = $this->currentNesting[$i];
05617             $def  = $this->htmlDefinition->info[$node->name];
05618             if (isset($def->excludes[$name])) return false;
05619         }
05620         return true;
05621     }
05622 
05631     protected function forward(&$i, &$current) {
05632         if ($i === null) $i = $this->inputIndex + 1;
05633         else $i++;
05634         if (!isset($this->inputTokens[$i])) return false;
05635         $current = $this->inputTokens[$i];
05636         return true;
05637     }
05638 
05644     protected function forwardUntilEndToken(&$i, &$current, &$nesting) {
05645         $result = $this->forward($i, $current);
05646         if (!$result) return false;
05647         if ($nesting === null) $nesting = 0;
05648         if     ($current instanceof HTMLPurifier_Token_Start) $nesting++;
05649         elseif ($current instanceof HTMLPurifier_Token_End) {
05650             if ($nesting <= 0) return false;
05651             $nesting--;
05652         }
05653         return true;
05654     }
05655 
05664     protected function backward(&$i, &$current) {
05665         if ($i === null) $i = $this->inputIndex - 1;
05666         else $i--;
05667         if ($i < 0) return false;
05668         $current = $this->inputTokens[$i];
05669         return true;
05670     }
05671 
05681     protected function current(&$i, &$current) {
05682         if ($i === null) $i = $this->inputIndex;
05683         $current = $this->inputTokens[$i];
05684     }
05685 
05689     public function handleText(&$token) {}
05690 
05694     public function handleElement(&$token) {}
05695 
05699     public function handleEnd(&$token) {
05700         $this->notifyEnd($token);
05701     }
05702 
05708     public function notifyEnd($token) {}
05709 
05710 
05711 }
05712 
05713 
05714 
05715 
05716 
05721 class HTMLPurifier_Language
05722 {
05723 
05727     public $code = 'en';
05728 
05732     public $fallback = false;
05733 
05737     public $messages = array();
05738 
05742     public $errorNames = array();
05743 
05749     public $error = false;
05750 
05755     public $_loaded = false;
05756 
05760     protected $config, $context;
05761 
05762     public function __construct($config, $context) {
05763         $this->config  = $config;
05764         $this->context = $context;
05765     }
05766 
05771     public function load() {
05772         if ($this->_loaded) return;
05773         $factory = HTMLPurifier_LanguageFactory::instance();
05774         $factory->loadLanguage($this->code);
05775         foreach ($factory->keys as $key) {
05776             $this->$key = $factory->cache[$this->code][$key];
05777         }
05778         $this->_loaded = true;
05779     }
05780 
05786     public function getMessage($key) {
05787         if (!$this->_loaded) $this->load();
05788         if (!isset($this->messages[$key])) return "[$key]";
05789         return $this->messages[$key];
05790     }
05791 
05798     public function getErrorName($int) {
05799         if (!$this->_loaded) $this->load();
05800         if (!isset($this->errorNames[$int])) return "[Error: $int]";
05801         return $this->errorNames[$int];
05802     }
05803 
05807     public function listify($array) {
05808         $sep      = $this->getMessage('Item separator');
05809         $sep_last = $this->getMessage('Item separator last');
05810         $ret = '';
05811         for ($i = 0, $c = count($array); $i < $c; $i++) {
05812             if ($i == 0) {
05813             } elseif ($i + 1 < $c) {
05814                 $ret .= $sep;
05815             } else {
05816                 $ret .= $sep_last;
05817             }
05818             $ret .= $array[$i];
05819         }
05820         return $ret;
05821     }
05822 
05831     public function formatMessage($key, $args = array()) {
05832         if (!$this->_loaded) $this->load();
05833         if (!isset($this->messages[$key])) return "[$key]";
05834         $raw = $this->messages[$key];
05835         $subst = array();
05836         $generator = false;
05837         foreach ($args as $i => $value) {
05838             if (is_object($value)) {
05839                 if ($value instanceof HTMLPurifier_Token) {
05840                     // factor this out some time
05841                     if (!$generator) $generator = $this->context->get('Generator');
05842                     if (isset($value->name)) $subst['$'.$i.'.Name'] = $value->name;
05843                     if (isset($value->data)) $subst['$'.$i.'.Data'] = $value->data;
05844                     $subst['$'.$i.'.Compact'] =
05845                     $subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value);
05846                     // a more complex algorithm for compact representation
05847                     // could be introduced for all types of tokens. This
05848                     // may need to be factored out into a dedicated class
05849                     if (!empty($value->attr)) {
05850                         $stripped_token = clone $value;
05851                         $stripped_token->attr = array();
05852                         $subst['$'.$i.'.Compact'] = $generator->generateFromToken($stripped_token);
05853                     }
05854                     $subst['$'.$i.'.Line'] = $value->line ? $value->line : 'unknown';
05855                 }
05856                 continue;
05857             } elseif (is_array($value)) {
05858                 $keys = array_keys($value);
05859                 if (array_keys($keys) === $keys) {
05860                     // list
05861                     $subst['$'.$i] = $this->listify($value);
05862                 } else {
05863                     // associative array
05864                     // no $i implementation yet, sorry
05865                     $subst['$'.$i.'.Keys'] = $this->listify($keys);
05866                     $subst['$'.$i.'.Values'] = $this->listify(array_values($value));
05867                 }
05868                 continue;
05869             }
05870             $subst['$' . $i] = $value;
05871         }
05872         return strtr($raw, $subst);
05873     }
05874 
05875 }
05876 
05877 
05878 
05879 
05880 
05888 class HTMLPurifier_LanguageFactory
05889 {
05890 
05896     public $cache;
05897 
05903     public $keys = array('fallback', 'messages', 'errorNames');
05904 
05909     protected $validator;
05910 
05916     protected $dir;
05917 
05922     protected $mergeable_keys_map = array('messages' => true, 'errorNames' => true);
05923 
05928     protected $mergeable_keys_list = array();
05929 
05935     public static function instance($prototype = null) {
05936         static $instance = null;
05937         if ($prototype !== null) {
05938             $instance = $prototype;
05939         } elseif ($instance === null || $prototype == true) {
05940             $instance = new HTMLPurifier_LanguageFactory();
05941             $instance->setup();
05942         }
05943         return $instance;
05944     }
05945 
05950     public function setup() {
05951         $this->validator = new HTMLPurifier_AttrDef_Lang();
05952         $this->dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier';
05953     }
05954 
05961     public function create($config, $context, $code = false) {
05962 
05963         // validate language code
05964         if ($code === false) {
05965             $code = $this->validator->validate(
05966               $config->get('Core.Language'), $config, $context
05967             );
05968         } else {
05969             $code = $this->validator->validate($code, $config, $context);
05970         }
05971         if ($code === false) $code = 'en'; // malformed code becomes English
05972 
05973         $pcode = str_replace('-', '_', $code); // make valid PHP classname
05974         static $depth = 0; // recursion protection
05975 
05976         if ($code == 'en') {
05977             $lang = new HTMLPurifier_Language($config, $context);
05978         } else {
05979             $class = 'HTMLPurifier_Language_' . $pcode;
05980             $file  = $this->dir . '/Language/classes/' . $code . '.php';
05981             if (file_exists($file) || class_exists($class, false)) {
05982                 $lang = new $class($config, $context);
05983             } else {
05984                 // Go fallback
05985                 $raw_fallback = $this->getFallbackFor($code);
05986                 $fallback = $raw_fallback ? $raw_fallback : 'en';
05987                 $depth++;
05988                 $lang = $this->create($config, $context, $fallback);
05989                 if (!$raw_fallback) {
05990                     $lang->error = true;
05991                 }
05992                 $depth--;
05993             }
05994         }
05995 
05996         $lang->code = $code;
05997 
05998         return $lang;
05999 
06000     }
06001 
06007     public function getFallbackFor($code) {
06008         $this->loadLanguage($code);
06009         return $this->cache[$code]['fallback'];
06010     }
06011 
06016     public function loadLanguage($code) {
06017         static $languages_seen = array(); // recursion guard
06018 
06019         // abort if we've already loaded it
06020         if (isset($this->cache[$code])) return;
06021 
06022         // generate filename
06023         $filename = $this->dir . '/Language/messages/' . $code . '.php';
06024 
06025         // default fallback : may be overwritten by the ensuing include
06026         $fallback = ($code != 'en') ? 'en' : false;
06027 
06028         // load primary localisation
06029         if (!file_exists($filename)) {
06030             // skip the include: will rely solely on fallback
06031             $filename = $this->dir . '/Language/messages/en.php';
06032             $cache = array();
06033         } else {
06034             include $filename;
06035             $cache = compact($this->keys);
06036         }
06037 
06038         // load fallback localisation
06039         if (!empty($fallback)) {
06040 
06041             // infinite recursion guard
06042             if (isset($languages_seen[$code])) {
06043                 trigger_error('Circular fallback reference in language ' .
06044                     $code, E_USER_ERROR);
06045                 $fallback = 'en';
06046             }
06047             $language_seen[$code] = true;
06048 
06049             // load the fallback recursively
06050             $this->loadLanguage($fallback);
06051             $fallback_cache = $this->cache[$fallback];
06052 
06053             // merge fallback with current language
06054             foreach ( $this->keys as $key ) {
06055                 if (isset($cache[$key]) && isset($fallback_cache[$key])) {
06056                     if (isset($this->mergeable_keys_map[$key])) {
06057                         $cache[$key] = $cache[$key] + $fallback_cache[$key];
06058                     } elseif (isset($this->mergeable_keys_list[$key])) {
06059                         $cache[$key] = array_merge( $fallback_cache[$key], $cache[$key] );
06060                     }
06061                 } else {
06062                     $cache[$key] = $fallback_cache[$key];
06063                 }
06064             }
06065 
06066         }
06067 
06068         // save to cache for later retrieval
06069         $this->cache[$code] = $cache;
06070 
06071         return;
06072     }
06073 
06074 }
06075 
06076 
06077 
06078 
06079 
06084 class HTMLPurifier_Length
06085 {
06086 
06090     protected $n;
06091 
06095     protected $unit;
06096 
06100     protected $isValid;
06101 
06105     protected static $allowedUnits = array(
06106         'em' => true, 'ex' => true, 'px' => true, 'in' => true,
06107         'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true
06108     );
06109 
06114     public function __construct($n = '0', $u = false) {
06115         $this->n = (string) $n;
06116         $this->unit = $u !== false ? (string) $u : false;
06117     }
06118 
06123     static public function make($s) {
06124         if ($s instanceof HTMLPurifier_Length) return $s;
06125         $n_length = strspn($s, '1234567890.+-');
06126         $n = substr($s, 0, $n_length);
06127         $unit = substr($s, $n_length);
06128         if ($unit === '') $unit = false;
06129         return new HTMLPurifier_Length($n, $unit);
06130     }
06131 
06135     protected function validate() {
06136         // Special case:
06137         if ($this->n === '+0' || $this->n === '-0') $this->n = '0';
06138         if ($this->n === '0' && $this->unit === false) return true;
06139         if (!ctype_lower($this->unit)) $this->unit = strtolower($this->unit);
06140         if (!isset(HTMLPurifier_Length::$allowedUnits[$this->unit])) return false;
06141         // Hack:
06142         $def = new HTMLPurifier_AttrDef_CSS_Number();
06143         $result = $def->validate($this->n, false, false);
06144         if ($result === false) return false;
06145         $this->n = $result;
06146         return true;
06147     }
06148 
06152     public function toString() {
06153         if (!$this->isValid()) return false;
06154         return $this->n . $this->unit;
06155     }
06156 
06160     public function getN() {return $this->n;}
06161 
06165     public function getUnit() {return $this->unit;}
06166 
06170     public function isValid() {
06171         if ($this->isValid === null) $this->isValid = $this->validate();
06172         return $this->isValid;
06173     }
06174 
06180     public function compareTo($l) {
06181         if ($l === false) return false;
06182         if ($l->unit !== $this->unit) {
06183             $converter = new HTMLPurifier_UnitConverter();
06184             $l = $converter->convert($l, $this->unit);
06185             if ($l === false) return false;
06186         }
06187         return $this->n - $l->n;
06188     }
06189 
06190 }
06191 
06192 
06193 
06194 
06195 
06235 class HTMLPurifier_Lexer
06236 {
06237 
06242     public $tracksLineNumbers = false;
06243 
06244     // -- STATIC ----------------------------------------------------------
06245 
06261     public static function create($config) {
06262 
06263         if (!($config instanceof HTMLPurifier_Config)) {
06264             $lexer = $config;
06265             trigger_error("Passing a prototype to
06266               HTMLPurifier_Lexer::create() is deprecated, please instead
06267               use %Core.LexerImpl", E_USER_WARNING);
06268         } else {
06269             $lexer = $config->get('Core.LexerImpl');
06270         }
06271 
06272         $needs_tracking =
06273             $config->get('Core.MaintainLineNumbers') ||
06274             $config->get('Core.CollectErrors');
06275 
06276         $inst = null;
06277         if (is_object($lexer)) {
06278             $inst = $lexer;
06279         } else {
06280 
06281             if (is_null($lexer)) { do {
06282                 // auto-detection algorithm
06283 
06284                 if ($needs_tracking) {
06285                     $lexer = 'DirectLex';
06286                     break;
06287                 }
06288 
06289                 if (
06290                     class_exists('DOMDocument') &&
06291                     method_exists('DOMDocument', 'loadHTML') &&
06292                     !extension_loaded('domxml')
06293                 ) {
06294                     // check for DOM support, because while it's part of the
06295                     // core, it can be disabled compile time. Also, the PECL
06296                     // domxml extension overrides the default DOM, and is evil
06297                     // and nasty and we shan't bother to support it
06298                     $lexer = 'DOMLex';
06299                 } else {
06300                     $lexer = 'DirectLex';
06301                 }
06302 
06303             } while(0); } // do..while so we can break
06304 
06305             // instantiate recognized string names
06306             switch ($lexer) {
06307                 case 'DOMLex':
06308                     $inst = new HTMLPurifier_Lexer_DOMLex();
06309                     break;
06310                 case 'DirectLex':
06311                     $inst = new HTMLPurifier_Lexer_DirectLex();
06312                     break;
06313                 case 'PH5P':
06314                     $inst = new HTMLPurifier_Lexer_PH5P();
06315                     break;
06316                 default:
06317                     throw new HTMLPurifier_Exception("Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer));
06318             }
06319         }
06320 
06321         if (!$inst) throw new HTMLPurifier_Exception('No lexer was instantiated');
06322 
06323         // once PHP DOM implements native line numbers, or we
06324         // hack out something using XSLT, remove this stipulation
06325         if ($needs_tracking && !$inst->tracksLineNumbers) {
06326             throw new HTMLPurifier_Exception('Cannot use lexer that does not support line numbers with Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)');
06327         }
06328 
06329         return $inst;
06330 
06331     }
06332 
06333     // -- CONVENIENCE MEMBERS ---------------------------------------------
06334 
06335     public function __construct() {
06336         $this->_entity_parser = new HTMLPurifier_EntityParser();
06337     }
06338 
06342     protected $_special_entity2str =
06343             array(
06344                     '&quot;' => '"',
06345                     '&amp;'  => '&',
06346                     '&lt;'   => '<',
06347                     '&gt;'   => '>',
06348                     '&#39;'  => "'",
06349                     '&#039;' => "'",
06350                     '&#x27;' => "'"
06351             );
06352 
06367     public function parseData($string) {
06368 
06369         // following functions require at least one character
06370         if ($string === '') return '';
06371 
06372         // subtracts amps that cannot possibly be escaped
06373         $num_amp = substr_count($string, '&') - substr_count($string, '& ') -
06374             ($string[strlen($string)-1] === '&' ? 1 : 0);
06375 
06376         if (!$num_amp) return $string; // abort if no entities
06377         $num_esc_amp = substr_count($string, '&amp;');
06378         $string = strtr($string, $this->_special_entity2str);
06379 
06380         // code duplication for sake of optimization, see above
06381         $num_amp_2 = substr_count($string, '&') - substr_count($string, '& ') -
06382             ($string[strlen($string)-1] === '&' ? 1 : 0);
06383 
06384         if ($num_amp_2 <= $num_esc_amp) return $string;
06385 
06386         // hmm... now we have some uncommon entities. Use the callback.
06387         $string = $this->_entity_parser->substituteSpecialEntities($string);
06388         return $string;
06389     }
06390 
06397     public function tokenizeHTML($string, $config, $context) {
06398         trigger_error('Call to abstract class', E_USER_ERROR);
06399     }
06400 
06407     protected static function escapeCDATA($string) {
06408         return preg_replace_callback(
06409             '/<!\[CDATA\[(.+?)\]\]>/s',
06410             array('HTMLPurifier_Lexer', 'CDATACallback'),
06411             $string
06412         );
06413     }
06414 
06418     protected static function escapeCommentedCDATA($string) {
06419         return preg_replace_callback(
06420             '#<!--//--><!\[CDATA\[//><!--(.+?)//--><!\]\]>#s',
06421             array('HTMLPurifier_Lexer', 'CDATACallback'),
06422             $string
06423         );
06424     }
06425 
06429     protected static function removeIEConditional($string) {
06430         return preg_replace(
06431             '#<!--\[if [^>]+\]>.*?<!\[endif\]-->#si', // probably should generalize for all strings
06432             '',
06433             $string
06434         );
06435     }
06436 
06446     protected static function CDATACallback($matches) {
06447         // not exactly sure why the character set is needed, but whatever
06448         return htmlspecialchars($matches[1], ENT_COMPAT, 'UTF-8');
06449     }
06450 
06456     public function normalize($html, $config, $context) {
06457 
06458         // normalize newlines to \n
06459         if ($config->get('Core.NormalizeNewlines')) {
06460             $html = str_replace("\r\n", "\n", $html);
06461             $html = str_replace("\r", "\n", $html);
06462         }
06463 
06464         if ($config->get('HTML.Trusted')) {
06465             // escape convoluted CDATA
06466             $html = $this->escapeCommentedCDATA($html);
06467         }
06468 
06469         // escape CDATA
06470         $html = $this->escapeCDATA($html);
06471 
06472         $html = $this->removeIEConditional($html);
06473 
06474         // extract body from document if applicable
06475         if ($config->get('Core.ConvertDocumentToFragment')) {
06476             $e = false;
06477             if ($config->get('Core.CollectErrors')) {
06478                 $e =& $context->get('ErrorCollector');
06479             }
06480             $new_html = $this->extractBody($html);
06481             if ($e && $new_html != $html) {
06482                 $e->send(E_WARNING, 'Lexer: Extracted body');
06483             }
06484             $html = $new_html;
06485         }
06486 
06487         // expand entities that aren't the big five
06488         $html = $this->_entity_parser->substituteNonSpecialEntities($html);
06489 
06490         // clean into wellformed UTF-8 string for an SGML context: this has
06491         // to be done after entity expansion because the entities sometimes
06492         // represent non-SGML characters (horror, horror!)
06493         $html = HTMLPurifier_Encoder::cleanUTF8($html);
06494 
06495         // if processing instructions are to removed, remove them now
06496         if ($config->get('Core.RemoveProcessingInstructions')) {
06497             $html = preg_replace('#<\?.+?\?>#s', '', $html);
06498         }
06499 
06500         return $html;
06501     }
06502 
06507     public function extractBody($html) {
06508         $matches = array();
06509         $result = preg_match('!<body[^>]*>(.*)</body>!is', $html, $matches);
06510         if ($result) {
06511             return $matches[1];
06512         } else {
06513             return $html;
06514         }
06515     }
06516 
06517 }
06518 
06519 
06520 
06521 
06522 
06531 class HTMLPurifier_PercentEncoder
06532 {
06533 
06537     protected $preserve = array();
06538 
06542     public function __construct($preserve = false) {
06543         // unreserved letters, ought to const-ify
06544         for ($i = 48; $i <= 57;  $i++) $this->preserve[$i] = true; // digits
06545         for ($i = 65; $i <= 90;  $i++) $this->preserve[$i] = true; // upper-case
06546         for ($i = 97; $i <= 122; $i++) $this->preserve[$i] = true; // lower-case
06547         $this->preserve[45] = true; // Dash         -
06548         $this->preserve[46] = true; // Period       .
06549         $this->preserve[95] = true; // Underscore   _
06550         $this->preserve[126]= true; // Tilde        ~
06551 
06552         // extra letters not to escape
06553         if ($preserve !== false) {
06554             for ($i = 0, $c = strlen($preserve); $i < $c; $i++) {
06555                 $this->preserve[ord($preserve[$i])] = true;
06556             }
06557         }
06558     }
06559 
06570     public function encode($string) {
06571         $ret = '';
06572         for ($i = 0, $c = strlen($string); $i < $c; $i++) {
06573             if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])]) ) {
06574                 $ret .= '%' . sprintf('%02X', $int);
06575             } else {
06576                 $ret .= $string[$i];
06577             }
06578         }
06579         return $ret;
06580     }
06581 
06589     public function normalize($string) {
06590         if ($string == '') return '';
06591         $parts = explode('%', $string);
06592         $ret = array_shift($parts);
06593         foreach ($parts as $part) {
06594             $length = strlen($part);
06595             if ($length < 2) {
06596                 $ret .= '%25' . $part;
06597                 continue;
06598             }
06599             $encoding = substr($part, 0, 2);
06600             $text     = substr($part, 2);
06601             if (!ctype_xdigit($encoding)) {
06602                 $ret .= '%25' . $part;
06603                 continue;
06604             }
06605             $int = hexdec($encoding);
06606             if (isset($this->preserve[$int])) {
06607                 $ret .= chr($int) . $text;
06608                 continue;
06609             }
06610             $encoding = strtoupper($encoding);
06611             $ret .= '%' . $encoding . $text;
06612         }
06613         return $ret;
06614     }
06615 
06616 }
06617 
06618 
06619 
06620 
06621 
06625 class HTMLPurifier_PropertyList
06626 {
06630     protected $data = array();
06631 
06635     protected $parent;
06636 
06637     protected $cache;
06638 
06639     public function __construct($parent = null) {
06640         $this->parent = $parent;
06641     }
06642 
06646     public function get($name) {
06647         if ($this->has($name)) return $this->data[$name];
06648         // possible performance bottleneck, convert to iterative if necessary
06649         if ($this->parent) return $this->parent->get($name);
06650         throw new HTMLPurifier_Exception("Key '$name' not found");
06651     }
06652 
06656     public function set($name, $value) {
06657         $this->data[$name] = $value;
06658     }
06659 
06663     public function has($name) {
06664         return array_key_exists($name, $this->data);
06665     }
06666 
06671     public function reset($name = null) {
06672         if ($name == null) $this->data = array();
06673         else unset($this->data[$name]);
06674     }
06675 
06681     public function squash($force = false) {
06682         if ($this->cache !== null && !$force) return $this->cache;
06683         if ($this->parent) {
06684             return $this->cache = array_merge($this->parent->squash($force), $this->data);
06685         } else {
06686             return $this->cache = $this->data;
06687         }
06688     }
06689 
06693     public function getParent() {
06694         return $this->parent;
06695     }
06696 
06700     public function setParent($plist) {
06701         $this->parent = $plist;
06702     }
06703 }
06704 
06705 
06706 
06707 
06708 
06712 class HTMLPurifier_PropertyListIterator extends FilterIterator
06713 {
06714 
06715     protected $l;
06716     protected $filter;
06717 
06722     public function __construct(Iterator $iterator, $filter = null) {
06723         parent::__construct($iterator);
06724         $this->l = strlen($filter);
06725         $this->filter = $filter;
06726     }
06727 
06728     public function accept() {
06729         $key = $this->getInnerIterator()->key();
06730         if( strncmp($key, $this->filter, $this->l) !== 0 ) {
06731             return false;
06732         }
06733         return true;
06734     }
06735 
06736 }
06737 
06738 
06739 
06740 
06741 
06751 abstract class HTMLPurifier_Strategy
06752 {
06753 
06761     abstract public function execute($tokens, $config, $context);
06762 
06763 }
06764 
06765 
06766 
06767 
06768 
06777 class HTMLPurifier_StringHash extends ArrayObject
06778 {
06779     protected $accessed = array();
06780 
06784     public function offsetGet($index) {
06785         $this->accessed[$index] = true;
06786         return parent::offsetGet($index);
06787     }
06788 
06793     public function getAccessed() {
06794         return $this->accessed;
06795     }
06796 
06800     public function resetAccessed() {
06801         $this->accessed = array();
06802     }
06803 }
06804 
06805 
06806 
06807 
06808 
06834 class HTMLPurifier_StringHashParser
06835 {
06836 
06837     public $default = 'ID';
06838 
06842     public function parseFile($file) {
06843         if (!file_exists($file)) return false;
06844         $fh = fopen($file, 'r');
06845         if (!$fh) return false;
06846         $ret = $this->parseHandle($fh);
06847         fclose($fh);
06848         return $ret;
06849     }
06850 
06854     public function parseMultiFile($file) {
06855         if (!file_exists($file)) return false;
06856         $ret = array();
06857         $fh = fopen($file, 'r');
06858         if (!$fh) return false;
06859         while (!feof($fh)) {
06860             $ret[] = $this->parseHandle($fh);
06861         }
06862         fclose($fh);
06863         return $ret;
06864     }
06865 
06874     protected function parseHandle($fh) {
06875         $state   = false;
06876         $single  = false;
06877         $ret     = array();
06878         do {
06879             $line = fgets($fh);
06880             if ($line === false) break;
06881             $line = rtrim($line, "\n\r");
06882             if (!$state && $line === '') continue;
06883             if ($line === '----') break;
06884             if (strncmp('--#', $line, 3) === 0) {
06885                 // Comment
06886                 continue;
06887             } elseif (strncmp('--', $line, 2) === 0) {
06888                 // Multiline declaration
06889                 $state = trim($line, '- ');
06890                 if (!isset($ret[$state])) $ret[$state] = '';
06891                 continue;
06892             } elseif (!$state) {
06893                 $single = true;
06894                 if (strpos($line, ':') !== false) {
06895                     // Single-line declaration
06896                     list($state, $line) = explode(':', $line, 2);
06897                     $line = trim($line);
06898                 } else {
06899                     // Use default declaration
06900                     $state  = $this->default;
06901                 }
06902             }
06903             if ($single) {
06904                 $ret[$state] = $line;
06905                 $single = false;
06906                 $state  = false;
06907             } else {
06908                 $ret[$state] .= "$line\n";
06909             }
06910         } while (!feof($fh));
06911         return $ret;
06912     }
06913 
06914 }
06915 
06916 
06917 
06918 
06919 
06923 abstract class HTMLPurifier_TagTransform
06924 {
06925 
06929     public $transform_to;
06930 
06937     abstract public function transform($tag, $config, $context);
06938 
06946     protected function prependCSS(&$attr, $css) {
06947         $attr['style'] = isset($attr['style']) ? $attr['style'] : '';
06948         $attr['style'] = $css . $attr['style'];
06949     }
06950 
06951 }
06952 
06953 
06954 
06955 
06956 
06960 class HTMLPurifier_Token {
06961     public $line; 
06962     public $col;  
06969     public $armor = array();
06970 
06974     public $skip;
06975     public $rewind;
06976     public $carryover;
06977 
06978     public function __get($n) {
06979       if ($n === 'type') {
06980         trigger_error('Deprecated type property called; use instanceof', E_USER_NOTICE);
06981         switch (get_class($this)) {
06982           case 'HTMLPurifier_Token_Start':      return 'start';
06983           case 'HTMLPurifier_Token_Empty':      return 'empty';
06984           case 'HTMLPurifier_Token_End':        return 'end';
06985           case 'HTMLPurifier_Token_Text':       return 'text';
06986           case 'HTMLPurifier_Token_Comment':    return 'comment';
06987           default: return null;
06988         }
06989       }
06990     }
06991 
06995     public function position($l = null, $c = null) {
06996         $this->line = $l;
06997         $this->col  = $c;
06998     }
06999 
07003     public function rawPosition($l, $c) {
07004         if ($c === -1) $l++;
07005         $this->line = $l;
07006         $this->col  = $c;
07007     }
07008 
07009 }
07010 
07011 
07012 
07013 
07014 
07026 class HTMLPurifier_TokenFactory
07027 {
07028 
07033     // p stands for prototype
07034     private $p_start, $p_end, $p_empty, $p_text, $p_comment;
07035 
07039     public function __construct() {
07040         $this->p_start  = new HTMLPurifier_Token_Start('', array());
07041         $this->p_end    = new HTMLPurifier_Token_End('');
07042         $this->p_empty  = new HTMLPurifier_Token_Empty('', array());
07043         $this->p_text   = new HTMLPurifier_Token_Text('');
07044         $this->p_comment= new HTMLPurifier_Token_Comment('');
07045     }
07046 
07053     public function createStart($name, $attr = array()) {
07054         $p = clone $this->p_start;
07055         $p->__construct($name, $attr);
07056         return $p;
07057     }
07058 
07064     public function createEnd($name) {
07065         $p = clone $this->p_end;
07066         $p->__construct($name);
07067         return $p;
07068     }
07069 
07076     public function createEmpty($name, $attr = array()) {
07077         $p = clone $this->p_empty;
07078         $p->__construct($name, $attr);
07079         return $p;
07080     }
07081 
07087     public function createText($data) {
07088         $p = clone $this->p_text;
07089         $p->__construct($data);
07090         return $p;
07091     }
07092 
07098     public function createComment($data) {
07099         $p = clone $this->p_comment;
07100         $p->__construct($data);
07101         return $p;
07102     }
07103 
07104 }
07105 
07106 
07107 
07108 
07109 
07118 class HTMLPurifier_URI
07119 {
07120 
07121     public $scheme, $userinfo, $host, $port, $path, $query, $fragment;
07122 
07126     public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment) {
07127         $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
07128         $this->userinfo = $userinfo;
07129         $this->host = $host;
07130         $this->port = is_null($port) ? $port : (int) $port;
07131         $this->path = $path;
07132         $this->query = $query;
07133         $this->fragment = $fragment;
07134     }
07135 
07142     public function getSchemeObj($config, $context) {
07143         $registry = HTMLPurifier_URISchemeRegistry::instance();
07144         if ($this->scheme !== null) {
07145             $scheme_obj = $registry->getScheme($this->scheme, $config, $context);
07146             if (!$scheme_obj) return false; // invalid scheme, clean it out
07147         } else {
07148             // no scheme: retrieve the default one
07149             $def = $config->getDefinition('URI');
07150             $scheme_obj = $def->getDefaultScheme($config, $context);
07151             if (!$scheme_obj) {
07152                 // something funky happened to the default scheme object
07153                 trigger_error(
07154                     'Default scheme object "' . $def->defaultScheme . '" was not readable',
07155                     E_USER_WARNING
07156                 );
07157                 return false;
07158             }
07159         }
07160         return $scheme_obj;
07161     }
07162 
07170     public function validate($config, $context) {
07171 
07172         // ABNF definitions from RFC 3986
07173         $chars_sub_delims = '!$&\'()*+,;=';
07174         $chars_gen_delims = ':/?#[]@';
07175         $chars_pchar = $chars_sub_delims . ':@';
07176 
07177         // validate host
07178         if (!is_null($this->host)) {
07179             $host_def = new HTMLPurifier_AttrDef_URI_Host();
07180             $this->host = $host_def->validate($this->host, $config, $context);
07181             if ($this->host === false) $this->host = null;
07182         }
07183 
07184         // validate scheme
07185         // NOTE: It's not appropriate to check whether or not this
07186         // scheme is in our registry, since a URIFilter may convert a
07187         // URI that we don't allow into one we do.  So instead, we just
07188         // check if the scheme can be dropped because there is no host
07189         // and it is our default scheme.
07190         if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') {
07191             // support for relative paths is pretty abysmal when the
07192             // scheme is present, so axe it when possible
07193             $def = $config->getDefinition('URI');
07194             if ($def->defaultScheme === $this->scheme) {
07195                 $this->scheme = null;
07196             }
07197         }
07198 
07199         // validate username
07200         if (!is_null($this->userinfo)) {
07201             $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
07202             $this->userinfo = $encoder->encode($this->userinfo);
07203         }
07204 
07205         // validate port
07206         if (!is_null($this->port)) {
07207             if ($this->port < 1 || $this->port > 65535) $this->port = null;
07208         }
07209 
07210         // validate path
07211         $path_parts = array();
07212         $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
07213         if (!is_null($this->host)) { // this catches $this->host === ''
07214             // path-abempty (hier and relative)
07215             // http://www.example.com/my/path
07216             // //www.example.com/my/path (looks odd, but works, and
07217             //                            recognized by most browsers)
07218             // (this set is valid or invalid on a scheme by scheme
07219             // basis, so we'll deal with it later)
07220             // file:///my/path
07221             // ///my/path
07222             $this->path = $segments_encoder->encode($this->path);
07223         } elseif ($this->path !== '') {
07224             if ($this->path[0] === '/') {
07225                 // path-absolute (hier and relative)
07226                 // http:/my/path
07227                 // /my/path
07228                 if (strlen($this->path) >= 2 && $this->path[1] === '/') {
07229                     // This could happen if both the host gets stripped
07230                     // out
07231                     // http://my/path
07232                     // //my/path
07233                     $this->path = '';
07234                 } else {
07235                     $this->path = $segments_encoder->encode($this->path);
07236                 }
07237             } elseif (!is_null($this->scheme)) {
07238                 // path-rootless (hier)
07239                 // http:my/path
07240                 // Short circuit evaluation means we don't need to check nz
07241                 $this->path = $segments_encoder->encode($this->path);
07242             } else {
07243                 // path-noscheme (relative)
07244                 // my/path
07245                 // (once again, not checking nz)
07246                 $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
07247                 $c = strpos($this->path, '/');
07248                 if ($c !== false) {
07249                     $this->path =
07250                         $segment_nc_encoder->encode(substr($this->path, 0, $c)) .
07251                         $segments_encoder->encode(substr($this->path, $c));
07252                 } else {
07253                     $this->path = $segment_nc_encoder->encode($this->path);
07254                 }
07255             }
07256         } else {
07257             // path-empty (hier and relative)
07258             $this->path = ''; // just to be safe
07259         }
07260 
07261         // qf = query and fragment
07262         $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');
07263 
07264         if (!is_null($this->query)) {
07265             $this->query = $qf_encoder->encode($this->query);
07266         }
07267 
07268         if (!is_null($this->fragment)) {
07269             $this->fragment = $qf_encoder->encode($this->fragment);
07270         }
07271 
07272         return true;
07273 
07274     }
07275 
07280     public function toString() {
07281         // reconstruct authority
07282         $authority = null;
07283         // there is a rendering difference between a null authority
07284         // (http:foo-bar) and an empty string authority
07285         // (http:///foo-bar).
07286         if (!is_null($this->host)) {
07287             $authority = '';
07288             if(!is_null($this->userinfo)) $authority .= $this->userinfo . '@';
07289             $authority .= $this->host;
07290             if(!is_null($this->port))     $authority .= ':' . $this->port;
07291         }
07292 
07293         // Reconstruct the result
07294         // One might wonder about parsing quirks from browsers after
07295         // this reconstruction.  Unfortunately, parsing behavior depends
07296         // on what *scheme* was employed (file:///foo is handled *very*
07297         // differently than http:///foo), so unfortunately we have to
07298         // defer to the schemes to do the right thing.
07299         $result = '';
07300         if (!is_null($this->scheme))    $result .= $this->scheme . ':';
07301         if (!is_null($authority))       $result .=  '//' . $authority;
07302         $result .= $this->path;
07303         if (!is_null($this->query))     $result .= '?' . $this->query;
07304         if (!is_null($this->fragment))  $result .= '#' . $this->fragment;
07305 
07306         return $result;
07307     }
07308 
07318     public function isLocal($config, $context) {
07319         if ($this->host === null) return true;
07320         $uri_def = $config->getDefinition('URI');
07321         if ($uri_def->host === $this->host) return true;
07322         return false;
07323     }
07324 
07332     public function isBenign($config, $context) {
07333         if (!$this->isLocal($config, $context)) return false;
07334 
07335         $scheme_obj = $this->getSchemeObj($config, $context);
07336         if (!$scheme_obj) return false; // conservative approach
07337 
07338         $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context);
07339         if ($current_scheme_obj->secure) {
07340             if (!$scheme_obj->secure) {
07341                 return false;
07342             }
07343         }
07344         return true;
07345     }
07346 
07347 }
07348 
07349 
07350 
07351 
07352 
07353 class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
07354 {
07355 
07356     public $type = 'URI';
07357     protected $filters = array();
07358     protected $postFilters = array();
07359     protected $registeredFilters = array();
07360 
07364     public $base;
07365 
07369     public $host;
07370 
07374     public $defaultScheme;
07375 
07376     public function __construct() {
07377         $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternal());
07378         $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources());
07379         $this->registerFilter(new HTMLPurifier_URIFilter_HostBlacklist());
07380         $this->registerFilter(new HTMLPurifier_URIFilter_SafeIframe());
07381         $this->registerFilter(new HTMLPurifier_URIFilter_MakeAbsolute());
07382         $this->registerFilter(new HTMLPurifier_URIFilter_Munge());
07383     }
07384 
07385     public function registerFilter($filter) {
07386         $this->registeredFilters[$filter->name] = $filter;
07387     }
07388 
07389     public function addFilter($filter, $config) {
07390         $r = $filter->prepare($config);
07391         if ($r === false) return; // null is ok, for backwards compat
07392         if ($filter->post) {
07393             $this->postFilters[$filter->name] = $filter;
07394         } else {
07395             $this->filters[$filter->name] = $filter;
07396         }
07397     }
07398 
07399     protected function doSetup($config) {
07400         $this->setupMemberVariables($config);
07401         $this->setupFilters($config);
07402     }
07403 
07404     protected function setupFilters($config) {
07405         foreach ($this->registeredFilters as $name => $filter) {
07406             if ($filter->always_load) {
07407                 $this->addFilter($filter, $config);
07408             } else {
07409                 $conf = $config->get('URI.' . $name);
07410                 if ($conf !== false && $conf !== null) {
07411                     $this->addFilter($filter, $config);
07412                 }
07413             }
07414         }
07415         unset($this->registeredFilters);
07416     }
07417 
07418     protected function setupMemberVariables($config) {
07419         $this->host = $config->get('URI.Host');
07420         $base_uri = $config->get('URI.Base');
07421         if (!is_null($base_uri)) {
07422             $parser = new HTMLPurifier_URIParser();
07423             $this->base = $parser->parse($base_uri);
07424             $this->defaultScheme = $this->base->scheme;
07425             if (is_null($this->host)) $this->host = $this->base->host;
07426         }
07427         if (is_null($this->defaultScheme)) $this->defaultScheme = $config->get('URI.DefaultScheme');
07428     }
07429 
07430     public function getDefaultScheme($config, $context) {
07431         return HTMLPurifier_URISchemeRegistry::instance()->getScheme($this->defaultScheme, $config, $context);
07432     }
07433 
07434     public function filter(&$uri, $config, $context) {
07435         foreach ($this->filters as $name => $f) {
07436             $result = $f->filter($uri, $config, $context);
07437             if (!$result) return false;
07438         }
07439         return true;
07440     }
07441 
07442     public function postFilter(&$uri, $config, $context) {
07443         foreach ($this->postFilters as $name => $f) {
07444             $result = $f->filter($uri, $config, $context);
07445             if (!$result) return false;
07446         }
07447         return true;
07448     }
07449 
07450 }
07451 
07452 
07453 
07454 
07455 
07481 abstract class HTMLPurifier_URIFilter
07482 {
07483 
07487     public $name;
07488 
07492     public $post = false;
07493 
07499     public $always_load = false;
07500 
07505     public function prepare($config) {return true;}
07506 
07516     abstract public function filter(&$uri, $config, $context);
07517 
07518 }
07519 
07520 
07521 
07522 
07523 
07528 class HTMLPurifier_URIParser
07529 {
07530 
07534     protected $percentEncoder;
07535 
07536     public function __construct() {
07537         $this->percentEncoder = new HTMLPurifier_PercentEncoder();
07538     }
07539 
07546     public function parse($uri) {
07547 
07548         $uri = $this->percentEncoder->normalize($uri);
07549 
07550         // Regexp is as per Appendix B.
07551         // Note that ["<>] are an addition to the RFC's recommended
07552         // characters, because they represent external delimeters.
07553         $r_URI = '!'.
07554             '(([^:/?#"<>]+):)?'. // 2. Scheme
07555             '(//([^/?#"<>]*))?'. // 4. Authority
07556             '([^?#"<>]*)'.       // 5. Path
07557             '(\?([^#"<>]*))?'.   // 7. Query
07558             '(#([^"<>]*))?'.     // 8. Fragment
07559             '!';
07560 
07561         $matches = array();
07562         $result = preg_match($r_URI, $uri, $matches);
07563 
07564         if (!$result) return false; // *really* invalid URI
07565 
07566         // seperate out parts
07567         $scheme     = !empty($matches[1]) ? $matches[2] : null;
07568         $authority  = !empty($matches[3]) ? $matches[4] : null;
07569         $path       = $matches[5]; // always present, can be empty
07570         $query      = !empty($matches[6]) ? $matches[7] : null;
07571         $fragment   = !empty($matches[8]) ? $matches[9] : null;
07572 
07573         // further parse authority
07574         if ($authority !== null) {
07575             $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
07576             $matches = array();
07577             preg_match($r_authority, $authority, $matches);
07578             $userinfo   = !empty($matches[1]) ? $matches[2] : null;
07579             $host       = !empty($matches[3]) ? $matches[3] : '';
07580             $port       = !empty($matches[4]) ? (int) $matches[5] : null;
07581         } else {
07582             $port = $host = $userinfo = null;
07583         }
07584 
07585         return new HTMLPurifier_URI(
07586             $scheme, $userinfo, $host, $port, $path, $query, $fragment);
07587     }
07588 
07589 }
07590 
07591 
07592 
07593 
07594 
07598 abstract class HTMLPurifier_URIScheme
07599 {
07600 
07606     public $default_port = null;
07607 
07612     public $browsable = false;
07613 
07618     public $secure = false;
07619 
07624     public $hierarchical = false;
07625 
07631     public $may_omit_host = false;
07632 
07640     public abstract function doValidate(&$uri, $config, $context);
07641 
07650     public function validate(&$uri, $config, $context) {
07651         if ($this->default_port == $uri->port) $uri->port = null;
07652         // kludge: browsers do funny things when the scheme but not the
07653         // authority is set
07654         if (!$this->may_omit_host &&
07655             // if the scheme is present, a missing host is always in error
07656             (!is_null($uri->scheme) && ($uri->host === '' || is_null($uri->host))) ||
07657             // if the scheme is not present, a *blank* host is in error,
07658             // since this translates into '///path' which most browsers
07659             // interpret as being 'http://path'.
07660              (is_null($uri->scheme) && $uri->host === '')
07661         ) {
07662             do {
07663                 if (is_null($uri->scheme)) {
07664                     if (substr($uri->path, 0, 2) != '//') {
07665                         $uri->host = null;
07666                         break;
07667                     }
07668                     // URI is '////path', so we cannot nullify the
07669                     // host to preserve semantics.  Try expanding the
07670                     // hostname instead (fall through)
07671                 }
07672                 // first see if we can manually insert a hostname
07673                 $host = $config->get('URI.Host');
07674                 if (!is_null($host)) {
07675                     $uri->host = $host;
07676                 } else {
07677                     // we can't do anything sensible, reject the URL.
07678                     return false;
07679                 }
07680             } while (false);
07681         }
07682         return $this->doValidate($uri, $config, $context);
07683     }
07684 
07685 }
07686 
07687 
07688 
07689 
07690 
07694 class HTMLPurifier_URISchemeRegistry
07695 {
07696 
07704     public static function instance($prototype = null) {
07705         static $instance = null;
07706         if ($prototype !== null) {
07707             $instance = $prototype;
07708         } elseif ($instance === null || $prototype == true) {
07709             $instance = new HTMLPurifier_URISchemeRegistry();
07710         }
07711         return $instance;
07712     }
07713 
07717     protected $schemes = array();
07718 
07725     public function getScheme($scheme, $config, $context) {
07726         if (!$config) $config = HTMLPurifier_Config::createDefault();
07727 
07728         // important, otherwise attacker could include arbitrary file
07729         $allowed_schemes = $config->get('URI.AllowedSchemes');
07730         if (!$config->get('URI.OverrideAllowedSchemes') &&
07731             !isset($allowed_schemes[$scheme])
07732         ) {
07733             return;
07734         }
07735 
07736         if (isset($this->schemes[$scheme])) return $this->schemes[$scheme];
07737         if (!isset($allowed_schemes[$scheme])) return;
07738 
07739         $class = 'HTMLPurifier_URIScheme_' . $scheme;
07740         if (!class_exists($class)) return;
07741         $this->schemes[$scheme] = new $class();
07742         return $this->schemes[$scheme];
07743     }
07744 
07750     public function register($scheme, $scheme_obj) {
07751         $this->schemes[$scheme] = $scheme_obj;
07752     }
07753 
07754 }
07755 
07756 
07757 
07758 
07759 
07764 class HTMLPurifier_UnitConverter
07765 {
07766 
07767     const ENGLISH = 1;
07768     const METRIC = 2;
07769     const DIGITAL = 3;
07770 
07780     protected static $units = array(
07781         self::ENGLISH => array(
07782             'px' => 3, // This is as per CSS 2.1 and Firefox. Your mileage may vary
07783             'pt' => 4,
07784             'pc' => 48,
07785             'in' => 288,
07786             self::METRIC => array('pt', '0.352777778', 'mm'),
07787         ),
07788         self::METRIC => array(
07789             'mm' => 1,
07790             'cm' => 10,
07791             self::ENGLISH => array('mm', '2.83464567', 'pt'),
07792         ),
07793     );
07794 
07798     protected $outputPrecision;
07799 
07803     protected $internalPrecision;
07804 
07808     private $bcmath;
07809 
07810     public function __construct($output_precision = 4, $internal_precision = 10, $force_no_bcmath = false) {
07811         $this->outputPrecision = $output_precision;
07812         $this->internalPrecision = $internal_precision;
07813         $this->bcmath = !$force_no_bcmath && function_exists('bcmul');
07814     }
07815 
07834     public function convert($length, $to_unit) {
07835 
07836         if (!$length->isValid()) return false;
07837 
07838         $n    = $length->getN();
07839         $unit = $length->getUnit();
07840 
07841         if ($n === '0' || $unit === false) {
07842             return new HTMLPurifier_Length('0', false);
07843         }
07844 
07845         $state = $dest_state = false;
07846         foreach (self::$units as $k => $x) {
07847             if (isset($x[$unit])) $state = $k;
07848             if (isset($x[$to_unit])) $dest_state = $k;
07849         }
07850         if (!$state || !$dest_state) return false;
07851 
07852         // Some calculations about the initial precision of the number;
07853         // this will be useful when we need to do final rounding.
07854         $sigfigs = $this->getSigFigs($n);
07855         if ($sigfigs < $this->outputPrecision) $sigfigs = $this->outputPrecision;
07856 
07857         // BCMath's internal precision deals only with decimals. Use
07858         // our default if the initial number has no decimals, or increase
07859         // it by how ever many decimals, thus, the number of guard digits
07860         // will always be greater than or equal to internalPrecision.
07861         $log = (int) floor(log(abs($n), 10));
07862         $cp = ($log < 0) ? $this->internalPrecision - $log : $this->internalPrecision; // internal precision
07863 
07864         for ($i = 0; $i < 2; $i++) {
07865 
07866             // Determine what unit IN THIS SYSTEM we need to convert to
07867             if ($dest_state === $state) {
07868                 // Simple conversion
07869                 $dest_unit = $to_unit;
07870             } else {
07871                 // Convert to the smallest unit, pending a system shift
07872                 $dest_unit = self::$units[$state][$dest_state][0];
07873             }
07874 
07875             // Do the conversion if necessary
07876             if ($dest_unit !== $unit) {
07877                 $factor = $this->div(self::$units[$state][$unit], self::$units[$state][$dest_unit], $cp);
07878                 $n = $this->mul($n, $factor, $cp);
07879                 $unit = $dest_unit;
07880             }
07881 
07882             // Output was zero, so bail out early. Shouldn't ever happen.
07883             if ($n === '') {
07884                 $n = '0';
07885                 $unit = $to_unit;
07886                 break;
07887             }
07888 
07889             // It was a simple conversion, so bail out
07890             if ($dest_state === $state) {
07891                 break;
07892             }
07893 
07894             if ($i !== 0) {
07895                 // Conversion failed! Apparently, the system we forwarded
07896                 // to didn't have this unit. This should never happen!
07897                 return false;
07898             }
07899 
07900             // Pre-condition: $i == 0
07901 
07902             // Perform conversion to next system of units
07903             $n = $this->mul($n, self::$units[$state][$dest_state][1], $cp);
07904             $unit = self::$units[$state][$dest_state][2];
07905             $state = $dest_state;
07906 
07907             // One more loop around to convert the unit in the new system.
07908 
07909         }
07910 
07911         // Post-condition: $unit == $to_unit
07912         if ($unit !== $to_unit) return false;
07913 
07914         // Useful for debugging:
07915         //echo "<pre>n";
07916         //echo "$n\nsigfigs = $sigfigs\nnew_log = $new_log\nlog = $log\nrp = $rp\n</pre>\n";
07917 
07918         $n = $this->round($n, $sigfigs);
07919         if (strpos($n, '.') !== false) $n = rtrim($n, '0');
07920         $n = rtrim($n, '.');
07921 
07922         return new HTMLPurifier_Length($n, $unit);
07923     }
07924 
07930     public function getSigFigs($n) {
07931         $n = ltrim($n, '0+-');
07932         $dp = strpos($n, '.'); // decimal position
07933         if ($dp === false) {
07934             $sigfigs = strlen(rtrim($n, '0'));
07935         } else {
07936             $sigfigs = strlen(ltrim($n, '0.')); // eliminate extra decimal character
07937             if ($dp !== 0) $sigfigs--;
07938         }
07939         return $sigfigs;
07940     }
07941 
07945     private function add($s1, $s2, $scale) {
07946         if ($this->bcmath) return bcadd($s1, $s2, $scale);
07947         else return $this->scale($s1 + $s2, $scale);
07948     }
07949 
07953     private function mul($s1, $s2, $scale) {
07954         if ($this->bcmath) return bcmul($s1, $s2, $scale);
07955         else return $this->scale($s1 * $s2, $scale);
07956     }
07957 
07961     private function div($s1, $s2, $scale) {
07962         if ($this->bcmath) return bcdiv($s1, $s2, $scale);
07963         else return $this->scale($s1 / $s2, $scale);
07964     }
07965 
07970     private function round($n, $sigfigs) {
07971         $new_log = (int) floor(log(abs($n), 10)); // Number of digits left of decimal - 1
07972         $rp = $sigfigs - $new_log - 1; // Number of decimal places needed
07973         $neg = $n < 0 ? '-' : ''; // Negative sign
07974         if ($this->bcmath) {
07975             if ($rp >= 0) {
07976                 $n = bcadd($n, $neg . '0.' .  str_repeat('0', $rp) . '5', $rp + 1);
07977                 $n = bcdiv($n, '1', $rp);
07978             } else {
07979                 // This algorithm partially depends on the standardized
07980                 // form of numbers that comes out of bcmath.
07981                 $n = bcadd($n, $neg . '5' . str_repeat('0', $new_log - $sigfigs), 0);
07982                 $n = substr($n, 0, $sigfigs + strlen($neg)) . str_repeat('0', $new_log - $sigfigs + 1);
07983             }
07984             return $n;
07985         } else {
07986             return $this->scale(round($n, $sigfigs - $new_log - 1), $rp + 1);
07987         }
07988     }
07989 
07993     private function scale($r, $scale) {
07994         if ($scale < 0) {
07995             // The f sprintf type doesn't support negative numbers, so we
07996             // need to cludge things manually. First get the string.
07997             $r = sprintf('%.0f', (float) $r);
07998             // Due to floating point precision loss, $r will more than likely
07999             // look something like 4652999999999.9234. We grab one more digit
08000             // than we need to precise from $r and then use that to round
08001             // appropriately.
08002             $precise = (string) round(substr($r, 0, strlen($r) + $scale), -1);
08003             // Now we return it, truncating the zero that was rounded off.
08004             return substr($precise, 0, -1) . str_repeat('0', -$scale + 1);
08005         }
08006         return sprintf('%.' . $scale . 'f', (float) $r);
08007     }
08008 
08009 }
08010 
08011 
08012 
08013 
08014 
08019 class HTMLPurifier_VarParser
08020 {
08021 
08022     const STRING    = 1;
08023     const ISTRING   = 2;
08024     const TEXT      = 3;
08025     const ITEXT     = 4;
08026     const INT       = 5;
08027     const FLOAT     = 6;
08028     const BOOL      = 7;
08029     const LOOKUP    = 8;
08030     const ALIST     = 9;
08031     const HASH      = 10;
08032     const MIXED     = 11;
08033 
08038     static public $types = array(
08039         'string'    => self::STRING,
08040         'istring'   => self::ISTRING,
08041         'text'      => self::TEXT,
08042         'itext'     => self::ITEXT,
08043         'int'       => self::INT,
08044         'float'     => self::FLOAT,
08045         'bool'      => self::BOOL,
08046         'lookup'    => self::LOOKUP,
08047         'list'      => self::ALIST,
08048         'hash'      => self::HASH,
08049         'mixed'     => self::MIXED
08050     );
08051 
08056     static public $stringTypes = array(
08057         self::STRING    => true,
08058         self::ISTRING   => true,
08059         self::TEXT      => true,
08060         self::ITEXT     => true,
08061     );
08062 
08073     final public function parse($var, $type, $allow_null = false) {
08074         if (is_string($type)) {
08075             if (!isset(HTMLPurifier_VarParser::$types[$type])) {
08076                 throw new HTMLPurifier_VarParserException("Invalid type '$type'");
08077             } else {
08078                 $type = HTMLPurifier_VarParser::$types[$type];
08079             }
08080         }
08081         $var = $this->parseImplementation($var, $type, $allow_null);
08082         if ($allow_null && $var === null) return null;
08083         // These are basic checks, to make sure nothing horribly wrong
08084         // happened in our implementations.
08085         switch ($type) {
08086             case (self::STRING):
08087             case (self::ISTRING):
08088             case (self::TEXT):
08089             case (self::ITEXT):
08090                 if (!is_string($var)) break;
08091                 if ($type == self::ISTRING || $type == self::ITEXT) $var = strtolower($var);
08092                 return $var;
08093             case (self::INT):
08094                 if (!is_int($var)) break;
08095                 return $var;
08096             case (self::FLOAT):
08097                 if (!is_float($var)) break;
08098                 return $var;
08099             case (self::BOOL):
08100                 if (!is_bool($var)) break;
08101                 return $var;
08102             case (self::LOOKUP):
08103             case (self::ALIST):
08104             case (self::HASH):
08105                 if (!is_array($var)) break;
08106                 if ($type === self::LOOKUP) {
08107                     foreach ($var as $k) if ($k !== true) $this->error('Lookup table contains value other than true');
08108                 } elseif ($type === self::ALIST) {
08109                     $keys = array_keys($var);
08110                     if (array_keys($keys) !== $keys) $this->error('Indices for list are not uniform');
08111                 }
08112                 return $var;
08113             case (self::MIXED):
08114                 return $var;
08115             default:
08116                 $this->errorInconsistent(get_class($this), $type);
08117         }
08118         $this->errorGeneric($var, $type);
08119     }
08120 
08125     protected function parseImplementation($var, $type, $allow_null) {
08126         return $var;
08127     }
08128 
08132     protected function error($msg) {
08133         throw new HTMLPurifier_VarParserException($msg);
08134     }
08135 
08142     protected function errorInconsistent($class, $type) {
08143         throw new HTMLPurifier_Exception("Inconsistency in $class: ".HTMLPurifier_VarParser::getTypeName($type)." not implemented");
08144     }
08145 
08149     protected function errorGeneric($var, $type) {
08150         $vtype = gettype($var);
08151         $this->error("Expected type ".HTMLPurifier_VarParser::getTypeName($type).", got $vtype");
08152     }
08153 
08154     static public function getTypeName($type) {
08155         static $lookup;
08156         if (!$lookup) {
08157             // Lazy load the alternative lookup table
08158             $lookup = array_flip(HTMLPurifier_VarParser::$types);
08159         }
08160         if (!isset($lookup[$type])) return 'unknown';
08161         return $lookup[$type];
08162     }
08163 
08164 }
08165 
08166 
08167 
08168 
08169 
08173 class HTMLPurifier_VarParserException extends HTMLPurifier_Exception
08174 {
08175 
08176 }
08177 
08178 
08179 
08180 
08181 
08193 class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
08194 {
08195 
08196     public function validate($css, $config, $context) {
08197 
08198         $css = $this->parseCDATA($css);
08199 
08200         $definition = $config->getCSSDefinition();
08201 
08202         // we're going to break the spec and explode by semicolons.
08203         // This is because semicolon rarely appears in escaped form
08204         // Doing this is generally flaky but fast
08205         // IT MIGHT APPEAR IN URIs, see HTMLPurifier_AttrDef_CSSURI
08206         // for details
08207 
08208         $declarations = explode(';', $css);
08209         $propvalues = array();
08210 
08214         $property = false;
08215         $context->register('CurrentCSSProperty', $property);
08216 
08217         foreach ($declarations as $declaration) {
08218             if (!$declaration) continue;
08219             if (!strpos($declaration, ':')) continue;
08220             list($property, $value) = explode(':', $declaration, 2);
08221             $property = trim($property);
08222             $value    = trim($value);
08223             $ok = false;
08224             do {
08225                 if (isset($definition->info[$property])) {
08226                     $ok = true;
08227                     break;
08228                 }
08229                 if (ctype_lower($property)) break;
08230                 $property = strtolower($property);
08231                 if (isset($definition->info[$property])) {
08232                     $ok = true;
08233                     break;
08234                 }
08235             } while(0);
08236             if (!$ok) continue;
08237             // inefficient call, since the validator will do this again
08238             if (strtolower(trim($value)) !== 'inherit') {
08239                 // inherit works for everything (but only on the base property)
08240                 $result = $definition->info[$property]->validate(
08241                     $value, $config, $context );
08242             } else {
08243                 $result = 'inherit';
08244             }
08245             if ($result === false) continue;
08246             $propvalues[$property] = $result;
08247         }
08248 
08249         $context->destroy('CurrentCSSProperty');
08250 
08251         // procedure does not write the new CSS simultaneously, so it's
08252         // slightly inefficient, but it's the only way of getting rid of
08253         // duplicates. Perhaps config to optimize it, but not now.
08254 
08255         $new_declarations = '';
08256         foreach ($propvalues as $prop => $value) {
08257             $new_declarations .= "$prop:$value;";
08258         }
08259 
08260         return $new_declarations ? $new_declarations : false;
08261 
08262     }
08263 
08264 }
08265 
08266 
08267 
08268 
08269 
08274 class HTMLPurifier_AttrDef_Clone extends HTMLPurifier_AttrDef
08275 {
08279     protected $clone;
08280 
08281     public function __construct($clone) {
08282         $this->clone = $clone;
08283     }
08284 
08285     public function validate($v, $config, $context) {
08286         return $this->clone->validate($v, $config, $context);
08287     }
08288 
08289     public function make($string) {
08290         return clone $this->clone;
08291     }
08292 
08293 }
08294 
08295 
08296 
08297 
08298 
08299 // Enum = Enumerated
08306 class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
08307 {
08308 
08313     public $valid_values   = array();
08314 
08319     protected $case_sensitive = false; // values according to W3C spec
08320 
08325     public function __construct(
08326         $valid_values = array(), $case_sensitive = false
08327     ) {
08328         $this->valid_values = array_flip($valid_values);
08329         $this->case_sensitive = $case_sensitive;
08330     }
08331 
08332     public function validate($string, $config, $context) {
08333         $string = trim($string);
08334         if (!$this->case_sensitive) {
08335             // we may want to do full case-insensitive libraries
08336             $string = ctype_lower($string) ? $string : strtolower($string);
08337         }
08338         $result = isset($this->valid_values[$string]);
08339 
08340         return $result ? $string : false;
08341     }
08342 
08348     public function make($string) {
08349         if (strlen($string) > 2 && $string[0] == 's' && $string[1] == ':') {
08350             $string = substr($string, 2);
08351             $sensitive = true;
08352         } else {
08353             $sensitive = false;
08354         }
08355         $values = explode(',', $string);
08356         return new HTMLPurifier_AttrDef_Enum($values, $sensitive);
08357     }
08358 
08359 }
08360 
08361 
08362 
08363 
08364 
08372 class HTMLPurifier_AttrDef_Integer extends HTMLPurifier_AttrDef
08373 {
08374 
08378     protected $negative = true;
08379 
08383     protected $zero = true;
08384 
08388     protected $positive = true;
08389 
08395     public function __construct(
08396         $negative = true, $zero = true, $positive = true
08397     ) {
08398         $this->negative = $negative;
08399         $this->zero     = $zero;
08400         $this->positive = $positive;
08401     }
08402 
08403     public function validate($integer, $config, $context) {
08404 
08405         $integer = $this->parseCDATA($integer);
08406         if ($integer === '') return false;
08407 
08408         // we could possibly simply typecast it to integer, but there are
08409         // certain fringe cases that must not return an integer.
08410 
08411         // clip leading sign
08412         if ( $this->negative && $integer[0] === '-' ) {
08413             $digits = substr($integer, 1);
08414             if ($digits === '0') $integer = '0'; // rm minus sign for zero
08415         } elseif( $this->positive && $integer[0] === '+' ) {
08416             $digits = $integer = substr($integer, 1); // rm unnecessary plus
08417         } else {
08418             $digits = $integer;
08419         }
08420 
08421         // test if it's numeric
08422         if (!ctype_digit($digits)) return false;
08423 
08424         // perform scope tests
08425         if (!$this->zero     && $integer == 0) return false;
08426         if (!$this->positive && $integer > 0) return false;
08427         if (!$this->negative && $integer < 0) return false;
08428 
08429         return $integer;
08430 
08431     }
08432 
08433 }
08434 
08435 
08436 
08437 
08438 
08443 class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
08444 {
08445 
08446     public function validate($string, $config, $context) {
08447 
08448         $string = trim($string);
08449         if (!$string) return false;
08450 
08451         $subtags = explode('-', $string);
08452         $num_subtags = count($subtags);
08453 
08454         if ($num_subtags == 0) return false; // sanity check
08455 
08456         // process primary subtag : $subtags[0]
08457         $length = strlen($subtags[0]);
08458         switch ($length) {
08459             case 0:
08460                 return false;
08461             case 1:
08462                 if (! ($subtags[0] == 'x' || $subtags[0] == 'i') ) {
08463                     return false;
08464                 }
08465                 break;
08466             case 2:
08467             case 3:
08468                 if (! ctype_alpha($subtags[0]) ) {
08469                     return false;
08470                 } elseif (! ctype_lower($subtags[0]) ) {
08471                     $subtags[0] = strtolower($subtags[0]);
08472                 }
08473                 break;
08474             default:
08475                 return false;
08476         }
08477 
08478         $new_string = $subtags[0];
08479         if ($num_subtags == 1) return $new_string;
08480 
08481         // process second subtag : $subtags[1]
08482         $length = strlen($subtags[1]);
08483         if ($length == 0 || ($length == 1 && $subtags[1] != 'x') || $length > 8 || !ctype_alnum($subtags[1])) {
08484             return $new_string;
08485         }
08486         if (!ctype_lower($subtags[1])) $subtags[1] = strtolower($subtags[1]);
08487 
08488         $new_string .= '-' . $subtags[1];
08489         if ($num_subtags == 2) return $new_string;
08490 
08491         // process all other subtags, index 2 and up
08492         for ($i = 2; $i < $num_subtags; $i++) {
08493             $length = strlen($subtags[$i]);
08494             if ($length == 0 || $length > 8 || !ctype_alnum($subtags[$i])) {
08495                 return $new_string;
08496             }
08497             if (!ctype_lower($subtags[$i])) {
08498                 $subtags[$i] = strtolower($subtags[$i]);
08499             }
08500             $new_string .= '-' . $subtags[$i];
08501         }
08502 
08503         return $new_string;
08504 
08505     }
08506 
08507 }
08508 
08509 
08510 
08511 
08512 
08516 class HTMLPurifier_AttrDef_Switch
08517 {
08518 
08519     protected $tag;
08520     protected $withTag, $withoutTag;
08521 
08527     public function __construct($tag, $with_tag, $without_tag) {
08528         $this->tag = $tag;
08529         $this->withTag = $with_tag;
08530         $this->withoutTag = $without_tag;
08531     }
08532 
08533     public function validate($string, $config, $context) {
08534         $token = $context->get('CurrentToken', true);
08535         if (!$token || $token->name !== $this->tag) {
08536             return $this->withoutTag->validate($string, $config, $context);
08537         } else {
08538             return $this->withTag->validate($string, $config, $context);
08539         }
08540     }
08541 
08542 }
08543 
08544 
08545 
08546 
08547 
08551 class HTMLPurifier_AttrDef_Text extends HTMLPurifier_AttrDef
08552 {
08553 
08554     public function validate($string, $config, $context) {
08555         return $this->parseCDATA($string);
08556     }
08557 
08558 }
08559 
08560 
08561 
08562 
08563 
08568 class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
08569 {
08570 
08571     protected $parser;
08572     protected $embedsResource;
08573 
08577     public function __construct($embeds_resource = false) {
08578         $this->parser = new HTMLPurifier_URIParser();
08579         $this->embedsResource = (bool) $embeds_resource;
08580     }
08581 
08582     public function make($string) {
08583         $embeds = ($string === 'embedded');
08584         return new HTMLPurifier_AttrDef_URI($embeds);
08585     }
08586 
08587     public function validate($uri, $config, $context) {
08588 
08589         if ($config->get('URI.Disable')) return false;
08590 
08591         $uri = $this->parseCDATA($uri);
08592 
08593         // parse the URI
08594         $uri = $this->parser->parse($uri);
08595         if ($uri === false) return false;
08596 
08597         // add embedded flag to context for validators
08598         $context->register('EmbeddedURI', $this->embedsResource);
08599 
08600         $ok = false;
08601         do {
08602 
08603             // generic validation
08604             $result = $uri->validate($config, $context);
08605             if (!$result) break;
08606 
08607             // chained filtering
08608             $uri_def = $config->getDefinition('URI');
08609             $result = $uri_def->filter($uri, $config, $context);
08610             if (!$result) break;
08611 
08612             // scheme-specific validation
08613             $scheme_obj = $uri->getSchemeObj($config, $context);
08614             if (!$scheme_obj) break;
08615             if ($this->embedsResource && !$scheme_obj->browsable) break;
08616             $result = $scheme_obj->validate($uri, $config, $context);
08617             if (!$result) break;
08618 
08619             // Post chained filtering
08620             $result = $uri_def->postFilter($uri, $config, $context);
08621             if (!$result) break;
08622 
08623             // survived gauntlet
08624             $ok = true;
08625 
08626         } while (false);
08627 
08628         $context->destroy('EmbeddedURI');
08629         if (!$ok) return false;
08630 
08631         // back to string
08632         return $uri->toString();
08633 
08634     }
08635 
08636 }
08637 
08638 
08639 
08640 
08641 
08645 class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
08646 {
08647 
08651     protected $non_negative = false;
08652 
08656     public function __construct($non_negative = false) {
08657         $this->non_negative = $non_negative;
08658     }
08659 
08664     public function validate($number, $config, $context) {
08665 
08666         $number = $this->parseCDATA($number);
08667 
08668         if ($number === '') return false;
08669         if ($number === '0') return '0';
08670 
08671         $sign = '';
08672         switch ($number[0]) {
08673             case '-':
08674                 if ($this->non_negative) return false;
08675                 $sign = '-';
08676             case '+':
08677                 $number = substr($number, 1);
08678         }
08679 
08680         if (ctype_digit($number)) {
08681             $number = ltrim($number, '0');
08682             return $number ? $sign . $number : '0';
08683         }
08684 
08685         // Period is the only non-numeric character allowed
08686         if (strpos($number, '.') === false) return false;
08687 
08688         list($left, $right) = explode('.', $number, 2);
08689 
08690         if ($left === '' && $right === '') return false;
08691         if ($left !== '' && !ctype_digit($left)) return false;
08692 
08693         $left  = ltrim($left,  '0');
08694         $right = rtrim($right, '0');
08695 
08696         if ($right === '') {
08697             return $left ? $sign . $left : '0';
08698         } elseif (!ctype_digit($right)) {
08699             return false;
08700         }
08701 
08702         return $sign . $left . '.' . $right;
08703 
08704     }
08705 
08706 }
08707 
08708 
08709 
08710 
08711 
08712 class HTMLPurifier_AttrDef_CSS_AlphaValue extends HTMLPurifier_AttrDef_CSS_Number
08713 {
08714 
08715     public function __construct() {
08716         parent::__construct(false); // opacity is non-negative, but we will clamp it
08717     }
08718 
08719     public function validate($number, $config, $context) {
08720         $result = parent::validate($number, $config, $context);
08721         if ($result === false) return $result;
08722         $float = (float) $result;
08723         if ($float < 0.0) $result = '0';
08724         if ($float > 1.0) $result = '1';
08725         return $result;
08726     }
08727 
08728 }
08729 
08730 
08731 
08732 
08733 
08738 class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
08739 {
08740 
08745     protected $info;
08746 
08747     public function __construct($config) {
08748         $def = $config->getCSSDefinition();
08749         $this->info['background-color'] = $def->info['background-color'];
08750         $this->info['background-image'] = $def->info['background-image'];
08751         $this->info['background-repeat'] = $def->info['background-repeat'];
08752         $this->info['background-attachment'] = $def->info['background-attachment'];
08753         $this->info['background-position'] = $def->info['background-position'];
08754     }
08755 
08756     public function validate($string, $config, $context) {
08757 
08758         // regular pre-processing
08759         $string = $this->parseCDATA($string);
08760         if ($string === '') return false;
08761 
08762         // munge rgb() decl if necessary
08763         $string = $this->mungeRgb($string);
08764 
08765         // assumes URI doesn't have spaces in it
08766         $bits = explode(' ', strtolower($string)); // bits to process
08767 
08768         $caught = array();
08769         $caught['color']    = false;
08770         $caught['image']    = false;
08771         $caught['repeat']   = false;
08772         $caught['attachment'] = false;
08773         $caught['position'] = false;
08774 
08775         $i = 0; // number of catches
08776         $none = false;
08777 
08778         foreach ($bits as $bit) {
08779             if ($bit === '') continue;
08780             foreach ($caught as $key => $status) {
08781                 if ($key != 'position') {
08782                     if ($status !== false) continue;
08783                     $r = $this->info['background-' . $key]->validate($bit, $config, $context);
08784                 } else {
08785                     $r = $bit;
08786                 }
08787                 if ($r === false) continue;
08788                 if ($key == 'position') {
08789                     if ($caught[$key] === false) $caught[$key] = '';
08790                     $caught[$key] .= $r . ' ';
08791                 } else {
08792                     $caught[$key] = $r;
08793                 }
08794                 $i++;
08795                 break;
08796             }
08797         }
08798 
08799         if (!$i) return false;
08800         if ($caught['position'] !== false) {
08801             $caught['position'] = $this->info['background-position']->
08802                 validate($caught['position'], $config, $context);
08803         }
08804 
08805         $ret = array();
08806         foreach ($caught as $value) {
08807             if ($value === false) continue;
08808             $ret[] = $value;
08809         }
08810 
08811         if (empty($ret)) return false;
08812         return implode(' ', $ret);
08813 
08814     }
08815 
08816 }
08817 
08818 
08819 
08820 
08821 
08822 /* W3C says:
08823     [ // adjective and number must be in correct order, even if
08824       // you could switch them without introducing ambiguity.
08825       // some browsers support that syntax
08826         [
08827             <percentage> | <length> | left | center | right
08828         ]
08829         [
08830             <percentage> | <length> | top | center | bottom
08831         ]?
08832     ] |
08833     [ // this signifies that the vertical and horizontal adjectives
08834       // can be arbitrarily ordered, however, there can only be two,
08835       // one of each, or none at all
08836         [
08837             left | center | right
08838         ] ||
08839         [
08840             top | center | bottom
08841         ]
08842     ]
08843     top, left = 0%
08844     center, (none) = 50%
08845     bottom, right = 100%
08846 */
08847 
08848 /* QuirksMode says:
08849     keyword + length/percentage must be ordered correctly, as per W3C
08850 
08851     Internet Explorer and Opera, however, support arbitrary ordering. We
08852     should fix it up.
08853 
08854     Minor issue though, not strictly necessary.
08855 */
08856 
08857 // control freaks may appreciate the ability to convert these to
08858 // percentages or something, but it's not necessary
08859 
08863 class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
08864 {
08865 
08866     protected $length;
08867     protected $percentage;
08868 
08869     public function __construct() {
08870         $this->length     = new HTMLPurifier_AttrDef_CSS_Length();
08871         $this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage();
08872     }
08873 
08874     public function validate($string, $config, $context) {
08875         $string = $this->parseCDATA($string);
08876         $bits = explode(' ', $string);
08877 
08878         $keywords = array();
08879         $keywords['h'] = false; // left, right
08880         $keywords['v'] = false; // top, bottom
08881         $keywords['ch'] = false; // center (first word)
08882         $keywords['cv'] = false; // center (second word)
08883         $measures = array();
08884 
08885         $i = 0;
08886 
08887         $lookup = array(
08888             'top' => 'v',
08889             'bottom' => 'v',
08890             'left' => 'h',
08891             'right' => 'h',
08892             'center' => 'c'
08893         );
08894 
08895         foreach ($bits as $bit) {
08896             if ($bit === '') continue;
08897 
08898             // test for keyword
08899             $lbit = ctype_lower($bit) ? $bit : strtolower($bit);
08900             if (isset($lookup[$lbit])) {
08901                 $status = $lookup[$lbit];
08902                 if ($status == 'c') {
08903                     if ($i == 0) {
08904                         $status = 'ch';
08905                     } else {
08906                         $status = 'cv';
08907                     }
08908                 }
08909                 $keywords[$status] = $lbit;
08910                 $i++;
08911             }
08912 
08913             // test for length
08914             $r = $this->length->validate($bit, $config, $context);
08915             if ($r !== false) {
08916                 $measures[] = $r;
08917                 $i++;
08918             }
08919 
08920             // test for percentage
08921             $r = $this->percentage->validate($bit, $config, $context);
08922             if ($r !== false) {
08923                 $measures[] = $r;
08924                 $i++;
08925             }
08926 
08927         }
08928 
08929         if (!$i) return false; // no valid values were caught
08930 
08931         $ret = array();
08932 
08933         // first keyword
08934         if     ($keywords['h'])     $ret[] = $keywords['h'];
08935         elseif ($keywords['ch']) {
08936             $ret[] = $keywords['ch'];
08937             $keywords['cv'] = false; // prevent re-use: center = center center
08938         }
08939         elseif (count($measures))   $ret[] = array_shift($measures);
08940 
08941         if     ($keywords['v'])     $ret[] = $keywords['v'];
08942         elseif ($keywords['cv'])    $ret[] = $keywords['cv'];
08943         elseif (count($measures))   $ret[] = array_shift($measures);
08944 
08945         if (empty($ret)) return false;
08946         return implode(' ', $ret);
08947 
08948     }
08949 
08950 }
08951 
08952 
08953 
08954 
08955 
08959 class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
08960 {
08961 
08965     protected $info = array();
08966 
08967     public function __construct($config) {
08968         $def = $config->getCSSDefinition();
08969         $this->info['border-width'] = $def->info['border-width'];
08970         $this->info['border-style'] = $def->info['border-style'];
08971         $this->info['border-top-color'] = $def->info['border-top-color'];
08972     }
08973 
08974     public function validate($string, $config, $context) {
08975         $string = $this->parseCDATA($string);
08976         $string = $this->mungeRgb($string);
08977         $bits = explode(' ', $string);
08978         $done = array(); // segments we've finished
08979         $ret = ''; // return value
08980         foreach ($bits as $bit) {
08981             foreach ($this->info as $propname => $validator) {
08982                 if (isset($done[$propname])) continue;
08983                 $r = $validator->validate($bit, $config, $context);
08984                 if ($r !== false) {
08985                     $ret .= $r . ' ';
08986                     $done[$propname] = true;
08987                     break;
08988                 }
08989             }
08990         }
08991         return rtrim($ret);
08992     }
08993 
08994 }
08995 
08996 
08997 
08998 
08999 
09003 class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
09004 {
09005 
09006     public function validate($color, $config, $context) {
09007 
09008         static $colors = null;
09009         if ($colors === null) $colors = $config->get('Core.ColorKeywords');
09010 
09011         $color = trim($color);
09012         if ($color === '') return false;
09013 
09014         $lower = strtolower($color);
09015         if (isset($colors[$lower])) return $colors[$lower];
09016 
09017         if (strpos($color, 'rgb(') !== false) {
09018             // rgb literal handling
09019             $length = strlen($color);
09020             if (strpos($color, ')') !== $length - 1) return false;
09021             $triad = substr($color, 4, $length - 4 - 1);
09022             $parts = explode(',', $triad);
09023             if (count($parts) !== 3) return false;
09024             $type = false; // to ensure that they're all the same type
09025             $new_parts = array();
09026             foreach ($parts as $part) {
09027                 $part = trim($part);
09028                 if ($part === '') return false;
09029                 $length = strlen($part);
09030                 if ($part[$length - 1] === '%') {
09031                     // handle percents
09032                     if (!$type) {
09033                         $type = 'percentage';
09034                     } elseif ($type !== 'percentage') {
09035                         return false;
09036                     }
09037                     $num = (float) substr($part, 0, $length - 1);
09038                     if ($num < 0) $num = 0;
09039                     if ($num > 100) $num = 100;
09040                     $new_parts[] = "$num%";
09041                 } else {
09042                     // handle integers
09043                     if (!$type) {
09044                         $type = 'integer';
09045                     } elseif ($type !== 'integer') {
09046                         return false;
09047                     }
09048                     $num = (int) $part;
09049                     if ($num < 0) $num = 0;
09050                     if ($num > 255) $num = 255;
09051                     $new_parts[] = (string) $num;
09052                 }
09053             }
09054             $new_triad = implode(',', $new_parts);
09055             $color = "rgb($new_triad)";
09056         } else {
09057             // hexadecimal handling
09058             if ($color[0] === '#') {
09059                 $hex = substr($color, 1);
09060             } else {
09061                 $hex = $color;
09062                 $color = '#' . $color;
09063             }
09064             $length = strlen($hex);
09065             if ($length !== 3 && $length !== 6) return false;
09066             if (!ctype_xdigit($hex)) return false;
09067         }
09068 
09069         return $color;
09070 
09071     }
09072 
09073 }
09074 
09075 
09076 
09077 
09078 
09088 class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef
09089 {
09090 
09095     public $defs;
09096 
09100     public function __construct($defs) {
09101         $this->defs = $defs;
09102     }
09103 
09104     public function validate($string, $config, $context) {
09105         foreach ($this->defs as $i => $def) {
09106             $result = $this->defs[$i]->validate($string, $config, $context);
09107             if ($result !== false) return $result;
09108         }
09109         return false;
09110     }
09111 
09112 }
09113 
09114 
09115 
09116 
09117 
09121 class HTMLPurifier_AttrDef_CSS_DenyElementDecorator extends HTMLPurifier_AttrDef
09122 {
09123     public $def, $element;
09124 
09129     public function __construct($def, $element) {
09130         $this->def = $def;
09131         $this->element = $element;
09132     }
09136     public function validate($string, $config, $context) {
09137         $token = $context->get('CurrentToken', true);
09138         if ($token && $token->name == $this->element) return false;
09139         return $this->def->validate($string, $config, $context);
09140     }
09141 }
09142 
09143 
09144 
09145 
09146 
09152 class HTMLPurifier_AttrDef_CSS_Filter extends HTMLPurifier_AttrDef
09153 {
09154 
09155     protected $intValidator;
09156 
09157     public function __construct() {
09158         $this->intValidator = new HTMLPurifier_AttrDef_Integer();
09159     }
09160 
09161     public function validate($value, $config, $context) {
09162         $value = $this->parseCDATA($value);
09163         if ($value === 'none') return $value;
09164         // if we looped this we could support multiple filters
09165         $function_length = strcspn($value, '(');
09166         $function = trim(substr($value, 0, $function_length));
09167         if ($function !== 'alpha' &&
09168             $function !== 'Alpha' &&
09169             $function !== 'progid:DXImageTransform.Microsoft.Alpha'
09170             ) return false;
09171         $cursor = $function_length + 1;
09172         $parameters_length = strcspn($value, ')', $cursor);
09173         $parameters = substr($value, $cursor, $parameters_length);
09174         $params = explode(',', $parameters);
09175         $ret_params = array();
09176         $lookup = array();
09177         foreach ($params as $param) {
09178             list($key, $value) = explode('=', $param);
09179             $key   = trim($key);
09180             $value = trim($value);
09181             if (isset($lookup[$key])) continue;
09182             if ($key !== 'opacity') continue;
09183             $value = $this->intValidator->validate($value, $config, $context);
09184             if ($value === false) continue;
09185             $int = (int) $value;
09186             if ($int > 100) $value = '100';
09187             if ($int < 0) $value = '0';
09188             $ret_params[] = "$key=$value";
09189             $lookup[$key] = true;
09190         }
09191         $ret_parameters = implode(',', $ret_params);
09192         $ret_function = "$function($ret_parameters)";
09193         return $ret_function;
09194     }
09195 
09196 }
09197 
09198 
09199 
09200 
09201 
09205 class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
09206 {
09207 
09216     protected $info = array();
09217 
09218     public function __construct($config) {
09219         $def = $config->getCSSDefinition();
09220         $this->info['font-style']   = $def->info['font-style'];
09221         $this->info['font-variant'] = $def->info['font-variant'];
09222         $this->info['font-weight']  = $def->info['font-weight'];
09223         $this->info['font-size']    = $def->info['font-size'];
09224         $this->info['line-height']  = $def->info['line-height'];
09225         $this->info['font-family']  = $def->info['font-family'];
09226     }
09227 
09228     public function validate($string, $config, $context) {
09229 
09230         static $system_fonts = array(
09231             'caption' => true,
09232             'icon' => true,
09233             'menu' => true,
09234             'message-box' => true,
09235             'small-caption' => true,
09236             'status-bar' => true
09237         );
09238 
09239         // regular pre-processing
09240         $string = $this->parseCDATA($string);
09241         if ($string === '') return false;
09242 
09243         // check if it's one of the keywords
09244         $lowercase_string = strtolower($string);
09245         if (isset($system_fonts[$lowercase_string])) {
09246             return $lowercase_string;
09247         }
09248 
09249         $bits = explode(' ', $string); // bits to process
09250         $stage = 0; // this indicates what we're looking for
09251         $caught = array(); // which stage 0 properties have we caught?
09252         $stage_1 = array('font-style', 'font-variant', 'font-weight');
09253         $final = ''; // output
09254 
09255         for ($i = 0, $size = count($bits); $i < $size; $i++) {
09256             if ($bits[$i] === '') continue;
09257             switch ($stage) {
09258 
09259                 // attempting to catch font-style, font-variant or font-weight
09260                 case 0:
09261                     foreach ($stage_1 as $validator_name) {
09262                         if (isset($caught[$validator_name])) continue;
09263                         $r = $this->info[$validator_name]->validate(
09264                                                 $bits[$i], $config, $context);
09265                         if ($r !== false) {
09266                             $final .= $r . ' ';
09267                             $caught[$validator_name] = true;
09268                             break;
09269                         }
09270                     }
09271                     // all three caught, continue on
09272                     if (count($caught) >= 3) $stage = 1;
09273                     if ($r !== false) break;
09274 
09275                 // attempting to catch font-size and perhaps line-height
09276                 case 1:
09277                     $found_slash = false;
09278                     if (strpos($bits[$i], '/') !== false) {
09279                         list($font_size, $line_height) =
09280                                                     explode('/', $bits[$i]);
09281                         if ($line_height === '') {
09282                             // ooh, there's a space after the slash!
09283                             $line_height = false;
09284                             $found_slash = true;
09285                         }
09286                     } else {
09287                         $font_size = $bits[$i];
09288                         $line_height = false;
09289                     }
09290                     $r = $this->info['font-size']->validate(
09291                                               $font_size, $config, $context);
09292                     if ($r !== false) {
09293                         $final .= $r;
09294                         // attempt to catch line-height
09295                         if ($line_height === false) {
09296                             // we need to scroll forward
09297                             for ($j = $i + 1; $j < $size; $j++) {
09298                                 if ($bits[$j] === '') continue;
09299                                 if ($bits[$j] === '/') {
09300                                     if ($found_slash) {
09301                                         return false;
09302                                     } else {
09303                                         $found_slash = true;
09304                                         continue;
09305                                     }
09306                                 }
09307                                 $line_height = $bits[$j];
09308                                 break;
09309                             }
09310                         } else {
09311                             // slash already found
09312                             $found_slash = true;
09313                             $j = $i;
09314                         }
09315                         if ($found_slash) {
09316                             $i = $j;
09317                             $r = $this->info['line-height']->validate(
09318                                               $line_height, $config, $context);
09319                             if ($r !== false) {
09320                                 $final .= '/' . $r;
09321                             }
09322                         }
09323                         $final .= ' ';
09324                         $stage = 2;
09325                         break;
09326                     }
09327                     return false;
09328 
09329                 // attempting to catch font-family
09330                 case 2:
09331                     $font_family =
09332                         implode(' ', array_slice($bits, $i, $size - $i));
09333                     $r = $this->info['font-family']->validate(
09334                                               $font_family, $config, $context);
09335                     if ($r !== false) {
09336                         $final .= $r . ' ';
09337                         // processing completed successfully
09338                         return rtrim($final);
09339                     }
09340                     return false;
09341             }
09342         }
09343         return false;
09344     }
09345 
09346 }
09347 
09348 
09349 
09350 
09351 
09355 class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
09356 {
09357 
09358     protected $mask = null;
09359 
09360     public function __construct() {
09361         $this->mask = '- ';
09362         for ($c = 'a'; $c <= 'z'; $c++) $this->mask .= $c;
09363         for ($c = 'A'; $c <= 'Z'; $c++) $this->mask .= $c;
09364         for ($c = '0'; $c <= '9'; $c++) $this->mask .= $c; // cast-y, but should be fine
09365         // special bytes used by UTF-8
09366         for ($i = 0x80; $i <= 0xFF; $i++) {
09367             // We don't bother excluding invalid bytes in this range,
09368             // because the our restriction of well-formed UTF-8 will
09369             // prevent these from ever occurring.
09370             $this->mask .= chr($i);
09371         }
09372 
09373         /*
09374             PHP's internal strcspn implementation is
09375             O(length of string * length of mask), making it inefficient
09376             for large masks.  However, it's still faster than
09377             preg_match 8)
09378           for (p = s1;;) {
09379             spanp = s2;
09380             do {
09381               if (*spanp == c || p == s1_end) {
09382                 return p - s1;
09383               }
09384             } while (spanp++ < (s2_end - 1));
09385             c = *++p;
09386           }
09387          */
09388         // possible optimization: invert the mask.
09389     }
09390 
09391     public function validate($string, $config, $context) {
09392         static $generic_names = array(
09393             'serif' => true,
09394             'sans-serif' => true,
09395             'monospace' => true,
09396             'fantasy' => true,
09397             'cursive' => true
09398         );
09399         $allowed_fonts = $config->get('CSS.AllowedFonts');
09400 
09401         // assume that no font names contain commas in them
09402         $fonts = explode(',', $string);
09403         $final = '';
09404         foreach($fonts as $font) {
09405             $font = trim($font);
09406             if ($font === '') continue;
09407             // match a generic name
09408             if (isset($generic_names[$font])) {
09409                 if ($allowed_fonts === null || isset($allowed_fonts[$font])) {
09410                     $final .= $font . ', ';
09411                 }
09412                 continue;
09413             }
09414             // match a quoted name
09415             if ($font[0] === '"' || $font[0] === "'") {
09416                 $length = strlen($font);
09417                 if ($length <= 2) continue;
09418                 $quote = $font[0];
09419                 if ($font[$length - 1] !== $quote) continue;
09420                 $font = substr($font, 1, $length - 2);
09421             }
09422 
09423             $font = $this->expandCSSEscape($font);
09424 
09425             // $font is a pure representation of the font name
09426 
09427             if ($allowed_fonts !== null && !isset($allowed_fonts[$font])) {
09428                 continue;
09429             }
09430 
09431             if (ctype_alnum($font) && $font !== '') {
09432                 // very simple font, allow it in unharmed
09433                 $final .= $font . ', ';
09434                 continue;
09435             }
09436 
09437             // bugger out on whitespace.  form feed (0C) really
09438             // shouldn't show up regardless
09439             $font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font);
09440 
09441             // Here, there are various classes of characters which need
09442             // to be treated differently:
09443             //  - Alphanumeric characters are essentially safe.  We
09444             //    handled these above.
09445             //  - Spaces require quoting, though most parsers will do
09446             //    the right thing if there aren't any characters that
09447             //    can be misinterpreted
09448             //  - Dashes rarely occur, but they fairly unproblematic
09449             //    for parsing/rendering purposes.
09450             //  The above characters cover the majority of Western font
09451             //  names.
09452             //  - Arbitrary Unicode characters not in ASCII.  Because
09453             //    most parsers give little thought to Unicode, treatment
09454             //    of these codepoints is basically uniform, even for
09455             //    punctuation-like codepoints.  These characters can
09456             //    show up in non-Western pages and are supported by most
09457             //    major browsers, for example: "MS 明朝" is a
09458             //    legitimate font-name
09459             //    <http://ja.wikipedia.org/wiki/MS_明朝>.  See
09460             //    the CSS3 spec for more examples:
09461             //    <http://www.w3.org/TR/2011/WD-css3-fonts-20110324/localizedfamilynames.png>
09462             //    You can see live samples of these on the Internet:
09463             //    <http://www.google.co.jp/search?q=font-family+MS+明朝|ゴシック>
09464             //    However, most of these fonts have ASCII equivalents:
09465             //    for example, 'MS Mincho', and it's considered
09466             //    professional to use ASCII font names instead of
09467             //    Unicode font names.  Thanks Takeshi Terada for
09468             //    providing this information.
09469             //  The following characters, to my knowledge, have not been
09470             //  used to name font names.
09471             //  - Single quote.  While theoretically you might find a
09472             //    font name that has a single quote in its name (serving
09473             //    as an apostrophe, e.g. Dave's Scribble), I haven't
09474             //    been able to find any actual examples of this.
09475             //    Internet Explorer's cssText translation (which I
09476             //    believe is invoked by innerHTML) normalizes any
09477             //    quoting to single quotes, and fails to escape single
09478             //    quotes.  (Note that this is not IE's behavior for all
09479             //    CSS properties, just some sort of special casing for
09480             //    font-family).  So a single quote *cannot* be used
09481             //    safely in the font-family context if there will be an
09482             //    innerHTML/cssText translation.  Note that Firefox 3.x
09483             //    does this too.
09484             //  - Double quote.  In IE, these get normalized to
09485             //    single-quotes, no matter what the encoding.  (Fun
09486             //    fact, in IE8, the 'content' CSS property gained
09487             //    support, where they special cased to preserve encoded
09488             //    double quotes, but still translate unadorned double
09489             //    quotes into single quotes.)  So, because their
09490             //    fixpoint behavior is identical to single quotes, they
09491             //    cannot be allowed either.  Firefox 3.x displays
09492             //    single-quote style behavior.
09493             //  - Backslashes are reduced by one (so \\ -> \) every
09494             //    iteration, so they cannot be used safely.  This shows
09495             //    up in IE7, IE8 and FF3
09496             //  - Semicolons, commas and backticks are handled properly.
09497             //  - The rest of the ASCII punctuation is handled properly.
09498             // We haven't checked what browsers do to unadorned
09499             // versions, but this is not important as long as the
09500             // browser doesn't /remove/ surrounding quotes (as IE does
09501             // for HTML).
09502             //
09503             // With these results in hand, we conclude that there are
09504             // various levels of safety:
09505             //  - Paranoid: alphanumeric, spaces and dashes(?)
09506             //  - International: Paranoid + non-ASCII Unicode
09507             //  - Edgy: Everything except quotes, backslashes
09508             //  - NoJS: Standards compliance, e.g. sod IE. Note that
09509             //    with some judicious character escaping (since certain
09510             //    types of escaping doesn't work) this is theoretically
09511             //    OK as long as innerHTML/cssText is not called.
09512             // We believe that international is a reasonable default
09513             // (that we will implement now), and once we do more
09514             // extensive research, we may feel comfortable with dropping
09515             // it down to edgy.
09516 
09517             // Edgy: alphanumeric, spaces, dashes and Unicode.  Use of
09518             // str(c)spn assumes that the string was already well formed
09519             // Unicode (which of course it is).
09520             if (strspn($font, $this->mask) !== strlen($font)) {
09521                 continue;
09522             }
09523 
09524             // Historical:
09525             // In the absence of innerHTML/cssText, these ugly
09526             // transforms don't pose a security risk (as \\ and \"
09527             // might--these escapes are not supported by most browsers).
09528             // We could try to be clever and use single-quote wrapping
09529             // when there is a double quote present, but I have choosen
09530             // not to implement that.  (NOTE: you can reduce the amount
09531             // of escapes by one depending on what quoting style you use)
09532             // $font = str_replace('\\', '\\5C ', $font);
09533             // $font = str_replace('"',  '\\22 ', $font);
09534             // $font = str_replace("'",  '\\27 ', $font);
09535 
09536             // font possibly with spaces, requires quoting
09537             $final .= "'$font', ";
09538         }
09539         $final = rtrim($final, ', ');
09540         if ($final === '') return false;
09541         return $final;
09542     }
09543 
09544 }
09545 
09546 
09547 
09548 
09549 
09553 class HTMLPurifier_AttrDef_CSS_Ident extends HTMLPurifier_AttrDef
09554 {
09555 
09556     public function validate($string, $config, $context) {
09557 
09558         $string = trim($string);
09559 
09560         // early abort: '' and '0' (strings that convert to false) are invalid
09561         if (!$string) return false;
09562 
09563         $pattern = '/^(-?[A-Za-z_][A-Za-z_\-0-9]*)$/';
09564         if (!preg_match($pattern, $string)) return false;
09565         return $string;
09566 
09567     }
09568 
09569 }
09570 
09571 
09572 
09573 
09574 
09578 class HTMLPurifier_AttrDef_CSS_ImportantDecorator extends HTMLPurifier_AttrDef
09579 {
09580     public $def, $allow;
09581 
09586     public function __construct($def, $allow = false) {
09587         $this->def = $def;
09588         $this->allow = $allow;
09589     }
09593     public function validate($string, $config, $context) {
09594         // test for ! and important tokens
09595         $string = trim($string);
09596         $is_important = false;
09597         // :TODO: optimization: test directly for !important and ! important
09598         if (strlen($string) >= 9 && substr($string, -9) === 'important') {
09599             $temp = rtrim(substr($string, 0, -9));
09600             // use a temp, because we might want to restore important
09601             if (strlen($temp) >= 1 && substr($temp, -1) === '!') {
09602                 $string = rtrim(substr($temp, 0, -1));
09603                 $is_important = true;
09604             }
09605         }
09606         $string = $this->def->validate($string, $config, $context);
09607         if ($this->allow && $is_important) $string .= ' !important';
09608         return $string;
09609     }
09610 }
09611 
09612 
09613 
09614 
09615 
09619 class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef
09620 {
09621 
09622     protected $min, $max;
09623 
09628     public function __construct($min = null, $max = null) {
09629         $this->min = $min !== null ? HTMLPurifier_Length::make($min) : null;
09630         $this->max = $max !== null ? HTMLPurifier_Length::make($max) : null;
09631     }
09632 
09633     public function validate($string, $config, $context) {
09634         $string = $this->parseCDATA($string);
09635 
09636         // Optimizations
09637         if ($string === '') return false;
09638         if ($string === '0') return '0';
09639         if (strlen($string) === 1) return false;
09640 
09641         $length = HTMLPurifier_Length::make($string);
09642         if (!$length->isValid()) return false;
09643 
09644         if ($this->min) {
09645             $c = $length->compareTo($this->min);
09646             if ($c === false) return false;
09647             if ($c < 0) return false;
09648         }
09649         if ($this->max) {
09650             $c = $length->compareTo($this->max);
09651             if ($c === false) return false;
09652             if ($c > 0) return false;
09653         }
09654 
09655         return $length->toString();
09656     }
09657 
09658 }
09659 
09660 
09661 
09662 
09663 
09668 class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef
09669 {
09670 
09675     protected $info;
09676 
09677     public function __construct($config) {
09678         $def = $config->getCSSDefinition();
09679         $this->info['list-style-type']     = $def->info['list-style-type'];
09680         $this->info['list-style-position'] = $def->info['list-style-position'];
09681         $this->info['list-style-image'] = $def->info['list-style-image'];
09682     }
09683 
09684     public function validate($string, $config, $context) {
09685 
09686         // regular pre-processing
09687         $string = $this->parseCDATA($string);
09688         if ($string === '') return false;
09689 
09690         // assumes URI doesn't have spaces in it
09691         $bits = explode(' ', strtolower($string)); // bits to process
09692 
09693         $caught = array();
09694         $caught['type']     = false;
09695         $caught['position'] = false;
09696         $caught['image']    = false;
09697 
09698         $i = 0; // number of catches
09699         $none = false;
09700 
09701         foreach ($bits as $bit) {
09702             if ($i >= 3) return; // optimization bit
09703             if ($bit === '') continue;
09704             foreach ($caught as $key => $status) {
09705                 if ($status !== false) continue;
09706                 $r = $this->info['list-style-' . $key]->validate($bit, $config, $context);
09707                 if ($r === false) continue;
09708                 if ($r === 'none') {
09709                     if ($none) continue;
09710                     else $none = true;
09711                     if ($key == 'image') continue;
09712                 }
09713                 $caught[$key] = $r;
09714                 $i++;
09715                 break;
09716             }
09717         }
09718 
09719         if (!$i) return false;
09720 
09721         $ret = array();
09722 
09723         // construct type
09724         if ($caught['type']) $ret[] = $caught['type'];
09725 
09726         // construct image
09727         if ($caught['image']) $ret[] = $caught['image'];
09728 
09729         // construct position
09730         if ($caught['position']) $ret[] = $caught['position'];
09731 
09732         if (empty($ret)) return false;
09733         return implode(' ', $ret);
09734 
09735     }
09736 
09737 }
09738 
09739 
09740 
09741 
09742 
09754 class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef
09755 {
09756 
09761     public $single;
09762 
09767     public $max;
09768 
09773     public function __construct($single, $max = 4) {
09774         $this->single = $single;
09775         $this->max = $max;
09776     }
09777 
09778     public function validate($string, $config, $context) {
09779         $string = $this->parseCDATA($string);
09780         if ($string === '') return false;
09781         $parts = explode(' ', $string); // parseCDATA replaced \r, \t and \n
09782         $length = count($parts);
09783         $final = '';
09784         for ($i = 0, $num = 0; $i < $length && $num < $this->max; $i++) {
09785             if (ctype_space($parts[$i])) continue;
09786             $result = $this->single->validate($parts[$i], $config, $context);
09787             if ($result !== false) {
09788                 $final .= $result . ' ';
09789                 $num++;
09790             }
09791         }
09792         if ($final === '') return false;
09793         return rtrim($final);
09794     }
09795 
09796 }
09797 
09798 
09799 
09800 
09801 
09805 class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef
09806 {
09807 
09811     protected $number_def;
09812 
09816     public function __construct($non_negative = false) {
09817         $this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
09818     }
09819 
09820     public function validate($string, $config, $context) {
09821 
09822         $string = $this->parseCDATA($string);
09823 
09824         if ($string === '') return false;
09825         $length = strlen($string);
09826         if ($length === 1) return false;
09827         if ($string[$length - 1] !== '%') return false;
09828 
09829         $number = substr($string, 0, $length - 1);
09830         $number = $this->number_def->validate($number, $config, $context);
09831 
09832         if ($number === false) return false;
09833         return "$number%";
09834 
09835     }
09836 
09837 }
09838 
09839 
09840 
09841 
09842 
09848 class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
09849 {
09850 
09851     public function validate($string, $config, $context) {
09852 
09853         static $allowed_values = array(
09854             'line-through' => true,
09855             'overline' => true,
09856             'underline' => true,
09857         );
09858 
09859         $string = strtolower($this->parseCDATA($string));
09860 
09861         if ($string === 'none') return $string;
09862 
09863         $parts = explode(' ', $string);
09864         $final = '';
09865         foreach ($parts as $part) {
09866             if (isset($allowed_values[$part])) {
09867                 $final .= $part . ' ';
09868             }
09869         }
09870         $final = rtrim($final);
09871         if ($final === '') return false;
09872         return $final;
09873 
09874     }
09875 
09876 }
09877 
09878 
09879 
09880 
09881 
09891 class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
09892 {
09893 
09894     public function __construct() {
09895         parent::__construct(true); // always embedded
09896     }
09897 
09898     public function validate($uri_string, $config, $context) {
09899         // parse the URI out of the string and then pass it onto
09900         // the parent object
09901 
09902         $uri_string = $this->parseCDATA($uri_string);
09903         if (strpos($uri_string, 'url(') !== 0) return false;
09904         $uri_string = substr($uri_string, 4);
09905         $new_length = strlen($uri_string) - 1;
09906         if ($uri_string[$new_length] != ')') return false;
09907         $uri = trim(substr($uri_string, 0, $new_length));
09908 
09909         if (!empty($uri) && ($uri[0] == "'" || $uri[0] == '"')) {
09910             $quote = $uri[0];
09911             $new_length = strlen($uri) - 1;
09912             if ($uri[$new_length] !== $quote) return false;
09913             $uri = substr($uri, 1, $new_length - 1);
09914         }
09915 
09916         $uri = $this->expandCSSEscape($uri);
09917 
09918         $result = parent::validate($uri, $config, $context);
09919 
09920         if ($result === false) return false;
09921 
09922         // extra sanity check; should have been done by URI
09923         $result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result);
09924 
09925         // suspicious characters are ()'; we're going to percent encode
09926         // them for safety.
09927         $result = str_replace(array('(', ')', "'"), array('%28', '%29', '%27'), $result);
09928 
09929         // there's an extra bug where ampersands lose their escaping on
09930         // an innerHTML cycle, so a very unlucky query parameter could
09931         // then change the meaning of the URL.  Unfortunately, there's
09932         // not much we can do about that...
09933 
09934         return "url(\"$result\")";
09935 
09936     }
09937 
09938 }
09939 
09940 
09941 
09942 
09943 
09947 class HTMLPurifier_AttrDef_HTML_Bool extends HTMLPurifier_AttrDef
09948 {
09949 
09950     protected $name;
09951     public $minimized = true;
09952 
09953     public function __construct($name = false) {$this->name = $name;}
09954 
09955     public function validate($string, $config, $context) {
09956         if (empty($string)) return false;
09957         return $this->name;
09958     }
09959 
09963     public function make($string) {
09964         return new HTMLPurifier_AttrDef_HTML_Bool($string);
09965     }
09966 
09967 }
09968 
09969 
09970 
09971 
09972 
09976 class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
09977 {
09978 
09979     public function validate($string, $config, $context) {
09980 
09981         $string = trim($string);
09982 
09983         // early abort: '' and '0' (strings that convert to false) are invalid
09984         if (!$string) return false;
09985 
09986         $tokens = $this->split($string, $config, $context);
09987         $tokens = $this->filter($tokens, $config, $context);
09988         if (empty($tokens)) return false;
09989         return implode(' ', $tokens);
09990 
09991     }
09992 
09996     protected function split($string, $config, $context) {
09997         // OPTIMIZABLE!
09998         // do the preg_match, capture all subpatterns for reformulation
09999 
10000         // we don't support U+00A1 and up codepoints or
10001         // escaping because I don't know how to do that with regexps
10002         // and plus it would complicate optimization efforts (you never
10003         // see that anyway).
10004         $pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start
10005                    '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'.
10006                    '(?:(?=\s)|\z)/'; // look ahead for space or string end
10007         preg_match_all($pattern, $string, $matches);
10008         return $matches[1];
10009     }
10010 
10016     protected function filter($tokens, $config, $context) {
10017         return $tokens;
10018     }
10019 
10020 }
10021 
10022 
10023 
10024 
10025 
10029 class HTMLPurifier_AttrDef_HTML_Class extends HTMLPurifier_AttrDef_HTML_Nmtokens
10030 {
10031     protected function split($string, $config, $context) {
10032         // really, this twiddle should be lazy loaded
10033         $name = $config->getDefinition('HTML')->doctype->name;
10034         if ($name == "XHTML 1.1" || $name == "XHTML 2.0") {
10035             return parent::split($string, $config, $context);
10036         } else {
10037             return preg_split('/\s+/', $string);
10038         }
10039     }
10040     protected function filter($tokens, $config, $context) {
10041         $allowed = $config->get('Attr.AllowedClasses');
10042         $forbidden = $config->get('Attr.ForbiddenClasses');
10043         $ret = array();
10044         foreach ($tokens as $token) {
10045             if (
10046                 ($allowed === null || isset($allowed[$token])) &&
10047                 !isset($forbidden[$token]) &&
10048                 // We need this O(n) check because of PHP's array
10049                 // implementation that casts -0 to 0.
10050                 !in_array($token, $ret, true)
10051             ) {
10052                 $ret[] = $token;
10053             }
10054         }
10055         return $ret;
10056     }
10057 }
10058 
10059 
10060 
10064 class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef
10065 {
10066 
10067     public function validate($string, $config, $context) {
10068 
10069         static $colors = null;
10070         if ($colors === null) $colors = $config->get('Core.ColorKeywords');
10071 
10072         $string = trim($string);
10073 
10074         if (empty($string)) return false;
10075         if (isset($colors[strtolower($string)])) return $colors[$string];
10076         if ($string[0] === '#') $hex = substr($string, 1);
10077         else $hex = $string;
10078 
10079         $length = strlen($hex);
10080         if ($length !== 3 && $length !== 6) return false;
10081         if (!ctype_xdigit($hex)) return false;
10082         if ($length === 3) $hex = $hex[0].$hex[0].$hex[1].$hex[1].$hex[2].$hex[2];
10083 
10084         return "#$hex";
10085 
10086     }
10087 
10088 }
10089 
10090 
10091 
10092 
10093 
10097 class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum
10098 {
10099 
10100     public $valid_values = false; // uninitialized value
10101     protected $case_sensitive = false;
10102 
10103     public function __construct() {}
10104 
10105     public function validate($string, $config, $context) {
10106         if ($this->valid_values === false) $this->valid_values = $config->get('Attr.AllowedFrameTargets');
10107         return parent::validate($string, $config, $context);
10108     }
10109 
10110 }
10111 
10112 
10113 
10114 
10115 
10125 class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
10126 {
10127 
10128     // selector is NOT a valid thing to use for IDREFs, because IDREFs
10129     // *must* target IDs that exist, whereas selector #ids do not.
10130 
10135     protected $selector;
10136 
10137     public function __construct($selector = false) {
10138         $this->selector = $selector;
10139     }
10140 
10141     public function validate($id, $config, $context) {
10142 
10143         if (!$this->selector && !$config->get('Attr.EnableID')) return false;
10144 
10145         $id = trim($id); // trim it first
10146 
10147         if ($id === '') return false;
10148 
10149         $prefix = $config->get('Attr.IDPrefix');
10150         if ($prefix !== '') {
10151             $prefix .= $config->get('Attr.IDPrefixLocal');
10152             // prevent re-appending the prefix
10153             if (strpos($id, $prefix) !== 0) $id = $prefix . $id;
10154         } elseif ($config->get('Attr.IDPrefixLocal') !== '') {
10155             trigger_error('%Attr.IDPrefixLocal cannot be used unless '.
10156                 '%Attr.IDPrefix is set', E_USER_WARNING);
10157         }
10158 
10159         if (!$this->selector) {
10160             $id_accumulator =& $context->get('IDAccumulator');
10161             if (isset($id_accumulator->ids[$id])) return false;
10162         }
10163 
10164         // we purposely avoid using regex, hopefully this is faster
10165 
10166         if (ctype_alpha($id)) {
10167             $result = true;
10168         } else {
10169             if (!ctype_alpha(@$id[0])) return false;
10170             $trim = trim( // primitive style of regexps, I suppose
10171                 $id,
10172                 'A..Za..z0..9:-._'
10173               );
10174             $result = ($trim === '');
10175         }
10176 
10177         $regexp = $config->get('Attr.IDBlacklistRegexp');
10178         if ($regexp && preg_match($regexp, $id)) {
10179             return false;
10180         }
10181 
10182         if (!$this->selector && $result) $id_accumulator->add($id);
10183 
10184         // if no change was made to the ID, return the result
10185         // else, return the new id if stripping whitespace made it
10186         //     valid, or return false.
10187         return $result ? $id : false;
10188 
10189     }
10190 
10191 }
10192 
10193 
10194 
10195 
10196 
10200 class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
10201 {
10202 
10203     protected $max;
10204 
10205     public function __construct($max = null) {
10206         $this->max = $max;
10207     }
10208 
10209     public function validate($string, $config, $context) {
10210 
10211         $string = trim($string);
10212         if ($string === '0') return $string;
10213         if ($string === '')  return false;
10214         $length = strlen($string);
10215         if (substr($string, $length - 2) == 'px') {
10216             $string = substr($string, 0, $length - 2);
10217         }
10218         if (!is_numeric($string)) return false;
10219         $int = (int) $string;
10220 
10221         if ($int < 0) return '0';
10222 
10223         // upper-bound value, extremely high values can
10224         // crash operating systems, see <http://ha.ckers.org/imagecrash.html>
10225         // WARNING, above link WILL crash you if you're using Windows
10226 
10227         if ($this->max !== null && $int > $this->max) return (string) $this->max;
10228 
10229         return (string) $int;
10230 
10231     }
10232 
10233     public function make($string) {
10234         if ($string === '') $max = null;
10235         else $max = (int) $string;
10236         $class = get_class($this);
10237         return new $class($max);
10238     }
10239 
10240 }
10241 
10242 
10243 
10244 
10245 
10253 class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels
10254 {
10255 
10256     public function validate($string, $config, $context) {
10257 
10258         $string = trim($string);
10259         if ($string === '') return false;
10260 
10261         $parent_result = parent::validate($string, $config, $context);
10262         if ($parent_result !== false) return $parent_result;
10263 
10264         $length = strlen($string);
10265         $last_char = $string[$length - 1];
10266 
10267         if ($last_char !== '%') return false;
10268 
10269         $points = substr($string, 0, $length - 1);
10270 
10271         if (!is_numeric($points)) return false;
10272 
10273         $points = (int) $points;
10274 
10275         if ($points < 0) return '0%';
10276         if ($points > 100) return '100%';
10277 
10278         return ((string) $points) . '%';
10279 
10280     }
10281 
10282 }
10283 
10284 
10285 
10286 
10287 
10294 class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
10295 {
10296 
10298     protected $name;
10299 
10300     public function __construct($name) {
10301         $configLookup = array(
10302             'rel' => 'AllowedRel',
10303             'rev' => 'AllowedRev'
10304         );
10305         if (!isset($configLookup[$name])) {
10306             trigger_error('Unrecognized attribute name for link '.
10307                 'relationship.', E_USER_ERROR);
10308             return;
10309         }
10310         $this->name = $configLookup[$name];
10311     }
10312 
10313     public function validate($string, $config, $context) {
10314 
10315         $allowed = $config->get('Attr.' . $this->name);
10316         if (empty($allowed)) return false;
10317 
10318         $string = $this->parseCDATA($string);
10319         $parts = explode(' ', $string);
10320 
10321         // lookup to prevent duplicates
10322         $ret_lookup = array();
10323         foreach ($parts as $part) {
10324             $part = strtolower(trim($part));
10325             if (!isset($allowed[$part])) continue;
10326             $ret_lookup[$part] = true;
10327         }
10328 
10329         if (empty($ret_lookup)) return false;
10330         $string = implode(' ', array_keys($ret_lookup));
10331 
10332         return $string;
10333 
10334     }
10335 
10336 }
10337 
10338 
10339 
10340 
10341 
10348 class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Length
10349 {
10350 
10351     public function validate($string, $config, $context) {
10352 
10353         $string = trim($string);
10354         if ($string === '') return false;
10355 
10356         $parent_result = parent::validate($string, $config, $context);
10357         if ($parent_result !== false) return $parent_result;
10358 
10359         $length = strlen($string);
10360         $last_char = $string[$length - 1];
10361 
10362         if ($last_char !== '*') return false;
10363 
10364         $int = substr($string, 0, $length - 1);
10365 
10366         if ($int == '') return '*';
10367         if (!is_numeric($int)) return false;
10368 
10369         $int = (int) $int;
10370 
10371         if ($int < 0) return false;
10372         if ($int == 0) return '0';
10373         if ($int == 1) return '*';
10374         return ((string) $int) . '*';
10375 
10376     }
10377 
10378 }
10379 
10380 
10381 
10382 
10383 
10384 abstract class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
10385 {
10386 
10390     function unpack($string) {
10391         // needs to be implemented
10392     }
10393 
10394 }
10395 
10396 // sub-implementations
10397 
10398 
10399 
10400 
10401 
10405 class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
10406 {
10407 
10411     protected $ipv4;
10412 
10416     protected $ipv6;
10417 
10418     public function __construct() {
10419         $this->ipv4 = new HTMLPurifier_AttrDef_URI_IPv4();
10420         $this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6();
10421     }
10422 
10423     public function validate($string, $config, $context) {
10424         $length = strlen($string);
10425         // empty hostname is OK; it's usually semantically equivalent:
10426         // the default host as defined by a URI scheme is used:
10427         //
10428         //      If the URI scheme defines a default for host, then that
10429         //      default applies when the host subcomponent is undefined
10430         //      or when the registered name is empty (zero length).
10431         if ($string === '') return '';
10432         if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') {
10433             //IPv6
10434             $ip = substr($string, 1, $length - 2);
10435             $valid = $this->ipv6->validate($ip, $config, $context);
10436             if ($valid === false) return false;
10437             return '['. $valid . ']';
10438         }
10439 
10440         // need to do checks on unusual encodings too
10441         $ipv4 = $this->ipv4->validate($string, $config, $context);
10442         if ($ipv4 !== false) return $ipv4;
10443 
10444         // A regular domain name.
10445 
10446         // This doesn't match I18N domain names, but we don't have proper IRI support,
10447         // so force users to insert Punycode.
10448 
10449         // The productions describing this are:
10450         $a   = '[a-z]';     // alpha
10451         $an  = '[a-z0-9]';  // alphanum
10452         $and = '[a-z0-9-]'; // alphanum | "-"
10453         // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
10454         $domainlabel   = "$an($and*$an)?";
10455         // toplabel    = alpha | alpha *( alphanum | "-" ) alphanum
10456         $toplabel      = "$a($and*$an)?";
10457         // hostname    = *( domainlabel "." ) toplabel [ "." ]
10458         if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
10459             return $string;
10460         }
10461 
10462         // If we have Net_IDNA2 support, we can support IRIs by
10463         // punycoding them. (This is the most portable thing to do,
10464         // since otherwise we have to assume browsers support
10465 
10466         if ($config->get('Core.EnableIDNA')) {
10467             $idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true));
10468             // we need to encode each period separately
10469             $parts = explode('.', $string);
10470             try {
10471                 $new_parts = array();
10472                 foreach ($parts as $part) {
10473                     $encodable = false;
10474                     for ($i = 0, $c = strlen($part); $i < $c; $i++) {
10475                         if (ord($part[$i]) > 0x7a) {
10476                             $encodable = true;
10477                             break;
10478                         }
10479                     }
10480                     if (!$encodable) {
10481                         $new_parts[] = $part;
10482                     } else {
10483                         $new_parts[] = $idna->encode($part);
10484                     }
10485                 }
10486                 $string = implode('.', $new_parts);
10487                 if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
10488                     return $string;
10489                 }
10490             } catch (Exception $e) {
10491                 // XXX error reporting
10492             }
10493         }
10494 
10495         return false;
10496     }
10497 
10498 }
10499 
10500 
10501 
10502 
10503 
10508 class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
10509 {
10510 
10514     protected $ip4;
10515 
10516     public function validate($aIP, $config, $context) {
10517 
10518         if (!$this->ip4) $this->_loadRegex();
10519 
10520         if (preg_match('#^' . $this->ip4 . '$#s', $aIP))
10521         {
10522                 return $aIP;
10523         }
10524 
10525         return false;
10526 
10527     }
10528 
10533     protected function _loadRegex() {
10534         $oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
10535         $this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
10536     }
10537 
10538 }
10539 
10540 
10541 
10542 
10543 
10550 class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
10551 {
10552 
10553     public function validate($aIP, $config, $context) {
10554 
10555         if (!$this->ip4) $this->_loadRegex();
10556 
10557         $original = $aIP;
10558 
10559         $hex = '[0-9a-fA-F]';
10560         $blk = '(?:' . $hex . '{1,4})';
10561         $pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))';   // /0 - /128
10562 
10563         //      prefix check
10564         if (strpos($aIP, '/') !== false)
10565         {
10566                 if (preg_match('#' . $pre . '$#s', $aIP, $find))
10567                 {
10568                         $aIP = substr($aIP, 0, 0-strlen($find[0]));
10569                         unset($find);
10570                 }
10571                 else
10572                 {
10573                         return false;
10574                 }
10575         }
10576 
10577         //      IPv4-compatiblity check
10578         if (preg_match('#(?<=:'.')' . $this->ip4 . '$#s', $aIP, $find))
10579         {
10580                 $aIP = substr($aIP, 0, 0-strlen($find[0]));
10581                 $ip = explode('.', $find[0]);
10582                 $ip = array_map('dechex', $ip);
10583                 $aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3];
10584                 unset($find, $ip);
10585         }
10586 
10587         //      compression check
10588         $aIP = explode('::', $aIP);
10589         $c = count($aIP);
10590         if ($c > 2)
10591         {
10592                 return false;
10593         }
10594         elseif ($c == 2)
10595         {
10596                 list($first, $second) = $aIP;
10597                 $first = explode(':', $first);
10598                 $second = explode(':', $second);
10599 
10600                 if (count($first) + count($second) > 8)
10601                 {
10602                         return false;
10603                 }
10604 
10605                 while(count($first) < 8)
10606                 {
10607                         array_push($first, '0');
10608                 }
10609 
10610                 array_splice($first, 8 - count($second), 8, $second);
10611                 $aIP = $first;
10612                 unset($first,$second);
10613         }
10614         else
10615         {
10616                 $aIP = explode(':', $aIP[0]);
10617         }
10618         $c = count($aIP);
10619 
10620         if ($c != 8)
10621         {
10622                 return false;
10623         }
10624 
10625         //      All the pieces should be 16-bit hex strings. Are they?
10626         foreach ($aIP as $piece)
10627         {
10628                 if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece)))
10629                 {
10630                         return false;
10631                 }
10632         }
10633 
10634         return $original;
10635 
10636     }
10637 
10638 }
10639 
10640 
10641 
10642 
10643 
10648 class HTMLPurifier_AttrDef_URI_Email_SimpleCheck extends HTMLPurifier_AttrDef_URI_Email
10649 {
10650 
10651     public function validate($string, $config, $context) {
10652         // no support for named mailboxes i.e. "Bob <bob@example.com>"
10653         // that needs more percent encoding to be done
10654         if ($string == '') return false;
10655         $string = trim($string);
10656         $result = preg_match('/^[A-Z0-9._%-]+@[A-Z0-9.-]+\.[A-Z]{2,4}$/i', $string);
10657         return $result ? $string : false;
10658     }
10659 
10660 }
10661 
10662 
10663 
10664 
10665 
10669 class HTMLPurifier_AttrTransform_Background extends HTMLPurifier_AttrTransform {
10670 
10671     public function transform($attr, $config, $context) {
10672 
10673         if (!isset($attr['background'])) return $attr;
10674 
10675         $background = $this->confiscateAttr($attr, 'background');
10676         // some validation should happen here
10677 
10678         $this->prependCSS($attr, "background-image:url($background);");
10679 
10680         return $attr;
10681 
10682     }
10683 
10684 }
10685 
10686 
10687 
10688 
10689 
10690 // this MUST be placed in post, as it assumes that any value in dir is valid
10691 
10695 class HTMLPurifier_AttrTransform_BdoDir extends HTMLPurifier_AttrTransform
10696 {
10697 
10698     public function transform($attr, $config, $context) {
10699         if (isset($attr['dir'])) return $attr;
10700         $attr['dir'] = $config->get('Attr.DefaultTextDir');
10701         return $attr;
10702     }
10703 
10704 }
10705 
10706 
10707 
10708 
10709 
10713 class HTMLPurifier_AttrTransform_BgColor extends HTMLPurifier_AttrTransform {
10714 
10715     public function transform($attr, $config, $context) {
10716 
10717         if (!isset($attr['bgcolor'])) return $attr;
10718 
10719         $bgcolor = $this->confiscateAttr($attr, 'bgcolor');
10720         // some validation should happen here
10721 
10722         $this->prependCSS($attr, "background-color:$bgcolor;");
10723 
10724         return $attr;
10725 
10726     }
10727 
10728 }
10729 
10730 
10731 
10732 
10733 
10737 class HTMLPurifier_AttrTransform_BoolToCSS extends HTMLPurifier_AttrTransform {
10738 
10742     protected $attr;
10743 
10747     protected $css;
10748 
10753     public function __construct($attr, $css) {
10754         $this->attr = $attr;
10755         $this->css  = $css;
10756     }
10757 
10758     public function transform($attr, $config, $context) {
10759         if (!isset($attr[$this->attr])) return $attr;
10760         unset($attr[$this->attr]);
10761         $this->prependCSS($attr, $this->css);
10762         return $attr;
10763     }
10764 
10765 }
10766 
10767 
10768 
10769 
10770 
10774 class HTMLPurifier_AttrTransform_Border extends HTMLPurifier_AttrTransform {
10775 
10776     public function transform($attr, $config, $context) {
10777         if (!isset($attr['border'])) return $attr;
10778         $border_width = $this->confiscateAttr($attr, 'border');
10779         // some validation should happen here
10780         $this->prependCSS($attr, "border:{$border_width}px solid;");
10781         return $attr;
10782     }
10783 
10784 }
10785 
10786 
10787 
10788 
10789 
10794 class HTMLPurifier_AttrTransform_EnumToCSS extends HTMLPurifier_AttrTransform {
10795 
10799     protected $attr;
10800 
10804     protected $enumToCSS = array();
10805 
10811     protected $caseSensitive = false;
10812 
10818     public function __construct($attr, $enum_to_css, $case_sensitive = false) {
10819         $this->attr = $attr;
10820         $this->enumToCSS = $enum_to_css;
10821         $this->caseSensitive = (bool) $case_sensitive;
10822     }
10823 
10824     public function transform($attr, $config, $context) {
10825 
10826         if (!isset($attr[$this->attr])) return $attr;
10827 
10828         $value = trim($attr[$this->attr]);
10829         unset($attr[$this->attr]);
10830 
10831         if (!$this->caseSensitive) $value = strtolower($value);
10832 
10833         if (!isset($this->enumToCSS[$value])) {
10834             return $attr;
10835         }
10836 
10837         $this->prependCSS($attr, $this->enumToCSS[$value]);
10838 
10839         return $attr;
10840 
10841     }
10842 
10843 }
10844 
10845 
10846 
10847 
10848 
10849 // must be called POST validation
10850 
10857 class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
10858 {
10859 
10860     public function transform($attr, $config, $context) {
10861 
10862         $src = true;
10863         if (!isset($attr['src'])) {
10864             if ($config->get('Core.RemoveInvalidImg')) return $attr;
10865             $attr['src'] = $config->get('Attr.DefaultInvalidImage');
10866             $src = false;
10867         }
10868 
10869         if (!isset($attr['alt'])) {
10870             if ($src) {
10871                 $alt = $config->get('Attr.DefaultImageAlt');
10872                 if ($alt === null) {
10873                     // truncate if the alt is too long
10874                     $attr['alt'] = substr(basename($attr['src']),0,40);
10875                 } else {
10876                     $attr['alt'] = $alt;
10877                 }
10878             } else {
10879                 $attr['alt'] = $config->get('Attr.DefaultInvalidImageAlt');
10880             }
10881         }
10882 
10883         return $attr;
10884 
10885     }
10886 
10887 }
10888 
10889 
10890 
10891 
10892 
10896 class HTMLPurifier_AttrTransform_ImgSpace extends HTMLPurifier_AttrTransform {
10897 
10898     protected $attr;
10899     protected $css = array(
10900         'hspace' => array('left', 'right'),
10901         'vspace' => array('top', 'bottom')
10902     );
10903 
10904     public function __construct($attr) {
10905         $this->attr = $attr;
10906         if (!isset($this->css[$attr])) {
10907             trigger_error(htmlspecialchars($attr) . ' is not valid space attribute');
10908         }
10909     }
10910 
10911     public function transform($attr, $config, $context) {
10912 
10913         if (!isset($attr[$this->attr])) return $attr;
10914 
10915         $width = $this->confiscateAttr($attr, $this->attr);
10916         // some validation could happen here
10917 
10918         if (!isset($this->css[$this->attr])) return $attr;
10919 
10920         $style = '';
10921         foreach ($this->css[$this->attr] as $suffix) {
10922             $property = "margin-$suffix";
10923             $style .= "$property:{$width}px;";
10924         }
10925 
10926         $this->prependCSS($attr, $style);
10927 
10928         return $attr;
10929 
10930     }
10931 
10932 }
10933 
10934 
10935 
10936 
10937 
10942 class HTMLPurifier_AttrTransform_Input extends HTMLPurifier_AttrTransform {
10943 
10944     protected $pixels;
10945 
10946     public function __construct() {
10947         $this->pixels = new HTMLPurifier_AttrDef_HTML_Pixels();
10948     }
10949 
10950     public function transform($attr, $config, $context) {
10951         if (!isset($attr['type'])) $t = 'text';
10952         else $t = strtolower($attr['type']);
10953         if (isset($attr['checked']) && $t !== 'radio' && $t !== 'checkbox') {
10954             unset($attr['checked']);
10955         }
10956         if (isset($attr['maxlength']) && $t !== 'text' && $t !== 'password') {
10957             unset($attr['maxlength']);
10958         }
10959         if (isset($attr['size']) && $t !== 'text' && $t !== 'password') {
10960             $result = $this->pixels->validate($attr['size'], $config, $context);
10961             if ($result === false) unset($attr['size']);
10962             else $attr['size'] = $result;
10963         }
10964         if (isset($attr['src']) && $t !== 'image') {
10965             unset($attr['src']);
10966         }
10967         if (!isset($attr['value']) && ($t === 'radio' || $t === 'checkbox')) {
10968             $attr['value'] = '';
10969         }
10970         return $attr;
10971     }
10972 
10973 }
10974 
10975 
10976 
10977 
10978 
10984 class HTMLPurifier_AttrTransform_Lang extends HTMLPurifier_AttrTransform
10985 {
10986 
10987     public function transform($attr, $config, $context) {
10988 
10989         $lang     = isset($attr['lang']) ? $attr['lang'] : false;
10990         $xml_lang = isset($attr['xml:lang']) ? $attr['xml:lang'] : false;
10991 
10992         if ($lang !== false && $xml_lang === false) {
10993             $attr['xml:lang'] = $lang;
10994         } elseif ($xml_lang !== false) {
10995             $attr['lang'] = $xml_lang;
10996         }
10997 
10998         return $attr;
10999 
11000     }
11001 
11002 }
11003 
11004 
11005 
11006 
11007 
11011 class HTMLPurifier_AttrTransform_Length extends HTMLPurifier_AttrTransform
11012 {
11013 
11014     protected $name;
11015     protected $cssName;
11016 
11017     public function __construct($name, $css_name = null) {
11018         $this->name = $name;
11019         $this->cssName = $css_name ? $css_name : $name;
11020     }
11021 
11022     public function transform($attr, $config, $context) {
11023         if (!isset($attr[$this->name])) return $attr;
11024         $length = $this->confiscateAttr($attr, $this->name);
11025         if(ctype_digit($length)) $length .= 'px';
11026         $this->prependCSS($attr, $this->cssName . ":$length;");
11027         return $attr;
11028     }
11029 
11030 }
11031 
11032 
11033 
11034 
11035 
11039 class HTMLPurifier_AttrTransform_Name extends HTMLPurifier_AttrTransform
11040 {
11041 
11042     public function transform($attr, $config, $context) {
11043         // Abort early if we're using relaxed definition of name
11044         if ($config->get('HTML.Attr.Name.UseCDATA')) return $attr;
11045         if (!isset($attr['name'])) return $attr;
11046         $id = $this->confiscateAttr($attr, 'name');
11047         if ( isset($attr['id']))   return $attr;
11048         $attr['id'] = $id;
11049         return $attr;
11050     }
11051 
11052 }
11053 
11054 
11055 
11056 
11057 
11063 class HTMLPurifier_AttrTransform_NameSync extends HTMLPurifier_AttrTransform
11064 {
11065 
11066     public function __construct() {
11067         $this->idDef = new HTMLPurifier_AttrDef_HTML_ID();
11068     }
11069 
11070     public function transform($attr, $config, $context) {
11071         if (!isset($attr['name'])) return $attr;
11072         $name = $attr['name'];
11073         if (isset($attr['id']) && $attr['id'] === $name) return $attr;
11074         $result = $this->idDef->validate($name, $config, $context);
11075         if ($result === false) unset($attr['name']);
11076         else $attr['name'] = $result;
11077         return $attr;
11078     }
11079 
11080 }
11081 
11082 
11083 
11084 
11085 
11086 // must be called POST validation
11087 
11092 class HTMLPurifier_AttrTransform_Nofollow extends HTMLPurifier_AttrTransform
11093 {
11094     private $parser;
11095 
11096     public function __construct() {
11097         $this->parser = new HTMLPurifier_URIParser();
11098     }
11099 
11100     public function transform($attr, $config, $context) {
11101 
11102         if (!isset($attr['href'])) {
11103             return $attr;
11104         }
11105 
11106         // XXX Kind of inefficient
11107         $url = $this->parser->parse($attr['href']);
11108         $scheme = $url->getSchemeObj($config, $context);
11109 
11110         if ($scheme->browsable && !$url->isLocal($config, $context)) {
11111             if (isset($attr['rel'])) {
11112                 $rels = explode(' ', $attr);
11113                 if (!in_array('nofollow', $rels)) {
11114                     $rels[] = 'nofollow';
11115                 }
11116                 $attr['rel'] = implode(' ', $rels);
11117             } else {
11118                 $attr['rel'] = 'nofollow';
11119             }
11120         }
11121 
11122         return $attr;
11123 
11124     }
11125 
11126 }
11127 
11128 
11129 
11130 
11131 
11132 class HTMLPurifier_AttrTransform_SafeEmbed extends HTMLPurifier_AttrTransform
11133 {
11134     public $name = "SafeEmbed";
11135 
11136     public function transform($attr, $config, $context) {
11137         $attr['allowscriptaccess'] = 'never';
11138         $attr['allownetworking'] = 'internal';
11139         $attr['type'] = 'application/x-shockwave-flash';
11140         return $attr;
11141     }
11142 }
11143 
11144 
11145 
11146 
11147 
11151 class HTMLPurifier_AttrTransform_SafeObject extends HTMLPurifier_AttrTransform
11152 {
11153     public $name = "SafeObject";
11154 
11155     function transform($attr, $config, $context) {
11156         if (!isset($attr['type'])) $attr['type'] = 'application/x-shockwave-flash';
11157         return $attr;
11158     }
11159 }
11160 
11161 
11162 
11163 
11164 
11177 class HTMLPurifier_AttrTransform_SafeParam extends HTMLPurifier_AttrTransform
11178 {
11179     public $name = "SafeParam";
11180     private $uri;
11181 
11182     public function __construct() {
11183         $this->uri = new HTMLPurifier_AttrDef_URI(true); // embedded
11184         $this->wmode = new HTMLPurifier_AttrDef_Enum(array('window', 'opaque', 'transparent'));
11185     }
11186 
11187     public function transform($attr, $config, $context) {
11188         // If we add support for other objects, we'll need to alter the
11189         // transforms.
11190         switch ($attr['name']) {
11191             // application/x-shockwave-flash
11192             // Keep this synchronized with Injector/SafeObject.php
11193             case 'allowScriptAccess':
11194                 $attr['value'] = 'never';
11195                 break;
11196             case 'allowNetworking':
11197                 $attr['value'] = 'internal';
11198                 break;
11199             case 'allowFullScreen':
11200                 if ($config->get('HTML.FlashAllowFullScreen')) {
11201                     $attr['value'] = ($attr['value'] == 'true') ? 'true' : 'false';
11202                 } else {
11203                     $attr['value'] = 'false';
11204                 }
11205                 break;
11206             case 'wmode':
11207                 $attr['value'] = $this->wmode->validate($attr['value'], $config, $context);
11208                 break;
11209             case 'movie':
11210             case 'src':
11211                 $attr['name'] = "movie";
11212                 $attr['value'] = $this->uri->validate($attr['value'], $config, $context);
11213                 break;
11214             case 'flashvars':
11215                 // we're going to allow arbitrary inputs to the SWF, on
11216                 // the reasoning that it could only hack the SWF, not us.
11217                 break;
11218             // add other cases to support other param name/value pairs
11219             default:
11220                 $attr['name'] = $attr['value'] = null;
11221         }
11222         return $attr;
11223     }
11224 }
11225 
11226 
11227 
11228 
11229 
11233 class HTMLPurifier_AttrTransform_ScriptRequired extends HTMLPurifier_AttrTransform
11234 {
11235     public function transform($attr, $config, $context) {
11236         if (!isset($attr['type'])) {
11237             $attr['type'] = 'text/javascript';
11238         }
11239         return $attr;
11240     }
11241 }
11242 
11243 
11244 
11245 
11246 
11247 // must be called POST validation
11248 
11254 class HTMLPurifier_AttrTransform_TargetBlank extends HTMLPurifier_AttrTransform
11255 {
11256     private $parser;
11257 
11258     public function __construct() {
11259         $this->parser = new HTMLPurifier_URIParser();
11260     }
11261 
11262     public function transform($attr, $config, $context) {
11263 
11264         if (!isset($attr['href'])) {
11265             return $attr;
11266         }
11267 
11268         // XXX Kind of inefficient
11269         $url = $this->parser->parse($attr['href']);
11270         $scheme = $url->getSchemeObj($config, $context);
11271 
11272         if ($scheme->browsable && !$url->isBenign($config, $context)) {
11273             $attr['target'] = 'blank';
11274         }
11275 
11276         return $attr;
11277 
11278     }
11279 
11280 }
11281 
11282 
11283 
11284 
11285 
11289 class HTMLPurifier_AttrTransform_Textarea extends HTMLPurifier_AttrTransform
11290 {
11291 
11292     public function transform($attr, $config, $context) {
11293         // Calculated from Firefox
11294         if (!isset($attr['cols'])) $attr['cols'] = '22';
11295         if (!isset($attr['rows'])) $attr['rows'] = '3';
11296         return $attr;
11297     }
11298 
11299 }
11300 
11301 
11302 
11303 
11304 
11314 class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
11315 {
11316 
11320     public $inline;
11321 
11325     public $block;
11326 
11327     public $type = 'chameleon';
11328 
11333     public function __construct($inline, $block) {
11334         $this->inline = new HTMLPurifier_ChildDef_Optional($inline);
11335         $this->block  = new HTMLPurifier_ChildDef_Optional($block);
11336         $this->elements = $this->block->elements;
11337     }
11338 
11339     public function validateChildren($tokens_of_children, $config, $context) {
11340         if ($context->get('IsInline') === false) {
11341             return $this->block->validateChildren(
11342                 $tokens_of_children, $config, $context);
11343         } else {
11344             return $this->inline->validateChildren(
11345                 $tokens_of_children, $config, $context);
11346         }
11347     }
11348 }
11349 
11350 
11351 
11352 
11353 
11360 class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
11361 {
11362     public $type = 'custom';
11363     public $allow_empty = false;
11367     public $dtd_regex;
11372     private $_pcre_regex;
11376     public function __construct($dtd_regex) {
11377         $this->dtd_regex = $dtd_regex;
11378         $this->_compileRegex();
11379     }
11383     protected function _compileRegex() {
11384         $raw = str_replace(' ', '', $this->dtd_regex);
11385         if ($raw{0} != '(') {
11386             $raw = "($raw)";
11387         }
11388         $el = '[#a-zA-Z0-9_.-]+';
11389         $reg = $raw;
11390 
11391         // COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M
11392         // DOING! Seriously: if there's problems, please report them.
11393 
11394         // collect all elements into the $elements array
11395         preg_match_all("/$el/", $reg, $matches);
11396         foreach ($matches[0] as $match) {
11397             $this->elements[$match] = true;
11398         }
11399 
11400         // setup all elements as parentheticals with leading commas
11401         $reg = preg_replace("/$el/", '(,\\0)', $reg);
11402 
11403         // remove commas when they were not solicited
11404         $reg = preg_replace("/([^,(|]\(+),/", '\\1', $reg);
11405 
11406         // remove all non-paranthetical commas: they are handled by first regex
11407         $reg = preg_replace("/,\(/", '(', $reg);
11408 
11409         $this->_pcre_regex = $reg;
11410     }
11411     public function validateChildren($tokens_of_children, $config, $context) {
11412         $list_of_children = '';
11413         $nesting = 0; // depth into the nest
11414         foreach ($tokens_of_children as $token) {
11415             if (!empty($token->is_whitespace)) continue;
11416 
11417             $is_child = ($nesting == 0); // direct
11418 
11419             if ($token instanceof HTMLPurifier_Token_Start) {
11420                 $nesting++;
11421             } elseif ($token instanceof HTMLPurifier_Token_End) {
11422                 $nesting--;
11423             }
11424 
11425             if ($is_child) {
11426                 $list_of_children .= $token->name . ',';
11427             }
11428         }
11429         // add leading comma to deal with stray comma declarations
11430         $list_of_children = ',' . rtrim($list_of_children, ',');
11431         $okay =
11432             preg_match(
11433                 '/^,?'.$this->_pcre_regex.'$/',
11434                 $list_of_children
11435             );
11436 
11437         return (bool) $okay;
11438     }
11439 }
11440 
11441 
11442 
11443 
11444 
11452 class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
11453 {
11454     public $allow_empty = true;
11455     public $type = 'empty';
11456     public function __construct() {}
11457     public function validateChildren($tokens_of_children, $config, $context) {
11458         return array();
11459     }
11460 }
11461 
11462 
11463 
11464 
11465 
11469 class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef
11470 {
11471     public $type = 'list';
11472     // lying a little bit, so that we can handle ul and ol ourselves
11473     // XXX: This whole business with 'wrap' is all a bit unsatisfactory
11474     public $elements = array('li' => true, 'ul' => true, 'ol' => true);
11475     public function validateChildren($tokens_of_children, $config, $context) {
11476         // Flag for subclasses
11477         $this->whitespace = false;
11478 
11479         // if there are no tokens, delete parent node
11480         if (empty($tokens_of_children)) return false;
11481 
11482         // the new set of children
11483         $result = array();
11484 
11485         // current depth into the nest
11486         $nesting = 0;
11487 
11488         // a little sanity check to make sure it's not ALL whitespace
11489         $all_whitespace = true;
11490 
11491         $seen_li = false;
11492         $need_close_li = false;
11493 
11494         foreach ($tokens_of_children as $token) {
11495             if (!empty($token->is_whitespace)) {
11496                 $result[] = $token;
11497                 continue;
11498             }
11499             $all_whitespace = false; // phew, we're not talking about whitespace
11500 
11501             if ($nesting == 1 && $need_close_li) {
11502                 $result[] = new HTMLPurifier_Token_End('li');
11503                 $nesting--;
11504                 $need_close_li = false;
11505             }
11506 
11507             $is_child = ($nesting == 0);
11508 
11509             if ($token instanceof HTMLPurifier_Token_Start) {
11510                 $nesting++;
11511             } elseif ($token instanceof HTMLPurifier_Token_End) {
11512                 $nesting--;
11513             }
11514 
11515             if ($is_child) {
11516                 if ($token->name === 'li') {
11517                     // good
11518                     $seen_li = true;
11519                 } elseif ($token->name === 'ul' || $token->name === 'ol') {
11520                     // we want to tuck this into the previous li
11521                     $need_close_li = true;
11522                     $nesting++;
11523                     if (!$seen_li) {
11524                         // create a new li element
11525                         $result[] = new HTMLPurifier_Token_Start('li');
11526                     } else {
11527                         // backtrack until </li> found
11528                         while(true) {
11529                             $t = array_pop($result);
11530                             if ($t instanceof HTMLPurifier_Token_End) {
11531                                 // XXX actually, these invariants could very plausibly be violated
11532                                 // if we are doing silly things with modifying the set of allowed elements.
11533                                 // FORTUNATELY, it doesn't make a difference, since the allowed
11534                                 // elements are hard-coded here!
11535                                 if ($t->name !== 'li') {
11536                                     trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
11537                                     return false;
11538                                 }
11539                                 break;
11540                             } elseif ($t instanceof HTMLPurifier_Token_Empty) { // bleagh
11541                                 if ($t->name !== 'li') {
11542                                     trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
11543                                     return false;
11544                                 }
11545                                 // XXX this should have a helper for it...
11546                                 $result[] = new HTMLPurifier_Token_Start('li', $t->attr, $t->line, $t->col, $t->armor);
11547                                 break;
11548                             } else {
11549                                 if (!$t->is_whitespace) {
11550                                     trigger_error("Only whitespace present invariant violated in List ChildDef", E_USER_ERROR);
11551                                     return false;
11552                                 }
11553                             }
11554                         }
11555                     }
11556                 } else {
11557                     // start wrapping (this doesn't precisely mimic
11558                     // browser behavior, but what browsers do is kind of
11559                     // hard to mimic in a standards compliant way
11560                     // XXX Actually, this has no impact in practice,
11561                     // because this gets handled earlier. Arguably,
11562                     // we should rip out all of that processing
11563                     $result[] = new HTMLPurifier_Token_Start('li');
11564                     $nesting++;
11565                     $seen_li = true;
11566                     $need_close_li = true;
11567                 }
11568             }
11569             $result[] = $token;
11570         }
11571         if ($need_close_li) {
11572             $result[] = new HTMLPurifier_Token_End('li');
11573         }
11574         if (empty($result)) return false;
11575         if ($all_whitespace) {
11576             return false;
11577         }
11578         if ($tokens_of_children == $result) return true;
11579         return $result;
11580     }
11581 }
11582 
11583 
11584 
11585 
11586 
11590 class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
11591 {
11596     public $elements = array();
11600     protected $whitespace = false;
11604     public function __construct($elements) {
11605         if (is_string($elements)) {
11606             $elements = str_replace(' ', '', $elements);
11607             $elements = explode('|', $elements);
11608         }
11609         $keys = array_keys($elements);
11610         if ($keys == array_keys($keys)) {
11611             $elements = array_flip($elements);
11612             foreach ($elements as $i => $x) {
11613                 $elements[$i] = true;
11614                 if (empty($i)) unset($elements[$i]); // remove blank
11615             }
11616         }
11617         $this->elements = $elements;
11618     }
11619     public $allow_empty = false;
11620     public $type = 'required';
11621     public function validateChildren($tokens_of_children, $config, $context) {
11622         // Flag for subclasses
11623         $this->whitespace = false;
11624 
11625         // if there are no tokens, delete parent node
11626         if (empty($tokens_of_children)) return false;
11627 
11628         // the new set of children
11629         $result = array();
11630 
11631         // current depth into the nest
11632         $nesting = 0;
11633 
11634         // whether or not we're deleting a node
11635         $is_deleting = false;
11636 
11637         // whether or not parsed character data is allowed
11638         // this controls whether or not we silently drop a tag
11639         // or generate escaped HTML from it
11640         $pcdata_allowed = isset($this->elements['#PCDATA']);
11641 
11642         // a little sanity check to make sure it's not ALL whitespace
11643         $all_whitespace = true;
11644 
11645         // some configuration
11646         $escape_invalid_children = $config->get('Core.EscapeInvalidChildren');
11647 
11648         // generator
11649         $gen = new HTMLPurifier_Generator($config, $context);
11650 
11651         foreach ($tokens_of_children as $token) {
11652             if (!empty($token->is_whitespace)) {
11653                 $result[] = $token;
11654                 continue;
11655             }
11656             $all_whitespace = false; // phew, we're not talking about whitespace
11657 
11658             $is_child = ($nesting == 0);
11659 
11660             if ($token instanceof HTMLPurifier_Token_Start) {
11661                 $nesting++;
11662             } elseif ($token instanceof HTMLPurifier_Token_End) {
11663                 $nesting--;
11664             }
11665 
11666             if ($is_child) {
11667                 $is_deleting = false;
11668                 if (!isset($this->elements[$token->name])) {
11669                     $is_deleting = true;
11670                     if ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text) {
11671                         $result[] = $token;
11672                     } elseif ($pcdata_allowed && $escape_invalid_children) {
11673                         $result[] = new HTMLPurifier_Token_Text(
11674                             $gen->generateFromToken($token)
11675                         );
11676                     }
11677                     continue;
11678                 }
11679             }
11680             if (!$is_deleting || ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text)) {
11681                 $result[] = $token;
11682             } elseif ($pcdata_allowed && $escape_invalid_children) {
11683                 $result[] =
11684                     new HTMLPurifier_Token_Text(
11685                         $gen->generateFromToken($token)
11686                     );
11687             } else {
11688                 // drop silently
11689             }
11690         }
11691         if (empty($result)) return false;
11692         if ($all_whitespace) {
11693             $this->whitespace = true;
11694             return false;
11695         }
11696         if ($tokens_of_children == $result) return true;
11697         return $result;
11698     }
11699 }
11700 
11701 
11702 
11703 
11704 
11712 class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
11713 {
11714     public $allow_empty = true;
11715     public $type = 'optional';
11716     public function validateChildren($tokens_of_children, $config, $context) {
11717         $result = parent::validateChildren($tokens_of_children, $config, $context);
11718         // we assume that $tokens_of_children is not modified
11719         if ($result === false) {
11720             if (empty($tokens_of_children)) return true;
11721             elseif ($this->whitespace) return $tokens_of_children;
11722             else return array();
11723         }
11724         return $result;
11725     }
11726 }
11727 
11728 
11729 
11730 
11731 
11735 class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Required
11736 {
11737     protected $real_elements;
11738     protected $fake_elements;
11739     public $allow_empty = true;
11740     public $type = 'strictblockquote';
11741     protected $init = false;
11742 
11747     public function getAllowedElements($config) {
11748         $this->init($config);
11749         return $this->fake_elements;
11750     }
11751 
11752     public function validateChildren($tokens_of_children, $config, $context) {
11753 
11754         $this->init($config);
11755 
11756         // trick the parent class into thinking it allows more
11757         $this->elements = $this->fake_elements;
11758         $result = parent::validateChildren($tokens_of_children, $config, $context);
11759         $this->elements = $this->real_elements;
11760 
11761         if ($result === false) return array();
11762         if ($result === true) $result = $tokens_of_children;
11763 
11764         $def = $config->getHTMLDefinition();
11765         $block_wrap_start = new HTMLPurifier_Token_Start($def->info_block_wrapper);
11766         $block_wrap_end   = new HTMLPurifier_Token_End(  $def->info_block_wrapper);
11767         $is_inline = false;
11768         $depth = 0;
11769         $ret = array();
11770 
11771         // assuming that there are no comment tokens
11772         foreach ($result as $i => $token) {
11773             $token = $result[$i];
11774             // ifs are nested for readability
11775             if (!$is_inline) {
11776                 if (!$depth) {
11777                      if (
11778                         ($token instanceof HTMLPurifier_Token_Text && !$token->is_whitespace) ||
11779                         (!$token instanceof HTMLPurifier_Token_Text && !isset($this->elements[$token->name]))
11780                      ) {
11781                         $is_inline = true;
11782                         $ret[] = $block_wrap_start;
11783                      }
11784                 }
11785             } else {
11786                 if (!$depth) {
11787                     // starting tokens have been inline text / empty
11788                     if ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) {
11789                         if (isset($this->elements[$token->name])) {
11790                             // ended
11791                             $ret[] = $block_wrap_end;
11792                             $is_inline = false;
11793                         }
11794                     }
11795                 }
11796             }
11797             $ret[] = $token;
11798             if ($token instanceof HTMLPurifier_Token_Start) $depth++;
11799             if ($token instanceof HTMLPurifier_Token_End)   $depth--;
11800         }
11801         if ($is_inline) $ret[] = $block_wrap_end;
11802         return $ret;
11803     }
11804 
11805     private function init($config) {
11806         if (!$this->init) {
11807             $def = $config->getHTMLDefinition();
11808             // allow all inline elements
11809             $this->real_elements = $this->elements;
11810             $this->fake_elements = $def->info_content_sets['Flow'];
11811             $this->fake_elements['#PCDATA'] = true;
11812             $this->init = true;
11813         }
11814     }
11815 }
11816 
11817 
11818 
11819 
11820 
11850 class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
11851 {
11852     public $allow_empty = false;
11853     public $type = 'table';
11854     public $elements = array('tr' => true, 'tbody' => true, 'thead' => true,
11855         'tfoot' => true, 'caption' => true, 'colgroup' => true, 'col' => true);
11856     public function __construct() {}
11857     public function validateChildren($tokens_of_children, $config, $context) {
11858         if (empty($tokens_of_children)) return false;
11859 
11860         // this ensures that the loop gets run one last time before closing
11861         // up. It's a little bit of a hack, but it works! Just make sure you
11862         // get rid of the token later.
11863         $tokens_of_children[] = false;
11864 
11865         // only one of these elements is allowed in a table
11866         $caption = false;
11867         $thead   = false;
11868         $tfoot   = false;
11869 
11870         // as many of these as you want
11871         $cols    = array();
11872         $content = array();
11873 
11874         $nesting = 0; // current depth so we can determine nodes
11875         $is_collecting = false; // are we globbing together tokens to package
11876                                 // into one of the collectors?
11877         $collection = array(); // collected nodes
11878         $tag_index = 0; // the first node might be whitespace,
11879                             // so this tells us where the start tag is
11880         $tbody_mode = false; // if true, then we need to wrap any stray
11881                              // <tr>s with a <tbody>.
11882 
11883         foreach ($tokens_of_children as $token) {
11884             $is_child = ($nesting == 0);
11885 
11886             if ($token === false) {
11887                 // terminating sequence started
11888             } elseif ($token instanceof HTMLPurifier_Token_Start) {
11889                 $nesting++;
11890             } elseif ($token instanceof HTMLPurifier_Token_End) {
11891                 $nesting--;
11892             }
11893 
11894             // handle node collection
11895             if ($is_collecting) {
11896                 if ($is_child) {
11897                     // okay, let's stash the tokens away
11898                     // first token tells us the type of the collection
11899                     switch ($collection[$tag_index]->name) {
11900                         case 'tbody':
11901                             $tbody_mode = true;
11902                         case 'tr':
11903                             $content[] = $collection;
11904                             break;
11905                         case 'caption':
11906                             if ($caption !== false) break;
11907                             $caption = $collection;
11908                             break;
11909                         case 'thead':
11910                         case 'tfoot':
11911                             $tbody_mode = true;
11912                             // XXX This breaks rendering properties with
11913                             // Firefox, which never floats a <thead> to
11914                             // the top. Ever. (Our scheme will float the
11915                             // first <thead> to the top.)  So maybe
11916                             // <thead>s that are not first should be
11917                             // turned into <tbody>? Very tricky, indeed.
11918 
11919                             // access the appropriate variable, $thead or $tfoot
11920                             $var = $collection[$tag_index]->name;
11921                             if ($$var === false) {
11922                                 $$var = $collection;
11923                             } else {
11924                                 // Oops, there's a second one! What
11925                                 // should we do?  Current behavior is to
11926                                 // transmutate the first and last entries into
11927                                 // tbody tags, and then put into content.
11928                                 // Maybe a better idea is to *attach
11929                                 // it* to the existing thead or tfoot?
11930                                 // We don't do this, because Firefox
11931                                 // doesn't float an extra tfoot to the
11932                                 // bottom like it does for the first one.
11933                                 $collection[$tag_index]->name = 'tbody';
11934                                 $collection[count($collection)-1]->name = 'tbody';
11935                                 $content[] = $collection;
11936                             }
11937                             break;
11938                          case 'colgroup':
11939                             $cols[] = $collection;
11940                             break;
11941                     }
11942                     $collection = array();
11943                     $is_collecting = false;
11944                     $tag_index = 0;
11945                 } else {
11946                     // add the node to the collection
11947                     $collection[] = $token;
11948                 }
11949             }
11950 
11951             // terminate
11952             if ($token === false) break;
11953 
11954             if ($is_child) {
11955                 // determine what we're dealing with
11956                 if ($token->name == 'col') {
11957                     // the only empty tag in the possie, we can handle it
11958                     // immediately
11959                     $cols[] = array_merge($collection, array($token));
11960                     $collection = array();
11961                     $tag_index = 0;
11962                     continue;
11963                 }
11964                 switch($token->name) {
11965                     case 'caption':
11966                     case 'colgroup':
11967                     case 'thead':
11968                     case 'tfoot':
11969                     case 'tbody':
11970                     case 'tr':
11971                         $is_collecting = true;
11972                         $collection[] = $token;
11973                         continue;
11974                     default:
11975                         if (!empty($token->is_whitespace)) {
11976                             $collection[] = $token;
11977                             $tag_index++;
11978                         }
11979                         continue;
11980                 }
11981             }
11982         }
11983 
11984         if (empty($content)) return false;
11985 
11986         $ret = array();
11987         if ($caption !== false) $ret = array_merge($ret, $caption);
11988         if ($cols !== false)    foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
11989         if ($thead !== false)   $ret = array_merge($ret, $thead);
11990         if ($tfoot !== false)   $ret = array_merge($ret, $tfoot);
11991 
11992         if ($tbody_mode) {
11993             // a little tricky, since the start of the collection may be
11994             // whitespace
11995             $inside_tbody = false;
11996             foreach ($content as $token_array) {
11997                 // find the starting token
11998                 foreach ($token_array as $t) {
11999                     if ($t->name === 'tr' || $t->name === 'tbody') {
12000                         break;
12001                     }
12002                 } // iterator variable carries over
12003                 if ($t->name === 'tr') {
12004                     if ($inside_tbody) {
12005                         $ret = array_merge($ret, $token_array);
12006                     } else {
12007                         $ret[] = new HTMLPurifier_Token_Start('tbody');
12008                         $ret = array_merge($ret, $token_array);
12009                         $inside_tbody = true;
12010                     }
12011                 } elseif ($t->name === 'tbody') {
12012                     if ($inside_tbody) {
12013                         $ret[] = new HTMLPurifier_Token_End('tbody');
12014                         $inside_tbody = false;
12015                         $ret = array_merge($ret, $token_array);
12016                     } else {
12017                         $ret = array_merge($ret, $token_array);
12018                     }
12019                 } else {
12020                     trigger_error("tr/tbody in content invariant failed in Table ChildDef", E_USER_ERROR);
12021                 }
12022             }
12023             if ($inside_tbody) {
12024                 $ret[] = new HTMLPurifier_Token_End('tbody');
12025             }
12026         } else {
12027             foreach ($content as $token_array) {
12028                 // invariant: everything in here is <tr>s
12029                 $ret = array_merge($ret, $token_array);
12030             }
12031         }
12032 
12033         if (!empty($collection) && $is_collecting == false){
12034             // grab the trailing space
12035             $ret = array_merge($ret, $collection);
12036         }
12037 
12038         array_pop($tokens_of_children); // remove phantom token
12039 
12040         return ($ret === $tokens_of_children) ? true : $ret;
12041 
12042     }
12043 }
12044 
12045 
12046 
12047 
12048 
12049 class HTMLPurifier_DefinitionCache_Decorator extends HTMLPurifier_DefinitionCache
12050 {
12051 
12055     public $cache;
12056 
12057     public function __construct() {}
12058 
12063     public function decorate(&$cache) {
12064         $decorator = $this->copy();
12065         // reference is necessary for mocks in PHP 4
12066         $decorator->cache =& $cache;
12067         $decorator->type  = $cache->type;
12068         return $decorator;
12069     }
12070 
12074     public function copy() {
12075         return new HTMLPurifier_DefinitionCache_Decorator();
12076     }
12077 
12078     public function add($def, $config) {
12079         return $this->cache->add($def, $config);
12080     }
12081 
12082     public function set($def, $config) {
12083         return $this->cache->set($def, $config);
12084     }
12085 
12086     public function replace($def, $config) {
12087         return $this->cache->replace($def, $config);
12088     }
12089 
12090     public function get($config) {
12091         return $this->cache->get($config);
12092     }
12093 
12094     public function remove($config) {
12095         return $this->cache->remove($config);
12096     }
12097 
12098     public function flush($config) {
12099         return $this->cache->flush($config);
12100     }
12101 
12102     public function cleanup($config) {
12103         return $this->cache->cleanup($config);
12104     }
12105 
12106 }
12107 
12108 
12109 
12110 
12111 
12115 class HTMLPurifier_DefinitionCache_Null extends HTMLPurifier_DefinitionCache
12116 {
12117 
12118     public function add($def, $config) {
12119         return false;
12120     }
12121 
12122     public function set($def, $config) {
12123         return false;
12124     }
12125 
12126     public function replace($def, $config) {
12127         return false;
12128     }
12129 
12130     public function remove($config) {
12131         return false;
12132     }
12133 
12134     public function get($config) {
12135         return false;
12136     }
12137 
12138     public function flush($config) {
12139         return false;
12140     }
12141 
12142     public function cleanup($config) {
12143         return false;
12144     }
12145 
12146 }
12147 
12148 
12149 
12150 
12151 
12152 class HTMLPurifier_DefinitionCache_Serializer extends
12153       HTMLPurifier_DefinitionCache
12154 {
12155 
12156     public function add($def, $config) {
12157         if (!$this->checkDefType($def)) return;
12158         $file = $this->generateFilePath($config);
12159         if (file_exists($file)) return false;
12160         if (!$this->_prepareDir($config)) return false;
12161         return $this->_write($file, serialize($def), $config);
12162     }
12163 
12164     public function set($def, $config) {
12165         if (!$this->checkDefType($def)) return;
12166         $file = $this->generateFilePath($config);
12167         if (!$this->_prepareDir($config)) return false;
12168         return $this->_write($file, serialize($def), $config);
12169     }
12170 
12171     public function replace($def, $config) {
12172         if (!$this->checkDefType($def)) return;
12173         $file = $this->generateFilePath($config);
12174         if (!file_exists($file)) return false;
12175         if (!$this->_prepareDir($config)) return false;
12176         return $this->_write($file, serialize($def), $config);
12177     }
12178 
12179     public function get($config) {
12180         $file = $this->generateFilePath($config);
12181         if (!file_exists($file)) return false;
12182         return unserialize(file_get_contents($file));
12183     }
12184 
12185     public function remove($config) {
12186         $file = $this->generateFilePath($config);
12187         if (!file_exists($file)) return false;
12188         return unlink($file);
12189     }
12190 
12191     public function flush($config) {
12192         if (!$this->_prepareDir($config)) return false;
12193         $dir = $this->generateDirectoryPath($config);
12194         $dh  = opendir($dir);
12195         while (false !== ($filename = readdir($dh))) {
12196             if (empty($filename)) continue;
12197             if ($filename[0] === '.') continue;
12198             unlink($dir . '/' . $filename);
12199         }
12200     }
12201 
12202     public function cleanup($config) {
12203         if (!$this->_prepareDir($config)) return false;
12204         $dir = $this->generateDirectoryPath($config);
12205         $dh  = opendir($dir);
12206         while (false !== ($filename = readdir($dh))) {
12207             if (empty($filename)) continue;
12208             if ($filename[0] === '.') continue;
12209             $key = substr($filename, 0, strlen($filename) - 4);
12210             if ($this->isOld($key, $config)) unlink($dir . '/' . $filename);
12211         }
12212     }
12213 
12219     public function generateFilePath($config) {
12220         $key = $this->generateKey($config);
12221         return $this->generateDirectoryPath($config) . '/' . $key . '.ser';
12222     }
12223 
12229     public function generateDirectoryPath($config) {
12230         $base = $this->generateBaseDirectoryPath($config);
12231         return $base . '/' . $this->type;
12232     }
12233 
12239     public function generateBaseDirectoryPath($config) {
12240         $base = $config->get('Cache.SerializerPath');
12241         $base = is_null($base) ? HTMLPURIFIER_PREFIX . '/HTMLPurifier/DefinitionCache/Serializer' : $base;
12242         return $base;
12243     }
12244 
12252     private function _write($file, $data, $config) {
12253         $result = file_put_contents($file, $data);
12254         if ($result !== false) {
12255             // set permissions of the new file (no execute)
12256             $chmod = $config->get('Cache.SerializerPermissions');
12257             if (!$chmod) {
12258                 $chmod = 0644; // invalid config or simpletest
12259             }
12260             $chmod = $chmod & 0666;
12261             chmod($file, $chmod);
12262         }
12263         return $result;
12264     }
12265 
12271     private function _prepareDir($config) {
12272         $directory = $this->generateDirectoryPath($config);
12273         $chmod = $config->get('Cache.SerializerPermissions');
12274         if (!$chmod) {
12275             $chmod = 0755; // invalid config or simpletest
12276         }
12277         if (!is_dir($directory)) {
12278             $base = $this->generateBaseDirectoryPath($config);
12279             if (!is_dir($base)) {
12280                 trigger_error('Base directory '.$base.' does not exist,
12281                     please create or change using %Cache.SerializerPath',
12282                     E_USER_WARNING);
12283                 return false;
12284             } elseif (!$this->_testPermissions($base, $chmod)) {
12285                 return false;
12286             }
12287             $old = umask(0000);
12288             mkdir($directory, $chmod);
12289             umask($old);
12290         } elseif (!$this->_testPermissions($directory, $chmod)) {
12291             return false;
12292         }
12293         return true;
12294     }
12295 
12303     private function _testPermissions($dir, $chmod) {
12304         // early abort, if it is writable, everything is hunky-dory
12305         if (is_writable($dir)) return true;
12306         if (!is_dir($dir)) {
12307             // generally, you'll want to handle this beforehand
12308             // so a more specific error message can be given
12309             trigger_error('Directory '.$dir.' does not exist',
12310                 E_USER_WARNING);
12311             return false;
12312         }
12313         if (function_exists('posix_getuid')) {
12314             // POSIX system, we can give more specific advice
12315             if (fileowner($dir) === posix_getuid()) {
12316                 // we can chmod it ourselves
12317                 $chmod = $chmod | 0700;
12318                 if (chmod($dir, $chmod)) return true;
12319             } elseif (filegroup($dir) === posix_getgid()) {
12320                 $chmod = $chmod | 0070;
12321             } else {
12322                 // PHP's probably running as nobody, so we'll
12323                 // need to give global permissions
12324                 $chmod = $chmod | 0777;
12325             }
12326             trigger_error('Directory '.$dir.' not writable, '.
12327                 'please chmod to ' . decoct($chmod),
12328                 E_USER_WARNING);
12329         } else {
12330             // generic error message
12331             trigger_error('Directory '.$dir.' not writable, '.
12332                 'please alter file permissions',
12333                 E_USER_WARNING);
12334         }
12335         return false;
12336     }
12337 
12338 }
12339 
12340 
12341 
12342 
12343 
12348 class HTMLPurifier_DefinitionCache_Decorator_Cleanup extends
12349       HTMLPurifier_DefinitionCache_Decorator
12350 {
12351 
12352     public $name = 'Cleanup';
12353 
12354     public function copy() {
12355         return new HTMLPurifier_DefinitionCache_Decorator_Cleanup();
12356     }
12357 
12358     public function add($def, $config) {
12359         $status = parent::add($def, $config);
12360         if (!$status) parent::cleanup($config);
12361         return $status;
12362     }
12363 
12364     public function set($def, $config) {
12365         $status = parent::set($def, $config);
12366         if (!$status) parent::cleanup($config);
12367         return $status;
12368     }
12369 
12370     public function replace($def, $config) {
12371         $status = parent::replace($def, $config);
12372         if (!$status) parent::cleanup($config);
12373         return $status;
12374     }
12375 
12376     public function get($config) {
12377         $ret = parent::get($config);
12378         if (!$ret) parent::cleanup($config);
12379         return $ret;
12380     }
12381 
12382 }
12383 
12384 
12385 
12386 
12387 
12393 class HTMLPurifier_DefinitionCache_Decorator_Memory extends
12394       HTMLPurifier_DefinitionCache_Decorator
12395 {
12396 
12397     protected $definitions;
12398     public $name = 'Memory';
12399 
12400     public function copy() {
12401         return new HTMLPurifier_DefinitionCache_Decorator_Memory();
12402     }
12403 
12404     public function add($def, $config) {
12405         $status = parent::add($def, $config);
12406         if ($status) $this->definitions[$this->generateKey($config)] = $def;
12407         return $status;
12408     }
12409 
12410     public function set($def, $config) {
12411         $status = parent::set($def, $config);
12412         if ($status) $this->definitions[$this->generateKey($config)] = $def;
12413         return $status;
12414     }
12415 
12416     public function replace($def, $config) {
12417         $status = parent::replace($def, $config);
12418         if ($status) $this->definitions[$this->generateKey($config)] = $def;
12419         return $status;
12420     }
12421 
12422     public function get($config) {
12423         $key = $this->generateKey($config);
12424         if (isset($this->definitions[$key])) return $this->definitions[$key];
12425         $this->definitions[$key] = parent::get($config);
12426         return $this->definitions[$key];
12427     }
12428 
12429 }
12430 
12431 
12432 
12433 
12434 
12439 class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule
12440 {
12441 
12442     public $name = 'Bdo';
12443     public $attr_collections = array(
12444         'I18N' => array('dir' => false)
12445     );
12446 
12447     public function setup($config) {
12448         $bdo = $this->addElement(
12449             'bdo', 'Inline', 'Inline', array('Core', 'Lang'),
12450             array(
12451                 'dir' => 'Enum#ltr,rtl', // required
12452                 // The Abstract Module specification has the attribute
12453                 // inclusions wrong for bdo: bdo allows Lang
12454             )
12455         );
12456         $bdo->attr_transform_post['required-dir'] = new HTMLPurifier_AttrTransform_BdoDir();
12457 
12458         $this->attr_collections['I18N']['dir'] = 'Enum#ltr,rtl';
12459     }
12460 
12461 }
12462 
12463 
12464 
12465 
12466 
12467 class HTMLPurifier_HTMLModule_CommonAttributes extends HTMLPurifier_HTMLModule
12468 {
12469     public $name = 'CommonAttributes';
12470 
12471     public $attr_collections = array(
12472         'Core' => array(
12473             0 => array('Style'),
12474             // 'xml:space' => false,
12475             'class' => 'Class',
12476             'id' => 'ID',
12477             'title' => 'CDATA',
12478         ),
12479         'Lang' => array(),
12480         'I18N' => array(
12481             0 => array('Lang'), // proprietary, for xml:lang/lang
12482         ),
12483         'Common' => array(
12484             0 => array('Core', 'I18N')
12485         )
12486     );
12487 
12488 }
12489 
12490 
12491 
12492 
12493 
12498 class HTMLPurifier_HTMLModule_Edit extends HTMLPurifier_HTMLModule
12499 {
12500 
12501     public $name = 'Edit';
12502 
12503     public function setup($config) {
12504         $contents = 'Chameleon: #PCDATA | Inline ! #PCDATA | Flow';
12505         $attr = array(
12506             'cite' => 'URI',
12507             // 'datetime' => 'Datetime', // not implemented
12508         );
12509         $this->addElement('del', 'Inline', $contents, 'Common', $attr);
12510         $this->addElement('ins', 'Inline', $contents, 'Common', $attr);
12511     }
12512 
12513     // HTML 4.01 specifies that ins/del must not contain block
12514     // elements when used in an inline context, chameleon is
12515     // a complicated workaround to acheive this effect
12516 
12517     // Inline context ! Block context (exclamation mark is
12518     // separator, see getChildDef for parsing)
12519 
12520     public $defines_child_def = true;
12521     public function getChildDef($def) {
12522         if ($def->content_model_type != 'chameleon') return false;
12523         $value = explode('!', $def->content_model);
12524         return new HTMLPurifier_ChildDef_Chameleon($value[0], $value[1]);
12525     }
12526 
12527 }
12528 
12529 
12530 
12531 
12532 
12536 class HTMLPurifier_HTMLModule_Forms extends HTMLPurifier_HTMLModule
12537 {
12538     public $name = 'Forms';
12539     public $safe = false;
12540 
12541     public $content_sets = array(
12542         'Block' => 'Form',
12543         'Inline' => 'Formctrl',
12544     );
12545 
12546     public function setup($config) {
12547         $form = $this->addElement('form', 'Form',
12548           'Required: Heading | List | Block | fieldset', 'Common', array(
12549             'accept' => 'ContentTypes',
12550             'accept-charset' => 'Charsets',
12551             'action*' => 'URI',
12552             'method' => 'Enum#get,post',
12553             // really ContentType, but these two are the only ones used today
12554             'enctype' => 'Enum#application/x-www-form-urlencoded,multipart/form-data',
12555         ));
12556         $form->excludes = array('form' => true);
12557 
12558         $input = $this->addElement('input', 'Formctrl', 'Empty', 'Common', array(
12559             'accept' => 'ContentTypes',
12560             'accesskey' => 'Character',
12561             'alt' => 'Text',
12562             'checked' => 'Bool#checked',
12563             'disabled' => 'Bool#disabled',
12564             'maxlength' => 'Number',
12565             'name' => 'CDATA',
12566             'readonly' => 'Bool#readonly',
12567             'size' => 'Number',
12568             'src' => 'URI#embedded',
12569             'tabindex' => 'Number',
12570             'type' => 'Enum#text,password,checkbox,button,radio,submit,reset,file,hidden,image',
12571             'value' => 'CDATA',
12572         ));
12573         $input->attr_transform_post[] = new HTMLPurifier_AttrTransform_Input();
12574 
12575         $this->addElement('select', 'Formctrl', 'Required: optgroup | option', 'Common', array(
12576             'disabled' => 'Bool#disabled',
12577             'multiple' => 'Bool#multiple',
12578             'name' => 'CDATA',
12579             'size' => 'Number',
12580             'tabindex' => 'Number',
12581         ));
12582 
12583         $this->addElement('option', false, 'Optional: #PCDATA', 'Common', array(
12584             'disabled' => 'Bool#disabled',
12585             'label' => 'Text',
12586             'selected' => 'Bool#selected',
12587             'value' => 'CDATA',
12588         ));
12589         // It's illegal for there to be more than one selected, but not
12590         // be multiple. Also, no selected means undefined behavior. This might
12591         // be difficult to implement; perhaps an injector, or a context variable.
12592 
12593         $textarea = $this->addElement('textarea', 'Formctrl', 'Optional: #PCDATA', 'Common', array(
12594             'accesskey' => 'Character',
12595             'cols*' => 'Number',
12596             'disabled' => 'Bool#disabled',
12597             'name' => 'CDATA',
12598             'readonly' => 'Bool#readonly',
12599             'rows*' => 'Number',
12600             'tabindex' => 'Number',
12601         ));
12602         $textarea->attr_transform_pre[] = new HTMLPurifier_AttrTransform_Textarea();
12603 
12604         $button = $this->addElement('button', 'Formctrl', 'Optional: #PCDATA | Heading | List | Block | Inline', 'Common', array(
12605             'accesskey' => 'Character',
12606             'disabled' => 'Bool#disabled',
12607             'name' => 'CDATA',
12608             'tabindex' => 'Number',
12609             'type' => 'Enum#button,submit,reset',
12610             'value' => 'CDATA',
12611         ));
12612 
12613         // For exclusions, ideally we'd specify content sets, not literal elements
12614         $button->excludes = $this->makeLookup(
12615             'form', 'fieldset', // Form
12616             'input', 'select', 'textarea', 'label', 'button', // Formctrl
12617             'a', // as per HTML 4.01 spec, this is omitted by modularization
12618             'isindex', 'iframe' // legacy items
12619         );
12620 
12621         // Extra exclusion: img usemap="" is not permitted within this element.
12622         // We'll omit this for now, since we don't have any good way of
12623         // indicating it yet.
12624 
12625         // This is HIGHLY user-unfriendly; we need a custom child-def for this
12626         $this->addElement('fieldset', 'Form', 'Custom: (#WS?,legend,(Flow|#PCDATA)*)', 'Common');
12627 
12628         $label = $this->addElement('label', 'Formctrl', 'Optional: #PCDATA | Inline', 'Common', array(
12629             'accesskey' => 'Character',
12630             // 'for' => 'IDREF', // IDREF not implemented, cannot allow
12631         ));
12632         $label->excludes = array('label' => true);
12633 
12634         $this->addElement('legend', false, 'Optional: #PCDATA | Inline', 'Common', array(
12635             'accesskey' => 'Character',
12636         ));
12637 
12638         $this->addElement('optgroup', false, 'Required: option', 'Common', array(
12639             'disabled' => 'Bool#disabled',
12640             'label*' => 'Text',
12641         ));
12642 
12643         // Don't forget an injector for <isindex>. This one's a little complex
12644         // because it maps to multiple elements.
12645 
12646     }
12647 }
12648 
12649 
12650 
12651 
12652 
12656 class HTMLPurifier_HTMLModule_Hypertext extends HTMLPurifier_HTMLModule
12657 {
12658 
12659     public $name = 'Hypertext';
12660 
12661     public function setup($config) {
12662         $a = $this->addElement(
12663             'a', 'Inline', 'Inline', 'Common',
12664             array(
12665                 // 'accesskey' => 'Character',
12666                 // 'charset' => 'Charset',
12667                 'href' => 'URI',
12668                 // 'hreflang' => 'LanguageCode',
12669                 'rel' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rel'),
12670                 'rev' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rev'),
12671                 // 'tabindex' => 'Number',
12672                 // 'type' => 'ContentType',
12673             )
12674         );
12675         $a->formatting = true;
12676         $a->excludes = array('a' => true);
12677     }
12678 
12679 }
12680 
12681 
12682 
12683 
12684 
12692 class HTMLPurifier_HTMLModule_Iframe extends HTMLPurifier_HTMLModule
12693 {
12694 
12695     public $name = 'Iframe';
12696     public $safe = false;
12697 
12698     public function setup($config) {
12699         if ($config->get('HTML.SafeIframe')) {
12700             $this->safe = true;
12701         }
12702         $this->addElement(
12703             'iframe', 'Inline', 'Flow', 'Common',
12704             array(
12705                 'src' => 'URI#embedded',
12706                 'width' => 'Length',
12707                 'height' => 'Length',
12708                 'name' => 'ID',
12709                 'scrolling' => 'Enum#yes,no,auto',
12710                 'frameborder' => 'Enum#0,1',
12711                 'longdesc' => 'URI',
12712                 'marginheight' => 'Pixels',
12713                 'marginwidth' => 'Pixels',
12714             )
12715         );
12716     }
12717 
12718 }
12719 
12720 
12721 
12722 
12723 
12729 class HTMLPurifier_HTMLModule_Image extends HTMLPurifier_HTMLModule
12730 {
12731 
12732     public $name = 'Image';
12733 
12734     public function setup($config) {
12735         $max = $config->get('HTML.MaxImgLength');
12736         $img = $this->addElement(
12737             'img', 'Inline', 'Empty', 'Common',
12738             array(
12739                 'alt*' => 'Text',
12740                 // According to the spec, it's Length, but percents can
12741                 // be abused, so we allow only Pixels.
12742                 'height' => 'Pixels#' . $max,
12743                 'width'  => 'Pixels#' . $max,
12744                 'longdesc' => 'URI',
12745                 'src*' => new HTMLPurifier_AttrDef_URI(true), // embedded
12746             )
12747         );
12748         if ($max === null || $config->get('HTML.Trusted')) {
12749             $img->attr['height'] =
12750             $img->attr['width'] = 'Length';
12751         }
12752 
12753         // kind of strange, but splitting things up would be inefficient
12754         $img->attr_transform_pre[] =
12755         $img->attr_transform_post[] =
12756             new HTMLPurifier_AttrTransform_ImgRequired();
12757     }
12758 
12759 }
12760 
12761 
12762 
12763 
12764 
12781 class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
12782 {
12783 
12784     public $name = 'Legacy';
12785 
12786     public function setup($config) {
12787 
12788         $this->addElement('basefont', 'Inline', 'Empty', false, array(
12789             'color' => 'Color',
12790             'face' => 'Text', // extremely broad, we should
12791             'size' => 'Text', // tighten it
12792             'id' => 'ID'
12793         ));
12794         $this->addElement('center', 'Block', 'Flow', 'Common');
12795         $this->addElement('dir', 'Block', 'Required: li', 'Common', array(
12796             'compact' => 'Bool#compact'
12797         ));
12798         $this->addElement('font', 'Inline', 'Inline', array('Core', 'I18N'), array(
12799             'color' => 'Color',
12800             'face' => 'Text', // extremely broad, we should
12801             'size' => 'Text', // tighten it
12802         ));
12803         $this->addElement('menu', 'Block', 'Required: li', 'Common', array(
12804             'compact' => 'Bool#compact'
12805         ));
12806 
12807         $s = $this->addElement('s', 'Inline', 'Inline', 'Common');
12808         $s->formatting = true;
12809 
12810         $strike = $this->addElement('strike', 'Inline', 'Inline', 'Common');
12811         $strike->formatting = true;
12812 
12813         $u = $this->addElement('u', 'Inline', 'Inline', 'Common');
12814         $u->formatting = true;
12815 
12816         // setup modifications to old elements
12817 
12818         $align = 'Enum#left,right,center,justify';
12819 
12820         $address = $this->addBlankElement('address');
12821         $address->content_model = 'Inline | #PCDATA | p';
12822         $address->content_model_type = 'optional';
12823         $address->child = false;
12824 
12825         $blockquote = $this->addBlankElement('blockquote');
12826         $blockquote->content_model = 'Flow | #PCDATA';
12827         $blockquote->content_model_type = 'optional';
12828         $blockquote->child = false;
12829 
12830         $br = $this->addBlankElement('br');
12831         $br->attr['clear'] = 'Enum#left,all,right,none';
12832 
12833         $caption = $this->addBlankElement('caption');
12834         $caption->attr['align'] = 'Enum#top,bottom,left,right';
12835 
12836         $div = $this->addBlankElement('div');
12837         $div->attr['align'] = $align;
12838 
12839         $dl = $this->addBlankElement('dl');
12840         $dl->attr['compact'] = 'Bool#compact';
12841 
12842         for ($i = 1; $i <= 6; $i++) {
12843             $h = $this->addBlankElement("h$i");
12844             $h->attr['align'] = $align;
12845         }
12846 
12847         $hr = $this->addBlankElement('hr');
12848         $hr->attr['align'] = $align;
12849         $hr->attr['noshade'] = 'Bool#noshade';
12850         $hr->attr['size'] = 'Pixels';
12851         $hr->attr['width'] = 'Length';
12852 
12853         $img = $this->addBlankElement('img');
12854         $img->attr['align'] = 'IAlign';
12855         $img->attr['border'] = 'Pixels';
12856         $img->attr['hspace'] = 'Pixels';
12857         $img->attr['vspace'] = 'Pixels';
12858 
12859         // figure out this integer business
12860 
12861         $li = $this->addBlankElement('li');
12862         $li->attr['value'] = new HTMLPurifier_AttrDef_Integer();
12863         $li->attr['type']  = 'Enum#s:1,i,I,a,A,disc,square,circle';
12864 
12865         $ol = $this->addBlankElement('ol');
12866         $ol->attr['compact'] = 'Bool#compact';
12867         $ol->attr['start'] = new HTMLPurifier_AttrDef_Integer();
12868         $ol->attr['type'] = 'Enum#s:1,i,I,a,A';
12869 
12870         $p = $this->addBlankElement('p');
12871         $p->attr['align'] = $align;
12872 
12873         $pre = $this->addBlankElement('pre');
12874         $pre->attr['width'] = 'Number';
12875 
12876         // script omitted
12877 
12878         $table = $this->addBlankElement('table');
12879         $table->attr['align'] = 'Enum#left,center,right';
12880         $table->attr['bgcolor'] = 'Color';
12881 
12882         $tr = $this->addBlankElement('tr');
12883         $tr->attr['bgcolor'] = 'Color';
12884 
12885         $th = $this->addBlankElement('th');
12886         $th->attr['bgcolor'] = 'Color';
12887         $th->attr['height'] = 'Length';
12888         $th->attr['nowrap'] = 'Bool#nowrap';
12889         $th->attr['width'] = 'Length';
12890 
12891         $td = $this->addBlankElement('td');
12892         $td->attr['bgcolor'] = 'Color';
12893         $td->attr['height'] = 'Length';
12894         $td->attr['nowrap'] = 'Bool#nowrap';
12895         $td->attr['width'] = 'Length';
12896 
12897         $ul = $this->addBlankElement('ul');
12898         $ul->attr['compact'] = 'Bool#compact';
12899         $ul->attr['type'] = 'Enum#square,disc,circle';
12900 
12901         // "safe" modifications to "unsafe" elements
12902         // WARNING: If you want to add support for an unsafe, legacy
12903         // attribute, make a new TrustedLegacy module with the trusted
12904         // bit set appropriately
12905 
12906         $form = $this->addBlankElement('form');
12907         $form->content_model = 'Flow | #PCDATA';
12908         $form->content_model_type = 'optional';
12909         $form->attr['target'] = 'FrameTarget';
12910 
12911         $input = $this->addBlankElement('input');
12912         $input->attr['align'] = 'IAlign';
12913 
12914         $legend = $this->addBlankElement('legend');
12915         $legend->attr['align'] = 'LAlign';
12916 
12917     }
12918 
12919 }
12920 
12921 
12922 
12923 
12924 
12928 class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
12929 {
12930 
12931     public $name = 'List';
12932 
12933     // According to the abstract schema, the List content set is a fully formed
12934     // one or more expr, but it invariably occurs in an optional declaration
12935     // so we're not going to do that subtlety. It might cause trouble
12936     // if a user defines "List" and expects that multiple lists are
12937     // allowed to be specified, but then again, that's not very intuitive.
12938     // Furthermore, the actual XML Schema may disagree. Regardless,
12939     // we don't have support for such nested expressions without using
12940     // the incredibly inefficient and draconic Custom ChildDef.
12941 
12942     public $content_sets = array('Flow' => 'List');
12943 
12944     public function setup($config) {
12945         $ol = $this->addElement('ol', 'List', new HTMLPurifier_ChildDef_List(), 'Common');
12946         $ul = $this->addElement('ul', 'List', new HTMLPurifier_ChildDef_List(), 'Common');
12947         // XXX The wrap attribute is handled by MakeWellFormed.  This is all
12948         // quite unsatisfactory, because we generated this
12949         // *specifically* for lists, and now a big chunk of the handling
12950         // is done properly by the List ChildDef.  So actually, we just
12951         // want enough information to make autoclosing work properly,
12952         // and then hand off the tricky stuff to the ChildDef.
12953         $ol->wrap = 'li';
12954         $ul->wrap = 'li';
12955         $this->addElement('dl', 'List', 'Required: dt | dd', 'Common');
12956 
12957         $this->addElement('li', false, 'Flow', 'Common');
12958 
12959         $this->addElement('dd', false, 'Flow', 'Common');
12960         $this->addElement('dt', false, 'Inline', 'Common');
12961     }
12962 
12963 }
12964 
12965 
12966 
12967 
12968 
12969 class HTMLPurifier_HTMLModule_Name extends HTMLPurifier_HTMLModule
12970 {
12971 
12972     public $name = 'Name';
12973 
12974     public function setup($config) {
12975         $elements = array('a', 'applet', 'form', 'frame', 'iframe', 'img', 'map');
12976         foreach ($elements as $name) {
12977             $element = $this->addBlankElement($name);
12978             $element->attr['name'] = 'CDATA';
12979             if (!$config->get('HTML.Attr.Name.UseCDATA')) {
12980                 $element->attr_transform_post['NameSync'] = new HTMLPurifier_AttrTransform_NameSync();
12981             }
12982         }
12983     }
12984 
12985 }
12986 
12987 
12988 
12989 
12990 
12995 class HTMLPurifier_HTMLModule_Nofollow extends HTMLPurifier_HTMLModule
12996 {
12997 
12998     public $name = 'Nofollow';
12999 
13000     public function setup($config) {
13001         $a = $this->addBlankElement('a');
13002         $a->attr_transform_post[] = new HTMLPurifier_AttrTransform_Nofollow();
13003     }
13004 
13005 }
13006 
13007 
13008 
13009 
13010 
13011 class HTMLPurifier_HTMLModule_NonXMLCommonAttributes extends HTMLPurifier_HTMLModule
13012 {
13013     public $name = 'NonXMLCommonAttributes';
13014 
13015     public $attr_collections = array(
13016         'Lang' => array(
13017             'lang' => 'LanguageCode',
13018         )
13019     );
13020 }
13021 
13022 
13023 
13024 
13025 
13031 class HTMLPurifier_HTMLModule_Object extends HTMLPurifier_HTMLModule
13032 {
13033 
13034     public $name = 'Object';
13035     public $safe = false;
13036 
13037     public function setup($config) {
13038 
13039         $this->addElement('object', 'Inline', 'Optional: #PCDATA | Flow | param', 'Common',
13040             array(
13041                 'archive' => 'URI',
13042                 'classid' => 'URI',
13043                 'codebase' => 'URI',
13044                 'codetype' => 'Text',
13045                 'data' => 'URI',
13046                 'declare' => 'Bool#declare',
13047                 'height' => 'Length',
13048                 'name' => 'CDATA',
13049                 'standby' => 'Text',
13050                 'tabindex' => 'Number',
13051                 'type' => 'ContentType',
13052                 'width' => 'Length'
13053             )
13054         );
13055 
13056         $this->addElement('param', false, 'Empty', false,
13057             array(
13058                 'id' => 'ID',
13059                 'name*' => 'Text',
13060                 'type' => 'Text',
13061                 'value' => 'Text',
13062                 'valuetype' => 'Enum#data,ref,object'
13063            )
13064         );
13065 
13066     }
13067 
13068 }
13069 
13070 
13071 
13072 
13073 
13084 class HTMLPurifier_HTMLModule_Presentation extends HTMLPurifier_HTMLModule
13085 {
13086 
13087     public $name = 'Presentation';
13088 
13089     public function setup($config) {
13090         $this->addElement('hr',     'Block',  'Empty',  'Common');
13091         $this->addElement('sub',    'Inline', 'Inline', 'Common');
13092         $this->addElement('sup',    'Inline', 'Inline', 'Common');
13093         $b = $this->addElement('b',      'Inline', 'Inline', 'Common');
13094         $b->formatting = true;
13095         $big = $this->addElement('big',    'Inline', 'Inline', 'Common');
13096         $big->formatting = true;
13097         $i = $this->addElement('i',      'Inline', 'Inline', 'Common');
13098         $i->formatting = true;
13099         $small = $this->addElement('small',  'Inline', 'Inline', 'Common');
13100         $small->formatting = true;
13101         $tt = $this->addElement('tt',     'Inline', 'Inline', 'Common');
13102         $tt->formatting = true;
13103     }
13104 
13105 }
13106 
13107 
13108 
13109 
13110 
13115 class HTMLPurifier_HTMLModule_Proprietary extends HTMLPurifier_HTMLModule
13116 {
13117 
13118     public $name = 'Proprietary';
13119 
13120     public function setup($config) {
13121 
13122         $this->addElement('marquee', 'Inline', 'Flow', 'Common',
13123             array(
13124                 'direction' => 'Enum#left,right,up,down',
13125                 'behavior' => 'Enum#alternate',
13126                 'width' => 'Length',
13127                 'height' => 'Length',
13128                 'scrolldelay' => 'Number',
13129                 'scrollamount' => 'Number',
13130                 'loop' => 'Number',
13131                 'bgcolor' => 'Color',
13132                 'hspace' => 'Pixels',
13133                 'vspace' => 'Pixels',
13134             )
13135         );
13136 
13137     }
13138 
13139 }
13140 
13141 
13142 
13143 
13144 
13149 class HTMLPurifier_HTMLModule_Ruby extends HTMLPurifier_HTMLModule
13150 {
13151 
13152     public $name = 'Ruby';
13153 
13154     public function setup($config) {
13155         $this->addElement('ruby', 'Inline',
13156             'Custom: ((rb, (rt | (rp, rt, rp))) | (rbc, rtc, rtc?))',
13157             'Common');
13158         $this->addElement('rbc', false, 'Required: rb', 'Common');
13159         $this->addElement('rtc', false, 'Required: rt', 'Common');
13160         $rb = $this->addElement('rb', false, 'Inline', 'Common');
13161         $rb->excludes = array('ruby' => true);
13162         $rt = $this->addElement('rt', false, 'Inline', 'Common', array('rbspan' => 'Number'));
13163         $rt->excludes = array('ruby' => true);
13164         $this->addElement('rp', false, 'Optional: #PCDATA', 'Common');
13165     }
13166 
13167 }
13168 
13169 
13170 
13171 
13172 
13176 class HTMLPurifier_HTMLModule_SafeEmbed extends HTMLPurifier_HTMLModule
13177 {
13178 
13179     public $name = 'SafeEmbed';
13180 
13181     public function setup($config) {
13182 
13183         $max = $config->get('HTML.MaxImgLength');
13184         $embed = $this->addElement(
13185             'embed', 'Inline', 'Empty', 'Common',
13186             array(
13187                 'src*' => 'URI#embedded',
13188                 'type' => 'Enum#application/x-shockwave-flash',
13189                 'width' => 'Pixels#' . $max,
13190                 'height' => 'Pixels#' . $max,
13191                 'allowscriptaccess' => 'Enum#never',
13192                 'allownetworking' => 'Enum#internal',
13193                 'flashvars' => 'Text',
13194                 'wmode' => 'Enum#window,transparent,opaque',
13195                 'name' => 'ID',
13196             )
13197         );
13198         $embed->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeEmbed();
13199 
13200     }
13201 
13202 }
13203 
13204 
13205 
13206 
13207 
13214 class HTMLPurifier_HTMLModule_SafeObject extends HTMLPurifier_HTMLModule
13215 {
13216 
13217     public $name = 'SafeObject';
13218 
13219     public function setup($config) {
13220 
13221         // These definitions are not intrinsically safe: the attribute transforms
13222         // are a vital part of ensuring safety.
13223 
13224         $max = $config->get('HTML.MaxImgLength');
13225         $object = $this->addElement(
13226             'object',
13227             'Inline',
13228             'Optional: param | Flow | #PCDATA',
13229             'Common',
13230             array(
13231                 // While technically not required by the spec, we're forcing
13232                 // it to this value.
13233                 'type'   => 'Enum#application/x-shockwave-flash',
13234                 'width'  => 'Pixels#' . $max,
13235                 'height' => 'Pixels#' . $max,
13236                 'data'   => 'URI#embedded',
13237                 'codebase' => new HTMLPurifier_AttrDef_Enum(array(
13238                     'http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,40,0')),
13239             )
13240         );
13241         $object->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeObject();
13242 
13243         $param = $this->addElement('param', false, 'Empty', false,
13244             array(
13245                 'id' => 'ID',
13246                 'name*' => 'Text',
13247                 'value' => 'Text'
13248             )
13249         );
13250         $param->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeParam();
13251         $this->info_injector[] = 'SafeObject';
13252 
13253     }
13254 
13255 }
13256 
13257 
13258 
13259 
13260 
13261 /*
13262 
13263 WARNING: THIS MODULE IS EXTREMELY DANGEROUS AS IT ENABLES INLINE SCRIPTING
13264 INSIDE HTML PURIFIER DOCUMENTS. USE ONLY WITH TRUSTED USER INPUT!!!
13265 
13266 */
13267 
13274 class HTMLPurifier_HTMLModule_Scripting extends HTMLPurifier_HTMLModule
13275 {
13276     public $name = 'Scripting';
13277     public $elements = array('script', 'noscript');
13278     public $content_sets = array('Block' => 'script | noscript', 'Inline' => 'script | noscript');
13279     public $safe = false;
13280 
13281     public function setup($config) {
13282         // TODO: create custom child-definition for noscript that
13283         // auto-wraps stray #PCDATA in a similar manner to
13284         // blockquote's custom definition (we would use it but
13285         // blockquote's contents are optional while noscript's contents
13286         // are required)
13287 
13288         // TODO: convert this to new syntax, main problem is getting
13289         // both content sets working
13290 
13291         // In theory, this could be safe, but I don't see any reason to
13292         // allow it.
13293         $this->info['noscript'] = new HTMLPurifier_ElementDef();
13294         $this->info['noscript']->attr = array( 0 => array('Common') );
13295         $this->info['noscript']->content_model = 'Heading | List | Block';
13296         $this->info['noscript']->content_model_type = 'required';
13297 
13298         $this->info['script'] = new HTMLPurifier_ElementDef();
13299         $this->info['script']->attr = array(
13300             'defer' => new HTMLPurifier_AttrDef_Enum(array('defer')),
13301             'src'   => new HTMLPurifier_AttrDef_URI(true),
13302             'type'  => new HTMLPurifier_AttrDef_Enum(array('text/javascript'))
13303         );
13304         $this->info['script']->content_model = '#PCDATA';
13305         $this->info['script']->content_model_type = 'optional';
13306         $this->info['script']->attr_transform_pre['type'] =
13307         $this->info['script']->attr_transform_post['type'] =
13308             new HTMLPurifier_AttrTransform_ScriptRequired();
13309     }
13310 }
13311 
13312 
13313 
13314 
13315 
13320 class HTMLPurifier_HTMLModule_StyleAttribute extends HTMLPurifier_HTMLModule
13321 {
13322 
13323     public $name = 'StyleAttribute';
13324     public $attr_collections = array(
13325         // The inclusion routine differs from the Abstract Modules but
13326         // is in line with the DTD and XML Schemas.
13327         'Style' => array('style' => false), // see constructor
13328         'Core' => array(0 => array('Style'))
13329     );
13330 
13331     public function setup($config) {
13332         $this->attr_collections['Style']['style'] = new HTMLPurifier_AttrDef_CSS();
13333     }
13334 
13335 }
13336 
13337 
13338 
13339 
13340 
13344 class HTMLPurifier_HTMLModule_Tables extends HTMLPurifier_HTMLModule
13345 {
13346 
13347     public $name = 'Tables';
13348 
13349     public function setup($config) {
13350 
13351         $this->addElement('caption', false, 'Inline', 'Common');
13352 
13353         $this->addElement('table', 'Block',
13354             new HTMLPurifier_ChildDef_Table(),  'Common',
13355             array(
13356                 'border' => 'Pixels',
13357                 'cellpadding' => 'Length',
13358                 'cellspacing' => 'Length',
13359                 'frame' => 'Enum#void,above,below,hsides,lhs,rhs,vsides,box,border',
13360                 'rules' => 'Enum#none,groups,rows,cols,all',
13361                 'summary' => 'Text',
13362                 'width' => 'Length'
13363             )
13364         );
13365 
13366         // common attributes
13367         $cell_align = array(
13368             'align' => 'Enum#left,center,right,justify,char',
13369             'charoff' => 'Length',
13370             'valign' => 'Enum#top,middle,bottom,baseline',
13371         );
13372 
13373         $cell_t = array_merge(
13374             array(
13375                 'abbr'    => 'Text',
13376                 'colspan' => 'Number',
13377                 'rowspan' => 'Number',
13378                 // Apparently, as of HTML5 this attribute only applies
13379                 // to 'th' elements.
13380                 'scope'   => 'Enum#row,col,rowgroup,colgroup',
13381             ),
13382             $cell_align
13383         );
13384         $this->addElement('td', false, 'Flow', 'Common', $cell_t);
13385         $this->addElement('th', false, 'Flow', 'Common', $cell_t);
13386 
13387         $this->addElement('tr', false, 'Required: td | th', 'Common', $cell_align);
13388 
13389         $cell_col = array_merge(
13390             array(
13391                 'span'  => 'Number',
13392                 'width' => 'MultiLength',
13393             ),
13394             $cell_align
13395         );
13396         $this->addElement('col',      false, 'Empty',         'Common', $cell_col);
13397         $this->addElement('colgroup', false, 'Optional: col', 'Common', $cell_col);
13398 
13399         $this->addElement('tbody', false, 'Required: tr', 'Common', $cell_align);
13400         $this->addElement('thead', false, 'Required: tr', 'Common', $cell_align);
13401         $this->addElement('tfoot', false, 'Required: tr', 'Common', $cell_align);
13402 
13403     }
13404 
13405 }
13406 
13407 
13408 
13409 
13410 
13414 class HTMLPurifier_HTMLModule_Target extends HTMLPurifier_HTMLModule
13415 {
13416 
13417     public $name = 'Target';
13418 
13419     public function setup($config) {
13420         $elements = array('a');
13421         foreach ($elements as $name) {
13422             $e = $this->addBlankElement($name);
13423             $e->attr = array(
13424                 'target' => new HTMLPurifier_AttrDef_HTML_FrameTarget()
13425             );
13426         }
13427     }
13428 
13429 }
13430 
13431 
13432 
13433 
13434 
13439 class HTMLPurifier_HTMLModule_TargetBlank extends HTMLPurifier_HTMLModule
13440 {
13441 
13442     public $name = 'TargetBlank';
13443 
13444     public function setup($config) {
13445         $a = $this->addBlankElement('a');
13446         $a->attr_transform_post[] = new HTMLPurifier_AttrTransform_TargetBlank();
13447     }
13448 
13449 }
13450 
13451 
13452 
13453 
13454 
13467 class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
13468 {
13469 
13470     public $name = 'Text';
13471     public $content_sets = array(
13472         'Flow' => 'Heading | Block | Inline'
13473     );
13474 
13475     public function setup($config) {
13476 
13477         // Inline Phrasal -------------------------------------------------
13478         $this->addElement('abbr',    'Inline', 'Inline', 'Common');
13479         $this->addElement('acronym', 'Inline', 'Inline', 'Common');
13480         $this->addElement('cite',    'Inline', 'Inline', 'Common');
13481         $this->addElement('dfn',     'Inline', 'Inline', 'Common');
13482         $this->addElement('kbd',     'Inline', 'Inline', 'Common');
13483         $this->addElement('q',       'Inline', 'Inline', 'Common', array('cite' => 'URI'));
13484         $this->addElement('samp',    'Inline', 'Inline', 'Common');
13485         $this->addElement('var',     'Inline', 'Inline', 'Common');
13486 
13487         $em = $this->addElement('em',      'Inline', 'Inline', 'Common');
13488         $em->formatting = true;
13489 
13490         $strong = $this->addElement('strong',  'Inline', 'Inline', 'Common');
13491         $strong->formatting = true;
13492 
13493         $code = $this->addElement('code',    'Inline', 'Inline', 'Common');
13494         $code->formatting = true;
13495 
13496         // Inline Structural ----------------------------------------------
13497         $this->addElement('span', 'Inline', 'Inline', 'Common');
13498         $this->addElement('br',   'Inline', 'Empty',  'Core');
13499 
13500         // Block Phrasal --------------------------------------------------
13501         $this->addElement('address',     'Block', 'Inline', 'Common');
13502         $this->addElement('blockquote',  'Block', 'Optional: Heading | Block | List', 'Common', array('cite' => 'URI') );
13503         $pre = $this->addElement('pre', 'Block', 'Inline', 'Common');
13504         $pre->excludes = $this->makeLookup(
13505             'img', 'big', 'small', 'object', 'applet', 'font', 'basefont' );
13506         $this->addElement('h1', 'Heading', 'Inline', 'Common');
13507         $this->addElement('h2', 'Heading', 'Inline', 'Common');
13508         $this->addElement('h3', 'Heading', 'Inline', 'Common');
13509         $this->addElement('h4', 'Heading', 'Inline', 'Common');
13510         $this->addElement('h5', 'Heading', 'Inline', 'Common');
13511         $this->addElement('h6', 'Heading', 'Inline', 'Common');
13512 
13513         // Block Structural -----------------------------------------------
13514         $p = $this->addElement('p', 'Block', 'Inline', 'Common');
13515         $p->autoclose = array_flip(array("address", "blockquote", "center", "dir", "div", "dl", "fieldset", "ol", "p", "ul"));
13516 
13517         $this->addElement('div', 'Block', 'Flow', 'Common');
13518 
13519     }
13520 
13521 }
13522 
13523 
13524 
13525 
13526 
13532 class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
13533 {
13534 
13539     public $levels = array(0 => 'none', 'light', 'medium', 'heavy');
13540 
13544     public $defaultLevel = null;
13545 
13550     public $fixesForLevel = array(
13551         'light'  => array(),
13552         'medium' => array(),
13553         'heavy'  => array()
13554     );
13555 
13562     public function setup($config) {
13563 
13564         // create fixes, initialize fixesForLevel
13565         $fixes = $this->makeFixes();
13566         $this->makeFixesForLevel($fixes);
13567 
13568         // figure out which fixes to use
13569         $level = $config->get('HTML.TidyLevel');
13570         $fixes_lookup = $this->getFixesForLevel($level);
13571 
13572         // get custom fix declarations: these need namespace processing
13573         $add_fixes    = $config->get('HTML.TidyAdd');
13574         $remove_fixes = $config->get('HTML.TidyRemove');
13575 
13576         foreach ($fixes as $name => $fix) {
13577             // needs to be refactored a little to implement globbing
13578             if (
13579                 isset($remove_fixes[$name]) ||
13580                 (!isset($add_fixes[$name]) && !isset($fixes_lookup[$name]))
13581             ) {
13582                 unset($fixes[$name]);
13583             }
13584         }
13585 
13586         // populate this module with necessary fixes
13587         $this->populate($fixes);
13588 
13589     }
13590 
13597     public function getFixesForLevel($level) {
13598         if ($level == $this->levels[0]) {
13599             return array();
13600         }
13601         $activated_levels = array();
13602         for ($i = 1, $c = count($this->levels); $i < $c; $i++) {
13603             $activated_levels[] = $this->levels[$i];
13604             if ($this->levels[$i] == $level) break;
13605         }
13606         if ($i == $c) {
13607             trigger_error(
13608                 'Tidy level ' . htmlspecialchars($level) . ' not recognized',
13609                 E_USER_WARNING
13610             );
13611             return array();
13612         }
13613         $ret = array();
13614         foreach ($activated_levels as $level) {
13615             foreach ($this->fixesForLevel[$level] as $fix) {
13616                 $ret[$fix] = true;
13617             }
13618         }
13619         return $ret;
13620     }
13621 
13627     public function makeFixesForLevel($fixes) {
13628         if (!isset($this->defaultLevel)) return;
13629         if (!isset($this->fixesForLevel[$this->defaultLevel])) {
13630             trigger_error(
13631                 'Default level ' . $this->defaultLevel . ' does not exist',
13632                 E_USER_ERROR
13633             );
13634             return;
13635         }
13636         $this->fixesForLevel[$this->defaultLevel] = array_keys($fixes);
13637     }
13638 
13644     public function populate($fixes) {
13645         foreach ($fixes as $name => $fix) {
13646             // determine what the fix is for
13647             list($type, $params) = $this->getFixType($name);
13648             switch ($type) {
13649                 case 'attr_transform_pre':
13650                 case 'attr_transform_post':
13651                     $attr = $params['attr'];
13652                     if (isset($params['element'])) {
13653                         $element = $params['element'];
13654                         if (empty($this->info[$element])) {
13655                             $e = $this->addBlankElement($element);
13656                         } else {
13657                             $e = $this->info[$element];
13658                         }
13659                     } else {
13660                         $type = "info_$type";
13661                         $e = $this;
13662                     }
13663                     // PHP does some weird parsing when I do
13664                     // $e->$type[$attr], so I have to assign a ref.
13665                     $f =& $e->$type;
13666                     $f[$attr] = $fix;
13667                     break;
13668                 case 'tag_transform':
13669                     $this->info_tag_transform[$params['element']] = $fix;
13670                     break;
13671                 case 'child':
13672                 case 'content_model_type':
13673                     $element = $params['element'];
13674                     if (empty($this->info[$element])) {
13675                         $e = $this->addBlankElement($element);
13676                     } else {
13677                         $e = $this->info[$element];
13678                     }
13679                     $e->$type = $fix;
13680                     break;
13681                 default:
13682                     trigger_error("Fix type $type not supported", E_USER_ERROR);
13683                     break;
13684             }
13685         }
13686     }
13687 
13696     public function getFixType($name) {
13697         // parse it
13698         $property = $attr = null;
13699         if (strpos($name, '#') !== false) list($name, $property) = explode('#', $name);
13700         if (strpos($name, '@') !== false) list($name, $attr)     = explode('@', $name);
13701 
13702         // figure out the parameters
13703         $params = array();
13704         if ($name !== '')    $params['element'] = $name;
13705         if (!is_null($attr)) $params['attr'] = $attr;
13706 
13707         // special case: attribute transform
13708         if (!is_null($attr)) {
13709             if (is_null($property)) $property = 'pre';
13710             $type = 'attr_transform_' . $property;
13711             return array($type, $params);
13712         }
13713 
13714         // special case: tag transform
13715         if (is_null($property)) {
13716             return array('tag_transform', $params);
13717         }
13718 
13719         return array($property, $params);
13720 
13721     }
13722 
13727     public function makeFixes() {}
13728 
13729 }
13730 
13731 
13732 
13733 
13734 
13735 class HTMLPurifier_HTMLModule_XMLCommonAttributes extends HTMLPurifier_HTMLModule
13736 {
13737     public $name = 'XMLCommonAttributes';
13738 
13739     public $attr_collections = array(
13740         'Lang' => array(
13741             'xml:lang' => 'LanguageCode',
13742         )
13743     );
13744 }
13745 
13746 
13747 
13748 
13749 
13753 class HTMLPurifier_HTMLModule_Tidy_Name extends HTMLPurifier_HTMLModule_Tidy
13754 {
13755     public $name = 'Tidy_Name';
13756     public $defaultLevel = 'heavy';
13757     public function makeFixes() {
13758 
13759         $r = array();
13760 
13761         // @name for img, a -----------------------------------------------
13762         // Technically, it's allowed even on strict, so we allow authors to use
13763         // it. However, it's deprecated in future versions of XHTML.
13764         $r['img@name'] =
13765         $r['a@name'] = new HTMLPurifier_AttrTransform_Name();
13766 
13767         return $r;
13768     }
13769 }
13770 
13771 
13772 
13773 
13774 
13775 class HTMLPurifier_HTMLModule_Tidy_Proprietary extends HTMLPurifier_HTMLModule_Tidy
13776 {
13777 
13778     public $name = 'Tidy_Proprietary';
13779     public $defaultLevel = 'light';
13780 
13781     public function makeFixes() {
13782         $r = array();
13783         $r['table@background'] = new HTMLPurifier_AttrTransform_Background();
13784         $r['td@background']    = new HTMLPurifier_AttrTransform_Background();
13785         $r['th@background']    = new HTMLPurifier_AttrTransform_Background();
13786         $r['tr@background']    = new HTMLPurifier_AttrTransform_Background();
13787         $r['thead@background'] = new HTMLPurifier_AttrTransform_Background();
13788         $r['tfoot@background'] = new HTMLPurifier_AttrTransform_Background();
13789         $r['tbody@background'] = new HTMLPurifier_AttrTransform_Background();
13790         $r['table@height']     = new HTMLPurifier_AttrTransform_Length('height');
13791         return $r;
13792     }
13793 
13794 }
13795 
13796 
13797 
13798 
13799 
13800 class HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 extends HTMLPurifier_HTMLModule_Tidy
13801 {
13802 
13803     public function makeFixes() {
13804 
13805         $r = array();
13806 
13807         // == deprecated tag transforms ===================================
13808 
13809         $r['font']   = new HTMLPurifier_TagTransform_Font();
13810         $r['menu']   = new HTMLPurifier_TagTransform_Simple('ul');
13811         $r['dir']    = new HTMLPurifier_TagTransform_Simple('ul');
13812         $r['center'] = new HTMLPurifier_TagTransform_Simple('div',  'text-align:center;');
13813         $r['u']      = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:underline;');
13814         $r['s']      = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:line-through;');
13815         $r['strike'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:line-through;');
13816 
13817         // == deprecated attribute transforms =============================
13818 
13819         $r['caption@align'] =
13820             new HTMLPurifier_AttrTransform_EnumToCSS('align', array(
13821                 // we're following IE's behavior, not Firefox's, due
13822                 // to the fact that no one supports caption-side:right,
13823                 // W3C included (with CSS 2.1). This is a slightly
13824                 // unreasonable attribute!
13825                 'left'   => 'text-align:left;',
13826                 'right'  => 'text-align:right;',
13827                 'top'    => 'caption-side:top;',
13828                 'bottom' => 'caption-side:bottom;' // not supported by IE
13829             ));
13830 
13831         // @align for img -------------------------------------------------
13832         $r['img@align'] =
13833             new HTMLPurifier_AttrTransform_EnumToCSS('align', array(
13834                 'left'   => 'float:left;',
13835                 'right'  => 'float:right;',
13836                 'top'    => 'vertical-align:top;',
13837                 'middle' => 'vertical-align:middle;',
13838                 'bottom' => 'vertical-align:baseline;',
13839             ));
13840 
13841         // @align for table -----------------------------------------------
13842         $r['table@align'] =
13843             new HTMLPurifier_AttrTransform_EnumToCSS('align', array(
13844                 'left'   => 'float:left;',
13845                 'center' => 'margin-left:auto;margin-right:auto;',
13846                 'right'  => 'float:right;'
13847             ));
13848 
13849         // @align for hr -----------------------------------------------
13850         $r['hr@align'] =
13851             new HTMLPurifier_AttrTransform_EnumToCSS('align', array(
13852                 // we use both text-align and margin because these work
13853                 // for different browsers (IE and Firefox, respectively)
13854                 // and the melange makes for a pretty cross-compatible
13855                 // solution
13856                 'left'   => 'margin-left:0;margin-right:auto;text-align:left;',
13857                 'center' => 'margin-left:auto;margin-right:auto;text-align:center;',
13858                 'right'  => 'margin-left:auto;margin-right:0;text-align:right;'
13859             ));
13860 
13861         // @align for h1, h2, h3, h4, h5, h6, p, div ----------------------
13862         // {{{
13863             $align_lookup = array();
13864             $align_values = array('left', 'right', 'center', 'justify');
13865             foreach ($align_values as $v) $align_lookup[$v] = "text-align:$v;";
13866         // }}}
13867         $r['h1@align'] =
13868         $r['h2@align'] =
13869         $r['h3@align'] =
13870         $r['h4@align'] =
13871         $r['h5@align'] =
13872         $r['h6@align'] =
13873         $r['p@align']  =
13874         $r['div@align'] =
13875             new HTMLPurifier_AttrTransform_EnumToCSS('align', $align_lookup);
13876 
13877         // @bgcolor for table, tr, td, th ---------------------------------
13878         $r['table@bgcolor'] =
13879         $r['td@bgcolor'] =
13880         $r['th@bgcolor'] =
13881             new HTMLPurifier_AttrTransform_BgColor();
13882 
13883         // @border for img ------------------------------------------------
13884         $r['img@border'] = new HTMLPurifier_AttrTransform_Border();
13885 
13886         // @clear for br --------------------------------------------------
13887         $r['br@clear'] =
13888             new HTMLPurifier_AttrTransform_EnumToCSS('clear', array(
13889                 'left'  => 'clear:left;',
13890                 'right' => 'clear:right;',
13891                 'all'   => 'clear:both;',
13892                 'none'  => 'clear:none;',
13893             ));
13894 
13895         // @height for td, th ---------------------------------------------
13896         $r['td@height'] =
13897         $r['th@height'] =
13898             new HTMLPurifier_AttrTransform_Length('height');
13899 
13900         // @hspace for img ------------------------------------------------
13901         $r['img@hspace'] = new HTMLPurifier_AttrTransform_ImgSpace('hspace');
13902 
13903         // @noshade for hr ------------------------------------------------
13904         // this transformation is not precise but often good enough.
13905         // different browsers use different styles to designate noshade
13906         $r['hr@noshade'] =
13907             new HTMLPurifier_AttrTransform_BoolToCSS(
13908                 'noshade',
13909                 'color:#808080;background-color:#808080;border:0;'
13910             );
13911 
13912         // @nowrap for td, th ---------------------------------------------
13913         $r['td@nowrap'] =
13914         $r['th@nowrap'] =
13915             new HTMLPurifier_AttrTransform_BoolToCSS(
13916                 'nowrap',
13917                 'white-space:nowrap;'
13918             );
13919 
13920         // @size for hr  --------------------------------------------------
13921         $r['hr@size'] = new HTMLPurifier_AttrTransform_Length('size', 'height');
13922 
13923         // @type for li, ol, ul -------------------------------------------
13924         // {{{
13925             $ul_types = array(
13926                 'disc'   => 'list-style-type:disc;',
13927                 'square' => 'list-style-type:square;',
13928                 'circle' => 'list-style-type:circle;'
13929             );
13930             $ol_types = array(
13931                 '1'   => 'list-style-type:decimal;',
13932                 'i'   => 'list-style-type:lower-roman;',
13933                 'I'   => 'list-style-type:upper-roman;',
13934                 'a'   => 'list-style-type:lower-alpha;',
13935                 'A'   => 'list-style-type:upper-alpha;'
13936             );
13937             $li_types = $ul_types + $ol_types;
13938         // }}}
13939 
13940         $r['ul@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ul_types);
13941         $r['ol@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ol_types, true);
13942         $r['li@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $li_types, true);
13943 
13944         // @vspace for img ------------------------------------------------
13945         $r['img@vspace'] = new HTMLPurifier_AttrTransform_ImgSpace('vspace');
13946 
13947         // @width for hr, td, th ------------------------------------------
13948         $r['td@width'] =
13949         $r['th@width'] =
13950         $r['hr@width'] = new HTMLPurifier_AttrTransform_Length('width');
13951 
13952         return $r;
13953 
13954     }
13955 
13956 }
13957 
13958 
13959 
13960 
13961 
13962 class HTMLPurifier_HTMLModule_Tidy_Strict extends HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4
13963 {
13964     public $name = 'Tidy_Strict';
13965     public $defaultLevel = 'light';
13966 
13967     public function makeFixes() {
13968         $r = parent::makeFixes();
13969         $r['blockquote#content_model_type'] = 'strictblockquote';
13970         return $r;
13971     }
13972 
13973     public $defines_child_def = true;
13974     public function getChildDef($def) {
13975         if ($def->content_model_type != 'strictblockquote') return parent::getChildDef($def);
13976         return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
13977     }
13978 }
13979 
13980 
13981 
13982 
13983 
13984 class HTMLPurifier_HTMLModule_Tidy_Transitional extends HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4
13985 {
13986     public $name = 'Tidy_Transitional';
13987     public $defaultLevel = 'heavy';
13988 }
13989 
13990 
13991 
13992 
13993 
13994 class HTMLPurifier_HTMLModule_Tidy_XHTML extends HTMLPurifier_HTMLModule_Tidy
13995 {
13996 
13997     public $name = 'Tidy_XHTML';
13998     public $defaultLevel = 'medium';
13999 
14000     public function makeFixes() {
14001         $r = array();
14002         $r['@lang'] = new HTMLPurifier_AttrTransform_Lang();
14003         return $r;
14004     }
14005 
14006 }
14007 
14008 
14009 
14010 
14011 
14018 class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
14019 {
14020 
14021     public $name = 'AutoParagraph';
14022     public $needed = array('p');
14023 
14024     private function _pStart() {
14025         $par = new HTMLPurifier_Token_Start('p');
14026         $par->armor['MakeWellFormed_TagClosedError'] = true;
14027         return $par;
14028     }
14029 
14030     public function handleText(&$token) {
14031         $text = $token->data;
14032         // Does the current parent allow <p> tags?
14033         if ($this->allowsElement('p')) {
14034             if (empty($this->currentNesting) || strpos($text, "\n\n") !== false) {
14035                 // Note that we have differing behavior when dealing with text
14036                 // in the anonymous root node, or a node inside the document.
14037                 // If the text as a double-newline, the treatment is the same;
14038                 // if it doesn't, see the next if-block if you're in the document.
14039 
14040                 $i = $nesting = null;
14041                 if (!$this->forwardUntilEndToken($i, $current, $nesting) && $token->is_whitespace) {
14042                     // State 1.1: ...    ^ (whitespace, then document end)
14043                     //               ----
14044                     // This is a degenerate case
14045                 } else {
14046                     if (!$token->is_whitespace || $this->_isInline($current)) {
14047                         // State 1.2: PAR1
14048                         //            ----
14049 
14050                         // State 1.3: PAR1\n\nPAR2
14051                         //            ------------
14052 
14053                         // State 1.4: <div>PAR1\n\nPAR2 (see State 2)
14054                         //                 ------------
14055                         $token = array($this->_pStart());
14056                         $this->_splitText($text, $token);
14057                     } else {
14058                         // State 1.5: \n<hr />
14059                         //            --
14060                     }
14061                 }
14062             } else {
14063                 // State 2:   <div>PAR1... (similar to 1.4)
14064                 //                 ----
14065 
14066                 // We're in an element that allows paragraph tags, but we're not
14067                 // sure if we're going to need them.
14068                 if ($this->_pLookAhead()) {
14069                     // State 2.1: <div>PAR1<b>PAR1\n\nPAR2
14070                     //                 ----
14071                     // Note: This will always be the first child, since any
14072                     // previous inline element would have triggered this very
14073                     // same routine, and found the double newline. One possible
14074                     // exception would be a comment.
14075                     $token = array($this->_pStart(), $token);
14076                 } else {
14077                     // State 2.2.1: <div>PAR1<div>
14078                     //                   ----
14079 
14080                     // State 2.2.2: <div>PAR1<b>PAR1</b></div>
14081                     //                   ----
14082                 }
14083             }
14084         // Is the current parent a <p> tag?
14085         } elseif (
14086             !empty($this->currentNesting) &&
14087             $this->currentNesting[count($this->currentNesting)-1]->name == 'p'
14088         ) {
14089             // State 3.1: ...<p>PAR1
14090             //                  ----
14091 
14092             // State 3.2: ...<p>PAR1\n\nPAR2
14093             //                  ------------
14094             $token = array();
14095             $this->_splitText($text, $token);
14096         // Abort!
14097         } else {
14098             // State 4.1: ...<b>PAR1
14099             //                  ----
14100 
14101             // State 4.2: ...<b>PAR1\n\nPAR2
14102             //                  ------------
14103         }
14104     }
14105 
14106     public function handleElement(&$token) {
14107         // We don't have to check if we're already in a <p> tag for block
14108         // tokens, because the tag would have been autoclosed by MakeWellFormed.
14109         if ($this->allowsElement('p')) {
14110             if (!empty($this->currentNesting)) {
14111                 if ($this->_isInline($token)) {
14112                     // State 1: <div>...<b>
14113                     //                  ---
14114 
14115                     // Check if this token is adjacent to the parent token
14116                     // (seek backwards until token isn't whitespace)
14117                     $i = null;
14118                     $this->backward($i, $prev);
14119 
14120                     if (!$prev instanceof HTMLPurifier_Token_Start) {
14121                         // Token wasn't adjacent
14122 
14123                         if (
14124                             $prev instanceof HTMLPurifier_Token_Text &&
14125                             substr($prev->data, -2) === "\n\n"
14126                         ) {
14127                             // State 1.1.4: <div><p>PAR1</p>\n\n<b>
14128                             //                                  ---
14129 
14130                             // Quite frankly, this should be handled by splitText
14131                             $token = array($this->_pStart(), $token);
14132                         } else {
14133                             // State 1.1.1: <div><p>PAR1</p><b>
14134                             //                              ---
14135 
14136                             // State 1.1.2: <div><br /><b>
14137                             //                         ---
14138 
14139                             // State 1.1.3: <div>PAR<b>
14140                             //                      ---
14141                         }
14142 
14143                     } else {
14144                         // State 1.2.1: <div><b>
14145                         //                   ---
14146 
14147                         // Lookahead to see if <p> is needed.
14148                         if ($this->_pLookAhead()) {
14149                             // State 1.3.1: <div><b>PAR1\n\nPAR2
14150                             //                   ---
14151                             $token = array($this->_pStart(), $token);
14152                         } else {
14153                             // State 1.3.2: <div><b>PAR1</b></div>
14154                             //                   ---
14155 
14156                             // State 1.3.3: <div><b>PAR1</b><div></div>\n\n</div>
14157                             //                   ---
14158                         }
14159                     }
14160                 } else {
14161                     // State 2.3: ...<div>
14162                     //               -----
14163                 }
14164             } else {
14165                 if ($this->_isInline($token)) {
14166                     // State 3.1: <b>
14167                     //            ---
14168                     // This is where the {p} tag is inserted, not reflected in
14169                     // inputTokens yet, however.
14170                     $token = array($this->_pStart(), $token);
14171                 } else {
14172                     // State 3.2: <div>
14173                     //            -----
14174                 }
14175 
14176                 $i = null;
14177                 if ($this->backward($i, $prev)) {
14178                     if (
14179                         !$prev instanceof HTMLPurifier_Token_Text
14180                     ) {
14181                         // State 3.1.1: ...</p>{p}<b>
14182                         //                        ---
14183 
14184                         // State 3.2.1: ...</p><div>
14185                         //                     -----
14186 
14187                         if (!is_array($token)) $token = array($token);
14188                         array_unshift($token, new HTMLPurifier_Token_Text("\n\n"));
14189                     } else {
14190                         // State 3.1.2: ...</p>\n\n{p}<b>
14191                         //                            ---
14192 
14193                         // State 3.2.2: ...</p>\n\n<div>
14194                         //                         -----
14195 
14196                         // Note: PAR<ELEM> cannot occur because PAR would have been
14197                         // wrapped in <p> tags.
14198                     }
14199                 }
14200             }
14201         } else {
14202             // State 2.2: <ul><li>
14203             //                ----
14204 
14205             // State 2.4: <p><b>
14206             //               ---
14207         }
14208     }
14209 
14220     private function _splitText($data, &$result) {
14221         $raw_paragraphs = explode("\n\n", $data);
14222         $paragraphs  = array(); // without empty paragraphs
14223         $needs_start = false;
14224         $needs_end   = false;
14225 
14226         $c = count($raw_paragraphs);
14227         if ($c == 1) {
14228             // There were no double-newlines, abort quickly. In theory this
14229             // should never happen.
14230             $result[] = new HTMLPurifier_Token_Text($data);
14231             return;
14232         }
14233         for ($i = 0; $i < $c; $i++) {
14234             $par = $raw_paragraphs[$i];
14235             if (trim($par) !== '') {
14236                 $paragraphs[] = $par;
14237             } else {
14238                 if ($i == 0) {
14239                     // Double newline at the front
14240                     if (empty($result)) {
14241                         // The empty result indicates that the AutoParagraph
14242                         // injector did not add any start paragraph tokens.
14243                         // This means that we have been in a paragraph for
14244                         // a while, and the newline means we should start a new one.
14245                         $result[] = new HTMLPurifier_Token_End('p');
14246                         $result[] = new HTMLPurifier_Token_Text("\n\n");
14247                         // However, the start token should only be added if
14248                         // there is more processing to be done (i.e. there are
14249                         // real paragraphs in here). If there are none, the
14250                         // next start paragraph tag will be handled by the
14251                         // next call to the injector
14252                         $needs_start = true;
14253                     } else {
14254                         // We just started a new paragraph!
14255                         // Reinstate a double-newline for presentation's sake, since
14256                         // it was in the source code.
14257                         array_unshift($result, new HTMLPurifier_Token_Text("\n\n"));
14258                     }
14259                 } elseif ($i + 1 == $c) {
14260                     // Double newline at the end
14261                     // There should be a trailing </p> when we're finally done.
14262                     $needs_end = true;
14263                 }
14264             }
14265         }
14266 
14267         // Check if this was just a giant blob of whitespace. Move this earlier,
14268         // perhaps?
14269         if (empty($paragraphs)) {
14270             return;
14271         }
14272 
14273         // Add the start tag indicated by \n\n at the beginning of $data
14274         if ($needs_start) {
14275             $result[] = $this->_pStart();
14276         }
14277 
14278         // Append the paragraphs onto the result
14279         foreach ($paragraphs as $par) {
14280             $result[] = new HTMLPurifier_Token_Text($par);
14281             $result[] = new HTMLPurifier_Token_End('p');
14282             $result[] = new HTMLPurifier_Token_Text("\n\n");
14283             $result[] = $this->_pStart();
14284         }
14285 
14286         // Remove trailing start token; Injector will handle this later if
14287         // it was indeed needed. This prevents from needing to do a lookahead,
14288         // at the cost of a lookbehind later.
14289         array_pop($result);
14290 
14291         // If there is no need for an end tag, remove all of it and let
14292         // MakeWellFormed close it later.
14293         if (!$needs_end) {
14294             array_pop($result); // removes \n\n
14295             array_pop($result); // removes </p>
14296         }
14297 
14298     }
14299 
14304     private function _isInline($token) {
14305         return isset($this->htmlDefinition->info['p']->child->elements[$token->name]);
14306     }
14307 
14312     private function _pLookAhead() {
14313         $this->current($i, $current);
14314         if ($current instanceof HTMLPurifier_Token_Start) $nesting = 1;
14315         else $nesting = 0;
14316         $ok = false;
14317         while ($this->forwardUntilEndToken($i, $current, $nesting)) {
14318             $result = $this->_checkNeedsP($current);
14319             if ($result !== null) {
14320                 $ok = $result;
14321                 break;
14322             }
14323         }
14324         return $ok;
14325     }
14326 
14331     private function _checkNeedsP($current) {
14332         if ($current instanceof HTMLPurifier_Token_Start){
14333             if (!$this->_isInline($current)) {
14334                 // <div>PAR1<div>
14335                 //      ----
14336                 // Terminate early, since we hit a block element
14337                 return false;
14338             }
14339         } elseif ($current instanceof HTMLPurifier_Token_Text) {
14340             if (strpos($current->data, "\n\n") !== false) {
14341                 // <div>PAR1<b>PAR1\n\nPAR2
14342                 //      ----
14343                 return true;
14344             } else {
14345                 // <div>PAR1<b>PAR1...
14346                 //      ----
14347             }
14348         }
14349         return null;
14350     }
14351 
14352 }
14353 
14354 
14355 
14356 
14357 
14361 class HTMLPurifier_Injector_DisplayLinkURI extends HTMLPurifier_Injector
14362 {
14363 
14364     public $name = 'DisplayLinkURI';
14365     public $needed = array('a');
14366 
14367     public function handleElement(&$token) {
14368     }
14369 
14370     public function handleEnd(&$token) {
14371         if (isset($token->start->attr['href'])){
14372             $url = $token->start->attr['href'];
14373             unset($token->start->attr['href']);
14374             $token = array($token, new HTMLPurifier_Token_Text(" ($url)"));
14375         } else {
14376             // nothing to display
14377         }
14378     }
14379 }
14380 
14381 
14382 
14383 
14384 
14388 class HTMLPurifier_Injector_Linkify extends HTMLPurifier_Injector
14389 {
14390 
14391     public $name = 'Linkify';
14392     public $needed = array('a' => array('href'));
14393 
14394     public function handleText(&$token) {
14395         if (!$this->allowsElement('a')) return;
14396 
14397         if (strpos($token->data, '://') === false) {
14398             // our really quick heuristic failed, abort
14399             // this may not work so well if we want to match things like
14400             // "google.com", but then again, most people don't
14401             return;
14402         }
14403 
14404         // there is/are URL(s). Let's split the string:
14405         // Note: this regex is extremely permissive
14406         $bits = preg_split('#((?:https?|ftp)://[^\s\'"<>()]+)#S', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE);
14407 
14408         $token = array();
14409 
14410         // $i = index
14411         // $c = count
14412         // $l = is link
14413         for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) {
14414             if (!$l) {
14415                 if ($bits[$i] === '') continue;
14416                 $token[] = new HTMLPurifier_Token_Text($bits[$i]);
14417             } else {
14418                 $token[] = new HTMLPurifier_Token_Start('a', array('href' => $bits[$i]));
14419                 $token[] = new HTMLPurifier_Token_Text($bits[$i]);
14420                 $token[] = new HTMLPurifier_Token_End('a');
14421             }
14422         }
14423 
14424     }
14425 
14426 }
14427 
14428 
14429 
14430 
14431 
14436 class HTMLPurifier_Injector_PurifierLinkify extends HTMLPurifier_Injector
14437 {
14438 
14439     public $name = 'PurifierLinkify';
14440     public $docURL;
14441     public $needed = array('a' => array('href'));
14442 
14443     public function prepare($config, $context) {
14444         $this->docURL = $config->get('AutoFormat.PurifierLinkify.DocURL');
14445         return parent::prepare($config, $context);
14446     }
14447 
14448     public function handleText(&$token) {
14449         if (!$this->allowsElement('a')) return;
14450         if (strpos($token->data, '%') === false) return;
14451 
14452         $bits = preg_split('#%([a-z0-9]+\.[a-z0-9]+)#Si', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE);
14453         $token = array();
14454 
14455         // $i = index
14456         // $c = count
14457         // $l = is link
14458         for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) {
14459             if (!$l) {
14460                 if ($bits[$i] === '') continue;
14461                 $token[] = new HTMLPurifier_Token_Text($bits[$i]);
14462             } else {
14463                 $token[] = new HTMLPurifier_Token_Start('a',
14464                     array('href' => str_replace('%s', $bits[$i], $this->docURL)));
14465                 $token[] = new HTMLPurifier_Token_Text('%' . $bits[$i]);
14466                 $token[] = new HTMLPurifier_Token_End('a');
14467             }
14468         }
14469 
14470     }
14471 
14472 }
14473 
14474 
14475 
14476 
14477 
14478 class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector
14479 {
14480 
14481     private $context, $config, $attrValidator, $removeNbsp, $removeNbspExceptions;
14482 
14483     public function prepare($config, $context) {
14484         parent::prepare($config, $context);
14485         $this->config = $config;
14486         $this->context = $context;
14487         $this->removeNbsp = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp');
14488         $this->removeNbspExceptions = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions');
14489         $this->attrValidator = new HTMLPurifier_AttrValidator();
14490     }
14491 
14492     public function handleElement(&$token) {
14493         if (!$token instanceof HTMLPurifier_Token_Start) return;
14494         $next = false;
14495         for ($i = $this->inputIndex + 1, $c = count($this->inputTokens); $i < $c; $i++) {
14496             $next = $this->inputTokens[$i];
14497             if ($next instanceof HTMLPurifier_Token_Text) {
14498                 if ($next->is_whitespace) continue;
14499                 if ($this->removeNbsp && !isset($this->removeNbspExceptions[$token->name])) {
14500                     $plain = str_replace("\xC2\xA0", "", $next->data);
14501                     $isWsOrNbsp = $plain === '' || ctype_space($plain);
14502                     if ($isWsOrNbsp) continue;
14503                 }
14504             }
14505             break;
14506         }
14507         if (!$next || ($next instanceof HTMLPurifier_Token_End && $next->name == $token->name)) {
14508             if ($token->name == 'colgroup') return;
14509             $this->attrValidator->validateToken($token, $this->config, $this->context);
14510             $token->armor['ValidateAttributes'] = true;
14511             if (isset($token->attr['id']) || isset($token->attr['name'])) return;
14512             $token = $i - $this->inputIndex + 1;
14513             for ($b = $this->inputIndex - 1; $b > 0; $b--) {
14514                 $prev = $this->inputTokens[$b];
14515                 if ($prev instanceof HTMLPurifier_Token_Text && $prev->is_whitespace) continue;
14516                 break;
14517             }
14518             // This is safe because we removed the token that triggered this.
14519             $this->rewind($b - 1);
14520             return;
14521         }
14522     }
14523 
14524 }
14525 
14526 
14527 
14528 
14529 
14533 class HTMLPurifier_Injector_RemoveSpansWithoutAttributes extends HTMLPurifier_Injector
14534 {
14535     public $name = 'RemoveSpansWithoutAttributes';
14536     public $needed = array('span');
14537 
14538     private $attrValidator;
14539 
14543     private $config;
14544     private $context;
14545 
14546     public function prepare($config, $context) {
14547         $this->attrValidator = new HTMLPurifier_AttrValidator();
14548         $this->config = $config;
14549         $this->context = $context;
14550         return parent::prepare($config, $context);
14551     }
14552 
14553     public function handleElement(&$token) {
14554         if ($token->name !== 'span' || !$token instanceof HTMLPurifier_Token_Start) {
14555             return;
14556         }
14557 
14558         // We need to validate the attributes now since this doesn't normally
14559         // happen until after MakeWellFormed. If all the attributes are removed
14560         // the span needs to be removed too.
14561         $this->attrValidator->validateToken($token, $this->config, $this->context);
14562         $token->armor['ValidateAttributes'] = true;
14563 
14564         if (!empty($token->attr)) {
14565             return;
14566         }
14567 
14568         $nesting = 0;
14569         $spanContentTokens = array();
14570         while ($this->forwardUntilEndToken($i, $current, $nesting)) {}
14571 
14572         if ($current instanceof HTMLPurifier_Token_End && $current->name === 'span') {
14573             // Mark closing span tag for deletion
14574             $current->markForDeletion = true;
14575             // Delete open span tag
14576             $token = false;
14577         }
14578     }
14579 
14580     public function handleEnd(&$token) {
14581         if ($token->markForDeletion) {
14582             $token = false;
14583         }
14584     }
14585 }
14586 
14587 
14588 
14589 
14590 
14595 class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector
14596 {
14597     public $name = 'SafeObject';
14598     public $needed = array('object', 'param');
14599 
14600     protected $objectStack = array();
14601     protected $paramStack  = array();
14602 
14603     // Keep this synchronized with AttrTransform/SafeParam.php
14604     protected $addParam = array(
14605         'allowScriptAccess' => 'never',
14606         'allowNetworking' => 'internal',
14607     );
14608     protected $allowedParam = array(
14609         'wmode' => true,
14610         'movie' => true,
14611         'flashvars' => true,
14612         'src' => true,
14613         'allowFullScreen' => true, // if omitted, assume to be 'false'
14614     );
14615 
14616     public function prepare($config, $context) {
14617         parent::prepare($config, $context);
14618     }
14619 
14620     public function handleElement(&$token) {
14621         if ($token->name == 'object') {
14622             $this->objectStack[] = $token;
14623             $this->paramStack[] = array();
14624             $new = array($token);
14625             foreach ($this->addParam as $name => $value) {
14626                 $new[] = new HTMLPurifier_Token_Empty('param', array('name' => $name, 'value' => $value));
14627             }
14628             $token = $new;
14629         } elseif ($token->name == 'param') {
14630             $nest = count($this->currentNesting) - 1;
14631             if ($nest >= 0 && $this->currentNesting[$nest]->name === 'object') {
14632                 $i = count($this->objectStack) - 1;
14633                 if (!isset($token->attr['name'])) {
14634                     $token = false;
14635                     return;
14636                 }
14637                 $n = $token->attr['name'];
14638                 // We need this fix because YouTube doesn't supply a data
14639                 // attribute, which we need if a type is specified. This is
14640                 // *very* Flash specific.
14641                 if (!isset($this->objectStack[$i]->attr['data']) &&
14642                     ($token->attr['name'] == 'movie' || $token->attr['name'] == 'src')) {
14643                     $this->objectStack[$i]->attr['data'] = $token->attr['value'];
14644                 }
14645                 // Check if the parameter is the correct value but has not
14646                 // already been added
14647                 if (
14648                     !isset($this->paramStack[$i][$n]) &&
14649                     isset($this->addParam[$n]) &&
14650                     $token->attr['name'] === $this->addParam[$n]
14651                 ) {
14652                     // keep token, and add to param stack
14653                     $this->paramStack[$i][$n] = true;
14654                 } elseif (isset($this->allowedParam[$n])) {
14655                     // keep token, don't do anything to it
14656                     // (could possibly check for duplicates here)
14657                 } else {
14658                     $token = false;
14659                 }
14660             } else {
14661                 // not directly inside an object, DENY!
14662                 $token = false;
14663             }
14664         }
14665     }
14666 
14667     public function handleEnd(&$token) {
14668         // This is the WRONG way of handling the object and param stacks;
14669         // we should be inserting them directly on the relevant object tokens
14670         // so that the global stack handling handles it.
14671         if ($token->name == 'object') {
14672             array_pop($this->objectStack);
14673             array_pop($this->paramStack);
14674         }
14675     }
14676 
14677 }
14678 
14679 
14680 
14681 
14682 
14707 class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
14708 {
14709 
14710     private $factory;
14711 
14712     public function __construct() {
14713         // setup the factory
14714         parent::__construct();
14715         $this->factory = new HTMLPurifier_TokenFactory();
14716     }
14717 
14718     public function tokenizeHTML($html, $config, $context) {
14719 
14720         $html = $this->normalize($html, $config, $context);
14721 
14722         // attempt to armor stray angled brackets that cannot possibly
14723         // form tags and thus are probably being used as emoticons
14724         if ($config->get('Core.AggressivelyFixLt')) {
14725             $char = '[^a-z!\/]';
14726             $comment = "/<!--(.*?)(-->|\z)/is";
14727             $html = preg_replace_callback($comment, array($this, 'callbackArmorCommentEntities'), $html);
14728             do {
14729                 $old = $html;
14730                 $html = preg_replace("/<($char)/i", '&lt;\\1', $html);
14731             } while ($html !== $old);
14732             $html = preg_replace_callback($comment, array($this, 'callbackUndoCommentSubst'), $html); // fix comments
14733         }
14734 
14735         // preprocess html, essential for UTF-8
14736         $html = $this->wrapHTML($html, $config, $context);
14737 
14738         $doc = new DOMDocument();
14739         $doc->encoding = 'UTF-8'; // theoretically, the above has this covered
14740 
14741         set_error_handler(array($this, 'muteErrorHandler'));
14742         $doc->loadHTML($html);
14743         restore_error_handler();
14744 
14745         $tokens = array();
14746         $this->tokenizeDOM(
14747             $doc->getElementsByTagName('html')->item(0)-> // <html>
14748                   getElementsByTagName('body')->item(0)-> //   <body>
14749                   getElementsByTagName('div')->item(0)    //     <div>
14750             , $tokens);
14751         return $tokens;
14752     }
14753 
14761     protected function tokenizeDOM($node, &$tokens) {
14762 
14763         $level = 0;
14764         $nodes = array($level => array($node));
14765         $closingNodes = array();
14766         do {
14767             while (!empty($nodes[$level])) {
14768                 $node = array_shift($nodes[$level]); // FIFO
14769                 $collect = $level > 0 ? true : false;
14770                 $needEndingTag = $this->createStartNode($node, $tokens, $collect);
14771                 if ($needEndingTag) {
14772                     $closingNodes[$level][] = $node;
14773                 }
14774                 if ($node->childNodes && $node->childNodes->length) {
14775                     $level++;
14776                     $nodes[$level] = array();
14777                     foreach ($node->childNodes as $childNode) {
14778                         array_push($nodes[$level], $childNode);
14779                     }
14780                 }
14781             }
14782             $level--;
14783             if ($level && isset($closingNodes[$level])) {
14784                 while($node = array_pop($closingNodes[$level])) {
14785                     $this->createEndNode($node, $tokens);
14786                 }
14787             }
14788         } while ($level > 0);
14789     }
14790 
14799     protected function createStartNode($node, &$tokens, $collect) {
14800         // intercept non element nodes. WE MUST catch all of them,
14801         // but we're not getting the character reference nodes because
14802         // those should have been preprocessed
14803         if ($node->nodeType === XML_TEXT_NODE) {
14804             $tokens[] = $this->factory->createText($node->data);
14805             return false;
14806         } elseif ($node->nodeType === XML_CDATA_SECTION_NODE) {
14807             // undo libxml's special treatment of <script> and <style> tags
14808             $last = end($tokens);
14809             $data = $node->data;
14810             // (note $node->tagname is already normalized)
14811             if ($last instanceof HTMLPurifier_Token_Start && ($last->name == 'script' || $last->name == 'style')) {
14812                 $new_data = trim($data);
14813                 if (substr($new_data, 0, 4) === '<!--') {
14814                     $data = substr($new_data, 4);
14815                     if (substr($data, -3) === '-->') {
14816                         $data = substr($data, 0, -3);
14817                     } else {
14818                         // Highly suspicious! Not sure what to do...
14819                     }
14820                 }
14821             }
14822             $tokens[] = $this->factory->createText($this->parseData($data));
14823             return false;
14824         } elseif ($node->nodeType === XML_COMMENT_NODE) {
14825             // this is code is only invoked for comments in script/style in versions
14826             // of libxml pre-2.6.28 (regular comments, of course, are still
14827             // handled regularly)
14828             $tokens[] = $this->factory->createComment($node->data);
14829             return false;
14830         } elseif (
14831             // not-well tested: there may be other nodes we have to grab
14832             $node->nodeType !== XML_ELEMENT_NODE
14833         ) {
14834             return false;
14835         }
14836 
14837         $attr = $node->hasAttributes() ? $this->transformAttrToAssoc($node->attributes) : array();
14838 
14839         // We still have to make sure that the element actually IS empty
14840         if (!$node->childNodes->length) {
14841             if ($collect) {
14842                 $tokens[] = $this->factory->createEmpty($node->tagName, $attr);
14843             }
14844             return false;
14845         } else {
14846             if ($collect) {
14847                 $tokens[] = $this->factory->createStart(
14848                     $tag_name = $node->tagName, // somehow, it get's dropped
14849                     $attr
14850                 );
14851             }
14852             return true;
14853         }
14854     }
14855 
14856     protected function createEndNode($node, &$tokens) {
14857         $tokens[] = $this->factory->createEnd($node->tagName);
14858     }
14859 
14860 
14867     protected function transformAttrToAssoc($node_map) {
14868         // NamedNodeMap is documented very well, so we're using undocumented
14869         // features, namely, the fact that it implements Iterator and
14870         // has a ->length attribute
14871         if ($node_map->length === 0) return array();
14872         $array = array();
14873         foreach ($node_map as $attr) {
14874             $array[$attr->name] = $attr->value;
14875         }
14876         return $array;
14877     }
14878 
14882     public function muteErrorHandler($errno, $errstr) {}
14883 
14888     public function callbackUndoCommentSubst($matches) {
14889         return '<!--' . strtr($matches[1], array('&amp;'=>'&','&lt;'=>'<')) . $matches[2];
14890     }
14891 
14896     public function callbackArmorCommentEntities($matches) {
14897         return '<!--' . str_replace('&', '&amp;', $matches[1]) . $matches[2];
14898     }
14899 
14903     protected function wrapHTML($html, $config, $context) {
14904         $def = $config->getDefinition('HTML');
14905         $ret = '';
14906 
14907         if (!empty($def->doctype->dtdPublic) || !empty($def->doctype->dtdSystem)) {
14908             $ret .= '<!DOCTYPE html ';
14909             if (!empty($def->doctype->dtdPublic)) $ret .= 'PUBLIC "' . $def->doctype->dtdPublic . '" ';
14910             if (!empty($def->doctype->dtdSystem)) $ret .= '"' . $def->doctype->dtdSystem . '" ';
14911             $ret .= '>';
14912         }
14913 
14914         $ret .= '<html><head>';
14915         $ret .= '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
14916         // No protection if $html contains a stray </div>!
14917         $ret .= '</head><body><div>'.$html.'</div></body></html>';
14918         return $ret;
14919     }
14920 
14921 }
14922 
14923 
14924 
14925 
14926 
14937 class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
14938 {
14939 
14940     public $tracksLineNumbers = true;
14941 
14945     protected $_whitespace = "\x20\x09\x0D\x0A";
14946 
14951     protected function scriptCallback($matches) {
14952         return $matches[1] . htmlspecialchars($matches[2], ENT_COMPAT, 'UTF-8') . $matches[3];
14953     }
14954 
14955     public function tokenizeHTML($html, $config, $context) {
14956 
14957         // special normalization for script tags without any armor
14958         // our "armor" heurstic is a < sign any number of whitespaces after
14959         // the first script tag
14960         if ($config->get('HTML.Trusted')) {
14961             $html = preg_replace_callback('#(<script[^>]*>)(\s*[^<].+?)(</script>)#si',
14962                 array($this, 'scriptCallback'), $html);
14963         }
14964 
14965         $html = $this->normalize($html, $config, $context);
14966 
14967         $cursor = 0; // our location in the text
14968         $inside_tag = false; // whether or not we're parsing the inside of a tag
14969         $array = array(); // result array
14970 
14971         // This is also treated to mean maintain *column* numbers too
14972         $maintain_line_numbers = $config->get('Core.MaintainLineNumbers');
14973 
14974         if ($maintain_line_numbers === null) {
14975             // automatically determine line numbering by checking
14976             // if error collection is on
14977             $maintain_line_numbers = $config->get('Core.CollectErrors');
14978         }
14979 
14980         if ($maintain_line_numbers) {
14981             $current_line = 1;
14982             $current_col  = 0;
14983             $length = strlen($html);
14984         } else {
14985             $current_line = false;
14986             $current_col  = false;
14987             $length = false;
14988         }
14989         $context->register('CurrentLine', $current_line);
14990         $context->register('CurrentCol',  $current_col);
14991         $nl = "\n";
14992         // how often to manually recalculate. This will ALWAYS be right,
14993         // but it's pretty wasteful. Set to 0 to turn off
14994         $synchronize_interval = $config->get('Core.DirectLexLineNumberSyncInterval');
14995 
14996         $e = false;
14997         if ($config->get('Core.CollectErrors')) {
14998             $e =& $context->get('ErrorCollector');
14999         }
15000 
15001         // for testing synchronization
15002         $loops = 0;
15003 
15004         while(++$loops) {
15005 
15006             // $cursor is either at the start of a token, or inside of
15007             // a tag (i.e. there was a < immediately before it), as indicated
15008             // by $inside_tag
15009 
15010             if ($maintain_line_numbers) {
15011 
15012                 // $rcursor, however, is always at the start of a token.
15013                 $rcursor = $cursor - (int) $inside_tag;
15014 
15015                 // Column number is cheap, so we calculate it every round.
15016                 // We're interested at the *end* of the newline string, so
15017                 // we need to add strlen($nl) == 1 to $nl_pos before subtracting it
15018                 // from our "rcursor" position.
15019                 $nl_pos = strrpos($html, $nl, $rcursor - $length);
15020                 $current_col = $rcursor - (is_bool($nl_pos) ? 0 : $nl_pos + 1);
15021 
15022                 // recalculate lines
15023                 if (
15024                     $synchronize_interval &&  // synchronization is on
15025                     $cursor > 0 &&            // cursor is further than zero
15026                     $loops % $synchronize_interval === 0 // time to synchronize!
15027                 ) {
15028                     $current_line = 1 + $this->substrCount($html, $nl, 0, $cursor);
15029                 }
15030 
15031             }
15032 
15033             $position_next_lt = strpos($html, '<', $cursor);
15034             $position_next_gt = strpos($html, '>', $cursor);
15035 
15036             // triggers on "<b>asdf</b>" but not "asdf <b></b>"
15037             // special case to set up context
15038             if ($position_next_lt === $cursor) {
15039                 $inside_tag = true;
15040                 $cursor++;
15041             }
15042 
15043             if (!$inside_tag && $position_next_lt !== false) {
15044                 // We are not inside tag and there still is another tag to parse
15045                 $token = new
15046                     HTMLPurifier_Token_Text(
15047                         $this->parseData(
15048                             substr(
15049                                 $html, $cursor, $position_next_lt - $cursor
15050                             )
15051                         )
15052                     );
15053                 if ($maintain_line_numbers) {
15054                     $token->rawPosition($current_line, $current_col);
15055                     $current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor);
15056                 }
15057                 $array[] = $token;
15058                 $cursor  = $position_next_lt + 1;
15059                 $inside_tag = true;
15060                 continue;
15061             } elseif (!$inside_tag) {
15062                 // We are not inside tag but there are no more tags
15063                 // If we're already at the end, break
15064                 if ($cursor === strlen($html)) break;
15065                 // Create Text of rest of string
15066                 $token = new
15067                     HTMLPurifier_Token_Text(
15068                         $this->parseData(
15069                             substr(
15070                                 $html, $cursor
15071                             )
15072                         )
15073                     );
15074                 if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col);
15075                 $array[] = $token;
15076                 break;
15077             } elseif ($inside_tag && $position_next_gt !== false) {
15078                 // We are in tag and it is well formed
15079                 // Grab the internals of the tag
15080                 $strlen_segment = $position_next_gt - $cursor;
15081 
15082                 if ($strlen_segment < 1) {
15083                     // there's nothing to process!
15084                     $token = new HTMLPurifier_Token_Text('<');
15085                     $cursor++;
15086                     continue;
15087                 }
15088 
15089                 $segment = substr($html, $cursor, $strlen_segment);
15090 
15091                 if ($segment === false) {
15092                     // somehow, we attempted to access beyond the end of
15093                     // the string, defense-in-depth, reported by Nate Abele
15094                     break;
15095                 }
15096 
15097                 // Check if it's a comment
15098                 if (
15099                     substr($segment, 0, 3) === '!--'
15100                 ) {
15101                     // re-determine segment length, looking for -->
15102                     $position_comment_end = strpos($html, '-->', $cursor);
15103                     if ($position_comment_end === false) {
15104                         // uh oh, we have a comment that extends to
15105                         // infinity. Can't be helped: set comment
15106                         // end position to end of string
15107                         if ($e) $e->send(E_WARNING, 'Lexer: Unclosed comment');
15108                         $position_comment_end = strlen($html);
15109                         $end = true;
15110                     } else {
15111                         $end = false;
15112                     }
15113                     $strlen_segment = $position_comment_end - $cursor;
15114                     $segment = substr($html, $cursor, $strlen_segment);
15115                     $token = new
15116                         HTMLPurifier_Token_Comment(
15117                             substr(
15118                                 $segment, 3, $strlen_segment - 3
15119                             )
15120                         );
15121                     if ($maintain_line_numbers) {
15122                         $token->rawPosition($current_line, $current_col);
15123                         $current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment);
15124                     }
15125                     $array[] = $token;
15126                     $cursor = $end ? $position_comment_end : $position_comment_end + 3;
15127                     $inside_tag = false;
15128                     continue;
15129                 }
15130 
15131                 // Check if it's an end tag
15132                 $is_end_tag = (strpos($segment,'/') === 0);
15133                 if ($is_end_tag) {
15134                     $type = substr($segment, 1);
15135                     $token = new HTMLPurifier_Token_End($type);
15136                     if ($maintain_line_numbers) {
15137                         $token->rawPosition($current_line, $current_col);
15138                         $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
15139                     }
15140                     $array[] = $token;
15141                     $inside_tag = false;
15142                     $cursor = $position_next_gt + 1;
15143                     continue;
15144                 }
15145 
15146                 // Check leading character is alnum, if not, we may
15147                 // have accidently grabbed an emoticon. Translate into
15148                 // text and go our merry way
15149                 if (!ctype_alpha($segment[0])) {
15150                     // XML:  $segment[0] !== '_' && $segment[0] !== ':'
15151                     if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt');
15152                     $token = new HTMLPurifier_Token_Text('<');
15153                     if ($maintain_line_numbers) {
15154                         $token->rawPosition($current_line, $current_col);
15155                         $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
15156                     }
15157                     $array[] = $token;
15158                     $inside_tag = false;
15159                     continue;
15160                 }
15161 
15162                 // Check if it is explicitly self closing, if so, remove
15163                 // trailing slash. Remember, we could have a tag like <br>, so
15164                 // any later token processing scripts must convert improperly
15165                 // classified EmptyTags from StartTags.
15166                 $is_self_closing = (strrpos($segment,'/') === $strlen_segment-1);
15167                 if ($is_self_closing) {
15168                     $strlen_segment--;
15169                     $segment = substr($segment, 0, $strlen_segment);
15170                 }
15171 
15172                 // Check if there are any attributes
15173                 $position_first_space = strcspn($segment, $this->_whitespace);
15174 
15175                 if ($position_first_space >= $strlen_segment) {
15176                     if ($is_self_closing) {
15177                         $token = new HTMLPurifier_Token_Empty($segment);
15178                     } else {
15179                         $token = new HTMLPurifier_Token_Start($segment);
15180                     }
15181                     if ($maintain_line_numbers) {
15182                         $token->rawPosition($current_line, $current_col);
15183                         $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
15184                     }
15185                     $array[] = $token;
15186                     $inside_tag = false;
15187                     $cursor = $position_next_gt + 1;
15188                     continue;
15189                 }
15190 
15191                 // Grab out all the data
15192                 $type = substr($segment, 0, $position_first_space);
15193                 $attribute_string =
15194                     trim(
15195                         substr(
15196                             $segment, $position_first_space
15197                         )
15198                     );
15199                 if ($attribute_string) {
15200                     $attr = $this->parseAttributeString(
15201                                     $attribute_string
15202                                   , $config, $context
15203                               );
15204                 } else {
15205                     $attr = array();
15206                 }
15207 
15208                 if ($is_self_closing) {
15209                     $token = new HTMLPurifier_Token_Empty($type, $attr);
15210                 } else {
15211                     $token = new HTMLPurifier_Token_Start($type, $attr);
15212                 }
15213                 if ($maintain_line_numbers) {
15214                     $token->rawPosition($current_line, $current_col);
15215                     $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
15216                 }
15217                 $array[] = $token;
15218                 $cursor = $position_next_gt + 1;
15219                 $inside_tag = false;
15220                 continue;
15221             } else {
15222                 // inside tag, but there's no ending > sign
15223                 if ($e) $e->send(E_WARNING, 'Lexer: Missing gt');
15224                 $token = new
15225                     HTMLPurifier_Token_Text(
15226                         '<' .
15227                         $this->parseData(
15228                             substr($html, $cursor)
15229                         )
15230                     );
15231                 if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col);
15232                 // no cursor scroll? Hmm...
15233                 $array[] = $token;
15234                 break;
15235             }
15236             break;
15237         }
15238 
15239         $context->destroy('CurrentLine');
15240         $context->destroy('CurrentCol');
15241         return $array;
15242     }
15243 
15247     protected function substrCount($haystack, $needle, $offset, $length) {
15248         static $oldVersion;
15249         if ($oldVersion === null) {
15250             $oldVersion = version_compare(PHP_VERSION, '5.1', '<');
15251         }
15252         if ($oldVersion) {
15253             $haystack = substr($haystack, $offset, $length);
15254             return substr_count($haystack, $needle);
15255         } else {
15256             return substr_count($haystack, $needle, $offset, $length);
15257         }
15258     }
15259 
15266     public function parseAttributeString($string, $config, $context) {
15267         $string = (string) $string; // quick typecast
15268 
15269         if ($string == '') return array(); // no attributes
15270 
15271         $e = false;
15272         if ($config->get('Core.CollectErrors')) {
15273             $e =& $context->get('ErrorCollector');
15274         }
15275 
15276         // let's see if we can abort as quickly as possible
15277         // one equal sign, no spaces => one attribute
15278         $num_equal = substr_count($string, '=');
15279         $has_space = strpos($string, ' ');
15280         if ($num_equal === 0 && !$has_space) {
15281             // bool attribute
15282             return array($string => $string);
15283         } elseif ($num_equal === 1 && !$has_space) {
15284             // only one attribute
15285             list($key, $quoted_value) = explode('=', $string);
15286             $quoted_value = trim($quoted_value);
15287             if (!$key) {
15288                 if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
15289                 return array();
15290             }
15291             if (!$quoted_value) return array($key => '');
15292             $first_char = @$quoted_value[0];
15293             $last_char  = @$quoted_value[strlen($quoted_value)-1];
15294 
15295             $same_quote = ($first_char == $last_char);
15296             $open_quote = ($first_char == '"' || $first_char == "'");
15297 
15298             if ( $same_quote && $open_quote) {
15299                 // well behaved
15300                 $value = substr($quoted_value, 1, strlen($quoted_value) - 2);
15301             } else {
15302                 // not well behaved
15303                 if ($open_quote) {
15304                     if ($e) $e->send(E_ERROR, 'Lexer: Missing end quote');
15305                     $value = substr($quoted_value, 1);
15306                 } else {
15307                     $value = $quoted_value;
15308                 }
15309             }
15310             if ($value === false) $value = '';
15311             return array($key => $this->parseData($value));
15312         }
15313 
15314         // setup loop environment
15315         $array  = array(); // return assoc array of attributes
15316         $cursor = 0; // current position in string (moves forward)
15317         $size   = strlen($string); // size of the string (stays the same)
15318 
15319         // if we have unquoted attributes, the parser expects a terminating
15320         // space, so let's guarantee that there's always a terminating space.
15321         $string .= ' ';
15322 
15323         while(true) {
15324 
15325             if ($cursor >= $size) {
15326                 break;
15327             }
15328 
15329             $cursor += ($value = strspn($string, $this->_whitespace, $cursor));
15330             // grab the key
15331 
15332             $key_begin = $cursor; //we're currently at the start of the key
15333 
15334             // scroll past all characters that are the key (not whitespace or =)
15335             $cursor += strcspn($string, $this->_whitespace . '=', $cursor);
15336 
15337             $key_end = $cursor; // now at the end of the key
15338 
15339             $key = substr($string, $key_begin, $key_end - $key_begin);
15340 
15341             if (!$key) {
15342                 if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
15343                 $cursor += strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop
15344                 continue; // empty key
15345             }
15346 
15347             // scroll past all whitespace
15348             $cursor += strspn($string, $this->_whitespace, $cursor);
15349 
15350             if ($cursor >= $size) {
15351                 $array[$key] = $key;
15352                 break;
15353             }
15354 
15355             // if the next character is an equal sign, we've got a regular
15356             // pair, otherwise, it's a bool attribute
15357             $first_char = @$string[$cursor];
15358 
15359             if ($first_char == '=') {
15360                 // key="value"
15361 
15362                 $cursor++;
15363                 $cursor += strspn($string, $this->_whitespace, $cursor);
15364 
15365                 if ($cursor === false) {
15366                     $array[$key] = '';
15367                     break;
15368                 }
15369 
15370                 // we might be in front of a quote right now
15371 
15372                 $char = @$string[$cursor];
15373 
15374                 if ($char == '"' || $char == "'") {
15375                     // it's quoted, end bound is $char
15376                     $cursor++;
15377                     $value_begin = $cursor;
15378                     $cursor = strpos($string, $char, $cursor);
15379                     $value_end = $cursor;
15380                 } else {
15381                     // it's not quoted, end bound is whitespace
15382                     $value_begin = $cursor;
15383                     $cursor += strcspn($string, $this->_whitespace, $cursor);
15384                     $value_end = $cursor;
15385                 }
15386 
15387                 // we reached a premature end
15388                 if ($cursor === false) {
15389                     $cursor = $size;
15390                     $value_end = $cursor;
15391                 }
15392 
15393                 $value = substr($string, $value_begin, $value_end - $value_begin);
15394                 if ($value === false) $value = '';
15395                 $array[$key] = $this->parseData($value);
15396                 $cursor++;
15397 
15398             } else {
15399                 // boolattr
15400                 if ($key !== '') {
15401                     $array[$key] = $key;
15402                 } else {
15403                     // purely theoretical
15404                     if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
15405                 }
15406 
15407             }
15408         }
15409         return $array;
15410     }
15411 
15412 }
15413 
15414 
15415 
15416 
15417 
15421 abstract class HTMLPurifier_Strategy_Composite extends HTMLPurifier_Strategy
15422 {
15423 
15427     protected $strategies = array();
15428 
15429     public function execute($tokens, $config, $context) {
15430         foreach ($this->strategies as $strategy) {
15431             $tokens = $strategy->execute($tokens, $config, $context);
15432         }
15433         return $tokens;
15434     }
15435 
15436 }
15437 
15438 
15439 
15440 
15441 
15445 class HTMLPurifier_Strategy_Core extends HTMLPurifier_Strategy_Composite
15446 {
15447 
15448     public function __construct() {
15449         $this->strategies[] = new HTMLPurifier_Strategy_RemoveForeignElements();
15450         $this->strategies[] = new HTMLPurifier_Strategy_MakeWellFormed();
15451         $this->strategies[] = new HTMLPurifier_Strategy_FixNesting();
15452         $this->strategies[] = new HTMLPurifier_Strategy_ValidateAttributes();
15453     }
15454 
15455 }
15456 
15457 
15458 
15459 
15460 
15489 class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
15490 {
15491 
15492     public function execute($tokens, $config, $context) {
15493         //####################################################################//
15494         // Pre-processing
15495 
15496         // get a copy of the HTML definition
15497         $definition = $config->getHTMLDefinition();
15498 
15499         // insert implicit "parent" node, will be removed at end.
15500         // DEFINITION CALL
15501         $parent_name = $definition->info_parent;
15502         array_unshift($tokens, new HTMLPurifier_Token_Start($parent_name));
15503         $tokens[] = new HTMLPurifier_Token_End($parent_name);
15504 
15505         // setup the context variable 'IsInline', for chameleon processing
15506         // is 'false' when we are not inline, 'true' when it must always
15507         // be inline, and an integer when it is inline for a certain
15508         // branch of the document tree
15509         $is_inline = $definition->info_parent_def->descendants_are_inline;
15510         $context->register('IsInline', $is_inline);
15511 
15512         // setup error collector
15513         $e =& $context->get('ErrorCollector', true);
15514 
15515         //####################################################################//
15516         // Loop initialization
15517 
15518         // stack that contains the indexes of all parents,
15519         // $stack[count($stack)-1] being the current parent
15520         $stack = array();
15521 
15522         // stack that contains all elements that are excluded
15523         // it is organized by parent elements, similar to $stack,
15524         // but it is only populated when an element with exclusions is
15525         // processed, i.e. there won't be empty exclusions.
15526         $exclude_stack = array();
15527 
15528         // variable that contains the start token while we are processing
15529         // nodes. This enables error reporting to do its job
15530         $start_token = false;
15531         $context->register('CurrentToken', $start_token);
15532 
15533         //####################################################################//
15534         // Loop
15535 
15536         // iterate through all start nodes. Determining the start node
15537         // is complicated so it has been omitted from the loop construct
15538         for ($i = 0, $size = count($tokens) ; $i < $size; ) {
15539 
15540             //################################################################//
15541             // Gather information on children
15542 
15543             // child token accumulator
15544             $child_tokens = array();
15545 
15546             // scroll to the end of this node, report number, and collect
15547             // all children
15548             for ($j = $i, $depth = 0; ; $j++) {
15549                 if ($tokens[$j] instanceof HTMLPurifier_Token_Start) {
15550                     $depth++;
15551                     // skip token assignment on first iteration, this is the
15552                     // token we currently are on
15553                     if ($depth == 1) continue;
15554                 } elseif ($tokens[$j] instanceof HTMLPurifier_Token_End) {
15555                     $depth--;
15556                     // skip token assignment on last iteration, this is the
15557                     // end token of the token we're currently on
15558                     if ($depth == 0) break;
15559                 }
15560                 $child_tokens[] = $tokens[$j];
15561             }
15562 
15563             // $i is index of start token
15564             // $j is index of end token
15565 
15566             $start_token = $tokens[$i]; // to make token available via CurrentToken
15567 
15568             //################################################################//
15569             // Gather information on parent
15570 
15571             // calculate parent information
15572             if ($count = count($stack)) {
15573                 $parent_index = $stack[$count-1];
15574                 $parent_name  = $tokens[$parent_index]->name;
15575                 if ($parent_index == 0) {
15576                     $parent_def   = $definition->info_parent_def;
15577                 } else {
15578                     $parent_def   = $definition->info[$parent_name];
15579                 }
15580             } else {
15581                 // processing as if the parent were the "root" node
15582                 // unknown info, it won't be used anyway, in the future,
15583                 // we may want to enforce one element only (this is
15584                 // necessary for HTML Purifier to clean entire documents
15585                 $parent_index = $parent_name = $parent_def = null;
15586             }
15587 
15588             // calculate context
15589             if ($is_inline === false) {
15590                 // check if conditions make it inline
15591                 if (!empty($parent_def) && $parent_def->descendants_are_inline) {
15592                     $is_inline = $count - 1;
15593                 }
15594             } else {
15595                 // check if we're out of inline
15596                 if ($count === $is_inline) {
15597                     $is_inline = false;
15598                 }
15599             }
15600 
15601             //################################################################//
15602             // Determine whether element is explicitly excluded SGML-style
15603 
15604             // determine whether or not element is excluded by checking all
15605             // parent exclusions. The array should not be very large, two
15606             // elements at most.
15607             $excluded = false;
15608             if (!empty($exclude_stack)) {
15609                 foreach ($exclude_stack as $lookup) {
15610                     if (isset($lookup[$tokens[$i]->name])) {
15611                         $excluded = true;
15612                         // no need to continue processing
15613                         break;
15614                     }
15615                 }
15616             }
15617 
15618             //################################################################//
15619             // Perform child validation
15620 
15621             if ($excluded) {
15622                 // there is an exclusion, remove the entire node
15623                 $result = false;
15624                 $excludes = array(); // not used, but good to initialize anyway
15625             } else {
15626                 // DEFINITION CALL
15627                 if ($i === 0) {
15628                     // special processing for the first node
15629                     $def = $definition->info_parent_def;
15630                 } else {
15631                     $def = $definition->info[$tokens[$i]->name];
15632 
15633                 }
15634 
15635                 if (!empty($def->child)) {
15636                     // have DTD child def validate children
15637                     $result = $def->child->validateChildren(
15638                         $child_tokens, $config, $context);
15639                 } else {
15640                     // weird, no child definition, get rid of everything
15641                     $result = false;
15642                 }
15643 
15644                 // determine whether or not this element has any exclusions
15645                 $excludes = $def->excludes;
15646             }
15647 
15648             // $result is now a bool or array
15649 
15650             //################################################################//
15651             // Process result by interpreting $result
15652 
15653             if ($result === true || $child_tokens === $result) {
15654                 // leave the node as is
15655 
15656                 // register start token as a parental node start
15657                 $stack[] = $i;
15658 
15659                 // register exclusions if there are any
15660                 if (!empty($excludes)) $exclude_stack[] = $excludes;
15661 
15662                 // move cursor to next possible start node
15663                 $i++;
15664 
15665             } elseif($result === false) {
15666                 // remove entire node
15667 
15668                 if ($e) {
15669                     if ($excluded) {
15670                         $e->send(E_ERROR, 'Strategy_FixNesting: Node excluded');
15671                     } else {
15672                         $e->send(E_ERROR, 'Strategy_FixNesting: Node removed');
15673                     }
15674                 }
15675 
15676                 // calculate length of inner tokens and current tokens
15677                 $length = $j - $i + 1;
15678 
15679                 // perform removal
15680                 array_splice($tokens, $i, $length);
15681 
15682                 // update size
15683                 $size -= $length;
15684 
15685                 // there is no start token to register,
15686                 // current node is now the next possible start node
15687                 // unless it turns out that we need to do a double-check
15688 
15689                 // this is a rought heuristic that covers 100% of HTML's
15690                 // cases and 99% of all other cases. A child definition
15691                 // that would be tricked by this would be something like:
15692                 // ( | a b c) where it's all or nothing. Fortunately,
15693                 // our current implementation claims that that case would
15694                 // not allow empty, even if it did
15695                 if (!$parent_def->child->allow_empty) {
15696                     // we need to do a double-check
15697                     $i = $parent_index;
15698                     array_pop($stack);
15699                 }
15700 
15701                 // PROJECTED OPTIMIZATION: Process all children elements before
15702                 // reprocessing parent node.
15703 
15704             } else {
15705                 // replace node with $result
15706 
15707                 // calculate length of inner tokens
15708                 $length = $j - $i - 1;
15709 
15710                 if ($e) {
15711                     if (empty($result) && $length) {
15712                         $e->send(E_ERROR, 'Strategy_FixNesting: Node contents removed');
15713                     } else {
15714                         $e->send(E_WARNING, 'Strategy_FixNesting: Node reorganized');
15715                     }
15716                 }
15717 
15718                 // perform replacement
15719                 array_splice($tokens, $i + 1, $length, $result);
15720 
15721                 // update size
15722                 $size -= $length;
15723                 $size += count($result);
15724 
15725                 // register start token as a parental node start
15726                 $stack[] = $i;
15727 
15728                 // register exclusions if there are any
15729                 if (!empty($excludes)) $exclude_stack[] = $excludes;
15730 
15731                 // move cursor to next possible start node
15732                 $i++;
15733 
15734             }
15735 
15736             //################################################################//
15737             // Scroll to next start node
15738 
15739             // We assume, at this point, that $i is the index of the token
15740             // that is the first possible new start point for a node.
15741 
15742             // Test if the token indeed is a start tag, if not, move forward
15743             // and test again.
15744             $size = count($tokens);
15745             while ($i < $size and !$tokens[$i] instanceof HTMLPurifier_Token_Start) {
15746                 if ($tokens[$i] instanceof HTMLPurifier_Token_End) {
15747                     // pop a token index off the stack if we ended a node
15748                     array_pop($stack);
15749                     // pop an exclusion lookup off exclusion stack if
15750                     // we ended node and that node had exclusions
15751                     if ($i == 0 || $i == $size - 1) {
15752                         // use specialized var if it's the super-parent
15753                         $s_excludes = $definition->info_parent_def->excludes;
15754                     } else {
15755                         $s_excludes = $definition->info[$tokens[$i]->name]->excludes;
15756                     }
15757                     if ($s_excludes) {
15758                         array_pop($exclude_stack);
15759                     }
15760                 }
15761                 $i++;
15762             }
15763 
15764         }
15765 
15766         //####################################################################//
15767         // Post-processing
15768 
15769         // remove implicit parent tokens at the beginning and end
15770         array_shift($tokens);
15771         array_pop($tokens);
15772 
15773         // remove context variables
15774         $context->destroy('IsInline');
15775         $context->destroy('CurrentToken');
15776 
15777         //####################################################################//
15778         // Return
15779 
15780         return $tokens;
15781 
15782     }
15783 
15784 }
15785 
15786 
15787 
15788 
15789 
15801 class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
15802 {
15803 
15807     protected $tokens;
15808 
15812     protected $t;
15813 
15817     protected $stack;
15818 
15822     protected $injectors;
15823 
15827     protected $config;
15828 
15832     protected $context;
15833 
15834     public function execute($tokens, $config, $context) {
15835 
15836         $definition = $config->getHTMLDefinition();
15837 
15838         // local variables
15839         $generator = new HTMLPurifier_Generator($config, $context);
15840         $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
15841         // used for autoclose early abortion
15842         $global_parent_allowed_elements = array();
15843         if (isset($definition->info[$definition->info_parent])) {
15844             // may be unset under testing circumstances
15845             $global_parent_allowed_elements = $definition->info[$definition->info_parent]->child->getAllowedElements($config);
15846         }
15847         $e = $context->get('ErrorCollector', true);
15848         $t = false; // token index
15849         $i = false; // injector index
15850         $token      = false; // the current token
15851         $reprocess  = false; // whether or not to reprocess the same token
15852         $stack = array();
15853 
15854         // member variables
15855         $this->stack   =& $stack;
15856         $this->t       =& $t;
15857         $this->tokens  =& $tokens;
15858         $this->config  = $config;
15859         $this->context = $context;
15860 
15861         // context variables
15862         $context->register('CurrentNesting', $stack);
15863         $context->register('InputIndex',     $t);
15864         $context->register('InputTokens',    $tokens);
15865         $context->register('CurrentToken',   $token);
15866 
15867         // -- begin INJECTOR --
15868 
15869         $this->injectors = array();
15870 
15871         $injectors = $config->getBatch('AutoFormat');
15872         $def_injectors = $definition->info_injector;
15873         $custom_injectors = $injectors['Custom'];
15874         unset($injectors['Custom']); // special case
15875         foreach ($injectors as $injector => $b) {
15876             // XXX: Fix with a legitimate lookup table of enabled filters
15877             if (strpos($injector, '.') !== false) continue;
15878             $injector = "HTMLPurifier_Injector_$injector";
15879             if (!$b) continue;
15880             $this->injectors[] = new $injector;
15881         }
15882         foreach ($def_injectors as $injector) {
15883             // assumed to be objects
15884             $this->injectors[] = $injector;
15885         }
15886         foreach ($custom_injectors as $injector) {
15887             if (!$injector) continue;
15888             if (is_string($injector)) {
15889                 $injector = "HTMLPurifier_Injector_$injector";
15890                 $injector = new $injector;
15891             }
15892             $this->injectors[] = $injector;
15893         }
15894 
15895         // give the injectors references to the definition and context
15896         // variables for performance reasons
15897         foreach ($this->injectors as $ix => $injector) {
15898             $error = $injector->prepare($config, $context);
15899             if (!$error) continue;
15900             array_splice($this->injectors, $ix, 1); // rm the injector
15901             trigger_error("Cannot enable {$injector->name} injector because $error is not allowed", E_USER_WARNING);
15902         }
15903 
15904         // -- end INJECTOR --
15905 
15906         // a note on reprocessing:
15907         //      In order to reduce code duplication, whenever some code needs
15908         //      to make HTML changes in order to make things "correct", the
15909         //      new HTML gets sent through the purifier, regardless of its
15910         //      status. This means that if we add a start token, because it
15911         //      was totally necessary, we don't have to update nesting; we just
15912         //      punt ($reprocess = true; continue;) and it does that for us.
15913 
15914         // isset is in loop because $tokens size changes during loop exec
15915         for (
15916             $t = 0;
15917             $t == 0 || isset($tokens[$t - 1]);
15918             // only increment if we don't need to reprocess
15919             $reprocess ? $reprocess = false : $t++
15920         ) {
15921 
15922             // check for a rewind
15923             if (is_int($i) && $i >= 0) {
15924                 // possibility: disable rewinding if the current token has a
15925                 // rewind set on it already. This would offer protection from
15926                 // infinite loop, but might hinder some advanced rewinding.
15927                 $rewind_to = $this->injectors[$i]->getRewind();
15928                 if (is_int($rewind_to) && $rewind_to < $t) {
15929                     if ($rewind_to < 0) $rewind_to = 0;
15930                     while ($t > $rewind_to) {
15931                         $t--;
15932                         $prev = $tokens[$t];
15933                         // indicate that other injectors should not process this token,
15934                         // but we need to reprocess it
15935                         unset($prev->skip[$i]);
15936                         $prev->rewind = $i;
15937                         if ($prev instanceof HTMLPurifier_Token_Start) array_pop($this->stack);
15938                         elseif ($prev instanceof HTMLPurifier_Token_End) $this->stack[] = $prev->start;
15939                     }
15940                 }
15941                 $i = false;
15942             }
15943 
15944             // handle case of document end
15945             if (!isset($tokens[$t])) {
15946                 // kill processing if stack is empty
15947                 if (empty($this->stack)) break;
15948 
15949                 // peek
15950                 $top_nesting = array_pop($this->stack);
15951                 $this->stack[] = $top_nesting;
15952 
15953                 // send error [TagClosedSuppress]
15954                 if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) {
15955                     $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting);
15956                 }
15957 
15958                 // append, don't splice, since this is the end
15959                 $tokens[] = new HTMLPurifier_Token_End($top_nesting->name);
15960 
15961                 // punt!
15962                 $reprocess = true;
15963                 continue;
15964             }
15965 
15966             $token = $tokens[$t];
15967 
15968             //echo '<br>'; printTokens($tokens, $t); printTokens($this->stack);
15969             //flush();
15970 
15971             // quick-check: if it's not a tag, no need to process
15972             if (empty($token->is_tag)) {
15973                 if ($token instanceof HTMLPurifier_Token_Text) {
15974                     foreach ($this->injectors as $i => $injector) {
15975                         if (isset($token->skip[$i])) continue;
15976                         if ($token->rewind !== null && $token->rewind !== $i) continue;
15977                         $injector->handleText($token);
15978                         $this->processToken($token, $i);
15979                         $reprocess = true;
15980                         break;
15981                     }
15982                 }
15983                 // another possibility is a comment
15984                 continue;
15985             }
15986 
15987             if (isset($definition->info[$token->name])) {
15988                 $type = $definition->info[$token->name]->child->type;
15989             } else {
15990                 $type = false; // Type is unknown, treat accordingly
15991             }
15992 
15993             // quick tag checks: anything that's *not* an end tag
15994             $ok = false;
15995             if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
15996                 // claims to be a start tag but is empty
15997                 $token = new HTMLPurifier_Token_Empty($token->name, $token->attr, $token->line, $token->col, $token->armor);
15998                 $ok = true;
15999             } elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
16000                 // claims to be empty but really is a start tag
16001                 $this->swap(new HTMLPurifier_Token_End($token->name));
16002                 $this->insertBefore(new HTMLPurifier_Token_Start($token->name, $token->attr, $token->line, $token->col, $token->armor));
16003                 // punt (since we had to modify the input stream in a non-trivial way)
16004                 $reprocess = true;
16005                 continue;
16006             } elseif ($token instanceof HTMLPurifier_Token_Empty) {
16007                 // real empty token
16008                 $ok = true;
16009             } elseif ($token instanceof HTMLPurifier_Token_Start) {
16010                 // start tag
16011 
16012                 // ...unless they also have to close their parent
16013                 if (!empty($this->stack)) {
16014 
16015                     // Performance note: you might think that it's rather
16016                     // inefficient, recalculating the autoclose information
16017                     // for every tag that a token closes (since when we
16018                     // do an autoclose, we push a new token into the
16019                     // stream and then /process/ that, before
16020                     // re-processing this token.)  But this is
16021                     // necessary, because an injector can make an
16022                     // arbitrary transformations to the autoclosing
16023                     // tokens we introduce, so things may have changed
16024                     // in the meantime.  Also, doing the inefficient thing is
16025                     // "easy" to reason about (for certain perverse definitions
16026                     // of "easy")
16027 
16028                     $parent = array_pop($this->stack);
16029                     $this->stack[] = $parent;
16030 
16031                     if (isset($definition->info[$parent->name])) {
16032                         $elements = $definition->info[$parent->name]->child->getAllowedElements($config);
16033                         $autoclose = !isset($elements[$token->name]);
16034                     } else {
16035                         $autoclose = false;
16036                     }
16037 
16038                     if ($autoclose && $definition->info[$token->name]->wrap) {
16039                         // Check if an element can be wrapped by another 
16040                         // element to make it valid in a context (for 
16041                         // example, <ul><ul> needs a <li> in between)
16042                         $wrapname = $definition->info[$token->name]->wrap;
16043                         $wrapdef = $definition->info[$wrapname];
16044                         $elements = $wrapdef->child->getAllowedElements($config);
16045                         $parent_elements = $definition->info[$parent->name]->child->getAllowedElements($config);
16046                         if (isset($elements[$token->name]) && isset($parent_elements[$wrapname])) {
16047                             $newtoken = new HTMLPurifier_Token_Start($wrapname);
16048                             $this->insertBefore($newtoken);
16049                             $reprocess = true;
16050                             continue;
16051                         }
16052                     }
16053 
16054                     $carryover = false;
16055                     if ($autoclose && $definition->info[$parent->name]->formatting) {
16056                         $carryover = true;
16057                     }
16058 
16059                     if ($autoclose) {
16060                         // check if this autoclose is doomed to fail
16061                         // (this rechecks $parent, which his harmless)
16062                         $autoclose_ok = isset($global_parent_allowed_elements[$token->name]);
16063                         if (!$autoclose_ok) {
16064                             foreach ($this->stack as $ancestor) {
16065                                 $elements = $definition->info[$ancestor->name]->child->getAllowedElements($config);
16066                                 if (isset($elements[$token->name])) {
16067                                     $autoclose_ok = true;
16068                                     break;
16069                                 }
16070                                 if ($definition->info[$token->name]->wrap) {
16071                                     $wrapname = $definition->info[$token->name]->wrap;
16072                                     $wrapdef = $definition->info[$wrapname];
16073                                     $wrap_elements = $wrapdef->child->getAllowedElements($config);
16074                                     if (isset($wrap_elements[$token->name]) && isset($elements[$wrapname])) {
16075                                         $autoclose_ok = true;
16076                                         break;
16077                                     }
16078                                 }
16079                             }
16080                         }
16081                         if ($autoclose_ok) {
16082                             // errors need to be updated
16083                             $new_token = new HTMLPurifier_Token_End($parent->name);
16084                             $new_token->start = $parent;
16085                             if ($carryover) {
16086                                 $element = clone $parent;
16087                                 // [TagClosedAuto]
16088                                 $element->armor['MakeWellFormed_TagClosedError'] = true;
16089                                 $element->carryover = true;
16090                                 $this->processToken(array($new_token, $token, $element));
16091                             } else {
16092                                 $this->insertBefore($new_token);
16093                             }
16094                             // [TagClosedSuppress]
16095                             if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
16096                                 if (!$carryover) {
16097                                     $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
16098                                 } else {
16099                                     $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
16100                                 }
16101                             }
16102                         } else {
16103                             $this->remove();
16104                         }
16105                         $reprocess = true;
16106                         continue;
16107                     }
16108 
16109                 }
16110                 $ok = true;
16111             }
16112 
16113             if ($ok) {
16114                 foreach ($this->injectors as $i => $injector) {
16115                     if (isset($token->skip[$i])) continue;
16116                     if ($token->rewind !== null && $token->rewind !== $i) continue;
16117                     $injector->handleElement($token);
16118                     $this->processToken($token, $i);
16119                     $reprocess = true;
16120                     break;
16121                 }
16122                 if (!$reprocess) {
16123                     // ah, nothing interesting happened; do normal processing
16124                     $this->swap($token);
16125                     if ($token instanceof HTMLPurifier_Token_Start) {
16126                         $this->stack[] = $token;
16127                     } elseif ($token instanceof HTMLPurifier_Token_End) {
16128                         throw new HTMLPurifier_Exception('Improper handling of end tag in start code; possible error in MakeWellFormed');
16129                     }
16130                 }
16131                 continue;
16132             }
16133 
16134             // sanity check: we should be dealing with a closing tag
16135             if (!$token instanceof HTMLPurifier_Token_End) {
16136                 throw new HTMLPurifier_Exception('Unaccounted for tag token in input stream, bug in HTML Purifier');
16137             }
16138 
16139             // make sure that we have something open
16140             if (empty($this->stack)) {
16141                 if ($escape_invalid_tags) {
16142                     if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
16143                     $this->swap(new HTMLPurifier_Token_Text(
16144                         $generator->generateFromToken($token)
16145                     ));
16146                 } else {
16147                     $this->remove();
16148                     if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
16149                 }
16150                 $reprocess = true;
16151                 continue;
16152             }
16153 
16154             // first, check for the simplest case: everything closes neatly.
16155             // Eventually, everything passes through here; if there are problems
16156             // we modify the input stream accordingly and then punt, so that
16157             // the tokens get processed again.
16158             $current_parent = array_pop($this->stack);
16159             if ($current_parent->name == $token->name) {
16160                 $token->start = $current_parent;
16161                 foreach ($this->injectors as $i => $injector) {
16162                     if (isset($token->skip[$i])) continue;
16163                     if ($token->rewind !== null && $token->rewind !== $i) continue;
16164                     $injector->handleEnd($token);
16165                     $this->processToken($token, $i);
16166                     $this->stack[] = $current_parent;
16167                     $reprocess = true;
16168                     break;
16169                 }
16170                 continue;
16171             }
16172 
16173             // okay, so we're trying to close the wrong tag
16174 
16175             // undo the pop previous pop
16176             $this->stack[] = $current_parent;
16177 
16178             // scroll back the entire nest, trying to find our tag.
16179             // (feature could be to specify how far you'd like to go)
16180             $size = count($this->stack);
16181             // -2 because -1 is the last element, but we already checked that
16182             $skipped_tags = false;
16183             for ($j = $size - 2; $j >= 0; $j--) {
16184                 if ($this->stack[$j]->name == $token->name) {
16185                     $skipped_tags = array_slice($this->stack, $j);
16186                     break;
16187                 }
16188             }
16189 
16190             // we didn't find the tag, so remove
16191             if ($skipped_tags === false) {
16192                 if ($escape_invalid_tags) {
16193                     $this->swap(new HTMLPurifier_Token_Text(
16194                         $generator->generateFromToken($token)
16195                     ));
16196                     if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
16197                 } else {
16198                     $this->remove();
16199                     if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
16200                 }
16201                 $reprocess = true;
16202                 continue;
16203             }
16204 
16205             // do errors, in REVERSE $j order: a,b,c with </a></b></c>
16206             $c = count($skipped_tags);
16207             if ($e) {
16208                 for ($j = $c - 1; $j > 0; $j--) {
16209                     // notice we exclude $j == 0, i.e. the current ending tag, from
16210                     // the errors... [TagClosedSuppress]
16211                     if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) {
16212                         $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]);
16213                     }
16214                 }
16215             }
16216 
16217             // insert tags, in FORWARD $j order: c,b,a with </a></b></c>
16218             $replace = array($token);
16219             for ($j = 1; $j < $c; $j++) {
16220                 // ...as well as from the insertions
16221                 $new_token = new HTMLPurifier_Token_End($skipped_tags[$j]->name);
16222                 $new_token->start = $skipped_tags[$j];
16223                 array_unshift($replace, $new_token);
16224                 if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) {
16225                     // [TagClosedAuto]
16226                     $element = clone $skipped_tags[$j];
16227                     $element->carryover = true;
16228                     $element->armor['MakeWellFormed_TagClosedError'] = true;
16229                     $replace[] = $element;
16230                 }
16231             }
16232             $this->processToken($replace);
16233             $reprocess = true;
16234             continue;
16235         }
16236 
16237         $context->destroy('CurrentNesting');
16238         $context->destroy('InputTokens');
16239         $context->destroy('InputIndex');
16240         $context->destroy('CurrentToken');
16241 
16242         unset($this->injectors, $this->stack, $this->tokens, $this->t);
16243         return $tokens;
16244     }
16245 
16266     protected function processToken($token, $injector = -1) {
16267 
16268         // normalize forms of token
16269         if (is_object($token)) $token = array(1, $token);
16270         if (is_int($token))    $token = array($token);
16271         if ($token === false)  $token = array(1);
16272         if (!is_array($token)) throw new HTMLPurifier_Exception('Invalid token type from injector');
16273         if (!is_int($token[0])) array_unshift($token, 1);
16274         if ($token[0] === 0) throw new HTMLPurifier_Exception('Deleting zero tokens is not valid');
16275 
16276         // $token is now an array with the following form:
16277         // array(number nodes to delete, new node 1, new node 2, ...)
16278 
16279         $delete = array_shift($token);
16280         $old = array_splice($this->tokens, $this->t, $delete, $token);
16281 
16282         if ($injector > -1) {
16283             // determine appropriate skips
16284             $oldskip = isset($old[0]) ? $old[0]->skip : array();
16285             foreach ($token as $object) {
16286                 $object->skip = $oldskip;
16287                 $object->skip[$injector] = true;
16288             }
16289         }
16290 
16291     }
16292 
16297     private function insertBefore($token) {
16298         array_splice($this->tokens, $this->t, 0, array($token));
16299     }
16300 
16305     private function remove() {
16306         array_splice($this->tokens, $this->t, 1);
16307     }
16308 
16313     private function swap($token) {
16314         $this->tokens[$this->t] = $token;
16315     }
16316 
16317 }
16318 
16319 
16320 
16321 
16322 
16331 class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
16332 {
16333 
16334     public function execute($tokens, $config, $context) {
16335         $definition = $config->getHTMLDefinition();
16336         $generator = new HTMLPurifier_Generator($config, $context);
16337         $result = array();
16338 
16339         $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
16340         $remove_invalid_img  = $config->get('Core.RemoveInvalidImg');
16341 
16342         // currently only used to determine if comments should be kept
16343         $trusted = $config->get('HTML.Trusted');
16344         $comment_lookup = $config->get('HTML.AllowedComments');
16345         $comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
16346         $check_comments = $comment_lookup !== array() || $comment_regexp !== null;
16347 
16348         $remove_script_contents = $config->get('Core.RemoveScriptContents');
16349         $hidden_elements     = $config->get('Core.HiddenElements');
16350 
16351         // remove script contents compatibility
16352         if ($remove_script_contents === true) {
16353             $hidden_elements['script'] = true;
16354         } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
16355             unset($hidden_elements['script']);
16356         }
16357 
16358         $attr_validator = new HTMLPurifier_AttrValidator();
16359 
16360         // removes tokens until it reaches a closing tag with its value
16361         $remove_until = false;
16362 
16363         // converts comments into text tokens when this is equal to a tag name
16364         $textify_comments = false;
16365 
16366         $token = false;
16367         $context->register('CurrentToken', $token);
16368 
16369         $e = false;
16370         if ($config->get('Core.CollectErrors')) {
16371             $e =& $context->get('ErrorCollector');
16372         }
16373 
16374         foreach($tokens as $token) {
16375             if ($remove_until) {
16376                 if (empty($token->is_tag) || $token->name !== $remove_until) {
16377                     continue;
16378                 }
16379             }
16380             if (!empty( $token->is_tag )) {
16381                 // DEFINITION CALL
16382 
16383                 // before any processing, try to transform the element
16384                 if (
16385                     isset($definition->info_tag_transform[$token->name])
16386                 ) {
16387                     $original_name = $token->name;
16388                     // there is a transformation for this tag
16389                     // DEFINITION CALL
16390                     $token = $definition->
16391                                 info_tag_transform[$token->name]->
16392                                     transform($token, $config, $context);
16393                     if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
16394                 }
16395 
16396                 if (isset($definition->info[$token->name])) {
16397 
16398                     // mostly everything's good, but
16399                     // we need to make sure required attributes are in order
16400                     if (
16401                         ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
16402                         $definition->info[$token->name]->required_attr &&
16403                         ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
16404                     ) {
16405                         $attr_validator->validateToken($token, $config, $context);
16406                         $ok = true;
16407                         foreach ($definition->info[$token->name]->required_attr as $name) {
16408                             if (!isset($token->attr[$name])) {
16409                                 $ok = false;
16410                                 break;
16411                             }
16412                         }
16413                         if (!$ok) {
16414                             if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $name);
16415                             continue;
16416                         }
16417                         $token->armor['ValidateAttributes'] = true;
16418                     }
16419 
16420                     if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
16421                         $textify_comments = $token->name;
16422                     } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
16423                         $textify_comments = false;
16424                     }
16425 
16426                 } elseif ($escape_invalid_tags) {
16427                     // invalid tag, generate HTML representation and insert in
16428                     if ($e) $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
16429                     $token = new HTMLPurifier_Token_Text(
16430                         $generator->generateFromToken($token)
16431                     );
16432                 } else {
16433                     // check if we need to destroy all of the tag's children
16434                     // CAN BE GENERICIZED
16435                     if (isset($hidden_elements[$token->name])) {
16436                         if ($token instanceof HTMLPurifier_Token_Start) {
16437                             $remove_until = $token->name;
16438                         } elseif ($token instanceof HTMLPurifier_Token_Empty) {
16439                             // do nothing: we're still looking
16440                         } else {
16441                             $remove_until = false;
16442                         }
16443                         if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
16444                     } else {
16445                         if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
16446                     }
16447                     continue;
16448                 }
16449             } elseif ($token instanceof HTMLPurifier_Token_Comment) {
16450                 // textify comments in script tags when they are allowed
16451                 if ($textify_comments !== false) {
16452                     $data = $token->data;
16453                     $token = new HTMLPurifier_Token_Text($data);
16454                 } elseif ($trusted || $check_comments) {
16455                     // always cleanup comments
16456                     $trailing_hyphen = false;
16457                     if ($e) {
16458                         // perform check whether or not there's a trailing hyphen
16459                         if (substr($token->data, -1) == '-') {
16460                             $trailing_hyphen = true;
16461                         }
16462                     }
16463                     $token->data = rtrim($token->data, '-');
16464                     $found_double_hyphen = false;
16465                     while (strpos($token->data, '--') !== false) {
16466                         $found_double_hyphen = true;
16467                         $token->data = str_replace('--', '-', $token->data);
16468                     }
16469                     if ($trusted || !empty($comment_lookup[trim($token->data)]) || ($comment_regexp !== NULL && preg_match($comment_regexp, trim($token->data)))) {
16470                         // OK good
16471                         if ($e) {
16472                             if ($trailing_hyphen) {
16473                                 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
16474                             }
16475                             if ($found_double_hyphen) {
16476                                 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
16477                             }
16478                         }
16479                     } else {
16480                         if ($e) {
16481                             $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
16482                         }
16483                         continue;
16484                     }
16485                 } else {
16486                     // strip comments
16487                     if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
16488                     continue;
16489                 }
16490             } elseif ($token instanceof HTMLPurifier_Token_Text) {
16491             } else {
16492                 continue;
16493             }
16494             $result[] = $token;
16495         }
16496         if ($remove_until && $e) {
16497             // we removed tokens until the end, throw error
16498             $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
16499         }
16500 
16501         $context->destroy('CurrentToken');
16502 
16503         return $result;
16504     }
16505 
16506 }
16507 
16508 
16509 
16510 
16511 
16516 class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
16517 {
16518 
16519     public function execute($tokens, $config, $context) {
16520 
16521         // setup validator
16522         $validator = new HTMLPurifier_AttrValidator();
16523 
16524         $token = false;
16525         $context->register('CurrentToken', $token);
16526 
16527         foreach ($tokens as $key => $token) {
16528 
16529             // only process tokens that have attributes,
16530             //   namely start and empty tags
16531             if (!$token instanceof HTMLPurifier_Token_Start && !$token instanceof HTMLPurifier_Token_Empty) continue;
16532 
16533             // skip tokens that are armored
16534             if (!empty($token->armor['ValidateAttributes'])) continue;
16535 
16536             // note that we have no facilities here for removing tokens
16537             $validator->validateToken($token, $config, $context);
16538 
16539             $tokens[$key] = $token; // for PHP 4
16540         }
16541         $context->destroy('CurrentToken');
16542 
16543         return $tokens;
16544     }
16545 
16546 }
16547 
16548 
16549 
16550 
16551 
16567 class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform
16568 {
16569 
16570     public $transform_to = 'span';
16571 
16572     protected $_size_lookup = array(
16573         '0' => 'xx-small',
16574         '1' => 'xx-small',
16575         '2' => 'small',
16576         '3' => 'medium',
16577         '4' => 'large',
16578         '5' => 'x-large',
16579         '6' => 'xx-large',
16580         '7' => '300%',
16581         '-1' => 'smaller',
16582         '-2' => '60%',
16583         '+1' => 'larger',
16584         '+2' => '150%',
16585         '+3' => '200%',
16586         '+4' => '300%'
16587     );
16588 
16589     public function transform($tag, $config, $context) {
16590 
16591         if ($tag instanceof HTMLPurifier_Token_End) {
16592             $new_tag = clone $tag;
16593             $new_tag->name = $this->transform_to;
16594             return $new_tag;
16595         }
16596 
16597         $attr = $tag->attr;
16598         $prepend_style = '';
16599 
16600         // handle color transform
16601         if (isset($attr['color'])) {
16602             $prepend_style .= 'color:' . $attr['color'] . ';';
16603             unset($attr['color']);
16604         }
16605 
16606         // handle face transform
16607         if (isset($attr['face'])) {
16608             $prepend_style .= 'font-family:' . $attr['face'] . ';';
16609             unset($attr['face']);
16610         }
16611 
16612         // handle size transform
16613         if (isset($attr['size'])) {
16614             // normalize large numbers
16615             if ($attr['size'] !== '') {
16616                 if ($attr['size']{0} == '+' || $attr['size']{0} == '-') {
16617                     $size = (int) $attr['size'];
16618                     if ($size < -2) $attr['size'] = '-2';
16619                     if ($size > 4)  $attr['size'] = '+4';
16620                 } else {
16621                     $size = (int) $attr['size'];
16622                     if ($size > 7) $attr['size'] = '7';
16623                 }
16624             }
16625             if (isset($this->_size_lookup[$attr['size']])) {
16626                 $prepend_style .= 'font-size:' .
16627                   $this->_size_lookup[$attr['size']] . ';';
16628             }
16629             unset($attr['size']);
16630         }
16631 
16632         if ($prepend_style) {
16633             $attr['style'] = isset($attr['style']) ?
16634                 $prepend_style . $attr['style'] :
16635                 $prepend_style;
16636         }
16637 
16638         $new_tag = clone $tag;
16639         $new_tag->name = $this->transform_to;
16640         $new_tag->attr = $attr;
16641 
16642         return $new_tag;
16643 
16644     }
16645 }
16646 
16647 
16648 
16649 
16650 
16656 class HTMLPurifier_TagTransform_Simple extends HTMLPurifier_TagTransform
16657 {
16658 
16659     protected $style;
16660 
16665     public function __construct($transform_to, $style = null) {
16666         $this->transform_to = $transform_to;
16667         $this->style = $style;
16668     }
16669 
16670     public function transform($tag, $config, $context) {
16671         $new_tag = clone $tag;
16672         $new_tag->name = $this->transform_to;
16673         if (!is_null($this->style) &&
16674             ($new_tag instanceof HTMLPurifier_Token_Start || $new_tag instanceof HTMLPurifier_Token_Empty)
16675         ) {
16676             $this->prependCSS($new_tag->attr, $this->style);
16677         }
16678         return $new_tag;
16679     }
16680 
16681 }
16682 
16683 
16684 
16685 
16686 
16690 class HTMLPurifier_Token_Comment extends HTMLPurifier_Token
16691 {
16692     public $data; 
16693     public $is_whitespace = true;
16699     public function __construct($data, $line = null, $col = null) {
16700         $this->data = $data;
16701         $this->line = $line;
16702         $this->col  = $col;
16703     }
16704 }
16705 
16706 
16707 
16708 
16709 
16713 class HTMLPurifier_Token_Tag extends HTMLPurifier_Token
16714 {
16721     public $is_tag = true;
16722 
16730     public $name;
16731 
16735     public $attr = array();
16736 
16743     public function __construct($name, $attr = array(), $line = null, $col = null, $armor = array()) {
16744         $this->name = ctype_lower($name) ? $name : strtolower($name);
16745         foreach ($attr as $key => $value) {
16746             // normalization only necessary when key is not lowercase
16747             if (!ctype_lower($key)) {
16748                 $new_key = strtolower($key);
16749                 if (!isset($attr[$new_key])) {
16750                     $attr[$new_key] = $attr[$key];
16751                 }
16752                 if ($new_key !== $key) {
16753                     unset($attr[$key]);
16754                 }
16755             }
16756         }
16757         $this->attr = $attr;
16758         $this->line = $line;
16759         $this->col  = $col;
16760         $this->armor = $armor;
16761     }
16762 }
16763 
16764 
16765 
16766 
16767 
16771 class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
16772 {
16773 
16774 }
16775 
16776 
16777 
16778 
16779 
16787 class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag
16788 {
16793     public $start;
16794 }
16795 
16796 
16797 
16798 
16799 
16803 class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
16804 {
16805 
16806 }
16807 
16808 
16809 
16810 
16811 
16821 class HTMLPurifier_Token_Text extends HTMLPurifier_Token
16822 {
16823 
16824     public $name = '#PCDATA'; 
16825     public $data; 
16826     public $is_whitespace; 
16833     public function __construct($data, $line = null, $col = null) {
16834         $this->data = $data;
16835         $this->is_whitespace = ctype_space($data);
16836         $this->line = $line;
16837         $this->col  = $col;
16838     }
16839 
16840 }
16841 
16842 
16843 
16844 
16845 
16846 class HTMLPurifier_URIFilter_DisableExternal extends HTMLPurifier_URIFilter
16847 {
16848     public $name = 'DisableExternal';
16849     protected $ourHostParts = false;
16850     public function prepare($config) {
16851         $our_host = $config->getDefinition('URI')->host;
16852         if ($our_host !== null) $this->ourHostParts = array_reverse(explode('.', $our_host));
16853     }
16854     public function filter(&$uri, $config, $context) {
16855         if (is_null($uri->host)) return true;
16856         if ($this->ourHostParts === false) return false;
16857         $host_parts = array_reverse(explode('.', $uri->host));
16858         foreach ($this->ourHostParts as $i => $x) {
16859             if (!isset($host_parts[$i])) return false;
16860             if ($host_parts[$i] != $this->ourHostParts[$i]) return false;
16861         }
16862         return true;
16863     }
16864 }
16865 
16866 
16867 
16868 
16869 
16870 class HTMLPurifier_URIFilter_DisableExternalResources extends HTMLPurifier_URIFilter_DisableExternal
16871 {
16872     public $name = 'DisableExternalResources';
16873     public function filter(&$uri, $config, $context) {
16874         if (!$context->get('EmbeddedURI', true)) return true;
16875         return parent::filter($uri, $config, $context);
16876     }
16877 }
16878 
16879 
16880 
16881 
16882 
16883 class HTMLPurifier_URIFilter_DisableResources extends HTMLPurifier_URIFilter
16884 {
16885     public $name = 'DisableResources';
16886     public function filter(&$uri, $config, $context) {
16887         return !$context->get('EmbeddedURI', true);
16888     }
16889 }
16890 
16891 
16892 
16893 
16894 
16895 // It's not clear to me whether or not Punycode means that hostnames
16896 // do not have canonical forms anymore. As far as I can tell, it's
16897 // not a problem (punycoding should be identity when no Unicode
16898 // points are involved), but I'm not 100% sure
16899 class HTMLPurifier_URIFilter_HostBlacklist extends HTMLPurifier_URIFilter
16900 {
16901     public $name = 'HostBlacklist';
16902     protected $blacklist = array();
16903     public function prepare($config) {
16904         $this->blacklist = $config->get('URI.HostBlacklist');
16905         return true;
16906     }
16907     public function filter(&$uri, $config, $context) {
16908         foreach($this->blacklist as $blacklisted_host_fragment) {
16909             if (strpos($uri->host, $blacklisted_host_fragment) !== false) {
16910                 return false;
16911             }
16912         }
16913         return true;
16914     }
16915 }
16916 
16917 
16918 
16919 
16920 
16921 // does not support network paths
16922 
16923 class HTMLPurifier_URIFilter_MakeAbsolute extends HTMLPurifier_URIFilter
16924 {
16925     public $name = 'MakeAbsolute';
16926     protected $base;
16927     protected $basePathStack = array();
16928     public function prepare($config) {
16929         $def = $config->getDefinition('URI');
16930         $this->base = $def->base;
16931         if (is_null($this->base)) {
16932             trigger_error('URI.MakeAbsolute is being ignored due to lack of value for URI.Base configuration', E_USER_WARNING);
16933             return false;
16934         }
16935         $this->base->fragment = null; // fragment is invalid for base URI
16936         $stack = explode('/', $this->base->path);
16937         array_pop($stack); // discard last segment
16938         $stack = $this->_collapseStack($stack); // do pre-parsing
16939         $this->basePathStack = $stack;
16940         return true;
16941     }
16942     public function filter(&$uri, $config, $context) {
16943         if (is_null($this->base)) return true; // abort early
16944         if (
16945             $uri->path === '' && is_null($uri->scheme) &&
16946             is_null($uri->host) && is_null($uri->query) && is_null($uri->fragment)
16947         ) {
16948             // reference to current document
16949             $uri = clone $this->base;
16950             return true;
16951         }
16952         if (!is_null($uri->scheme)) {
16953             // absolute URI already: don't change
16954             if (!is_null($uri->host)) return true;
16955             $scheme_obj = $uri->getSchemeObj($config, $context);
16956             if (!$scheme_obj) {
16957                 // scheme not recognized
16958                 return false;
16959             }
16960             if (!$scheme_obj->hierarchical) {
16961                 // non-hierarchal URI with explicit scheme, don't change
16962                 return true;
16963             }
16964             // special case: had a scheme but always is hierarchical and had no authority
16965         }
16966         if (!is_null($uri->host)) {
16967             // network path, don't bother
16968             return true;
16969         }
16970         if ($uri->path === '') {
16971             $uri->path = $this->base->path;
16972         } elseif ($uri->path[0] !== '/') {
16973             // relative path, needs more complicated processing
16974             $stack = explode('/', $uri->path);
16975             $new_stack = array_merge($this->basePathStack, $stack);
16976             if ($new_stack[0] !== '' && !is_null($this->base->host)) {
16977                 array_unshift($new_stack, '');
16978             }
16979             $new_stack = $this->_collapseStack($new_stack);
16980             $uri->path = implode('/', $new_stack);
16981         } else {
16982             // absolute path, but still we should collapse
16983             $uri->path = implode('/', $this->_collapseStack(explode('/', $uri->path)));
16984         }
16985         // re-combine
16986         $uri->scheme = $this->base->scheme;
16987         if (is_null($uri->userinfo)) $uri->userinfo = $this->base->userinfo;
16988         if (is_null($uri->host))     $uri->host     = $this->base->host;
16989         if (is_null($uri->port))     $uri->port     = $this->base->port;
16990         return true;
16991     }
16992 
16996     private function _collapseStack($stack) {
16997         $result = array();
16998         $is_folder = false;
16999         for ($i = 0; isset($stack[$i]); $i++) {
17000             $is_folder = false;
17001             // absorb an internally duplicated slash
17002             if ($stack[$i] == '' && $i && isset($stack[$i+1])) continue;
17003             if ($stack[$i] == '..') {
17004                 if (!empty($result)) {
17005                     $segment = array_pop($result);
17006                     if ($segment === '' && empty($result)) {
17007                         // error case: attempted to back out too far:
17008                         // restore the leading slash
17009                         $result[] = '';
17010                     } elseif ($segment === '..') {
17011                         $result[] = '..'; // cannot remove .. with ..
17012                     }
17013                 } else {
17014                     // relative path, preserve the double-dots
17015                     $result[] = '..';
17016                 }
17017                 $is_folder = true;
17018                 continue;
17019             }
17020             if ($stack[$i] == '.') {
17021                 // silently absorb
17022                 $is_folder = true;
17023                 continue;
17024             }
17025             $result[] = $stack[$i];
17026         }
17027         if ($is_folder) $result[] = '';
17028         return $result;
17029     }
17030 }
17031 
17032 
17033 
17034 
17035 
17036 class HTMLPurifier_URIFilter_Munge extends HTMLPurifier_URIFilter
17037 {
17038     public $name = 'Munge';
17039     public $post = true;
17040     private $target, $parser, $doEmbed, $secretKey;
17041 
17042     protected $replace = array();
17043 
17044     public function prepare($config) {
17045         $this->target    = $config->get('URI.' . $this->name);
17046         $this->parser    = new HTMLPurifier_URIParser();
17047         $this->doEmbed   = $config->get('URI.MungeResources');
17048         $this->secretKey = $config->get('URI.MungeSecretKey');
17049         return true;
17050     }
17051     public function filter(&$uri, $config, $context) {
17052         if ($context->get('EmbeddedURI', true) && !$this->doEmbed) return true;
17053 
17054         $scheme_obj = $uri->getSchemeObj($config, $context);
17055         if (!$scheme_obj) return true; // ignore unknown schemes, maybe another postfilter did it
17056         if (!$scheme_obj->browsable) return true; // ignore non-browseable schemes, since we can't munge those in a reasonable way
17057         if ($uri->isBenign($config, $context)) return true; // don't redirect if a benign URL
17058 
17059         $this->makeReplace($uri, $config, $context);
17060         $this->replace = array_map('rawurlencode', $this->replace);
17061 
17062         $new_uri = strtr($this->target, $this->replace);
17063         $new_uri = $this->parser->parse($new_uri);
17064         // don't redirect if the target host is the same as the
17065         // starting host
17066         if ($uri->host === $new_uri->host) return true;
17067         $uri = $new_uri; // overwrite
17068         return true;
17069     }
17070 
17071     protected function makeReplace($uri, $config, $context) {
17072         $string = $uri->toString();
17073         // always available
17074         $this->replace['%s'] = $string;
17075         $this->replace['%r'] = $context->get('EmbeddedURI', true);
17076         $token = $context->get('CurrentToken', true);
17077         $this->replace['%n'] = $token ? $token->name : null;
17078         $this->replace['%m'] = $context->get('CurrentAttr', true);
17079         $this->replace['%p'] = $context->get('CurrentCSSProperty', true);
17080         // not always available
17081         if ($this->secretKey) $this->replace['%t'] = sha1($this->secretKey . ':' . $string);
17082     }
17083 
17084 }
17085 
17086 
17087 
17088 
17089 
17096 class HTMLPurifier_URIFilter_SafeIframe extends HTMLPurifier_URIFilter
17097 {
17098     public $name = 'SafeIframe';
17099     public $always_load = true;
17100     protected $regexp = NULL;
17101     // XXX: The not so good bit about how this is all setup now is we
17102     // can't check HTML.SafeIframe in the 'prepare' step: we have to
17103     // defer till the actual filtering.
17104     public function prepare($config) {
17105         $this->regexp = $config->get('URI.SafeIframeRegexp');
17106         return true;
17107     }
17108     public function filter(&$uri, $config, $context) {
17109         // check if filter not applicable
17110         if (!$config->get('HTML.SafeIframe')) return true;
17111         // check if the filter should actually trigger
17112         if (!$context->get('EmbeddedURI', true)) return true;
17113         $token = $context->get('CurrentToken', true);
17114         if (!($token && $token->name == 'iframe')) return true;
17115         // check if we actually have some whitelists enabled
17116         if ($this->regexp === null) return false;
17117         // actually check the whitelists
17118         return preg_match($this->regexp, $uri->toString());
17119     }
17120 }
17121 
17122 
17123 
17124 
17125 
17129 class HTMLPurifier_URIScheme_data extends HTMLPurifier_URIScheme {
17130 
17131     public $browsable = true;
17132     public $allowed_types = array(
17133         // you better write validation code for other types if you
17134         // decide to allow them
17135         'image/jpeg' => true,
17136         'image/gif' => true,
17137         'image/png' => true,
17138         );
17139     // this is actually irrelevant since we only write out the path
17140     // component
17141     public $may_omit_host = true;
17142 
17143     public function doValidate(&$uri, $config, $context) {
17144         $result = explode(',', $uri->path, 2);
17145         $is_base64 = false;
17146         $charset = null;
17147         $content_type = null;
17148         if (count($result) == 2) {
17149             list($metadata, $data) = $result;
17150             // do some legwork on the metadata
17151             $metas = explode(';', $metadata);
17152             while(!empty($metas)) {
17153                 $cur = array_shift($metas);
17154                 if ($cur == 'base64') {
17155                     $is_base64 = true;
17156                     break;
17157                 }
17158                 if (substr($cur, 0, 8) == 'charset=') {
17159                     // doesn't match if there are arbitrary spaces, but
17160                     // whatever dude
17161                     if ($charset !== null) continue; // garbage
17162                     $charset = substr($cur, 8); // not used
17163                 } else {
17164                     if ($content_type !== null) continue; // garbage
17165                     $content_type = $cur;
17166                 }
17167             }
17168         } else {
17169             $data = $result[0];
17170         }
17171         if ($content_type !== null && empty($this->allowed_types[$content_type])) {
17172             return false;
17173         }
17174         if ($charset !== null) {
17175             // error; we don't allow plaintext stuff
17176             $charset = null;
17177         }
17178         $data = rawurldecode($data);
17179         if ($is_base64) {
17180             $raw_data = base64_decode($data);
17181         } else {
17182             $raw_data = $data;
17183         }
17184         // XXX probably want to refactor this into a general mechanism
17185         // for filtering arbitrary content types
17186         $file = tempnam("/tmp", "");
17187         file_put_contents($file, $raw_data);
17188         if (function_exists('exif_imagetype')) {
17189             $image_code = exif_imagetype($file);
17190         } elseif (function_exists('getimagesize')) {
17191             set_error_handler(array($this, 'muteErrorHandler'));
17192             $info = getimagesize($file);
17193             restore_error_handler();
17194             if ($info == false) return false;
17195             $image_code = $info[2];
17196         } else {
17197             trigger_error("could not find exif_imagetype or getimagesize functions", E_USER_ERROR);
17198         }
17199         $real_content_type = image_type_to_mime_type($image_code);
17200         if ($real_content_type != $content_type) {
17201             // we're nice guys; if the content type is something else we
17202             // support, change it over
17203             if (empty($this->allowed_types[$real_content_type])) return false;
17204             $content_type = $real_content_type;
17205         }
17206         // ok, it's kosher, rewrite what we need
17207         $uri->userinfo = null;
17208         $uri->host = null;
17209         $uri->port = null;
17210         $uri->fragment = null;
17211         $uri->query = null;
17212         $uri->path = "$content_type;base64," . base64_encode($raw_data);
17213         return true;
17214     }
17215 
17216     public function muteErrorHandler($errno, $errstr) {}
17217 
17218 }
17219 
17220 
17221 
17222 
17226 class HTMLPurifier_URIScheme_file extends HTMLPurifier_URIScheme {
17227 
17228     // Generally file:// URLs are not accessible from most
17229     // machines, so placing them as an img src is incorrect.
17230     public $browsable = false;
17231 
17232     // Basically the *only* URI scheme for which this is true, since
17233     // accessing files on the local machine is very common.  In fact,
17234     // browsers on some operating systems don't understand the
17235     // authority, though I hear it is used on Windows to refer to
17236     // network shares.
17237     public $may_omit_host = true;
17238 
17239     public function doValidate(&$uri, $config, $context) {
17240         // Authentication method is not supported
17241         $uri->userinfo = null;
17242         // file:// makes no provisions for accessing the resource
17243         $uri->port     = null;
17244         // While it seems to work on Firefox, the querystring has
17245         // no possible effect and is thus stripped.
17246         $uri->query    = null;
17247         return true;
17248     }
17249 
17250 }
17251 
17252 
17253 
17254 
17255 
17259 class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme {
17260 
17261     public $default_port = 21;
17262     public $browsable = true; // usually
17263     public $hierarchical = true;
17264 
17265     public function doValidate(&$uri, $config, $context) {
17266         $uri->query    = null;
17267 
17268         // typecode check
17269         $semicolon_pos = strrpos($uri->path, ';'); // reverse
17270         if ($semicolon_pos !== false) {
17271             $type = substr($uri->path, $semicolon_pos + 1); // no semicolon
17272             $uri->path = substr($uri->path, 0, $semicolon_pos);
17273             $type_ret = '';
17274             if (strpos($type, '=') !== false) {
17275                 // figure out whether or not the declaration is correct
17276                 list($key, $typecode) = explode('=', $type, 2);
17277                 if ($key !== 'type') {
17278                     // invalid key, tack it back on encoded
17279                     $uri->path .= '%3B' . $type;
17280                 } elseif ($typecode === 'a' || $typecode === 'i' || $typecode === 'd') {
17281                     $type_ret = ";type=$typecode";
17282                 }
17283             } else {
17284                 $uri->path .= '%3B' . $type;
17285             }
17286             $uri->path = str_replace(';', '%3B', $uri->path);
17287             $uri->path .= $type_ret;
17288         }
17289 
17290         return true;
17291     }
17292 
17293 }
17294 
17295 
17296 
17297 
17298 
17302 class HTMLPurifier_URIScheme_http extends HTMLPurifier_URIScheme {
17303 
17304     public $default_port = 80;
17305     public $browsable = true;
17306     public $hierarchical = true;
17307 
17308     public function doValidate(&$uri, $config, $context) {
17309         $uri->userinfo = null;
17310         return true;
17311     }
17312 
17313 }
17314 
17315 
17316 
17317 
17318 
17322 class HTMLPurifier_URIScheme_https extends HTMLPurifier_URIScheme_http {
17323 
17324     public $default_port = 443;
17325     public $secure = true;
17326 
17327 }
17328 
17329 
17330 
17331 
17332 
17333 // VERY RELAXED! Shouldn't cause problems, not even Firefox checks if the
17334 // email is valid, but be careful!
17335 
17342 class HTMLPurifier_URIScheme_mailto extends HTMLPurifier_URIScheme {
17343 
17344     public $browsable = false;
17345     public $may_omit_host = true;
17346 
17347     public function doValidate(&$uri, $config, $context) {
17348         $uri->userinfo = null;
17349         $uri->host     = null;
17350         $uri->port     = null;
17351         // we need to validate path against RFC 2368's addr-spec
17352         return true;
17353     }
17354 
17355 }
17356 
17357 
17358 
17359 
17360 
17364 class HTMLPurifier_URIScheme_news extends HTMLPurifier_URIScheme {
17365 
17366     public $browsable = false;
17367     public $may_omit_host = true;
17368 
17369     public function doValidate(&$uri, $config, $context) {
17370         $uri->userinfo = null;
17371         $uri->host     = null;
17372         $uri->port     = null;
17373         $uri->query    = null;
17374         // typecode check needed on path
17375         return true;
17376     }
17377 
17378 }
17379 
17380 
17381 
17382 
17383 
17387 class HTMLPurifier_URIScheme_nntp extends HTMLPurifier_URIScheme {
17388 
17389     public $default_port = 119;
17390     public $browsable = false;
17391 
17392     public function doValidate(&$uri, $config, $context) {
17393         $uri->userinfo = null;
17394         $uri->query    = null;
17395         return true;
17396     }
17397 
17398 }
17399 
17400 
17401 
17402 
17403 
17409 class HTMLPurifier_VarParser_Flexible extends HTMLPurifier_VarParser
17410 {
17411 
17412     protected function parseImplementation($var, $type, $allow_null) {
17413         if ($allow_null && $var === null) return null;
17414         switch ($type) {
17415             // Note: if code "breaks" from the switch, it triggers a generic
17416             // exception to be thrown. Specific errors can be specifically
17417             // done here.
17418             case self::MIXED :
17419             case self::ISTRING :
17420             case self::STRING :
17421             case self::TEXT :
17422             case self::ITEXT :
17423                 return $var;
17424             case self::INT :
17425                 if (is_string($var) && ctype_digit($var)) $var = (int) $var;
17426                 return $var;
17427             case self::FLOAT :
17428                 if ((is_string($var) && is_numeric($var)) || is_int($var)) $var = (float) $var;
17429                 return $var;
17430             case self::BOOL :
17431                 if (is_int($var) && ($var === 0 || $var === 1)) {
17432                     $var = (bool) $var;
17433                 } elseif (is_string($var)) {
17434                     if ($var == 'on' || $var == 'true' || $var == '1') {
17435                         $var = true;
17436                     } elseif ($var == 'off' || $var == 'false' || $var == '0') {
17437                         $var = false;
17438                     } else {
17439                         throw new HTMLPurifier_VarParserException("Unrecognized value '$var' for $type");
17440                     }
17441                 }
17442                 return $var;
17443             case self::ALIST :
17444             case self::HASH :
17445             case self::LOOKUP :
17446                 if (is_string($var)) {
17447                     // special case: technically, this is an array with
17448                     // a single empty string item, but having an empty
17449                     // array is more intuitive
17450                     if ($var == '') return array();
17451                     if (strpos($var, "\n") === false && strpos($var, "\r") === false) {
17452                         // simplistic string to array method that only works
17453                         // for simple lists of tag names or alphanumeric characters
17454                         $var = explode(',',$var);
17455                     } else {
17456                         $var = preg_split('/(,|[\n\r]+)/', $var);
17457                     }
17458                     // remove spaces
17459                     foreach ($var as $i => $j) $var[$i] = trim($j);
17460                     if ($type === self::HASH) {
17461                         // key:value,key2:value2
17462                         $nvar = array();
17463                         foreach ($var as $keypair) {
17464                             $c = explode(':', $keypair, 2);
17465                             if (!isset($c[1])) continue;
17466                             $nvar[trim($c[0])] = trim($c[1]);
17467                         }
17468                         $var = $nvar;
17469                     }
17470                 }
17471                 if (!is_array($var)) break;
17472                 $keys = array_keys($var);
17473                 if ($keys === array_keys($keys)) {
17474                     if ($type == self::ALIST) return $var;
17475                     elseif ($type == self::LOOKUP) {
17476                         $new = array();
17477                         foreach ($var as $key) {
17478                             $new[$key] = true;
17479                         }
17480                         return $new;
17481                     } else break;
17482                 }
17483                 if ($type === self::ALIST) {
17484                     trigger_error("Array list did not have consecutive integer indexes", E_USER_WARNING);
17485                     return array_values($var);
17486                 }
17487                 if ($type === self::LOOKUP) {
17488                     foreach ($var as $key => $value) {
17489                         if ($value !== true) {
17490                             trigger_error("Lookup array has non-true value at key '$key'; maybe your input array was not indexed numerically", E_USER_WARNING);
17491                         }
17492                         $var[$key] = true;
17493                     }
17494                 }
17495                 return $var;
17496             default:
17497                 $this->errorInconsistent(__CLASS__, $type);
17498         }
17499         $this->errorGeneric($var, $type);
17500     }
17501 
17502 }
17503 
17504 
17505 
17506 
17507 
17513 class HTMLPurifier_VarParser_Native extends HTMLPurifier_VarParser
17514 {
17515 
17516     protected function parseImplementation($var, $type, $allow_null) {
17517         return $this->evalExpression($var);
17518     }
17519 
17520     protected function evalExpression($expr) {
17521         $var = null;
17522         $result = eval("\$var = $expr;");
17523         if ($result === false) {
17524             throw new HTMLPurifier_VarParserException("Fatal error in evaluated code");
17525         }
17526         return $var;
17527     }
17528 
17529 }
17530 
17531 
17532