HTMLPurifier 4.4.0
|
00001 <?php 00002 00041 /* 00042 HTML Purifier 4.4.0 - Standards Compliant HTML Filtering 00043 Copyright (C) 2006-2008 Edward Z. Yang 00044 00045 This library is free software; you can redistribute it and/or 00046 modify it under the terms of the GNU Lesser General Public 00047 License as published by the Free Software Foundation; either 00048 version 2.1 of the License, or (at your option) any later version. 00049 00050 This library is distributed in the hope that it will be useful, 00051 but WITHOUT ANY WARRANTY; without even the implied warranty of 00052 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00053 Lesser General Public License for more details. 00054 00055 You should have received a copy of the GNU Lesser General Public 00056 License along with this library; if not, write to the Free Software 00057 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00058 */ 00059 00074 class HTMLPurifier 00075 { 00076 00078 public $version = '4.4.0'; 00079 00081 const VERSION = '4.4.0'; 00082 00084 public $config; 00085 00087 private $filters = array(); 00088 00090 private static $instance; 00091 00092 protected $strategy, $generator; 00093 00098 public $context; 00099 00108 public function __construct($config = null) { 00109 00110 $this->config = HTMLPurifier_Config::create($config); 00111 00112 $this->strategy = new HTMLPurifier_Strategy_Core(); 00113 00114 } 00115 00120 public function addFilter($filter) { 00121 trigger_error('HTMLPurifier->addFilter() is deprecated, use configuration directives in the Filter namespace or Filter.Custom', E_USER_WARNING); 00122 $this->filters[] = $filter; 00123 } 00124 00135 public function purify($html, $config = null) { 00136 00137 // :TODO: make the config merge in, instead of replace 00138 $config = $config ? HTMLPurifier_Config::create($config) : $this->config; 00139 00140 // implementation is partially environment dependant, partially 00141 // configuration dependant 00142 $lexer = HTMLPurifier_Lexer::create($config); 00143 00144 $context = new HTMLPurifier_Context(); 00145 00146 // setup HTML generator 00147 $this->generator = new HTMLPurifier_Generator($config, $context); 00148 $context->register('Generator', $this->generator); 00149 00150 // set up global context variables 00151 if ($config->get('Core.CollectErrors')) { 00152 // may get moved out if other facilities use it 00153 $language_factory = HTMLPurifier_LanguageFactory::instance(); 00154 $language = $language_factory->create($config, $context); 00155 $context->register('Locale', $language); 00156 00157 $error_collector = new HTMLPurifier_ErrorCollector($context); 00158 $context->register('ErrorCollector', $error_collector); 00159 } 00160 00161 // setup id_accumulator context, necessary due to the fact that 00162 // AttrValidator can be called from many places 00163 $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context); 00164 $context->register('IDAccumulator', $id_accumulator); 00165 00166 $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context); 00167 00168 // setup filters 00169 $filter_flags = $config->getBatch('Filter'); 00170 $custom_filters = $filter_flags['Custom']; 00171 unset($filter_flags['Custom']); 00172 $filters = array(); 00173 foreach ($filter_flags as $filter => $flag) { 00174 if (!$flag) continue; 00175 if (strpos($filter, '.') !== false) continue; 00176 $class = "HTMLPurifier_Filter_$filter"; 00177 $filters[] = new $class; 00178 } 00179 foreach ($custom_filters as $filter) { 00180 // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat 00181 $filters[] = $filter; 00182 } 00183 $filters = array_merge($filters, $this->filters); 00184 // maybe prepare(), but later 00185 00186 for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) { 00187 $html = $filters[$i]->preFilter($html, $config, $context); 00188 } 00189 00190 // purified HTML 00191 $html = 00192 $this->generator->generateFromTokens( 00193 // list of tokens 00194 $this->strategy->execute( 00195 // list of un-purified tokens 00196 $lexer->tokenizeHTML( 00197 // un-purified HTML 00198 $html, $config, $context 00199 ), 00200 $config, $context 00201 ) 00202 ); 00203 00204 for ($i = $filter_size - 1; $i >= 0; $i--) { 00205 $html = $filters[$i]->postFilter($html, $config, $context); 00206 } 00207 00208 $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context); 00209 $this->context =& $context; 00210 return $html; 00211 } 00212 00219 public function purifyArray($array_of_html, $config = null) { 00220 $context_array = array(); 00221 foreach ($array_of_html as $key => $html) { 00222 $array_of_html[$key] = $this->purify($html, $config); 00223 $context_array[$key] = $this->context; 00224 } 00225 $this->context = $context_array; 00226 return $array_of_html; 00227 } 00228 00235 public static function instance($prototype = null) { 00236 if (!self::$instance || $prototype) { 00237 if ($prototype instanceof HTMLPurifier) { 00238 self::$instance = $prototype; 00239 } elseif ($prototype) { 00240 self::$instance = new HTMLPurifier($prototype); 00241 } else { 00242 self::$instance = new HTMLPurifier(); 00243 } 00244 } 00245 return self::$instance; 00246 } 00247 00251 public static function getInstance($prototype = null) { 00252 return HTMLPurifier::instance($prototype); 00253 } 00254 00255 } 00256 00257 00258 00259 00260 00265 class HTMLPurifier_AttrCollections 00266 { 00267 00271 public $info = array(); 00272 00280 public function __construct($attr_types, $modules) { 00281 // load extensions from the modules 00282 foreach ($modules as $module) { 00283 foreach ($module->attr_collections as $coll_i => $coll) { 00284 if (!isset($this->info[$coll_i])) { 00285 $this->info[$coll_i] = array(); 00286 } 00287 foreach ($coll as $attr_i => $attr) { 00288 if ($attr_i === 0 && isset($this->info[$coll_i][$attr_i])) { 00289 // merge in includes 00290 $this->info[$coll_i][$attr_i] = array_merge( 00291 $this->info[$coll_i][$attr_i], $attr); 00292 continue; 00293 } 00294 $this->info[$coll_i][$attr_i] = $attr; 00295 } 00296 } 00297 } 00298 // perform internal expansions and inclusions 00299 foreach ($this->info as $name => $attr) { 00300 // merge attribute collections that include others 00301 $this->performInclusions($this->info[$name]); 00302 // replace string identifiers with actual attribute objects 00303 $this->expandIdentifiers($this->info[$name], $attr_types); 00304 } 00305 } 00306 00312 public function performInclusions(&$attr) { 00313 if (!isset($attr[0])) return; 00314 $merge = $attr[0]; 00315 $seen = array(); // recursion guard 00316 // loop through all the inclusions 00317 for ($i = 0; isset($merge[$i]); $i++) { 00318 if (isset($seen[$merge[$i]])) continue; 00319 $seen[$merge[$i]] = true; 00320 // foreach attribute of the inclusion, copy it over 00321 if (!isset($this->info[$merge[$i]])) continue; 00322 foreach ($this->info[$merge[$i]] as $key => $value) { 00323 if (isset($attr[$key])) continue; // also catches more inclusions 00324 $attr[$key] = $value; 00325 } 00326 if (isset($this->info[$merge[$i]][0])) { 00327 // recursion 00328 $merge = array_merge($merge, $this->info[$merge[$i]][0]); 00329 } 00330 } 00331 unset($attr[0]); 00332 } 00333 00340 public function expandIdentifiers(&$attr, $attr_types) { 00341 00342 // because foreach will process new elements we add, make sure we 00343 // skip duplicates 00344 $processed = array(); 00345 00346 foreach ($attr as $def_i => $def) { 00347 // skip inclusions 00348 if ($def_i === 0) continue; 00349 00350 if (isset($processed[$def_i])) continue; 00351 00352 // determine whether or not attribute is required 00353 if ($required = (strpos($def_i, '*') !== false)) { 00354 // rename the definition 00355 unset($attr[$def_i]); 00356 $def_i = trim($def_i, '*'); 00357 $attr[$def_i] = $def; 00358 } 00359 00360 $processed[$def_i] = true; 00361 00362 // if we've already got a literal object, move on 00363 if (is_object($def)) { 00364 // preserve previous required 00365 $attr[$def_i]->required = ($required || $attr[$def_i]->required); 00366 continue; 00367 } 00368 00369 if ($def === false) { 00370 unset($attr[$def_i]); 00371 continue; 00372 } 00373 00374 if ($t = $attr_types->get($def)) { 00375 $attr[$def_i] = $t; 00376 $attr[$def_i]->required = $required; 00377 } else { 00378 unset($attr[$def_i]); 00379 } 00380 } 00381 00382 } 00383 00384 } 00385 00386 00387 00388 00389 00400 abstract class HTMLPurifier_AttrDef 00401 { 00402 00407 public $minimized = false; 00408 00413 public $required = false; 00414 00422 abstract public function validate($string, $config, $context); 00423 00445 public function parseCDATA($string) { 00446 $string = trim($string); 00447 $string = str_replace(array("\n", "\t", "\r"), ' ', $string); 00448 return $string; 00449 } 00450 00456 public function make($string) { 00457 // default implementation, return a flyweight of this object. 00458 // If $string has an effect on the returned object (i.e. you 00459 // need to overload this method), it is best 00460 // to clone or instantiate new copies. (Instantiation is safer.) 00461 return $this; 00462 } 00463 00468 protected function mungeRgb($string) { 00469 return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string); 00470 } 00471 00476 protected function expandCSSEscape($string) { 00477 // flexibly parse it 00478 $ret = ''; 00479 for ($i = 0, $c = strlen($string); $i < $c; $i++) { 00480 if ($string[$i] === '\\') { 00481 $i++; 00482 if ($i >= $c) { 00483 $ret .= '\\'; 00484 break; 00485 } 00486 if (ctype_xdigit($string[$i])) { 00487 $code = $string[$i]; 00488 for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) { 00489 if (!ctype_xdigit($string[$i])) break; 00490 $code .= $string[$i]; 00491 } 00492 // We have to be extremely careful when adding 00493 // new characters, to make sure we're not breaking 00494 // the encoding. 00495 $char = HTMLPurifier_Encoder::unichr(hexdec($code)); 00496 if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue; 00497 $ret .= $char; 00498 if ($i < $c && trim($string[$i]) !== '') $i--; 00499 continue; 00500 } 00501 if ($string[$i] === "\n") continue; 00502 } 00503 $ret .= $string[$i]; 00504 } 00505 return $ret; 00506 } 00507 00508 } 00509 00510 00511 00512 00513 00528 abstract class HTMLPurifier_AttrTransform 00529 { 00530 00540 abstract public function transform($attr, $config, $context); 00541 00548 public function prependCSS(&$attr, $css) { 00549 $attr['style'] = isset($attr['style']) ? $attr['style'] : ''; 00550 $attr['style'] = $css . $attr['style']; 00551 } 00552 00558 public function confiscateAttr(&$attr, $key) { 00559 if (!isset($attr[$key])) return null; 00560 $value = $attr[$key]; 00561 unset($attr[$key]); 00562 return $value; 00563 } 00564 00565 } 00566 00567 00568 00569 00570 00574 class HTMLPurifier_AttrTypes 00575 { 00579 protected $info = array(); 00580 00585 public function __construct() { 00586 // XXX This is kind of poor, since we don't actually /clone/ 00587 // instances; instead, we use the supplied make() attribute. So, 00588 // the underlying class must know how to deal with arguments. 00589 // With the old implementation of Enum, that ignored its 00590 // arguments when handling a make dispatch, the IAlign 00591 // definition wouldn't work. 00592 00593 // pseudo-types, must be instantiated via shorthand 00594 $this->info['Enum'] = new HTMLPurifier_AttrDef_Enum(); 00595 $this->info['Bool'] = new HTMLPurifier_AttrDef_HTML_Bool(); 00596 00597 $this->info['CDATA'] = new HTMLPurifier_AttrDef_Text(); 00598 $this->info['ID'] = new HTMLPurifier_AttrDef_HTML_ID(); 00599 $this->info['Length'] = new HTMLPurifier_AttrDef_HTML_Length(); 00600 $this->info['MultiLength'] = new HTMLPurifier_AttrDef_HTML_MultiLength(); 00601 $this->info['NMTOKENS'] = new HTMLPurifier_AttrDef_HTML_Nmtokens(); 00602 $this->info['Pixels'] = new HTMLPurifier_AttrDef_HTML_Pixels(); 00603 $this->info['Text'] = new HTMLPurifier_AttrDef_Text(); 00604 $this->info['URI'] = new HTMLPurifier_AttrDef_URI(); 00605 $this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang(); 00606 $this->info['Color'] = new HTMLPurifier_AttrDef_HTML_Color(); 00607 $this->info['IAlign'] = self::makeEnum('top,middle,bottom,left,right'); 00608 $this->info['LAlign'] = self::makeEnum('top,bottom,left,right'); 00609 $this->info['FrameTarget'] = new HTMLPurifier_AttrDef_HTML_FrameTarget(); 00610 00611 // unimplemented aliases 00612 $this->info['ContentType'] = new HTMLPurifier_AttrDef_Text(); 00613 $this->info['ContentTypes'] = new HTMLPurifier_AttrDef_Text(); 00614 $this->info['Charsets'] = new HTMLPurifier_AttrDef_Text(); 00615 $this->info['Character'] = new HTMLPurifier_AttrDef_Text(); 00616 00617 // "proprietary" types 00618 $this->info['Class'] = new HTMLPurifier_AttrDef_HTML_Class(); 00619 00620 // number is really a positive integer (one or more digits) 00621 // FIXME: ^^ not always, see start and value of list items 00622 $this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true); 00623 } 00624 00625 private static function makeEnum($in) { 00626 return new HTMLPurifier_AttrDef_Clone(new HTMLPurifier_AttrDef_Enum(explode(',', $in))); 00627 } 00628 00634 public function get($type) { 00635 00636 // determine if there is any extra info tacked on 00637 if (strpos($type, '#') !== false) list($type, $string) = explode('#', $type, 2); 00638 else $string = ''; 00639 00640 if (!isset($this->info[$type])) { 00641 trigger_error('Cannot retrieve undefined attribute type ' . $type, E_USER_ERROR); 00642 return; 00643 } 00644 00645 return $this->info[$type]->make($string); 00646 00647 } 00648 00654 public function set($type, $impl) { 00655 $this->info[$type] = $impl; 00656 } 00657 } 00658 00659 00660 00661 00662 00668 class HTMLPurifier_AttrValidator 00669 { 00670 00681 public function validateToken(&$token, &$config, $context) { 00682 00683 $definition = $config->getHTMLDefinition(); 00684 $e =& $context->get('ErrorCollector', true); 00685 00686 // initialize IDAccumulator if necessary 00687 $ok =& $context->get('IDAccumulator', true); 00688 if (!$ok) { 00689 $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context); 00690 $context->register('IDAccumulator', $id_accumulator); 00691 } 00692 00693 // initialize CurrentToken if necessary 00694 $current_token =& $context->get('CurrentToken', true); 00695 if (!$current_token) $context->register('CurrentToken', $token); 00696 00697 if ( 00698 !$token instanceof HTMLPurifier_Token_Start && 00699 !$token instanceof HTMLPurifier_Token_Empty 00700 ) return $token; 00701 00702 // create alias to global definition array, see also $defs 00703 // DEFINITION CALL 00704 $d_defs = $definition->info_global_attr; 00705 00706 // don't update token until the very end, to ensure an atomic update 00707 $attr = $token->attr; 00708 00709 // do global transformations (pre) 00710 // nothing currently utilizes this 00711 foreach ($definition->info_attr_transform_pre as $transform) { 00712 $attr = $transform->transform($o = $attr, $config, $context); 00713 if ($e) { 00714 if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); 00715 } 00716 } 00717 00718 // do local transformations only applicable to this element (pre) 00719 // ex. <p align="right"> to <p style="text-align:right;"> 00720 foreach ($definition->info[$token->name]->attr_transform_pre as $transform) { 00721 $attr = $transform->transform($o = $attr, $config, $context); 00722 if ($e) { 00723 if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); 00724 } 00725 } 00726 00727 // create alias to this element's attribute definition array, see 00728 // also $d_defs (global attribute definition array) 00729 // DEFINITION CALL 00730 $defs = $definition->info[$token->name]->attr; 00731 00732 $attr_key = false; 00733 $context->register('CurrentAttr', $attr_key); 00734 00735 // iterate through all the attribute keypairs 00736 // Watch out for name collisions: $key has previously been used 00737 foreach ($attr as $attr_key => $value) { 00738 00739 // call the definition 00740 if ( isset($defs[$attr_key]) ) { 00741 // there is a local definition defined 00742 if ($defs[$attr_key] === false) { 00743 // We've explicitly been told not to allow this element. 00744 // This is usually when there's a global definition 00745 // that must be overridden. 00746 // Theoretically speaking, we could have a 00747 // AttrDef_DenyAll, but this is faster! 00748 $result = false; 00749 } else { 00750 // validate according to the element's definition 00751 $result = $defs[$attr_key]->validate( 00752 $value, $config, $context 00753 ); 00754 } 00755 } elseif ( isset($d_defs[$attr_key]) ) { 00756 // there is a global definition defined, validate according 00757 // to the global definition 00758 $result = $d_defs[$attr_key]->validate( 00759 $value, $config, $context 00760 ); 00761 } else { 00762 // system never heard of the attribute? DELETE! 00763 $result = false; 00764 } 00765 00766 // put the results into effect 00767 if ($result === false || $result === null) { 00768 // this is a generic error message that should replaced 00769 // with more specific ones when possible 00770 if ($e) $e->send(E_ERROR, 'AttrValidator: Attribute removed'); 00771 00772 // remove the attribute 00773 unset($attr[$attr_key]); 00774 } elseif (is_string($result)) { 00775 // generally, if a substitution is happening, there 00776 // was some sort of implicit correction going on. We'll 00777 // delegate it to the attribute classes to say exactly what. 00778 00779 // simple substitution 00780 $attr[$attr_key] = $result; 00781 } else { 00782 // nothing happens 00783 } 00784 00785 // we'd also want slightly more complicated substitution 00786 // involving an array as the return value, 00787 // although we're not sure how colliding attributes would 00788 // resolve (certain ones would be completely overriden, 00789 // others would prepend themselves). 00790 } 00791 00792 $context->destroy('CurrentAttr'); 00793 00794 // post transforms 00795 00796 // global (error reporting untested) 00797 foreach ($definition->info_attr_transform_post as $transform) { 00798 $attr = $transform->transform($o = $attr, $config, $context); 00799 if ($e) { 00800 if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); 00801 } 00802 } 00803 00804 // local (error reporting untested) 00805 foreach ($definition->info[$token->name]->attr_transform_post as $transform) { 00806 $attr = $transform->transform($o = $attr, $config, $context); 00807 if ($e) { 00808 if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); 00809 } 00810 } 00811 00812 $token->attr = $attr; 00813 00814 // destroy CurrentToken if we made it ourselves 00815 if (!$current_token) $context->destroy('CurrentToken'); 00816 00817 } 00818 00819 00820 } 00821 00822 00823 00824 00825 00826 // constants are slow, so we use as few as possible 00827 if (!defined('HTMLPURIFIER_PREFIX')) { 00828 define('HTMLPURIFIER_PREFIX', dirname(__FILE__) . '/standalone'); 00829 set_include_path(HTMLPURIFIER_PREFIX . PATH_SEPARATOR . get_include_path()); 00830 } 00831 00832 // accomodations for versions earlier than 5.0.2 00833 // borrowed from PHP_Compat, LGPL licensed, by Aidan Lister <aidan@php.net> 00834 if (!defined('PHP_EOL')) { 00835 switch (strtoupper(substr(PHP_OS, 0, 3))) { 00836 case 'WIN': 00837 define('PHP_EOL', "\r\n"); 00838 break; 00839 case 'DAR': 00840 define('PHP_EOL', "\r"); 00841 break; 00842 default: 00843 define('PHP_EOL', "\n"); 00844 } 00845 } 00846 00854 class HTMLPurifier_Bootstrap 00855 { 00856 00861 public static function autoload($class) { 00862 $file = HTMLPurifier_Bootstrap::getPath($class); 00863 if (!$file) return false; 00864 // Technically speaking, it should be ok and more efficient to 00865 // just do 'require', but Antonio Parraga reports that with 00866 // Zend extensions such as Zend debugger and APC, this invariant 00867 // may be broken. Since we have efficient alternatives, pay 00868 // the cost here and avoid the bug. 00869 require_once HTMLPURIFIER_PREFIX . '/' . $file; 00870 return true; 00871 } 00872 00876 public static function getPath($class) { 00877 if (strncmp('HTMLPurifier', $class, 12) !== 0) return false; 00878 // Custom implementations 00879 if (strncmp('HTMLPurifier_Language_', $class, 22) === 0) { 00880 $code = str_replace('_', '-', substr($class, 22)); 00881 $file = 'HTMLPurifier/Language/classes/' . $code . '.php'; 00882 } else { 00883 $file = str_replace('_', '/', $class) . '.php'; 00884 } 00885 if (!file_exists(HTMLPURIFIER_PREFIX . '/' . $file)) return false; 00886 return $file; 00887 } 00888 00892 public static function registerAutoload() { 00893 $autoload = array('HTMLPurifier_Bootstrap', 'autoload'); 00894 if ( ($funcs = spl_autoload_functions()) === false ) { 00895 spl_autoload_register($autoload); 00896 } elseif (function_exists('spl_autoload_unregister')) { 00897 $buggy = version_compare(PHP_VERSION, '5.2.11', '<'); 00898 $compat = version_compare(PHP_VERSION, '5.1.2', '<=') && 00899 version_compare(PHP_VERSION, '5.1.0', '>='); 00900 foreach ($funcs as $func) { 00901 if ($buggy && is_array($func)) { 00902 // :TRICKY: There are some compatibility issues and some 00903 // places where we need to error out 00904 $reflector = new ReflectionMethod($func[0], $func[1]); 00905 if (!$reflector->isStatic()) { 00906 throw new Exception(' 00907 HTML Purifier autoloader registrar is not compatible 00908 with non-static object methods due to PHP Bug #44144; 00909 Please do not use HTMLPurifier.autoload.php (or any 00910 file that includes this file); instead, place the code: 00911 spl_autoload_register(array(\'HTMLPurifier_Bootstrap\', \'autoload\')) 00912 after your own autoloaders. 00913 '); 00914 } 00915 // Suprisingly, spl_autoload_register supports the 00916 // Class::staticMethod callback format, although call_user_func doesn't 00917 if ($compat) $func = implode('::', $func); 00918 } 00919 spl_autoload_unregister($func); 00920 } 00921 spl_autoload_register($autoload); 00922 foreach ($funcs as $func) spl_autoload_register($func); 00923 } 00924 } 00925 00926 } 00927 00928 00929 00930 00931 00936 abstract class HTMLPurifier_Definition 00937 { 00938 00942 public $setup = false; 00943 00953 public $optimized = null; 00954 00958 public $type; 00959 00965 abstract protected function doSetup($config); 00966 00971 public function setup($config) { 00972 if ($this->setup) return; 00973 $this->setup = true; 00974 $this->doSetup($config); 00975 } 00976 00977 } 00978 00979 00980 00981 00982 00987 class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition 00988 { 00989 00990 public $type = 'CSS'; 00991 00995 public $info = array(); 00996 01000 protected function doSetup($config) { 01001 01002 $this->info['text-align'] = new HTMLPurifier_AttrDef_Enum( 01003 array('left', 'right', 'center', 'justify'), false); 01004 01005 $border_style = 01006 $this->info['border-bottom-style'] = 01007 $this->info['border-right-style'] = 01008 $this->info['border-left-style'] = 01009 $this->info['border-top-style'] = new HTMLPurifier_AttrDef_Enum( 01010 array('none', 'hidden', 'dotted', 'dashed', 'solid', 'double', 01011 'groove', 'ridge', 'inset', 'outset'), false); 01012 01013 $this->info['border-style'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_style); 01014 01015 $this->info['clear'] = new HTMLPurifier_AttrDef_Enum( 01016 array('none', 'left', 'right', 'both'), false); 01017 $this->info['float'] = new HTMLPurifier_AttrDef_Enum( 01018 array('none', 'left', 'right'), false); 01019 $this->info['font-style'] = new HTMLPurifier_AttrDef_Enum( 01020 array('normal', 'italic', 'oblique'), false); 01021 $this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum( 01022 array('normal', 'small-caps'), false); 01023 01024 $uri_or_none = new HTMLPurifier_AttrDef_CSS_Composite( 01025 array( 01026 new HTMLPurifier_AttrDef_Enum(array('none')), 01027 new HTMLPurifier_AttrDef_CSS_URI() 01028 ) 01029 ); 01030 01031 $this->info['list-style-position'] = new HTMLPurifier_AttrDef_Enum( 01032 array('inside', 'outside'), false); 01033 $this->info['list-style-type'] = new HTMLPurifier_AttrDef_Enum( 01034 array('disc', 'circle', 'square', 'decimal', 'lower-roman', 01035 'upper-roman', 'lower-alpha', 'upper-alpha', 'none'), false); 01036 $this->info['list-style-image'] = $uri_or_none; 01037 01038 $this->info['list-style'] = new HTMLPurifier_AttrDef_CSS_ListStyle($config); 01039 01040 $this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum( 01041 array('capitalize', 'uppercase', 'lowercase', 'none'), false); 01042 $this->info['color'] = new HTMLPurifier_AttrDef_CSS_Color(); 01043 01044 $this->info['background-image'] = $uri_or_none; 01045 $this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum( 01046 array('repeat', 'repeat-x', 'repeat-y', 'no-repeat') 01047 ); 01048 $this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum( 01049 array('scroll', 'fixed') 01050 ); 01051 $this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition(); 01052 01053 $border_color = 01054 $this->info['border-top-color'] = 01055 $this->info['border-bottom-color'] = 01056 $this->info['border-left-color'] = 01057 $this->info['border-right-color'] = 01058 $this->info['background-color'] = new HTMLPurifier_AttrDef_CSS_Composite(array( 01059 new HTMLPurifier_AttrDef_Enum(array('transparent')), 01060 new HTMLPurifier_AttrDef_CSS_Color() 01061 )); 01062 01063 $this->info['background'] = new HTMLPurifier_AttrDef_CSS_Background($config); 01064 01065 $this->info['border-color'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_color); 01066 01067 $border_width = 01068 $this->info['border-top-width'] = 01069 $this->info['border-bottom-width'] = 01070 $this->info['border-left-width'] = 01071 $this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(array( 01072 new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')), 01073 new HTMLPurifier_AttrDef_CSS_Length('0') //disallow negative 01074 )); 01075 01076 $this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width); 01077 01078 $this->info['letter-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array( 01079 new HTMLPurifier_AttrDef_Enum(array('normal')), 01080 new HTMLPurifier_AttrDef_CSS_Length() 01081 )); 01082 01083 $this->info['word-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array( 01084 new HTMLPurifier_AttrDef_Enum(array('normal')), 01085 new HTMLPurifier_AttrDef_CSS_Length() 01086 )); 01087 01088 $this->info['font-size'] = new HTMLPurifier_AttrDef_CSS_Composite(array( 01089 new HTMLPurifier_AttrDef_Enum(array('xx-small', 'x-small', 01090 'small', 'medium', 'large', 'x-large', 'xx-large', 01091 'larger', 'smaller')), 01092 new HTMLPurifier_AttrDef_CSS_Percentage(), 01093 new HTMLPurifier_AttrDef_CSS_Length() 01094 )); 01095 01096 $this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(array( 01097 new HTMLPurifier_AttrDef_Enum(array('normal')), 01098 new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives 01099 new HTMLPurifier_AttrDef_CSS_Length('0'), 01100 new HTMLPurifier_AttrDef_CSS_Percentage(true) 01101 )); 01102 01103 $margin = 01104 $this->info['margin-top'] = 01105 $this->info['margin-bottom'] = 01106 $this->info['margin-left'] = 01107 $this->info['margin-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array( 01108 new HTMLPurifier_AttrDef_CSS_Length(), 01109 new HTMLPurifier_AttrDef_CSS_Percentage(), 01110 new HTMLPurifier_AttrDef_Enum(array('auto')) 01111 )); 01112 01113 $this->info['margin'] = new HTMLPurifier_AttrDef_CSS_Multiple($margin); 01114 01115 // non-negative 01116 $padding = 01117 $this->info['padding-top'] = 01118 $this->info['padding-bottom'] = 01119 $this->info['padding-left'] = 01120 $this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array( 01121 new HTMLPurifier_AttrDef_CSS_Length('0'), 01122 new HTMLPurifier_AttrDef_CSS_Percentage(true) 01123 )); 01124 01125 $this->info['padding'] = new HTMLPurifier_AttrDef_CSS_Multiple($padding); 01126 01127 $this->info['text-indent'] = new HTMLPurifier_AttrDef_CSS_Composite(array( 01128 new HTMLPurifier_AttrDef_CSS_Length(), 01129 new HTMLPurifier_AttrDef_CSS_Percentage() 01130 )); 01131 01132 $trusted_wh = new HTMLPurifier_AttrDef_CSS_Composite(array( 01133 new HTMLPurifier_AttrDef_CSS_Length('0'), 01134 new HTMLPurifier_AttrDef_CSS_Percentage(true), 01135 new HTMLPurifier_AttrDef_Enum(array('auto')) 01136 )); 01137 $max = $config->get('CSS.MaxImgLength'); 01138 01139 $this->info['width'] = 01140 $this->info['height'] = 01141 $max === null ? 01142 $trusted_wh : 01143 new HTMLPurifier_AttrDef_Switch('img', 01144 // For img tags: 01145 new HTMLPurifier_AttrDef_CSS_Composite(array( 01146 new HTMLPurifier_AttrDef_CSS_Length('0', $max), 01147 new HTMLPurifier_AttrDef_Enum(array('auto')) 01148 )), 01149 // For everyone else: 01150 $trusted_wh 01151 ); 01152 01153 $this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration(); 01154 01155 $this->info['font-family'] = new HTMLPurifier_AttrDef_CSS_FontFamily(); 01156 01157 // this could use specialized code 01158 $this->info['font-weight'] = new HTMLPurifier_AttrDef_Enum( 01159 array('normal', 'bold', 'bolder', 'lighter', '100', '200', '300', 01160 '400', '500', '600', '700', '800', '900'), false); 01161 01162 // MUST be called after other font properties, as it references 01163 // a CSSDefinition object 01164 $this->info['font'] = new HTMLPurifier_AttrDef_CSS_Font($config); 01165 01166 // same here 01167 $this->info['border'] = 01168 $this->info['border-bottom'] = 01169 $this->info['border-top'] = 01170 $this->info['border-left'] = 01171 $this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config); 01172 01173 $this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(array( 01174 'collapse', 'separate')); 01175 01176 $this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum(array( 01177 'top', 'bottom')); 01178 01179 $this->info['table-layout'] = new HTMLPurifier_AttrDef_Enum(array( 01180 'auto', 'fixed')); 01181 01182 $this->info['vertical-align'] = new HTMLPurifier_AttrDef_CSS_Composite(array( 01183 new HTMLPurifier_AttrDef_Enum(array('baseline', 'sub', 'super', 01184 'top', 'text-top', 'middle', 'bottom', 'text-bottom')), 01185 new HTMLPurifier_AttrDef_CSS_Length(), 01186 new HTMLPurifier_AttrDef_CSS_Percentage() 01187 )); 01188 01189 $this->info['border-spacing'] = new HTMLPurifier_AttrDef_CSS_Multiple(new HTMLPurifier_AttrDef_CSS_Length(), 2); 01190 01191 // partial support 01192 $this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(array('nowrap')); 01193 01194 if ($config->get('CSS.Proprietary')) { 01195 $this->doSetupProprietary($config); 01196 } 01197 01198 if ($config->get('CSS.AllowTricky')) { 01199 $this->doSetupTricky($config); 01200 } 01201 01202 if ($config->get('CSS.Trusted')) { 01203 $this->doSetupTrusted($config); 01204 } 01205 01206 $allow_important = $config->get('CSS.AllowImportant'); 01207 // wrap all attr-defs with decorator that handles !important 01208 foreach ($this->info as $k => $v) { 01209 $this->info[$k] = new HTMLPurifier_AttrDef_CSS_ImportantDecorator($v, $allow_important); 01210 } 01211 01212 $this->setupConfigStuff($config); 01213 } 01214 01215 protected function doSetupProprietary($config) { 01216 // Internet Explorer only scrollbar colors 01217 $this->info['scrollbar-arrow-color'] = new HTMLPurifier_AttrDef_CSS_Color(); 01218 $this->info['scrollbar-base-color'] = new HTMLPurifier_AttrDef_CSS_Color(); 01219 $this->info['scrollbar-darkshadow-color'] = new HTMLPurifier_AttrDef_CSS_Color(); 01220 $this->info['scrollbar-face-color'] = new HTMLPurifier_AttrDef_CSS_Color(); 01221 $this->info['scrollbar-highlight-color'] = new HTMLPurifier_AttrDef_CSS_Color(); 01222 $this->info['scrollbar-shadow-color'] = new HTMLPurifier_AttrDef_CSS_Color(); 01223 01224 // technically not proprietary, but CSS3, and no one supports it 01225 $this->info['opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue(); 01226 $this->info['-moz-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue(); 01227 $this->info['-khtml-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue(); 01228 01229 // only opacity, for now 01230 $this->info['filter'] = new HTMLPurifier_AttrDef_CSS_Filter(); 01231 01232 } 01233 01234 protected function doSetupTricky($config) { 01235 $this->info['display'] = new HTMLPurifier_AttrDef_Enum(array( 01236 'inline', 'block', 'list-item', 'run-in', 'compact', 01237 'marker', 'table', 'inline-table', 'table-row-group', 01238 'table-header-group', 'table-footer-group', 'table-row', 01239 'table-column-group', 'table-column', 'table-cell', 'table-caption', 'none' 01240 )); 01241 $this->info['visibility'] = new HTMLPurifier_AttrDef_Enum(array( 01242 'visible', 'hidden', 'collapse' 01243 )); 01244 $this->info['overflow'] = new HTMLPurifier_AttrDef_Enum(array('visible', 'hidden', 'auto', 'scroll')); 01245 } 01246 01247 protected function doSetupTrusted($config) { 01248 $this->info['position'] = new HTMLPurifier_AttrDef_Enum(array( 01249 'static', 'relative', 'absolute', 'fixed' 01250 )); 01251 $this->info['top'] = 01252 $this->info['left'] = 01253 $this->info['right'] = 01254 $this->info['bottom'] = new HTMLPurifier_AttrDef_CSS_Composite(array( 01255 new HTMLPurifier_AttrDef_CSS_Length(), 01256 new HTMLPurifier_AttrDef_CSS_Percentage(), 01257 new HTMLPurifier_AttrDef_Enum(array('auto')), 01258 )); 01259 $this->info['z-index'] = new HTMLPurifier_AttrDef_CSS_Composite(array( 01260 new HTMLPurifier_AttrDef_Integer(), 01261 new HTMLPurifier_AttrDef_Enum(array('auto')), 01262 )); 01263 } 01264 01271 protected function setupConfigStuff($config) { 01272 01273 // setup allowed elements 01274 $support = "(for information on implementing this, see the ". 01275 "support forums) "; 01276 $allowed_properties = $config->get('CSS.AllowedProperties'); 01277 if ($allowed_properties !== null) { 01278 foreach ($this->info as $name => $d) { 01279 if(!isset($allowed_properties[$name])) unset($this->info[$name]); 01280 unset($allowed_properties[$name]); 01281 } 01282 // emit errors 01283 foreach ($allowed_properties as $name => $d) { 01284 // :TODO: Is this htmlspecialchars() call really necessary? 01285 $name = htmlspecialchars($name); 01286 trigger_error("Style attribute '$name' is not supported $support", E_USER_WARNING); 01287 } 01288 } 01289 01290 $forbidden_properties = $config->get('CSS.ForbiddenProperties'); 01291 if ($forbidden_properties !== null) { 01292 foreach ($this->info as $name => $d) { 01293 if (isset($forbidden_properties[$name])) { 01294 unset($this->info[$name]); 01295 } 01296 } 01297 } 01298 01299 } 01300 } 01301 01302 01303 01304 01305 01309 abstract class HTMLPurifier_ChildDef 01310 { 01315 public $type; 01316 01323 public $allow_empty; 01324 01328 public $elements = array(); 01329 01334 public function getAllowedElements($config) { 01335 return $this->elements; 01336 } 01337 01348 abstract public function validateChildren($tokens_of_children, $config, $context); 01349 } 01350 01351 01352 01353 01354 01369 class HTMLPurifier_Config 01370 { 01371 01375 public $version = '4.4.0'; 01376 01381 public $autoFinalize = true; 01382 01383 // protected member variables 01384 01389 protected $serials = array(); 01390 01394 protected $serial; 01395 01399 protected $parser = null; 01400 01406 public $def; 01407 01411 protected $definitions; 01412 01416 protected $finalized = false; 01417 01421 protected $plist; 01422 01427 private $aliasMode; 01428 01434 public $chatty = true; 01435 01439 private $lock; 01440 01445 public function __construct($definition, $parent = null) { 01446 $parent = $parent ? $parent : $definition->defaultPlist; 01447 $this->plist = new HTMLPurifier_PropertyList($parent); 01448 $this->def = $definition; // keep a copy around for checking 01449 $this->parser = new HTMLPurifier_VarParser_Flexible(); 01450 } 01451 01461 public static function create($config, $schema = null) { 01462 if ($config instanceof HTMLPurifier_Config) { 01463 // pass-through 01464 return $config; 01465 } 01466 if (!$schema) { 01467 $ret = HTMLPurifier_Config::createDefault(); 01468 } else { 01469 $ret = new HTMLPurifier_Config($schema); 01470 } 01471 if (is_string($config)) $ret->loadIni($config); 01472 elseif (is_array($config)) $ret->loadArray($config); 01473 return $ret; 01474 } 01475 01482 public static function inherit(HTMLPurifier_Config $config) { 01483 return new HTMLPurifier_Config($config->def, $config->plist); 01484 } 01485 01490 public static function createDefault() { 01491 $definition = HTMLPurifier_ConfigSchema::instance(); 01492 $config = new HTMLPurifier_Config($definition); 01493 return $config; 01494 } 01495 01500 public function get($key, $a = null) { 01501 if ($a !== null) { 01502 $this->triggerError("Using deprecated API: use \$config->get('$key.$a') instead", E_USER_WARNING); 01503 $key = "$key.$a"; 01504 } 01505 if (!$this->finalized) $this->autoFinalize(); 01506 if (!isset($this->def->info[$key])) { 01507 // can't add % due to SimpleTest bug 01508 $this->triggerError('Cannot retrieve value of undefined directive ' . htmlspecialchars($key), 01509 E_USER_WARNING); 01510 return; 01511 } 01512 if (isset($this->def->info[$key]->isAlias)) { 01513 $d = $this->def->info[$key]; 01514 $this->triggerError('Cannot get value from aliased directive, use real name ' . $d->key, 01515 E_USER_ERROR); 01516 return; 01517 } 01518 if ($this->lock) { 01519 list($ns) = explode('.', $key); 01520 if ($ns !== $this->lock) { 01521 $this->triggerError('Cannot get value of namespace ' . $ns . ' when lock for ' . $this->lock . ' is active, this probably indicates a Definition setup method is accessing directives that are not within its namespace', E_USER_ERROR); 01522 return; 01523 } 01524 } 01525 return $this->plist->get($key); 01526 } 01527 01532 public function getBatch($namespace) { 01533 if (!$this->finalized) $this->autoFinalize(); 01534 $full = $this->getAll(); 01535 if (!isset($full[$namespace])) { 01536 $this->triggerError('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace), 01537 E_USER_WARNING); 01538 return; 01539 } 01540 return $full[$namespace]; 01541 } 01542 01550 public function getBatchSerial($namespace) { 01551 if (empty($this->serials[$namespace])) { 01552 $batch = $this->getBatch($namespace); 01553 unset($batch['DefinitionRev']); 01554 $this->serials[$namespace] = md5(serialize($batch)); 01555 } 01556 return $this->serials[$namespace]; 01557 } 01558 01563 public function getSerial() { 01564 if (empty($this->serial)) { 01565 $this->serial = md5(serialize($this->getAll())); 01566 } 01567 return $this->serial; 01568 } 01569 01574 public function getAll() { 01575 if (!$this->finalized) $this->autoFinalize(); 01576 $ret = array(); 01577 foreach ($this->plist->squash() as $name => $value) { 01578 list($ns, $key) = explode('.', $name, 2); 01579 $ret[$ns][$key] = $value; 01580 } 01581 return $ret; 01582 } 01583 01589 public function set($key, $value, $a = null) { 01590 if (strpos($key, '.') === false) { 01591 $namespace = $key; 01592 $directive = $value; 01593 $value = $a; 01594 $key = "$key.$directive"; 01595 $this->triggerError("Using deprecated API: use \$config->set('$key', ...) instead", E_USER_NOTICE); 01596 } else { 01597 list($namespace) = explode('.', $key); 01598 } 01599 if ($this->isFinalized('Cannot set directive after finalization')) return; 01600 if (!isset($this->def->info[$key])) { 01601 $this->triggerError('Cannot set undefined directive ' . htmlspecialchars($key) . ' to value', 01602 E_USER_WARNING); 01603 return; 01604 } 01605 $def = $this->def->info[$key]; 01606 01607 if (isset($def->isAlias)) { 01608 if ($this->aliasMode) { 01609 $this->triggerError('Double-aliases not allowed, please fix '. 01610 'ConfigSchema bug with' . $key, E_USER_ERROR); 01611 return; 01612 } 01613 $this->aliasMode = true; 01614 $this->set($def->key, $value); 01615 $this->aliasMode = false; 01616 $this->triggerError("$key is an alias, preferred directive name is {$def->key}", E_USER_NOTICE); 01617 return; 01618 } 01619 01620 // Raw type might be negative when using the fully optimized form 01621 // of stdclass, which indicates allow_null == true 01622 $rtype = is_int($def) ? $def : $def->type; 01623 if ($rtype < 0) { 01624 $type = -$rtype; 01625 $allow_null = true; 01626 } else { 01627 $type = $rtype; 01628 $allow_null = isset($def->allow_null); 01629 } 01630 01631 try { 01632 $value = $this->parser->parse($value, $type, $allow_null); 01633 } catch (HTMLPurifier_VarParserException $e) { 01634 $this->triggerError('Value for ' . $key . ' is of invalid type, should be ' . HTMLPurifier_VarParser::getTypeName($type), E_USER_WARNING); 01635 return; 01636 } 01637 if (is_string($value) && is_object($def)) { 01638 // resolve value alias if defined 01639 if (isset($def->aliases[$value])) { 01640 $value = $def->aliases[$value]; 01641 } 01642 // check to see if the value is allowed 01643 if (isset($def->allowed) && !isset($def->allowed[$value])) { 01644 $this->triggerError('Value not supported, valid values are: ' . 01645 $this->_listify($def->allowed), E_USER_WARNING); 01646 return; 01647 } 01648 } 01649 $this->plist->set($key, $value); 01650 01651 // reset definitions if the directives they depend on changed 01652 // this is a very costly process, so it's discouraged 01653 // with finalization 01654 if ($namespace == 'HTML' || $namespace == 'CSS' || $namespace == 'URI') { 01655 $this->definitions[$namespace] = null; 01656 } 01657 01658 $this->serials[$namespace] = false; 01659 } 01660 01664 private function _listify($lookup) { 01665 $list = array(); 01666 foreach ($lookup as $name => $b) $list[] = $name; 01667 return implode(', ', $list); 01668 } 01669 01681 public function getHTMLDefinition($raw = false, $optimized = false) { 01682 return $this->getDefinition('HTML', $raw, $optimized); 01683 } 01684 01696 public function getCSSDefinition($raw = false, $optimized = false) { 01697 return $this->getDefinition('CSS', $raw, $optimized); 01698 } 01699 01711 public function getURIDefinition($raw = false, $optimized = false) { 01712 return $this->getDefinition('URI', $raw, $optimized); 01713 } 01714 01728 public function getDefinition($type, $raw = false, $optimized = false) { 01729 if ($optimized && !$raw) { 01730 throw new HTMLPurifier_Exception("Cannot set optimized = true when raw = false"); 01731 } 01732 if (!$this->finalized) $this->autoFinalize(); 01733 // temporarily suspend locks, so we can handle recursive definition calls 01734 $lock = $this->lock; 01735 $this->lock = null; 01736 $factory = HTMLPurifier_DefinitionCacheFactory::instance(); 01737 $cache = $factory->create($type, $this); 01738 $this->lock = $lock; 01739 if (!$raw) { 01740 // full definition 01741 // --------------- 01742 // check if definition is in memory 01743 if (!empty($this->definitions[$type])) { 01744 $def = $this->definitions[$type]; 01745 // check if the definition is setup 01746 if ($def->setup) { 01747 return $def; 01748 } else { 01749 $def->setup($this); 01750 if ($def->optimized) $cache->add($def, $this); 01751 return $def; 01752 } 01753 } 01754 // check if definition is in cache 01755 $def = $cache->get($this); 01756 if ($def) { 01757 // definition in cache, save to memory and return it 01758 $this->definitions[$type] = $def; 01759 return $def; 01760 } 01761 // initialize it 01762 $def = $this->initDefinition($type); 01763 // set it up 01764 $this->lock = $type; 01765 $def->setup($this); 01766 $this->lock = null; 01767 // save in cache 01768 $cache->add($def, $this); 01769 // return it 01770 return $def; 01771 } else { 01772 // raw definition 01773 // -------------- 01774 // check preconditions 01775 $def = null; 01776 if ($optimized) { 01777 if (is_null($this->get($type . '.DefinitionID'))) { 01778 // fatally error out if definition ID not set 01779 throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID"); 01780 } 01781 } 01782 if (!empty($this->definitions[$type])) { 01783 $def = $this->definitions[$type]; 01784 if ($def->setup && !$optimized) { 01785 $extra = $this->chatty ? " (try moving this code block earlier in your initialization)" : ""; 01786 throw new HTMLPurifier_Exception("Cannot retrieve raw definition after it has already been setup" . $extra); 01787 } 01788 if ($def->optimized === null) { 01789 $extra = $this->chatty ? " (try flushing your cache)" : ""; 01790 throw new HTMLPurifier_Exception("Optimization status of definition is unknown" . $extra); 01791 } 01792 if ($def->optimized !== $optimized) { 01793 $msg = $optimized ? "optimized" : "unoptimized"; 01794 $extra = $this->chatty ? " (this backtrace is for the first inconsistent call, which was for a $msg raw definition)" : ""; 01795 throw new HTMLPurifier_Exception("Inconsistent use of optimized and unoptimized raw definition retrievals" . $extra); 01796 } 01797 } 01798 // check if definition was in memory 01799 if ($def) { 01800 if ($def->setup) { 01801 // invariant: $optimized === true (checked above) 01802 return null; 01803 } else { 01804 return $def; 01805 } 01806 } 01807 // if optimized, check if definition was in cache 01808 // (because we do the memory check first, this formulation 01809 // is prone to cache slamming, but I think 01810 // guaranteeing that either /all/ of the raw 01811 // setup code or /none/ of it is run is more important.) 01812 if ($optimized) { 01813 // This code path only gets run once; once we put 01814 // something in $definitions (which is guaranteed by the 01815 // trailing code), we always short-circuit above. 01816 $def = $cache->get($this); 01817 if ($def) { 01818 // save the full definition for later, but don't 01819 // return it yet 01820 $this->definitions[$type] = $def; 01821 return null; 01822 } 01823 } 01824 // check invariants for creation 01825 if (!$optimized) { 01826 if (!is_null($this->get($type . '.DefinitionID'))) { 01827 if ($this->chatty) { 01828 $this->triggerError("Due to a documentation error in previous version of HTML Purifier, your definitions are not being cached. If this is OK, you can remove the %$type.DefinitionRev and %$type.DefinitionID declaration. Otherwise, modify your code to use maybeGetRawDefinition, and test if the returned value is null before making any edits (if it is null, that means that a cached version is available, and no raw operations are necessary). See <a href='http://htmlpurifier.org/docs/enduser-customize.html#optimized'>Customize</a> for more details", E_USER_WARNING); 01829 } else { 01830 $this->triggerError("Useless DefinitionID declaration", E_USER_WARNING); 01831 } 01832 } 01833 } 01834 // initialize it 01835 $def = $this->initDefinition($type); 01836 $def->optimized = $optimized; 01837 return $def; 01838 } 01839 throw new HTMLPurifier_Exception("The impossible happened!"); 01840 } 01841 01842 private function initDefinition($type) { 01843 // quick checks failed, let's create the object 01844 if ($type == 'HTML') { 01845 $def = new HTMLPurifier_HTMLDefinition(); 01846 } elseif ($type == 'CSS') { 01847 $def = new HTMLPurifier_CSSDefinition(); 01848 } elseif ($type == 'URI') { 01849 $def = new HTMLPurifier_URIDefinition(); 01850 } else { 01851 throw new HTMLPurifier_Exception("Definition of $type type not supported"); 01852 } 01853 $this->definitions[$type] = $def; 01854 return $def; 01855 } 01856 01857 public function maybeGetRawDefinition($name) { 01858 return $this->getDefinition($name, true, true); 01859 } 01860 01861 public function maybeGetRawHTMLDefinition() { 01862 return $this->getDefinition('HTML', true, true); 01863 } 01864 01865 public function maybeGetRawCSSDefinition() { 01866 return $this->getDefinition('CSS', true, true); 01867 } 01868 01869 public function maybeGetRawURIDefinition() { 01870 return $this->getDefinition('URI', true, true); 01871 } 01872 01878 public function loadArray($config_array) { 01879 if ($this->isFinalized('Cannot load directives after finalization')) return; 01880 foreach ($config_array as $key => $value) { 01881 $key = str_replace('_', '.', $key); 01882 if (strpos($key, '.') !== false) { 01883 $this->set($key, $value); 01884 } else { 01885 $namespace = $key; 01886 $namespace_values = $value; 01887 foreach ($namespace_values as $directive => $value) { 01888 $this->set($namespace .'.'. $directive, $value); 01889 } 01890 } 01891 } 01892 } 01893 01900 public static function getAllowedDirectivesForForm($allowed, $schema = null) { 01901 if (!$schema) { 01902 $schema = HTMLPurifier_ConfigSchema::instance(); 01903 } 01904 if ($allowed !== true) { 01905 if (is_string($allowed)) $allowed = array($allowed); 01906 $allowed_ns = array(); 01907 $allowed_directives = array(); 01908 $blacklisted_directives = array(); 01909 foreach ($allowed as $ns_or_directive) { 01910 if (strpos($ns_or_directive, '.') !== false) { 01911 // directive 01912 if ($ns_or_directive[0] == '-') { 01913 $blacklisted_directives[substr($ns_or_directive, 1)] = true; 01914 } else { 01915 $allowed_directives[$ns_or_directive] = true; 01916 } 01917 } else { 01918 // namespace 01919 $allowed_ns[$ns_or_directive] = true; 01920 } 01921 } 01922 } 01923 $ret = array(); 01924 foreach ($schema->info as $key => $def) { 01925 list($ns, $directive) = explode('.', $key, 2); 01926 if ($allowed !== true) { 01927 if (isset($blacklisted_directives["$ns.$directive"])) continue; 01928 if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue; 01929 } 01930 if (isset($def->isAlias)) continue; 01931 if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue; 01932 $ret[] = array($ns, $directive); 01933 } 01934 return $ret; 01935 } 01936 01946 public static function loadArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) { 01947 $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $schema); 01948 $config = HTMLPurifier_Config::create($ret, $schema); 01949 return $config; 01950 } 01951 01956 public function mergeArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true) { 01957 $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $this->def); 01958 $this->loadArray($ret); 01959 } 01960 01965 public static function prepareArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) { 01966 if ($index !== false) $array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array(); 01967 $mq = $mq_fix && function_exists('get_magic_quotes_gpc') && get_magic_quotes_gpc(); 01968 01969 $allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed, $schema); 01970 $ret = array(); 01971 foreach ($allowed as $key) { 01972 list($ns, $directive) = $key; 01973 $skey = "$ns.$directive"; 01974 if (!empty($array["Null_$skey"])) { 01975 $ret[$ns][$directive] = null; 01976 continue; 01977 } 01978 if (!isset($array[$skey])) continue; 01979 $value = $mq ? stripslashes($array[$skey]) : $array[$skey]; 01980 $ret[$ns][$directive] = $value; 01981 } 01982 return $ret; 01983 } 01984 01989 public function loadIni($filename) { 01990 if ($this->isFinalized('Cannot load directives after finalization')) return; 01991 $array = parse_ini_file($filename, true); 01992 $this->loadArray($array); 01993 } 01994 01999 public function isFinalized($error = false) { 02000 if ($this->finalized && $error) { 02001 $this->triggerError($error, E_USER_ERROR); 02002 } 02003 return $this->finalized; 02004 } 02005 02010 public function autoFinalize() { 02011 if ($this->autoFinalize) { 02012 $this->finalize(); 02013 } else { 02014 $this->plist->squash(true); 02015 } 02016 } 02017 02021 public function finalize() { 02022 $this->finalized = true; 02023 $this->parser = null; 02024 } 02025 02030 protected function triggerError($msg, $no) { 02031 // determine previous stack frame 02032 $extra = ''; 02033 if ($this->chatty) { 02034 $trace = debug_backtrace(); 02035 // zip(tail(trace), trace) -- but PHP is not Haskell har har 02036 for ($i = 0, $c = count($trace); $i < $c - 1; $i++) { 02037 if ($trace[$i + 1]['class'] === 'HTMLPurifier_Config') { 02038 continue; 02039 } 02040 $frame = $trace[$i]; 02041 $extra = " invoked on line {$frame['line']} in file {$frame['file']}"; 02042 break; 02043 } 02044 } 02045 trigger_error($msg . $extra, $no); 02046 } 02047 02052 public function serialize() { 02053 $this->getDefinition('HTML'); 02054 $this->getDefinition('CSS'); 02055 $this->getDefinition('URI'); 02056 return serialize($this); 02057 } 02058 02059 } 02060 02061 02062 02063 02064 02068 class HTMLPurifier_ConfigSchema { 02069 02074 public $defaults = array(); 02075 02079 public $defaultPlist; 02080 02110 public $info = array(); 02111 02115 static protected $singleton; 02116 02117 public function __construct() { 02118 $this->defaultPlist = new HTMLPurifier_PropertyList(); 02119 } 02120 02124 public static function makeFromSerial() { 02125 $contents = file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser'); 02126 $r = unserialize($contents); 02127 if (!$r) { 02128 $hash = sha1($contents); 02129 trigger_error("Unserialization of configuration schema failed, sha1 of file was $hash", E_USER_ERROR); 02130 } 02131 return $r; 02132 } 02133 02137 public static function instance($prototype = null) { 02138 if ($prototype !== null) { 02139 HTMLPurifier_ConfigSchema::$singleton = $prototype; 02140 } elseif (HTMLPurifier_ConfigSchema::$singleton === null || $prototype === true) { 02141 HTMLPurifier_ConfigSchema::$singleton = HTMLPurifier_ConfigSchema::makeFromSerial(); 02142 } 02143 return HTMLPurifier_ConfigSchema::$singleton; 02144 } 02145 02158 public function add($key, $default, $type, $allow_null) { 02159 $obj = new stdclass(); 02160 $obj->type = is_int($type) ? $type : HTMLPurifier_VarParser::$types[$type]; 02161 if ($allow_null) $obj->allow_null = true; 02162 $this->info[$key] = $obj; 02163 $this->defaults[$key] = $default; 02164 $this->defaultPlist->set($key, $default); 02165 } 02166 02176 public function addValueAliases($key, $aliases) { 02177 if (!isset($this->info[$key]->aliases)) { 02178 $this->info[$key]->aliases = array(); 02179 } 02180 foreach ($aliases as $alias => $real) { 02181 $this->info[$key]->aliases[$alias] = $real; 02182 } 02183 } 02184 02193 public function addAllowedValues($key, $allowed) { 02194 $this->info[$key]->allowed = $allowed; 02195 } 02196 02204 public function addAlias($key, $new_key) { 02205 $obj = new stdclass; 02206 $obj->key = $new_key; 02207 $obj->isAlias = true; 02208 $this->info[$key] = $obj; 02209 } 02210 02214 public function postProcess() { 02215 foreach ($this->info as $key => $v) { 02216 if (count((array) $v) == 1) { 02217 $this->info[$key] = $v->type; 02218 } elseif (count((array) $v) == 2 && isset($v->allow_null)) { 02219 $this->info[$key] = -$v->type; 02220 } 02221 } 02222 } 02223 02224 } 02225 02226 02227 02228 02229 02233 class HTMLPurifier_ContentSets 02234 { 02235 02239 public $info = array(); 02240 02245 public $lookup = array(); 02246 02250 protected $keys = array(); 02254 protected $values = array(); 02255 02261 public function __construct($modules) { 02262 if (!is_array($modules)) $modules = array($modules); 02263 // populate content_sets based on module hints 02264 // sorry, no way of overloading 02265 foreach ($modules as $module_i => $module) { 02266 foreach ($module->content_sets as $key => $value) { 02267 $temp = $this->convertToLookup($value); 02268 if (isset($this->lookup[$key])) { 02269 // add it into the existing content set 02270 $this->lookup[$key] = array_merge($this->lookup[$key], $temp); 02271 } else { 02272 $this->lookup[$key] = $temp; 02273 } 02274 } 02275 } 02276 $old_lookup = false; 02277 while ($old_lookup !== $this->lookup) { 02278 $old_lookup = $this->lookup; 02279 foreach ($this->lookup as $i => $set) { 02280 $add = array(); 02281 foreach ($set as $element => $x) { 02282 if (isset($this->lookup[$element])) { 02283 $add += $this->lookup[$element]; 02284 unset($this->lookup[$i][$element]); 02285 } 02286 } 02287 $this->lookup[$i] += $add; 02288 } 02289 } 02290 02291 foreach ($this->lookup as $key => $lookup) { 02292 $this->info[$key] = implode(' | ', array_keys($lookup)); 02293 } 02294 $this->keys = array_keys($this->info); 02295 $this->values = array_values($this->info); 02296 } 02297 02303 public function generateChildDef(&$def, $module) { 02304 if (!empty($def->child)) return; // already done! 02305 $content_model = $def->content_model; 02306 if (is_string($content_model)) { 02307 // Assume that $this->keys is alphanumeric 02308 $def->content_model = preg_replace_callback( 02309 '/\b(' . implode('|', $this->keys) . ')\b/', 02310 array($this, 'generateChildDefCallback'), 02311 $content_model 02312 ); 02313 //$def->content_model = str_replace( 02314 // $this->keys, $this->values, $content_model); 02315 } 02316 $def->child = $this->getChildDef($def, $module); 02317 } 02318 02319 public function generateChildDefCallback($matches) { 02320 return $this->info[$matches[0]]; 02321 } 02322 02331 public function getChildDef($def, $module) { 02332 $value = $def->content_model; 02333 if (is_object($value)) { 02334 trigger_error( 02335 'Literal object child definitions should be stored in '. 02336 'ElementDef->child not ElementDef->content_model', 02337 E_USER_NOTICE 02338 ); 02339 return $value; 02340 } 02341 switch ($def->content_model_type) { 02342 case 'required': 02343 return new HTMLPurifier_ChildDef_Required($value); 02344 case 'optional': 02345 return new HTMLPurifier_ChildDef_Optional($value); 02346 case 'empty': 02347 return new HTMLPurifier_ChildDef_Empty(); 02348 case 'custom': 02349 return new HTMLPurifier_ChildDef_Custom($value); 02350 } 02351 // defer to its module 02352 $return = false; 02353 if ($module->defines_child_def) { // save a func call 02354 $return = $module->getChildDef($def); 02355 } 02356 if ($return !== false) return $return; 02357 // error-out 02358 trigger_error( 02359 'Could not determine which ChildDef class to instantiate', 02360 E_USER_ERROR 02361 ); 02362 return false; 02363 } 02364 02371 protected function convertToLookup($string) { 02372 $array = explode('|', str_replace(' ', '', $string)); 02373 $ret = array(); 02374 foreach ($array as $i => $k) { 02375 $ret[$k] = true; 02376 } 02377 return $ret; 02378 } 02379 02380 } 02381 02382 02383 02384 02385 02393 class HTMLPurifier_Context 02394 { 02395 02399 private $_storage = array(); 02400 02406 public function register($name, &$ref) { 02407 if (isset($this->_storage[$name])) { 02408 trigger_error("Name $name produces collision, cannot re-register", 02409 E_USER_ERROR); 02410 return; 02411 } 02412 $this->_storage[$name] =& $ref; 02413 } 02414 02420 public function &get($name, $ignore_error = false) { 02421 if (!isset($this->_storage[$name])) { 02422 if (!$ignore_error) { 02423 trigger_error("Attempted to retrieve non-existent variable $name", 02424 E_USER_ERROR); 02425 } 02426 $var = null; // so we can return by reference 02427 return $var; 02428 } 02429 return $this->_storage[$name]; 02430 } 02431 02436 public function destroy($name) { 02437 if (!isset($this->_storage[$name])) { 02438 trigger_error("Attempted to destroy non-existent variable $name", 02439 E_USER_ERROR); 02440 return; 02441 } 02442 unset($this->_storage[$name]); 02443 } 02444 02449 public function exists($name) { 02450 return isset($this->_storage[$name]); 02451 } 02452 02457 public function loadArray($context_array) { 02458 foreach ($context_array as $key => $discard) { 02459 $this->register($key, $context_array[$key]); 02460 } 02461 } 02462 02463 } 02464 02465 02466 02467 02468 02477 abstract class HTMLPurifier_DefinitionCache 02478 { 02479 02480 public $type; 02481 02486 public function __construct($type) { 02487 $this->type = $type; 02488 } 02489 02494 public function generateKey($config) { 02495 return $config->version . ',' . // possibly replace with function calls 02496 $config->getBatchSerial($this->type) . ',' . 02497 $config->get($this->type . '.DefinitionRev'); 02498 } 02499 02506 public function isOld($key, $config) { 02507 if (substr_count($key, ',') < 2) return true; 02508 list($version, $hash, $revision) = explode(',', $key, 3); 02509 $compare = version_compare($version, $config->version); 02510 // version mismatch, is always old 02511 if ($compare != 0) return true; 02512 // versions match, ids match, check revision number 02513 if ( 02514 $hash == $config->getBatchSerial($this->type) && 02515 $revision < $config->get($this->type . '.DefinitionRev') 02516 ) return true; 02517 return false; 02518 } 02519 02526 public function checkDefType($def) { 02527 if ($def->type !== $this->type) { 02528 trigger_error("Cannot use definition of type {$def->type} in cache for {$this->type}"); 02529 return false; 02530 } 02531 return true; 02532 } 02533 02537 abstract public function add($def, $config); 02538 02542 abstract public function set($def, $config); 02543 02547 abstract public function replace($def, $config); 02548 02552 abstract public function get($config); 02553 02557 abstract public function remove($config); 02558 02562 abstract public function flush($config); 02563 02570 abstract public function cleanup($config); 02571 02572 } 02573 02574 02575 02576 02577 02581 class HTMLPurifier_DefinitionCacheFactory 02582 { 02583 02584 protected $caches = array('Serializer' => array()); 02585 protected $implementations = array(); 02586 protected $decorators = array(); 02587 02591 public function setup() { 02592 $this->addDecorator('Cleanup'); 02593 } 02594 02598 public static function instance($prototype = null) { 02599 static $instance; 02600 if ($prototype !== null) { 02601 $instance = $prototype; 02602 } elseif ($instance === null || $prototype === true) { 02603 $instance = new HTMLPurifier_DefinitionCacheFactory(); 02604 $instance->setup(); 02605 } 02606 return $instance; 02607 } 02608 02614 public function register($short, $long) { 02615 $this->implementations[$short] = $long; 02616 } 02617 02623 public function create($type, $config) { 02624 $method = $config->get('Cache.DefinitionImpl'); 02625 if ($method === null) { 02626 return new HTMLPurifier_DefinitionCache_Null($type); 02627 } 02628 if (!empty($this->caches[$method][$type])) { 02629 return $this->caches[$method][$type]; 02630 } 02631 if ( 02632 isset($this->implementations[$method]) && 02633 class_exists($class = $this->implementations[$method], false) 02634 ) { 02635 $cache = new $class($type); 02636 } else { 02637 if ($method != 'Serializer') { 02638 trigger_error("Unrecognized DefinitionCache $method, using Serializer instead", E_USER_WARNING); 02639 } 02640 $cache = new HTMLPurifier_DefinitionCache_Serializer($type); 02641 } 02642 foreach ($this->decorators as $decorator) { 02643 $new_cache = $decorator->decorate($cache); 02644 // prevent infinite recursion in PHP 4 02645 unset($cache); 02646 $cache = $new_cache; 02647 } 02648 $this->caches[$method][$type] = $cache; 02649 return $this->caches[$method][$type]; 02650 } 02651 02656 public function addDecorator($decorator) { 02657 if (is_string($decorator)) { 02658 $class = "HTMLPurifier_DefinitionCache_Decorator_$decorator"; 02659 $decorator = new $class; 02660 } 02661 $this->decorators[$decorator->name] = $decorator; 02662 } 02663 02664 } 02665 02666 02667 02668 02669 02676 class HTMLPurifier_Doctype 02677 { 02681 public $name; 02682 02687 public $modules = array(); 02688 02692 public $tidyModules = array(); 02693 02697 public $xml = true; 02698 02702 public $aliases = array(); 02703 02707 public $dtdPublic; 02708 02712 public $dtdSystem; 02713 02714 public function __construct($name = null, $xml = true, $modules = array(), 02715 $tidyModules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null 02716 ) { 02717 $this->name = $name; 02718 $this->xml = $xml; 02719 $this->modules = $modules; 02720 $this->tidyModules = $tidyModules; 02721 $this->aliases = $aliases; 02722 $this->dtdPublic = $dtd_public; 02723 $this->dtdSystem = $dtd_system; 02724 } 02725 } 02726 02727 02728 02729 02730 02731 class HTMLPurifier_DoctypeRegistry 02732 { 02733 02737 protected $doctypes; 02738 02742 protected $aliases; 02743 02754 public function register($doctype, $xml = true, $modules = array(), 02755 $tidy_modules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null 02756 ) { 02757 if (!is_array($modules)) $modules = array($modules); 02758 if (!is_array($tidy_modules)) $tidy_modules = array($tidy_modules); 02759 if (!is_array($aliases)) $aliases = array($aliases); 02760 if (!is_object($doctype)) { 02761 $doctype = new HTMLPurifier_Doctype( 02762 $doctype, $xml, $modules, $tidy_modules, $aliases, $dtd_public, $dtd_system 02763 ); 02764 } 02765 $this->doctypes[$doctype->name] = $doctype; 02766 $name = $doctype->name; 02767 // hookup aliases 02768 foreach ($doctype->aliases as $alias) { 02769 if (isset($this->doctypes[$alias])) continue; 02770 $this->aliases[$alias] = $name; 02771 } 02772 // remove old aliases 02773 if (isset($this->aliases[$name])) unset($this->aliases[$name]); 02774 return $doctype; 02775 } 02776 02784 public function get($doctype) { 02785 if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype]; 02786 if (!isset($this->doctypes[$doctype])) { 02787 trigger_error('Doctype ' . htmlspecialchars($doctype) . ' does not exist', E_USER_ERROR); 02788 $anon = new HTMLPurifier_Doctype($doctype); 02789 return $anon; 02790 } 02791 return $this->doctypes[$doctype]; 02792 } 02793 02802 public function make($config) { 02803 return clone $this->get($this->getDoctypeFromConfig($config)); 02804 } 02805 02809 public function getDoctypeFromConfig($config) { 02810 // recommended test 02811 $doctype = $config->get('HTML.Doctype'); 02812 if (!empty($doctype)) return $doctype; 02813 $doctype = $config->get('HTML.CustomDoctype'); 02814 if (!empty($doctype)) return $doctype; 02815 // backwards-compatibility 02816 if ($config->get('HTML.XHTML')) { 02817 $doctype = 'XHTML 1.0'; 02818 } else { 02819 $doctype = 'HTML 4.01'; 02820 } 02821 if ($config->get('HTML.Strict')) { 02822 $doctype .= ' Strict'; 02823 } else { 02824 $doctype .= ' Transitional'; 02825 } 02826 return $doctype; 02827 } 02828 02829 } 02830 02831 02832 02833 02834 02843 class HTMLPurifier_ElementDef 02844 { 02845 02850 public $standalone = true; 02851 02863 public $attr = array(); 02864 02868 public $attr_transform_pre = array(); 02869 02873 public $attr_transform_post = array(); 02874 02878 public $child; 02879 02887 public $content_model; 02888 02896 public $content_model_type; 02897 02898 02899 02906 public $descendants_are_inline = false; 02907 02912 public $required_attr = array(); 02913 02925 public $excludes = array(); 02926 02930 public $autoclose = array(); 02931 02937 public $wrap; 02938 02943 public $formatting; 02944 02948 public static function create($content_model, $content_model_type, $attr) { 02949 $def = new HTMLPurifier_ElementDef(); 02950 $def->content_model = $content_model; 02951 $def->content_model_type = $content_model_type; 02952 $def->attr = $attr; 02953 return $def; 02954 } 02955 02961 public function mergeIn($def) { 02962 02963 // later keys takes precedence 02964 foreach($def->attr as $k => $v) { 02965 if ($k === 0) { 02966 // merge in the includes 02967 // sorry, no way to override an include 02968 foreach ($v as $v2) { 02969 $this->attr[0][] = $v2; 02970 } 02971 continue; 02972 } 02973 if ($v === false) { 02974 if (isset($this->attr[$k])) unset($this->attr[$k]); 02975 continue; 02976 } 02977 $this->attr[$k] = $v; 02978 } 02979 $this->_mergeAssocArray($this->attr_transform_pre, $def->attr_transform_pre); 02980 $this->_mergeAssocArray($this->attr_transform_post, $def->attr_transform_post); 02981 $this->_mergeAssocArray($this->excludes, $def->excludes); 02982 02983 if(!empty($def->content_model)) { 02984 $this->content_model = 02985 str_replace("#SUPER", $this->content_model, $def->content_model); 02986 $this->child = false; 02987 } 02988 if(!empty($def->content_model_type)) { 02989 $this->content_model_type = $def->content_model_type; 02990 $this->child = false; 02991 } 02992 if(!is_null($def->child)) $this->child = $def->child; 02993 if(!is_null($def->formatting)) $this->formatting = $def->formatting; 02994 if($def->descendants_are_inline) $this->descendants_are_inline = $def->descendants_are_inline; 02995 02996 } 02997 03003 private function _mergeAssocArray(&$a1, $a2) { 03004 foreach ($a2 as $k => $v) { 03005 if ($v === false) { 03006 if (isset($a1[$k])) unset($a1[$k]); 03007 continue; 03008 } 03009 $a1[$k] = $v; 03010 } 03011 } 03012 03013 } 03014 03015 03016 03017 03018 03023 class HTMLPurifier_Encoder 03024 { 03025 03029 private function __construct() { 03030 trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR); 03031 } 03032 03036 public static function muteErrorHandler() {} 03037 03041 public static function unsafeIconv($in, $out, $text) { 03042 set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler')); 03043 $r = iconv($in, $out, $text); 03044 restore_error_handler(); 03045 return $r; 03046 } 03047 03051 public static function iconv($in, $out, $text, $max_chunk_size = 8000) { 03052 $code = self::testIconvTruncateBug(); 03053 if ($code == self::ICONV_OK) { 03054 return self::unsafeIconv($in, $out, $text); 03055 } elseif ($code == self::ICONV_TRUNCATES) { 03056 // we can only work around this if the input character set 03057 // is utf-8 03058 if ($in == 'utf-8') { 03059 if ($max_chunk_size < 4) { 03060 trigger_error('max_chunk_size is too small', E_USER_WARNING); 03061 return false; 03062 } 03063 // split into 8000 byte chunks, but be careful to handle 03064 // multibyte boundaries properly 03065 if (($c = strlen($text)) <= $max_chunk_size) { 03066 return self::unsafeIconv($in, $out, $text); 03067 } 03068 $r = ''; 03069 $i = 0; 03070 while (true) { 03071 if ($i + $max_chunk_size >= $c) { 03072 $r .= self::unsafeIconv($in, $out, substr($text, $i)); 03073 break; 03074 } 03075 // wibble the boundary 03076 if (0x80 != (0xC0 & ord($text[$i + $max_chunk_size]))) { 03077 $chunk_size = $max_chunk_size; 03078 } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 1]))) { 03079 $chunk_size = $max_chunk_size - 1; 03080 } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 2]))) { 03081 $chunk_size = $max_chunk_size - 2; 03082 } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 3]))) { 03083 $chunk_size = $max_chunk_size - 3; 03084 } else { 03085 return false; // rather confusing UTF-8... 03086 } 03087 $chunk = substr($text, $i, $chunk_size); // substr doesn't mind overlong lengths 03088 $r .= self::unsafeIconv($in, $out, $chunk); 03089 $i += $chunk_size; 03090 } 03091 return $r; 03092 } else { 03093 return false; 03094 } 03095 } else { 03096 return false; 03097 } 03098 } 03099 03125 public static function cleanUTF8($str, $force_php = false) { 03126 03127 // UTF-8 validity is checked since PHP 4.3.5 03128 // This is an optimization: if the string is already valid UTF-8, no 03129 // need to do PHP stuff. 99% of the time, this will be the case. 03130 // The regexp matches the XML char production, as well as well as excluding 03131 // non-SGML codepoints U+007F to U+009F 03132 if (preg_match('/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du', $str)) { 03133 return $str; 03134 } 03135 03136 $mState = 0; // cached expected number of octets after the current octet 03137 // until the beginning of the next UTF8 character sequence 03138 $mUcs4 = 0; // cached Unicode character 03139 $mBytes = 1; // cached expected number of octets in the current sequence 03140 03141 // original code involved an $out that was an array of Unicode 03142 // codepoints. Instead of having to convert back into UTF-8, we've 03143 // decided to directly append valid UTF-8 characters onto a string 03144 // $out once they're done. $char accumulates raw bytes, while $mUcs4 03145 // turns into the Unicode code point, so there's some redundancy. 03146 03147 $out = ''; 03148 $char = ''; 03149 03150 $len = strlen($str); 03151 for($i = 0; $i < $len; $i++) { 03152 $in = ord($str{$i}); 03153 $char .= $str[$i]; // append byte to char 03154 if (0 == $mState) { 03155 // When mState is zero we expect either a US-ASCII character 03156 // or a multi-octet sequence. 03157 if (0 == (0x80 & ($in))) { 03158 // US-ASCII, pass straight through. 03159 if (($in <= 31 || $in == 127) && 03160 !($in == 9 || $in == 13 || $in == 10) // save \r\t\n 03161 ) { 03162 // control characters, remove 03163 } else { 03164 $out .= $char; 03165 } 03166 // reset 03167 $char = ''; 03168 $mBytes = 1; 03169 } elseif (0xC0 == (0xE0 & ($in))) { 03170 // First octet of 2 octet sequence 03171 $mUcs4 = ($in); 03172 $mUcs4 = ($mUcs4 & 0x1F) << 6; 03173 $mState = 1; 03174 $mBytes = 2; 03175 } elseif (0xE0 == (0xF0 & ($in))) { 03176 // First octet of 3 octet sequence 03177 $mUcs4 = ($in); 03178 $mUcs4 = ($mUcs4 & 0x0F) << 12; 03179 $mState = 2; 03180 $mBytes = 3; 03181 } elseif (0xF0 == (0xF8 & ($in))) { 03182 // First octet of 4 octet sequence 03183 $mUcs4 = ($in); 03184 $mUcs4 = ($mUcs4 & 0x07) << 18; 03185 $mState = 3; 03186 $mBytes = 4; 03187 } elseif (0xF8 == (0xFC & ($in))) { 03188 // First octet of 5 octet sequence. 03189 // 03190 // This is illegal because the encoded codepoint must be 03191 // either: 03192 // (a) not the shortest form or 03193 // (b) outside the Unicode range of 0-0x10FFFF. 03194 // Rather than trying to resynchronize, we will carry on 03195 // until the end of the sequence and let the later error 03196 // handling code catch it. 03197 $mUcs4 = ($in); 03198 $mUcs4 = ($mUcs4 & 0x03) << 24; 03199 $mState = 4; 03200 $mBytes = 5; 03201 } elseif (0xFC == (0xFE & ($in))) { 03202 // First octet of 6 octet sequence, see comments for 5 03203 // octet sequence. 03204 $mUcs4 = ($in); 03205 $mUcs4 = ($mUcs4 & 1) << 30; 03206 $mState = 5; 03207 $mBytes = 6; 03208 } else { 03209 // Current octet is neither in the US-ASCII range nor a 03210 // legal first octet of a multi-octet sequence. 03211 $mState = 0; 03212 $mUcs4 = 0; 03213 $mBytes = 1; 03214 $char = ''; 03215 } 03216 } else { 03217 // When mState is non-zero, we expect a continuation of the 03218 // multi-octet sequence 03219 if (0x80 == (0xC0 & ($in))) { 03220 // Legal continuation. 03221 $shift = ($mState - 1) * 6; 03222 $tmp = $in; 03223 $tmp = ($tmp & 0x0000003F) << $shift; 03224 $mUcs4 |= $tmp; 03225 03226 if (0 == --$mState) { 03227 // End of the multi-octet sequence. mUcs4 now contains 03228 // the final Unicode codepoint to be output 03229 03230 // Check for illegal sequences and codepoints. 03231 03232 // From Unicode 3.1, non-shortest form is illegal 03233 if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || 03234 ((3 == $mBytes) && ($mUcs4 < 0x0800)) || 03235 ((4 == $mBytes) && ($mUcs4 < 0x10000)) || 03236 (4 < $mBytes) || 03237 // From Unicode 3.2, surrogate characters = illegal 03238 (($mUcs4 & 0xFFFFF800) == 0xD800) || 03239 // Codepoints outside the Unicode range are illegal 03240 ($mUcs4 > 0x10FFFF) 03241 ) { 03242 03243 } elseif (0xFEFF != $mUcs4 && // omit BOM 03244 // check for valid Char unicode codepoints 03245 ( 03246 0x9 == $mUcs4 || 03247 0xA == $mUcs4 || 03248 0xD == $mUcs4 || 03249 (0x20 <= $mUcs4 && 0x7E >= $mUcs4) || 03250 // 7F-9F is not strictly prohibited by XML, 03251 // but it is non-SGML, and thus we don't allow it 03252 (0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) || 03253 (0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4) 03254 ) 03255 ) { 03256 $out .= $char; 03257 } 03258 // initialize UTF8 cache (reset) 03259 $mState = 0; 03260 $mUcs4 = 0; 03261 $mBytes = 1; 03262 $char = ''; 03263 } 03264 } else { 03265 // ((0xC0 & (*in) != 0x80) && (mState != 0)) 03266 // Incomplete multi-octet sequence. 03267 // used to result in complete fail, but we'll reset 03268 $mState = 0; 03269 $mUcs4 = 0; 03270 $mBytes = 1; 03271 $char =''; 03272 } 03273 } 03274 } 03275 return $out; 03276 } 03277 03291 // +----------+----------+----------+----------+ 03292 // | 33222222 | 22221111 | 111111 | | 03293 // | 10987654 | 32109876 | 54321098 | 76543210 | bit 03294 // +----------+----------+----------+----------+ 03295 // | | | | 0xxxxxxx | 1 byte 0x00000000..0x0000007F 03296 // | | | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF 03297 // | | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF 03298 // | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF 03299 // +----------+----------+----------+----------+ 03300 // | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF) 03301 // | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes 03302 // +----------+----------+----------+----------+ 03303 03304 public static function unichr($code) { 03305 if($code > 1114111 or $code < 0 or 03306 ($code >= 55296 and $code <= 57343) ) { 03307 // bits are set outside the "valid" range as defined 03308 // by UNICODE 4.1.0 03309 return ''; 03310 } 03311 03312 $x = $y = $z = $w = 0; 03313 if ($code < 128) { 03314 // regular ASCII character 03315 $x = $code; 03316 } else { 03317 // set up bits for UTF-8 03318 $x = ($code & 63) | 128; 03319 if ($code < 2048) { 03320 $y = (($code & 2047) >> 6) | 192; 03321 } else { 03322 $y = (($code & 4032) >> 6) | 128; 03323 if($code < 65536) { 03324 $z = (($code >> 12) & 15) | 224; 03325 } else { 03326 $z = (($code >> 12) & 63) | 128; 03327 $w = (($code >> 18) & 7) | 240; 03328 } 03329 } 03330 } 03331 // set up the actual character 03332 $ret = ''; 03333 if($w) $ret .= chr($w); 03334 if($z) $ret .= chr($z); 03335 if($y) $ret .= chr($y); 03336 $ret .= chr($x); 03337 03338 return $ret; 03339 } 03340 03341 public static function iconvAvailable() { 03342 static $iconv = null; 03343 if ($iconv === null) { 03344 $iconv = function_exists('iconv') && self::testIconvTruncateBug() != self::ICONV_UNUSABLE; 03345 } 03346 return $iconv; 03347 } 03348 03352 public static function convertToUTF8($str, $config, $context) { 03353 $encoding = $config->get('Core.Encoding'); 03354 if ($encoding === 'utf-8') return $str; 03355 static $iconv = null; 03356 if ($iconv === null) $iconv = self::iconvAvailable(); 03357 if ($iconv && !$config->get('Test.ForceNoIconv')) { 03358 // unaffected by bugs, since UTF-8 support all characters 03359 $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str); 03360 if ($str === false) { 03361 // $encoding is not a valid encoding 03362 trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR); 03363 return ''; 03364 } 03365 // If the string is bjorked by Shift_JIS or a similar encoding 03366 // that doesn't support all of ASCII, convert the naughty 03367 // characters to their true byte-wise ASCII/UTF-8 equivalents. 03368 $str = strtr($str, self::testEncodingSupportsASCII($encoding)); 03369 return $str; 03370 } elseif ($encoding === 'iso-8859-1') { 03371 $str = utf8_encode($str); 03372 return $str; 03373 } 03374 trigger_error('Encoding not supported, please install iconv', E_USER_ERROR); 03375 } 03376 03382 public static function convertFromUTF8($str, $config, $context) { 03383 $encoding = $config->get('Core.Encoding'); 03384 if ($escape = $config->get('Core.EscapeNonASCIICharacters')) { 03385 $str = self::convertToASCIIDumbLossless($str); 03386 } 03387 if ($encoding === 'utf-8') return $str; 03388 static $iconv = null; 03389 if ($iconv === null) $iconv = self::iconvAvailable(); 03390 if ($iconv && !$config->get('Test.ForceNoIconv')) { 03391 // Undo our previous fix in convertToUTF8, otherwise iconv will barf 03392 $ascii_fix = self::testEncodingSupportsASCII($encoding); 03393 if (!$escape && !empty($ascii_fix)) { 03394 $clear_fix = array(); 03395 foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = ''; 03396 $str = strtr($str, $clear_fix); 03397 } 03398 $str = strtr($str, array_flip($ascii_fix)); 03399 // Normal stuff 03400 $str = self::iconv('utf-8', $encoding . '//IGNORE', $str); 03401 return $str; 03402 } elseif ($encoding === 'iso-8859-1') { 03403 $str = utf8_decode($str); 03404 return $str; 03405 } 03406 trigger_error('Encoding not supported', E_USER_ERROR); 03407 // You might be tempted to assume that the ASCII representation 03408 // might be OK, however, this is *not* universally true over all 03409 // encodings. So we take the conservative route here, rather 03410 // than forcibly turn on %Core.EscapeNonASCIICharacters 03411 } 03412 03429 public static function convertToASCIIDumbLossless($str) { 03430 $bytesleft = 0; 03431 $result = ''; 03432 $working = 0; 03433 $len = strlen($str); 03434 for( $i = 0; $i < $len; $i++ ) { 03435 $bytevalue = ord( $str[$i] ); 03436 if( $bytevalue <= 0x7F ) { //0xxx xxxx 03437 $result .= chr( $bytevalue ); 03438 $bytesleft = 0; 03439 } elseif( $bytevalue <= 0xBF ) { //10xx xxxx 03440 $working = $working << 6; 03441 $working += ($bytevalue & 0x3F); 03442 $bytesleft--; 03443 if( $bytesleft <= 0 ) { 03444 $result .= "&#" . $working . ";"; 03445 } 03446 } elseif( $bytevalue <= 0xDF ) { //110x xxxx 03447 $working = $bytevalue & 0x1F; 03448 $bytesleft = 1; 03449 } elseif( $bytevalue <= 0xEF ) { //1110 xxxx 03450 $working = $bytevalue & 0x0F; 03451 $bytesleft = 2; 03452 } else { //1111 0xxx 03453 $working = $bytevalue & 0x07; 03454 $bytesleft = 3; 03455 } 03456 } 03457 return $result; 03458 } 03459 03461 const ICONV_OK = 0; 03462 03465 const ICONV_TRUNCATES = 1; 03466 03469 const ICONV_UNUSABLE = 2; 03470 03485 public static function testIconvTruncateBug() { 03486 static $code = null; 03487 if ($code === null) { 03488 // better not use iconv, otherwise infinite loop! 03489 $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000)); 03490 if ($r === false) { 03491 $code = self::ICONV_UNUSABLE; 03492 } elseif (($c = strlen($r)) < 9000) { 03493 $code = self::ICONV_TRUNCATES; 03494 } elseif ($c > 9000) { 03495 trigger_error('Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: include your iconv version as per phpversion()', E_USER_ERROR); 03496 } else { 03497 $code = self::ICONV_OK; 03498 } 03499 } 03500 return $code; 03501 } 03502 03514 public static function testEncodingSupportsASCII($encoding, $bypass = false) { 03515 // All calls to iconv here are unsafe, proof by case analysis: 03516 // If ICONV_OK, no difference. 03517 // If ICONV_TRUNCATE, all calls involve one character inputs, 03518 // so bug is not triggered. 03519 // If ICONV_UNUSABLE, this call is irrelevant 03520 static $encodings = array(); 03521 if (!$bypass) { 03522 if (isset($encodings[$encoding])) return $encodings[$encoding]; 03523 $lenc = strtolower($encoding); 03524 switch ($lenc) { 03525 case 'shift_jis': 03526 return array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~'); 03527 case 'johab': 03528 return array("\xE2\x82\xA9" => '\\'); 03529 } 03530 if (strpos($lenc, 'iso-8859-') === 0) return array(); 03531 } 03532 $ret = array(); 03533 if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) return false; 03534 for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars 03535 $c = chr($i); // UTF-8 char 03536 $r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion 03537 if ( 03538 $r === '' || 03539 // This line is needed for iconv implementations that do not 03540 // omit characters that do not exist in the target character set 03541 ($r === $c && self::unsafeIconv($encoding, 'UTF-8//IGNORE', $r) !== $c) 03542 ) { 03543 // Reverse engineer: what's the UTF-8 equiv of this byte 03544 // sequence? This assumes that there's no variable width 03545 // encoding that doesn't support ASCII. 03546 $ret[self::unsafeIconv($encoding, 'UTF-8//IGNORE', $c)] = $c; 03547 } 03548 } 03549 $encodings[$encoding] = $ret; 03550 return $ret; 03551 } 03552 03553 03554 } 03555 03556 03557 03558 03559 03563 class HTMLPurifier_EntityLookup { 03564 03568 public $table; 03569 03576 public function setup($file = false) { 03577 if (!$file) { 03578 $file = HTMLPURIFIER_PREFIX . '/HTMLPurifier/EntityLookup/entities.ser'; 03579 } 03580 $this->table = unserialize(file_get_contents($file)); 03581 } 03582 03587 public static function instance($prototype = false) { 03588 // no references, since PHP doesn't copy unless modified 03589 static $instance = null; 03590 if ($prototype) { 03591 $instance = $prototype; 03592 } elseif (!$instance) { 03593 $instance = new HTMLPurifier_EntityLookup(); 03594 $instance->setup(); 03595 } 03596 return $instance; 03597 } 03598 03599 } 03600 03601 03602 03603 03604 03605 // if want to implement error collecting here, we'll need to use some sort 03606 // of global data (probably trigger_error) because it's impossible to pass 03607 // $config or $context to the callback functions. 03608 03612 class HTMLPurifier_EntityParser 03613 { 03614 03618 protected $_entity_lookup; 03619 03623 protected $_substituteEntitiesRegex = 03624 '/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/'; 03625 // 1. hex 2. dec 3. string (XML style) 03626 03627 03631 protected $_special_dec2str = 03632 array( 03633 34 => '"', 03634 38 => '&', 03635 39 => "'", 03636 60 => '<', 03637 62 => '>' 03638 ); 03639 03643 protected $_special_ent2dec = 03644 array( 03645 'quot' => 34, 03646 'amp' => 38, 03647 'lt' => 60, 03648 'gt' => 62 03649 ); 03650 03659 public function substituteNonSpecialEntities($string) { 03660 // it will try to detect missing semicolons, but don't rely on it 03661 return preg_replace_callback( 03662 $this->_substituteEntitiesRegex, 03663 array($this, 'nonSpecialEntityCallback'), 03664 $string 03665 ); 03666 } 03667 03677 protected function nonSpecialEntityCallback($matches) { 03678 // replaces all but big five 03679 $entity = $matches[0]; 03680 $is_num = (@$matches[0][1] === '#'); 03681 if ($is_num) { 03682 $is_hex = (@$entity[2] === 'x'); 03683 $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2]; 03684 03685 // abort for special characters 03686 if (isset($this->_special_dec2str[$code])) return $entity; 03687 03688 return HTMLPurifier_Encoder::unichr($code); 03689 } else { 03690 if (isset($this->_special_ent2dec[$matches[3]])) return $entity; 03691 if (!$this->_entity_lookup) { 03692 $this->_entity_lookup = HTMLPurifier_EntityLookup::instance(); 03693 } 03694 if (isset($this->_entity_lookup->table[$matches[3]])) { 03695 return $this->_entity_lookup->table[$matches[3]]; 03696 } else { 03697 return $entity; 03698 } 03699 } 03700 } 03701 03711 public function substituteSpecialEntities($string) { 03712 return preg_replace_callback( 03713 $this->_substituteEntitiesRegex, 03714 array($this, 'specialEntityCallback'), 03715 $string); 03716 } 03717 03728 protected function specialEntityCallback($matches) { 03729 $entity = $matches[0]; 03730 $is_num = (@$matches[0][1] === '#'); 03731 if ($is_num) { 03732 $is_hex = (@$entity[2] === 'x'); 03733 $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2]; 03734 return isset($this->_special_dec2str[$int]) ? 03735 $this->_special_dec2str[$int] : 03736 $entity; 03737 } else { 03738 return isset($this->_special_ent2dec[$matches[3]]) ? 03739 $this->_special_ent2dec[$matches[3]] : 03740 $entity; 03741 } 03742 } 03743 03744 } 03745 03746 03747 03748 03749 03754 class HTMLPurifier_ErrorCollector 03755 { 03756 03761 const LINENO = 0; 03762 const SEVERITY = 1; 03763 const MESSAGE = 2; 03764 const CHILDREN = 3; 03765 03766 protected $errors; 03767 protected $_current; 03768 protected $_stacks = array(array()); 03769 protected $locale; 03770 protected $generator; 03771 protected $context; 03772 03773 protected $lines = array(); 03774 03775 public function __construct($context) { 03776 $this->locale =& $context->get('Locale'); 03777 $this->context = $context; 03778 $this->_current =& $this->_stacks[0]; 03779 $this->errors =& $this->_stacks[0]; 03780 } 03781 03789 public function send($severity, $msg) { 03790 03791 $args = array(); 03792 if (func_num_args() > 2) { 03793 $args = func_get_args(); 03794 array_shift($args); 03795 unset($args[0]); 03796 } 03797 03798 $token = $this->context->get('CurrentToken', true); 03799 $line = $token ? $token->line : $this->context->get('CurrentLine', true); 03800 $col = $token ? $token->col : $this->context->get('CurrentCol', true); 03801 $attr = $this->context->get('CurrentAttr', true); 03802 03803 // perform special substitutions, also add custom parameters 03804 $subst = array(); 03805 if (!is_null($token)) { 03806 $args['CurrentToken'] = $token; 03807 } 03808 if (!is_null($attr)) { 03809 $subst['$CurrentAttr.Name'] = $attr; 03810 if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr]; 03811 } 03812 03813 if (empty($args)) { 03814 $msg = $this->locale->getMessage($msg); 03815 } else { 03816 $msg = $this->locale->formatMessage($msg, $args); 03817 } 03818 03819 if (!empty($subst)) $msg = strtr($msg, $subst); 03820 03821 // (numerically indexed) 03822 $error = array( 03823 self::LINENO => $line, 03824 self::SEVERITY => $severity, 03825 self::MESSAGE => $msg, 03826 self::CHILDREN => array() 03827 ); 03828 $this->_current[] = $error; 03829 03830 03831 // NEW CODE BELOW ... 03832 03833 $struct = null; 03834 // Top-level errors are either: 03835 // TOKEN type, if $value is set appropriately, or 03836 // "syntax" type, if $value is null 03837 $new_struct = new HTMLPurifier_ErrorStruct(); 03838 $new_struct->type = HTMLPurifier_ErrorStruct::TOKEN; 03839 if ($token) $new_struct->value = clone $token; 03840 if (is_int($line) && is_int($col)) { 03841 if (isset($this->lines[$line][$col])) { 03842 $struct = $this->lines[$line][$col]; 03843 } else { 03844 $struct = $this->lines[$line][$col] = $new_struct; 03845 } 03846 // These ksorts may present a performance problem 03847 ksort($this->lines[$line], SORT_NUMERIC); 03848 } else { 03849 if (isset($this->lines[-1])) { 03850 $struct = $this->lines[-1]; 03851 } else { 03852 $struct = $this->lines[-1] = $new_struct; 03853 } 03854 } 03855 ksort($this->lines, SORT_NUMERIC); 03856 03857 // Now, check if we need to operate on a lower structure 03858 if (!empty($attr)) { 03859 $struct = $struct->getChild(HTMLPurifier_ErrorStruct::ATTR, $attr); 03860 if (!$struct->value) { 03861 $struct->value = array($attr, 'PUT VALUE HERE'); 03862 } 03863 } 03864 if (!empty($cssprop)) { 03865 $struct = $struct->getChild(HTMLPurifier_ErrorStruct::CSSPROP, $cssprop); 03866 if (!$struct->value) { 03867 // if we tokenize CSS this might be a little more difficult to do 03868 $struct->value = array($cssprop, 'PUT VALUE HERE'); 03869 } 03870 } 03871 03872 // Ok, structs are all setup, now time to register the error 03873 $struct->addError($severity, $msg); 03874 } 03875 03882 public function getRaw() { 03883 return $this->errors; 03884 } 03885 03891 public function getHTMLFormatted($config, $errors = null) { 03892 $ret = array(); 03893 03894 $this->generator = new HTMLPurifier_Generator($config, $this->context); 03895 if ($errors === null) $errors = $this->errors; 03896 03897 // 'At line' message needs to be removed 03898 03899 // generation code for new structure goes here. It needs to be recursive. 03900 foreach ($this->lines as $line => $col_array) { 03901 if ($line == -1) continue; 03902 foreach ($col_array as $col => $struct) { 03903 $this->_renderStruct($ret, $struct, $line, $col); 03904 } 03905 } 03906 if (isset($this->lines[-1])) { 03907 $this->_renderStruct($ret, $this->lines[-1]); 03908 } 03909 03910 if (empty($errors)) { 03911 return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>'; 03912 } else { 03913 return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>'; 03914 } 03915 03916 } 03917 03918 private function _renderStruct(&$ret, $struct, $line = null, $col = null) { 03919 $stack = array($struct); 03920 $context_stack = array(array()); 03921 while ($current = array_pop($stack)) { 03922 $context = array_pop($context_stack); 03923 foreach ($current->errors as $error) { 03924 list($severity, $msg) = $error; 03925 $string = ''; 03926 $string .= '<div>'; 03927 // W3C uses an icon to indicate the severity of the error. 03928 $error = $this->locale->getErrorName($severity); 03929 $string .= "<span class=\"error e$severity\"><strong>$error</strong></span> "; 03930 if (!is_null($line) && !is_null($col)) { 03931 $string .= "<em class=\"location\">Line $line, Column $col: </em> "; 03932 } else { 03933 $string .= '<em class="location">End of Document: </em> '; 03934 } 03935 $string .= '<strong class="description">' . $this->generator->escape($msg) . '</strong> '; 03936 $string .= '</div>'; 03937 // Here, have a marker for the character on the column appropriate. 03938 // Be sure to clip extremely long lines. 03939 //$string .= '<pre>'; 03940 //$string .= ''; 03941 //$string .= '</pre>'; 03942 $ret[] = $string; 03943 } 03944 foreach ($current->children as $type => $array) { 03945 $context[] = $current; 03946 $stack = array_merge($stack, array_reverse($array, true)); 03947 for ($i = count($array); $i > 0; $i--) { 03948 $context_stack[] = $context; 03949 } 03950 } 03951 } 03952 } 03953 03954 } 03955 03956 03957 03958 03959 03966 class HTMLPurifier_ErrorStruct 03967 { 03968 03973 const TOKEN = 0; 03974 const ATTR = 1; 03975 const CSSPROP = 2; 03976 03980 public $type; 03981 03989 public $value; 03990 03994 public $errors = array(); 03995 04001 public $children = array(); 04002 04003 public function getChild($type, $id) { 04004 if (!isset($this->children[$type][$id])) { 04005 $this->children[$type][$id] = new HTMLPurifier_ErrorStruct(); 04006 $this->children[$type][$id]->type = $type; 04007 } 04008 return $this->children[$type][$id]; 04009 } 04010 04011 public function addError($severity, $message) { 04012 $this->errors[] = array($severity, $message); 04013 } 04014 04015 } 04016 04017 04018 04019 04020 04025 class HTMLPurifier_Exception extends Exception 04026 { 04027 04028 } 04029 04030 04031 04032 04033 04053 class HTMLPurifier_Filter 04054 { 04055 04059 public $name; 04060 04064 public function preFilter($html, $config, $context) { 04065 return $html; 04066 } 04067 04071 public function postFilter($html, $config, $context) { 04072 return $html; 04073 } 04074 04075 } 04076 04077 04078 04079 04080 04088 class HTMLPurifier_Generator 04089 { 04090 04094 private $_xhtml = true; 04095 04099 private $_scriptFix = false; 04100 04105 private $_def; 04106 04110 private $_sortAttr; 04111 04115 private $_flashCompat; 04116 04120 private $_innerHTMLFix; 04121 04126 private $_flashStack = array(); 04127 04131 protected $config; 04132 04137 public function __construct($config, $context) { 04138 $this->config = $config; 04139 $this->_scriptFix = $config->get('Output.CommentScriptContents'); 04140 $this->_innerHTMLFix = $config->get('Output.FixInnerHTML'); 04141 $this->_sortAttr = $config->get('Output.SortAttr'); 04142 $this->_flashCompat = $config->get('Output.FlashCompat'); 04143 $this->_def = $config->getHTMLDefinition(); 04144 $this->_xhtml = $this->_def->doctype->xml; 04145 } 04146 04153 public function generateFromTokens($tokens) { 04154 if (!$tokens) return ''; 04155 04156 // Basic algorithm 04157 $html = ''; 04158 for ($i = 0, $size = count($tokens); $i < $size; $i++) { 04159 if ($this->_scriptFix && $tokens[$i]->name === 'script' 04160 && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) { 04161 // script special case 04162 // the contents of the script block must be ONE token 04163 // for this to work. 04164 $html .= $this->generateFromToken($tokens[$i++]); 04165 $html .= $this->generateScriptFromToken($tokens[$i++]); 04166 } 04167 $html .= $this->generateFromToken($tokens[$i]); 04168 } 04169 04170 // Tidy cleanup 04171 if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) { 04172 $tidy = new Tidy; 04173 $tidy->parseString($html, array( 04174 'indent'=> true, 04175 'output-xhtml' => $this->_xhtml, 04176 'show-body-only' => true, 04177 'indent-spaces' => 2, 04178 'wrap' => 68, 04179 ), 'utf8'); 04180 $tidy->cleanRepair(); 04181 $html = (string) $tidy; // explicit cast necessary 04182 } 04183 04184 // Normalize newlines to system defined value 04185 if ($this->config->get('Core.NormalizeNewlines')) { 04186 $nl = $this->config->get('Output.Newline'); 04187 if ($nl === null) $nl = PHP_EOL; 04188 if ($nl !== "\n") $html = str_replace("\n", $nl, $html); 04189 } 04190 return $html; 04191 } 04192 04198 public function generateFromToken($token) { 04199 if (!$token instanceof HTMLPurifier_Token) { 04200 trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING); 04201 return ''; 04202 04203 } elseif ($token instanceof HTMLPurifier_Token_Start) { 04204 $attr = $this->generateAttributes($token->attr, $token->name); 04205 if ($this->_flashCompat) { 04206 if ($token->name == "object") { 04207 $flash = new stdclass(); 04208 $flash->attr = $token->attr; 04209 $flash->param = array(); 04210 $this->_flashStack[] = $flash; 04211 } 04212 } 04213 return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>'; 04214 04215 } elseif ($token instanceof HTMLPurifier_Token_End) { 04216 $_extra = ''; 04217 if ($this->_flashCompat) { 04218 if ($token->name == "object" && !empty($this->_flashStack)) { 04219 // doesn't do anything for now 04220 } 04221 } 04222 return $_extra . '</' . $token->name . '>'; 04223 04224 } elseif ($token instanceof HTMLPurifier_Token_Empty) { 04225 if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) { 04226 $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value']; 04227 } 04228 $attr = $this->generateAttributes($token->attr, $token->name); 04229 return '<' . $token->name . ($attr ? ' ' : '') . $attr . 04230 ( $this->_xhtml ? ' /': '' ) // <br /> v. <br> 04231 . '>'; 04232 04233 } elseif ($token instanceof HTMLPurifier_Token_Text) { 04234 return $this->escape($token->data, ENT_NOQUOTES); 04235 04236 } elseif ($token instanceof HTMLPurifier_Token_Comment) { 04237 return '<!--' . $token->data . '-->'; 04238 } else { 04239 return ''; 04240 04241 } 04242 } 04243 04249 public function generateScriptFromToken($token) { 04250 if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token); 04251 // Thanks <http://lachy.id.au/log/2005/05/script-comments> 04252 $data = preg_replace('#//\s*$#', '', $token->data); 04253 return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>'; 04254 } 04255 04264 public function generateAttributes($assoc_array_of_attributes, $element = false) { 04265 $html = ''; 04266 if ($this->_sortAttr) ksort($assoc_array_of_attributes); 04267 foreach ($assoc_array_of_attributes as $key => $value) { 04268 if (!$this->_xhtml) { 04269 // Remove namespaced attributes 04270 if (strpos($key, ':') !== false) continue; 04271 // Check if we should minimize the attribute: val="val" -> val 04272 if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) { 04273 $html .= $key . ' '; 04274 continue; 04275 } 04276 } 04277 // Workaround for Internet Explorer innerHTML bug. 04278 // Essentially, Internet Explorer, when calculating 04279 // innerHTML, omits quotes if there are no instances of 04280 // angled brackets, quotes or spaces. However, when parsing 04281 // HTML (for example, when you assign to innerHTML), it 04282 // treats backticks as quotes. Thus, 04283 // <img alt="``" /> 04284 // becomes 04285 // <img alt=`` /> 04286 // becomes 04287 // <img alt='' /> 04288 // Fortunately, all we need to do is trigger an appropriate 04289 // quoting style, which we do by adding an extra space. 04290 // This also is consistent with the W3C spec, which states 04291 // that user agents may ignore leading or trailing 04292 // whitespace (in fact, most don't, at least for attributes 04293 // like alt, but an extra space at the end is barely 04294 // noticeable). Still, we have a configuration knob for 04295 // this, since this transformation is not necesary if you 04296 // don't process user input with innerHTML or you don't plan 04297 // on supporting Internet Explorer. 04298 if ($this->_innerHTMLFix) { 04299 if (strpos($value, '`') !== false) { 04300 // check if correct quoting style would not already be 04301 // triggered 04302 if (strcspn($value, '"\' <>') === strlen($value)) { 04303 // protect! 04304 $value .= ' '; 04305 } 04306 } 04307 } 04308 $html .= $key.'="'.$this->escape($value).'" '; 04309 } 04310 return rtrim($html); 04311 } 04312 04323 public function escape($string, $quote = null) { 04324 // Workaround for APC bug on Mac Leopard reported by sidepodcast 04325 // http://htmlpurifier.org/phorum/read.php?3,4823,4846 04326 if ($quote === null) $quote = ENT_COMPAT; 04327 return htmlspecialchars($string, $quote, 'UTF-8'); 04328 } 04329 04330 } 04331 04332 04333 04334 04335 04359 class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition 04360 { 04361 04362 // FULLY-PUBLIC VARIABLES --------------------------------------------- 04363 04367 public $info = array(); 04368 04372 public $info_global_attr = array(); 04373 04377 public $info_parent = 'div'; 04378 04383 public $info_parent_def; 04384 04389 public $info_block_wrapper = 'p'; 04390 04394 public $info_tag_transform = array(); 04395 04399 public $info_attr_transform_pre = array(); 04400 04404 public $info_attr_transform_post = array(); 04405 04410 public $info_content_sets = array(); 04411 04415 public $info_injector = array(); 04416 04420 public $doctype; 04421 04422 04423 04424 // RAW CUSTOMIZATION STUFF -------------------------------------------- 04425 04435 public function addAttribute($element_name, $attr_name, $def) { 04436 $module = $this->getAnonymousModule(); 04437 if (!isset($module->info[$element_name])) { 04438 $element = $module->addBlankElement($element_name); 04439 } else { 04440 $element = $module->info[$element_name]; 04441 } 04442 $element->attr[$attr_name] = $def; 04443 } 04444 04450 public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array()) { 04451 $module = $this->getAnonymousModule(); 04452 // assume that if the user is calling this, the element 04453 // is safe. This may not be a good idea 04454 $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes); 04455 return $element; 04456 } 04457 04464 public function addBlankElement($element_name) { 04465 $module = $this->getAnonymousModule(); 04466 $element = $module->addBlankElement($element_name); 04467 return $element; 04468 } 04469 04475 public function getAnonymousModule() { 04476 if (!$this->_anonModule) { 04477 $this->_anonModule = new HTMLPurifier_HTMLModule(); 04478 $this->_anonModule->name = 'Anonymous'; 04479 } 04480 return $this->_anonModule; 04481 } 04482 04483 private $_anonModule = null; 04484 04485 04486 // PUBLIC BUT INTERNAL VARIABLES -------------------------------------- 04487 04488 public $type = 'HTML'; 04489 public $manager; 04494 public function __construct() { 04495 $this->manager = new HTMLPurifier_HTMLModuleManager(); 04496 } 04497 04498 protected function doSetup($config) { 04499 $this->processModules($config); 04500 $this->setupConfigStuff($config); 04501 unset($this->manager); 04502 04503 // cleanup some of the element definitions 04504 foreach ($this->info as $k => $v) { 04505 unset($this->info[$k]->content_model); 04506 unset($this->info[$k]->content_model_type); 04507 } 04508 } 04509 04513 protected function processModules($config) { 04514 04515 if ($this->_anonModule) { 04516 // for user specific changes 04517 // this is late-loaded so we don't have to deal with PHP4 04518 // reference wonky-ness 04519 $this->manager->addModule($this->_anonModule); 04520 unset($this->_anonModule); 04521 } 04522 04523 $this->manager->setup($config); 04524 $this->doctype = $this->manager->doctype; 04525 04526 foreach ($this->manager->modules as $module) { 04527 foreach($module->info_tag_transform as $k => $v) { 04528 if ($v === false) unset($this->info_tag_transform[$k]); 04529 else $this->info_tag_transform[$k] = $v; 04530 } 04531 foreach($module->info_attr_transform_pre as $k => $v) { 04532 if ($v === false) unset($this->info_attr_transform_pre[$k]); 04533 else $this->info_attr_transform_pre[$k] = $v; 04534 } 04535 foreach($module->info_attr_transform_post as $k => $v) { 04536 if ($v === false) unset($this->info_attr_transform_post[$k]); 04537 else $this->info_attr_transform_post[$k] = $v; 04538 } 04539 foreach ($module->info_injector as $k => $v) { 04540 if ($v === false) unset($this->info_injector[$k]); 04541 else $this->info_injector[$k] = $v; 04542 } 04543 } 04544 04545 $this->info = $this->manager->getElements(); 04546 $this->info_content_sets = $this->manager->contentSets->lookup; 04547 04548 } 04549 04553 protected function setupConfigStuff($config) { 04554 04555 $block_wrapper = $config->get('HTML.BlockWrapper'); 04556 if (isset($this->info_content_sets['Block'][$block_wrapper])) { 04557 $this->info_block_wrapper = $block_wrapper; 04558 } else { 04559 trigger_error('Cannot use non-block element as block wrapper', 04560 E_USER_ERROR); 04561 } 04562 04563 $parent = $config->get('HTML.Parent'); 04564 $def = $this->manager->getElement($parent, true); 04565 if ($def) { 04566 $this->info_parent = $parent; 04567 $this->info_parent_def = $def; 04568 } else { 04569 trigger_error('Cannot use unrecognized element as parent', 04570 E_USER_ERROR); 04571 $this->info_parent_def = $this->manager->getElement($this->info_parent, true); 04572 } 04573 04574 // support template text 04575 $support = "(for information on implementing this, see the ". 04576 "support forums) "; 04577 04578 // setup allowed elements ----------------------------------------- 04579 04580 $allowed_elements = $config->get('HTML.AllowedElements'); 04581 $allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early 04582 04583 if (!is_array($allowed_elements) && !is_array($allowed_attributes)) { 04584 $allowed = $config->get('HTML.Allowed'); 04585 if (is_string($allowed)) { 04586 list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed); 04587 } 04588 } 04589 04590 if (is_array($allowed_elements)) { 04591 foreach ($this->info as $name => $d) { 04592 if(!isset($allowed_elements[$name])) unset($this->info[$name]); 04593 unset($allowed_elements[$name]); 04594 } 04595 // emit errors 04596 foreach ($allowed_elements as $element => $d) { 04597 $element = htmlspecialchars($element); // PHP doesn't escape errors, be careful! 04598 trigger_error("Element '$element' is not supported $support", E_USER_WARNING); 04599 } 04600 } 04601 04602 // setup allowed attributes --------------------------------------- 04603 04604 $allowed_attributes_mutable = $allowed_attributes; // by copy! 04605 if (is_array($allowed_attributes)) { 04606 04607 // This actually doesn't do anything, since we went away from 04608 // global attributes. It's possible that userland code uses 04609 // it, but HTMLModuleManager doesn't! 04610 foreach ($this->info_global_attr as $attr => $x) { 04611 $keys = array($attr, "*@$attr", "*.$attr"); 04612 $delete = true; 04613 foreach ($keys as $key) { 04614 if ($delete && isset($allowed_attributes[$key])) { 04615 $delete = false; 04616 } 04617 if (isset($allowed_attributes_mutable[$key])) { 04618 unset($allowed_attributes_mutable[$key]); 04619 } 04620 } 04621 if ($delete) unset($this->info_global_attr[$attr]); 04622 } 04623 04624 foreach ($this->info as $tag => $info) { 04625 foreach ($info->attr as $attr => $x) { 04626 $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr"); 04627 $delete = true; 04628 foreach ($keys as $key) { 04629 if ($delete && isset($allowed_attributes[$key])) { 04630 $delete = false; 04631 } 04632 if (isset($allowed_attributes_mutable[$key])) { 04633 unset($allowed_attributes_mutable[$key]); 04634 } 04635 } 04636 if ($delete) { 04637 if ($this->info[$tag]->attr[$attr]->required) { 04638 trigger_error("Required attribute '$attr' in element '$tag' was not allowed, which means '$tag' will not be allowed either", E_USER_WARNING); 04639 } 04640 unset($this->info[$tag]->attr[$attr]); 04641 } 04642 } 04643 } 04644 // emit errors 04645 foreach ($allowed_attributes_mutable as $elattr => $d) { 04646 $bits = preg_split('/[.@]/', $elattr, 2); 04647 $c = count($bits); 04648 switch ($c) { 04649 case 2: 04650 if ($bits[0] !== '*') { 04651 $element = htmlspecialchars($bits[0]); 04652 $attribute = htmlspecialchars($bits[1]); 04653 if (!isset($this->info[$element])) { 04654 trigger_error("Cannot allow attribute '$attribute' if element '$element' is not allowed/supported $support"); 04655 } else { 04656 trigger_error("Attribute '$attribute' in element '$element' not supported $support", 04657 E_USER_WARNING); 04658 } 04659 break; 04660 } 04661 // otherwise fall through 04662 case 1: 04663 $attribute = htmlspecialchars($bits[0]); 04664 trigger_error("Global attribute '$attribute' is not ". 04665 "supported in any elements $support", 04666 E_USER_WARNING); 04667 break; 04668 } 04669 } 04670 04671 } 04672 04673 // setup forbidden elements --------------------------------------- 04674 04675 $forbidden_elements = $config->get('HTML.ForbiddenElements'); 04676 $forbidden_attributes = $config->get('HTML.ForbiddenAttributes'); 04677 04678 foreach ($this->info as $tag => $info) { 04679 if (isset($forbidden_elements[$tag])) { 04680 unset($this->info[$tag]); 04681 continue; 04682 } 04683 foreach ($info->attr as $attr => $x) { 04684 if ( 04685 isset($forbidden_attributes["$tag@$attr"]) || 04686 isset($forbidden_attributes["*@$attr"]) || 04687 isset($forbidden_attributes[$attr]) 04688 ) { 04689 unset($this->info[$tag]->attr[$attr]); 04690 continue; 04691 } // this segment might get removed eventually 04692 elseif (isset($forbidden_attributes["$tag.$attr"])) { 04693 // $tag.$attr are not user supplied, so no worries! 04694 trigger_error("Error with $tag.$attr: tag.attr syntax not supported for HTML.ForbiddenAttributes; use tag@attr instead", E_USER_WARNING); 04695 } 04696 } 04697 } 04698 foreach ($forbidden_attributes as $key => $v) { 04699 if (strlen($key) < 2) continue; 04700 if ($key[0] != '*') continue; 04701 if ($key[1] == '.') { 04702 trigger_error("Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", E_USER_WARNING); 04703 } 04704 } 04705 04706 // setup injectors ----------------------------------------------------- 04707 foreach ($this->info_injector as $i => $injector) { 04708 if ($injector->checkNeeded($config) !== false) { 04709 // remove injector that does not have it's required 04710 // elements/attributes present, and is thus not needed. 04711 unset($this->info_injector[$i]); 04712 } 04713 } 04714 } 04715 04725 public function parseTinyMCEAllowedList($list) { 04726 04727 $list = str_replace(array(' ', "\t"), '', $list); 04728 04729 $elements = array(); 04730 $attributes = array(); 04731 04732 $chunks = preg_split('/(,|[\n\r]+)/', $list); 04733 foreach ($chunks as $chunk) { 04734 if (empty($chunk)) continue; 04735 // remove TinyMCE element control characters 04736 if (!strpos($chunk, '[')) { 04737 $element = $chunk; 04738 $attr = false; 04739 } else { 04740 list($element, $attr) = explode('[', $chunk); 04741 } 04742 if ($element !== '*') $elements[$element] = true; 04743 if (!$attr) continue; 04744 $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ] 04745 $attr = explode('|', $attr); 04746 foreach ($attr as $key) { 04747 $attributes["$element.$key"] = true; 04748 } 04749 } 04750 04751 return array($elements, $attributes); 04752 04753 } 04754 04755 04756 } 04757 04758 04759 04760 04761 04777 class HTMLPurifier_HTMLModule 04778 { 04779 04780 // -- Overloadable ---------------------------------------------------- 04781 04785 public $name; 04786 04791 public $elements = array(); 04792 04798 public $info = array(); 04799 04806 public $content_sets = array(); 04807 04816 public $attr_collections = array(); 04817 04821 public $info_tag_transform = array(); 04822 04826 public $info_attr_transform_pre = array(); 04827 04831 public $info_attr_transform_post = array(); 04832 04839 public $info_injector = array(); 04840 04847 public $defines_child_def = false; 04848 04861 public $safe = true; 04862 04871 public function getChildDef($def) {return false;} 04872 04873 // -- Convenience ----------------------------------------------------- 04874 04889 public function addElement($element, $type, $contents, $attr_includes = array(), $attr = array()) { 04890 $this->elements[] = $element; 04891 // parse content_model 04892 list($content_model_type, $content_model) = $this->parseContents($contents); 04893 // merge in attribute inclusions 04894 $this->mergeInAttrIncludes($attr, $attr_includes); 04895 // add element to content sets 04896 if ($type) $this->addElementToContentSet($element, $type); 04897 // create element 04898 $this->info[$element] = HTMLPurifier_ElementDef::create( 04899 $content_model, $content_model_type, $attr 04900 ); 04901 // literal object $contents means direct child manipulation 04902 if (!is_string($contents)) $this->info[$element]->child = $contents; 04903 return $this->info[$element]; 04904 } 04905 04912 public function addBlankElement($element) { 04913 if (!isset($this->info[$element])) { 04914 $this->elements[] = $element; 04915 $this->info[$element] = new HTMLPurifier_ElementDef(); 04916 $this->info[$element]->standalone = false; 04917 } else { 04918 trigger_error("Definition for $element already exists in module, cannot redefine"); 04919 } 04920 return $this->info[$element]; 04921 } 04922 04929 public function addElementToContentSet($element, $type) { 04930 if (!isset($this->content_sets[$type])) $this->content_sets[$type] = ''; 04931 else $this->content_sets[$type] .= ' | '; 04932 $this->content_sets[$type] .= $element; 04933 } 04934 04944 public function parseContents($contents) { 04945 if (!is_string($contents)) return array(null, null); // defer 04946 switch ($contents) { 04947 // check for shorthand content model forms 04948 case 'Empty': 04949 return array('empty', ''); 04950 case 'Inline': 04951 return array('optional', 'Inline | #PCDATA'); 04952 case 'Flow': 04953 return array('optional', 'Flow | #PCDATA'); 04954 } 04955 list($content_model_type, $content_model) = explode(':', $contents); 04956 $content_model_type = strtolower(trim($content_model_type)); 04957 $content_model = trim($content_model); 04958 return array($content_model_type, $content_model); 04959 } 04960 04967 public function mergeInAttrIncludes(&$attr, $attr_includes) { 04968 if (!is_array($attr_includes)) { 04969 if (empty($attr_includes)) $attr_includes = array(); 04970 else $attr_includes = array($attr_includes); 04971 } 04972 $attr[0] = $attr_includes; 04973 } 04974 04983 public function makeLookup($list) { 04984 if (is_string($list)) $list = func_get_args(); 04985 $ret = array(); 04986 foreach ($list as $value) { 04987 if (is_null($value)) continue; 04988 $ret[$value] = true; 04989 } 04990 return $ret; 04991 } 04992 04999 public function setup($config) {} 05000 05001 } 05002 05003 05004 05005 05006 05007 class HTMLPurifier_HTMLModuleManager 05008 { 05009 05013 public $doctypes; 05014 05018 public $doctype; 05019 05023 public $attrTypes; 05024 05029 public $modules = array(); 05030 05036 public $registeredModules = array(); 05037 05043 public $userModules = array(); 05044 05049 public $elementLookup = array(); 05050 05052 public $prefixes = array('HTMLPurifier_HTMLModule_'); 05053 05054 public $contentSets; 05055 public $attrCollections; 05058 public $trusted = false; 05059 05060 public function __construct() { 05061 05062 // editable internal objects 05063 $this->attrTypes = new HTMLPurifier_AttrTypes(); 05064 $this->doctypes = new HTMLPurifier_DoctypeRegistry(); 05065 05066 // setup basic modules 05067 $common = array( 05068 'CommonAttributes', 'Text', 'Hypertext', 'List', 05069 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image', 05070 'StyleAttribute', 05071 // Unsafe: 05072 'Scripting', 'Object', 'Forms', 05073 // Sorta legacy, but present in strict: 05074 'Name', 05075 ); 05076 $transitional = array('Legacy', 'Target', 'Iframe'); 05077 $xml = array('XMLCommonAttributes'); 05078 $non_xml = array('NonXMLCommonAttributes'); 05079 05080 // setup basic doctypes 05081 $this->doctypes->register( 05082 'HTML 4.01 Transitional', false, 05083 array_merge($common, $transitional, $non_xml), 05084 array('Tidy_Transitional', 'Tidy_Proprietary'), 05085 array(), 05086 '-//W3C//DTD HTML 4.01 Transitional//EN', 05087 'http://www.w3.org/TR/html4/loose.dtd' 05088 ); 05089 05090 $this->doctypes->register( 05091 'HTML 4.01 Strict', false, 05092 array_merge($common, $non_xml), 05093 array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'), 05094 array(), 05095 '-//W3C//DTD HTML 4.01//EN', 05096 'http://www.w3.org/TR/html4/strict.dtd' 05097 ); 05098 05099 $this->doctypes->register( 05100 'XHTML 1.0 Transitional', true, 05101 array_merge($common, $transitional, $xml, $non_xml), 05102 array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'), 05103 array(), 05104 '-//W3C//DTD XHTML 1.0 Transitional//EN', 05105 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd' 05106 ); 05107 05108 $this->doctypes->register( 05109 'XHTML 1.0 Strict', true, 05110 array_merge($common, $xml, $non_xml), 05111 array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'), 05112 array(), 05113 '-//W3C//DTD XHTML 1.0 Strict//EN', 05114 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' 05115 ); 05116 05117 $this->doctypes->register( 05118 'XHTML 1.1', true, 05119 // Iframe is a real XHTML 1.1 module, despite being 05120 // "transitional"! 05121 array_merge($common, $xml, array('Ruby', 'Iframe')), 05122 array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1 05123 array(), 05124 '-//W3C//DTD XHTML 1.1//EN', 05125 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' 05126 ); 05127 05128 } 05129 05151 public function registerModule($module, $overload = false) { 05152 if (is_string($module)) { 05153 // attempt to load the module 05154 $original_module = $module; 05155 $ok = false; 05156 foreach ($this->prefixes as $prefix) { 05157 $module = $prefix . $original_module; 05158 if (class_exists($module)) { 05159 $ok = true; 05160 break; 05161 } 05162 } 05163 if (!$ok) { 05164 $module = $original_module; 05165 if (!class_exists($module)) { 05166 trigger_error($original_module . ' module does not exist', 05167 E_USER_ERROR); 05168 return; 05169 } 05170 } 05171 $module = new $module(); 05172 } 05173 if (empty($module->name)) { 05174 trigger_error('Module instance of ' . get_class($module) . ' must have name'); 05175 return; 05176 } 05177 if (!$overload && isset($this->registeredModules[$module->name])) { 05178 trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING); 05179 } 05180 $this->registeredModules[$module->name] = $module; 05181 } 05182 05187 public function addModule($module) { 05188 $this->registerModule($module); 05189 if (is_object($module)) $module = $module->name; 05190 $this->userModules[] = $module; 05191 } 05192 05197 public function addPrefix($prefix) { 05198 $this->prefixes[] = $prefix; 05199 } 05200 05206 public function setup($config) { 05207 05208 $this->trusted = $config->get('HTML.Trusted'); 05209 05210 // generate 05211 $this->doctype = $this->doctypes->make($config); 05212 $modules = $this->doctype->modules; 05213 05214 // take out the default modules that aren't allowed 05215 $lookup = $config->get('HTML.AllowedModules'); 05216 $special_cases = $config->get('HTML.CoreModules'); 05217 05218 if (is_array($lookup)) { 05219 foreach ($modules as $k => $m) { 05220 if (isset($special_cases[$m])) continue; 05221 if (!isset($lookup[$m])) unset($modules[$k]); 05222 } 05223 } 05224 05225 // custom modules 05226 if ($config->get('HTML.Proprietary')) { 05227 $modules[] = 'Proprietary'; 05228 } 05229 if ($config->get('HTML.SafeObject')) { 05230 $modules[] = 'SafeObject'; 05231 } 05232 if ($config->get('HTML.SafeEmbed')) { 05233 $modules[] = 'SafeEmbed'; 05234 } 05235 if ($config->get('HTML.Nofollow')) { 05236 $modules[] = 'Nofollow'; 05237 } 05238 if ($config->get('HTML.TargetBlank')) { 05239 $modules[] = 'TargetBlank'; 05240 } 05241 05242 // merge in custom modules 05243 $modules = array_merge($modules, $this->userModules); 05244 05245 foreach ($modules as $module) { 05246 $this->processModule($module); 05247 $this->modules[$module]->setup($config); 05248 } 05249 05250 foreach ($this->doctype->tidyModules as $module) { 05251 $this->processModule($module); 05252 $this->modules[$module]->setup($config); 05253 } 05254 05255 // prepare any injectors 05256 foreach ($this->modules as $module) { 05257 $n = array(); 05258 foreach ($module->info_injector as $i => $injector) { 05259 if (!is_object($injector)) { 05260 $class = "HTMLPurifier_Injector_$injector"; 05261 $injector = new $class; 05262 } 05263 $n[$injector->name] = $injector; 05264 } 05265 $module->info_injector = $n; 05266 } 05267 05268 // setup lookup table based on all valid modules 05269 foreach ($this->modules as $module) { 05270 foreach ($module->info as $name => $def) { 05271 if (!isset($this->elementLookup[$name])) { 05272 $this->elementLookup[$name] = array(); 05273 } 05274 $this->elementLookup[$name][] = $module->name; 05275 } 05276 } 05277 05278 // note the different choice 05279 $this->contentSets = new HTMLPurifier_ContentSets( 05280 // content set assembly deals with all possible modules, 05281 // not just ones deemed to be "safe" 05282 $this->modules 05283 ); 05284 $this->attrCollections = new HTMLPurifier_AttrCollections( 05285 $this->attrTypes, 05286 // there is no way to directly disable a global attribute, 05287 // but using AllowedAttributes or simply not including 05288 // the module in your custom doctype should be sufficient 05289 $this->modules 05290 ); 05291 } 05292 05297 public function processModule($module) { 05298 if (!isset($this->registeredModules[$module]) || is_object($module)) { 05299 $this->registerModule($module); 05300 } 05301 $this->modules[$module] = $this->registeredModules[$module]; 05302 } 05303 05308 public function getElements() { 05309 05310 $elements = array(); 05311 foreach ($this->modules as $module) { 05312 if (!$this->trusted && !$module->safe) continue; 05313 foreach ($module->info as $name => $v) { 05314 if (isset($elements[$name])) continue; 05315 $elements[$name] = $this->getElement($name); 05316 } 05317 } 05318 05319 // remove dud elements, this happens when an element that 05320 // appeared to be safe actually wasn't 05321 foreach ($elements as $n => $v) { 05322 if ($v === false) unset($elements[$n]); 05323 } 05324 05325 return $elements; 05326 05327 } 05328 05339 public function getElement($name, $trusted = null) { 05340 05341 if (!isset($this->elementLookup[$name])) { 05342 return false; 05343 } 05344 05345 // setup global state variables 05346 $def = false; 05347 if ($trusted === null) $trusted = $this->trusted; 05348 05349 // iterate through each module that has registered itself to this 05350 // element 05351 foreach($this->elementLookup[$name] as $module_name) { 05352 05353 $module = $this->modules[$module_name]; 05354 05355 // refuse to create/merge from a module that is deemed unsafe-- 05356 // pretend the module doesn't exist--when trusted mode is not on. 05357 if (!$trusted && !$module->safe) { 05358 continue; 05359 } 05360 05361 // clone is used because, ideally speaking, the original 05362 // definition should not be modified. Usually, this will 05363 // make no difference, but for consistency's sake 05364 $new_def = clone $module->info[$name]; 05365 05366 if (!$def && $new_def->standalone) { 05367 $def = $new_def; 05368 } elseif ($def) { 05369 // This will occur even if $new_def is standalone. In practice, 05370 // this will usually result in a full replacement. 05371 $def->mergeIn($new_def); 05372 } else { 05373 // :TODO: 05374 // non-standalone definitions that don't have a standalone 05375 // to merge into could be deferred to the end 05376 // HOWEVER, it is perfectly valid for a non-standalone 05377 // definition to lack a standalone definition, even 05378 // after all processing: this allows us to safely 05379 // specify extra attributes for elements that may not be 05380 // enabled all in one place. In particular, this might 05381 // be the case for trusted elements. WARNING: care must 05382 // be taken that the /extra/ definitions are all safe. 05383 continue; 05384 } 05385 05386 // attribute value expansions 05387 $this->attrCollections->performInclusions($def->attr); 05388 $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes); 05389 05390 // descendants_are_inline, for ChildDef_Chameleon 05391 if (is_string($def->content_model) && 05392 strpos($def->content_model, 'Inline') !== false) { 05393 if ($name != 'del' && $name != 'ins') { 05394 // this is for you, ins/del 05395 $def->descendants_are_inline = true; 05396 } 05397 } 05398 05399 $this->contentSets->generateChildDef($def, $module); 05400 } 05401 05402 // This can occur if there is a blank definition, but no base to 05403 // mix it in with 05404 if (!$def) return false; 05405 05406 // add information on required attributes 05407 foreach ($def->attr as $attr_name => $attr_def) { 05408 if ($attr_def->required) { 05409 $def->required_attr[] = $attr_name; 05410 } 05411 } 05412 05413 return $def; 05414 05415 } 05416 05417 } 05418 05419 05420 05421 05422 05429 class HTMLPurifier_IDAccumulator 05430 { 05431 05436 public $ids = array(); 05437 05444 public static function build($config, $context) { 05445 $id_accumulator = new HTMLPurifier_IDAccumulator(); 05446 $id_accumulator->load($config->get('Attr.IDBlacklist')); 05447 return $id_accumulator; 05448 } 05449 05455 public function add($id) { 05456 if (isset($this->ids[$id])) return false; 05457 return $this->ids[$id] = true; 05458 } 05459 05465 public function load($array_of_ids) { 05466 foreach ($array_of_ids as $id) { 05467 $this->ids[$id] = true; 05468 } 05469 } 05470 05471 } 05472 05473 05474 05475 05476 05490 abstract class HTMLPurifier_Injector 05491 { 05492 05496 public $name; 05497 05501 protected $htmlDefinition; 05502 05507 protected $currentNesting; 05508 05513 protected $inputTokens; 05514 05520 protected $inputIndex; 05521 05527 public $needed = array(); 05528 05532 protected $rewind = false; 05533 05542 public function rewind($index) { 05543 $this->rewind = $index; 05544 } 05545 05549 public function getRewind() { 05550 $r = $this->rewind; 05551 $this->rewind = false; 05552 return $r; 05553 } 05554 05564 public function prepare($config, $context) { 05565 $this->htmlDefinition = $config->getHTMLDefinition(); 05566 // Even though this might fail, some unit tests ignore this and 05567 // still test checkNeeded, so be careful. Maybe get rid of that 05568 // dependency. 05569 $result = $this->checkNeeded($config); 05570 if ($result !== false) return $result; 05571 $this->currentNesting =& $context->get('CurrentNesting'); 05572 $this->inputTokens =& $context->get('InputTokens'); 05573 $this->inputIndex =& $context->get('InputIndex'); 05574 return false; 05575 } 05576 05585 public function checkNeeded($config) { 05586 $def = $config->getHTMLDefinition(); 05587 foreach ($this->needed as $element => $attributes) { 05588 if (is_int($element)) $element = $attributes; 05589 if (!isset($def->info[$element])) return $element; 05590 if (!is_array($attributes)) continue; 05591 foreach ($attributes as $name) { 05592 if (!isset($def->info[$element]->attr[$name])) return "$element.$name"; 05593 } 05594 } 05595 return false; 05596 } 05597 05603 public function allowsElement($name) { 05604 if (!empty($this->currentNesting)) { 05605 $parent_token = array_pop($this->currentNesting); 05606 $this->currentNesting[] = $parent_token; 05607 $parent = $this->htmlDefinition->info[$parent_token->name]; 05608 } else { 05609 $parent = $this->htmlDefinition->info_parent_def; 05610 } 05611 if (!isset($parent->child->elements[$name]) || isset($parent->excludes[$name])) { 05612 return false; 05613 } 05614 // check for exclusion 05615 for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) { 05616 $node = $this->currentNesting[$i]; 05617 $def = $this->htmlDefinition->info[$node->name]; 05618 if (isset($def->excludes[$name])) return false; 05619 } 05620 return true; 05621 } 05622 05631 protected function forward(&$i, &$current) { 05632 if ($i === null) $i = $this->inputIndex + 1; 05633 else $i++; 05634 if (!isset($this->inputTokens[$i])) return false; 05635 $current = $this->inputTokens[$i]; 05636 return true; 05637 } 05638 05644 protected function forwardUntilEndToken(&$i, &$current, &$nesting) { 05645 $result = $this->forward($i, $current); 05646 if (!$result) return false; 05647 if ($nesting === null) $nesting = 0; 05648 if ($current instanceof HTMLPurifier_Token_Start) $nesting++; 05649 elseif ($current instanceof HTMLPurifier_Token_End) { 05650 if ($nesting <= 0) return false; 05651 $nesting--; 05652 } 05653 return true; 05654 } 05655 05664 protected function backward(&$i, &$current) { 05665 if ($i === null) $i = $this->inputIndex - 1; 05666 else $i--; 05667 if ($i < 0) return false; 05668 $current = $this->inputTokens[$i]; 05669 return true; 05670 } 05671 05681 protected function current(&$i, &$current) { 05682 if ($i === null) $i = $this->inputIndex; 05683 $current = $this->inputTokens[$i]; 05684 } 05685 05689 public function handleText(&$token) {} 05690 05694 public function handleElement(&$token) {} 05695 05699 public function handleEnd(&$token) { 05700 $this->notifyEnd($token); 05701 } 05702 05708 public function notifyEnd($token) {} 05709 05710 05711 } 05712 05713 05714 05715 05716 05721 class HTMLPurifier_Language 05722 { 05723 05727 public $code = 'en'; 05728 05732 public $fallback = false; 05733 05737 public $messages = array(); 05738 05742 public $errorNames = array(); 05743 05749 public $error = false; 05750 05755 public $_loaded = false; 05756 05760 protected $config, $context; 05761 05762 public function __construct($config, $context) { 05763 $this->config = $config; 05764 $this->context = $context; 05765 } 05766 05771 public function load() { 05772 if ($this->_loaded) return; 05773 $factory = HTMLPurifier_LanguageFactory::instance(); 05774 $factory->loadLanguage($this->code); 05775 foreach ($factory->keys as $key) { 05776 $this->$key = $factory->cache[$this->code][$key]; 05777 } 05778 $this->_loaded = true; 05779 } 05780 05786 public function getMessage($key) { 05787 if (!$this->_loaded) $this->load(); 05788 if (!isset($this->messages[$key])) return "[$key]"; 05789 return $this->messages[$key]; 05790 } 05791 05798 public function getErrorName($int) { 05799 if (!$this->_loaded) $this->load(); 05800 if (!isset($this->errorNames[$int])) return "[Error: $int]"; 05801 return $this->errorNames[$int]; 05802 } 05803 05807 public function listify($array) { 05808 $sep = $this->getMessage('Item separator'); 05809 $sep_last = $this->getMessage('Item separator last'); 05810 $ret = ''; 05811 for ($i = 0, $c = count($array); $i < $c; $i++) { 05812 if ($i == 0) { 05813 } elseif ($i + 1 < $c) { 05814 $ret .= $sep; 05815 } else { 05816 $ret .= $sep_last; 05817 } 05818 $ret .= $array[$i]; 05819 } 05820 return $ret; 05821 } 05822 05831 public function formatMessage($key, $args = array()) { 05832 if (!$this->_loaded) $this->load(); 05833 if (!isset($this->messages[$key])) return "[$key]"; 05834 $raw = $this->messages[$key]; 05835 $subst = array(); 05836 $generator = false; 05837 foreach ($args as $i => $value) { 05838 if (is_object($value)) { 05839 if ($value instanceof HTMLPurifier_Token) { 05840 // factor this out some time 05841 if (!$generator) $generator = $this->context->get('Generator'); 05842 if (isset($value->name)) $subst['$'.$i.'.Name'] = $value->name; 05843 if (isset($value->data)) $subst['$'.$i.'.Data'] = $value->data; 05844 $subst['$'.$i.'.Compact'] = 05845 $subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value); 05846 // a more complex algorithm for compact representation 05847 // could be introduced for all types of tokens. This 05848 // may need to be factored out into a dedicated class 05849 if (!empty($value->attr)) { 05850 $stripped_token = clone $value; 05851 $stripped_token->attr = array(); 05852 $subst['$'.$i.'.Compact'] = $generator->generateFromToken($stripped_token); 05853 } 05854 $subst['$'.$i.'.Line'] = $value->line ? $value->line : 'unknown'; 05855 } 05856 continue; 05857 } elseif (is_array($value)) { 05858 $keys = array_keys($value); 05859 if (array_keys($keys) === $keys) { 05860 // list 05861 $subst['$'.$i] = $this->listify($value); 05862 } else { 05863 // associative array 05864 // no $i implementation yet, sorry 05865 $subst['$'.$i.'.Keys'] = $this->listify($keys); 05866 $subst['$'.$i.'.Values'] = $this->listify(array_values($value)); 05867 } 05868 continue; 05869 } 05870 $subst['$' . $i] = $value; 05871 } 05872 return strtr($raw, $subst); 05873 } 05874 05875 } 05876 05877 05878 05879 05880 05888 class HTMLPurifier_LanguageFactory 05889 { 05890 05896 public $cache; 05897 05903 public $keys = array('fallback', 'messages', 'errorNames'); 05904 05909 protected $validator; 05910 05916 protected $dir; 05917 05922 protected $mergeable_keys_map = array('messages' => true, 'errorNames' => true); 05923 05928 protected $mergeable_keys_list = array(); 05929 05935 public static function instance($prototype = null) { 05936 static $instance = null; 05937 if ($prototype !== null) { 05938 $instance = $prototype; 05939 } elseif ($instance === null || $prototype == true) { 05940 $instance = new HTMLPurifier_LanguageFactory(); 05941 $instance->setup(); 05942 } 05943 return $instance; 05944 } 05945 05950 public function setup() { 05951 $this->validator = new HTMLPurifier_AttrDef_Lang(); 05952 $this->dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier'; 05953 } 05954 05961 public function create($config, $context, $code = false) { 05962 05963 // validate language code 05964 if ($code === false) { 05965 $code = $this->validator->validate( 05966 $config->get('Core.Language'), $config, $context 05967 ); 05968 } else { 05969 $code = $this->validator->validate($code, $config, $context); 05970 } 05971 if ($code === false) $code = 'en'; // malformed code becomes English 05972 05973 $pcode = str_replace('-', '_', $code); // make valid PHP classname 05974 static $depth = 0; // recursion protection 05975 05976 if ($code == 'en') { 05977 $lang = new HTMLPurifier_Language($config, $context); 05978 } else { 05979 $class = 'HTMLPurifier_Language_' . $pcode; 05980 $file = $this->dir . '/Language/classes/' . $code . '.php'; 05981 if (file_exists($file) || class_exists($class, false)) { 05982 $lang = new $class($config, $context); 05983 } else { 05984 // Go fallback 05985 $raw_fallback = $this->getFallbackFor($code); 05986 $fallback = $raw_fallback ? $raw_fallback : 'en'; 05987 $depth++; 05988 $lang = $this->create($config, $context, $fallback); 05989 if (!$raw_fallback) { 05990 $lang->error = true; 05991 } 05992 $depth--; 05993 } 05994 } 05995 05996 $lang->code = $code; 05997 05998 return $lang; 05999 06000 } 06001 06007 public function getFallbackFor($code) { 06008 $this->loadLanguage($code); 06009 return $this->cache[$code]['fallback']; 06010 } 06011 06016 public function loadLanguage($code) { 06017 static $languages_seen = array(); // recursion guard 06018 06019 // abort if we've already loaded it 06020 if (isset($this->cache[$code])) return; 06021 06022 // generate filename 06023 $filename = $this->dir . '/Language/messages/' . $code . '.php'; 06024 06025 // default fallback : may be overwritten by the ensuing include 06026 $fallback = ($code != 'en') ? 'en' : false; 06027 06028 // load primary localisation 06029 if (!file_exists($filename)) { 06030 // skip the include: will rely solely on fallback 06031 $filename = $this->dir . '/Language/messages/en.php'; 06032 $cache = array(); 06033 } else { 06034 include $filename; 06035 $cache = compact($this->keys); 06036 } 06037 06038 // load fallback localisation 06039 if (!empty($fallback)) { 06040 06041 // infinite recursion guard 06042 if (isset($languages_seen[$code])) { 06043 trigger_error('Circular fallback reference in language ' . 06044 $code, E_USER_ERROR); 06045 $fallback = 'en'; 06046 } 06047 $language_seen[$code] = true; 06048 06049 // load the fallback recursively 06050 $this->loadLanguage($fallback); 06051 $fallback_cache = $this->cache[$fallback]; 06052 06053 // merge fallback with current language 06054 foreach ( $this->keys as $key ) { 06055 if (isset($cache[$key]) && isset($fallback_cache[$key])) { 06056 if (isset($this->mergeable_keys_map[$key])) { 06057 $cache[$key] = $cache[$key] + $fallback_cache[$key]; 06058 } elseif (isset($this->mergeable_keys_list[$key])) { 06059 $cache[$key] = array_merge( $fallback_cache[$key], $cache[$key] ); 06060 } 06061 } else { 06062 $cache[$key] = $fallback_cache[$key]; 06063 } 06064 } 06065 06066 } 06067 06068 // save to cache for later retrieval 06069 $this->cache[$code] = $cache; 06070 06071 return; 06072 } 06073 06074 } 06075 06076 06077 06078 06079 06084 class HTMLPurifier_Length 06085 { 06086 06090 protected $n; 06091 06095 protected $unit; 06096 06100 protected $isValid; 06101 06105 protected static $allowedUnits = array( 06106 'em' => true, 'ex' => true, 'px' => true, 'in' => true, 06107 'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true 06108 ); 06109 06114 public function __construct($n = '0', $u = false) { 06115 $this->n = (string) $n; 06116 $this->unit = $u !== false ? (string) $u : false; 06117 } 06118 06123 static public function make($s) { 06124 if ($s instanceof HTMLPurifier_Length) return $s; 06125 $n_length = strspn($s, '1234567890.+-'); 06126 $n = substr($s, 0, $n_length); 06127 $unit = substr($s, $n_length); 06128 if ($unit === '') $unit = false; 06129 return new HTMLPurifier_Length($n, $unit); 06130 } 06131 06135 protected function validate() { 06136 // Special case: 06137 if ($this->n === '+0' || $this->n === '-0') $this->n = '0'; 06138 if ($this->n === '0' && $this->unit === false) return true; 06139 if (!ctype_lower($this->unit)) $this->unit = strtolower($this->unit); 06140 if (!isset(HTMLPurifier_Length::$allowedUnits[$this->unit])) return false; 06141 // Hack: 06142 $def = new HTMLPurifier_AttrDef_CSS_Number(); 06143 $result = $def->validate($this->n, false, false); 06144 if ($result === false) return false; 06145 $this->n = $result; 06146 return true; 06147 } 06148 06152 public function toString() { 06153 if (!$this->isValid()) return false; 06154 return $this->n . $this->unit; 06155 } 06156 06160 public function getN() {return $this->n;} 06161 06165 public function getUnit() {return $this->unit;} 06166 06170 public function isValid() { 06171 if ($this->isValid === null) $this->isValid = $this->validate(); 06172 return $this->isValid; 06173 } 06174 06180 public function compareTo($l) { 06181 if ($l === false) return false; 06182 if ($l->unit !== $this->unit) { 06183 $converter = new HTMLPurifier_UnitConverter(); 06184 $l = $converter->convert($l, $this->unit); 06185 if ($l === false) return false; 06186 } 06187 return $this->n - $l->n; 06188 } 06189 06190 } 06191 06192 06193 06194 06195 06235 class HTMLPurifier_Lexer 06236 { 06237 06242 public $tracksLineNumbers = false; 06243 06244 // -- STATIC ---------------------------------------------------------- 06245 06261 public static function create($config) { 06262 06263 if (!($config instanceof HTMLPurifier_Config)) { 06264 $lexer = $config; 06265 trigger_error("Passing a prototype to 06266 HTMLPurifier_Lexer::create() is deprecated, please instead 06267 use %Core.LexerImpl", E_USER_WARNING); 06268 } else { 06269 $lexer = $config->get('Core.LexerImpl'); 06270 } 06271 06272 $needs_tracking = 06273 $config->get('Core.MaintainLineNumbers') || 06274 $config->get('Core.CollectErrors'); 06275 06276 $inst = null; 06277 if (is_object($lexer)) { 06278 $inst = $lexer; 06279 } else { 06280 06281 if (is_null($lexer)) { do { 06282 // auto-detection algorithm 06283 06284 if ($needs_tracking) { 06285 $lexer = 'DirectLex'; 06286 break; 06287 } 06288 06289 if ( 06290 class_exists('DOMDocument') && 06291 method_exists('DOMDocument', 'loadHTML') && 06292 !extension_loaded('domxml') 06293 ) { 06294 // check for DOM support, because while it's part of the 06295 // core, it can be disabled compile time. Also, the PECL 06296 // domxml extension overrides the default DOM, and is evil 06297 // and nasty and we shan't bother to support it 06298 $lexer = 'DOMLex'; 06299 } else { 06300 $lexer = 'DirectLex'; 06301 } 06302 06303 } while(0); } // do..while so we can break 06304 06305 // instantiate recognized string names 06306 switch ($lexer) { 06307 case 'DOMLex': 06308 $inst = new HTMLPurifier_Lexer_DOMLex(); 06309 break; 06310 case 'DirectLex': 06311 $inst = new HTMLPurifier_Lexer_DirectLex(); 06312 break; 06313 case 'PH5P': 06314 $inst = new HTMLPurifier_Lexer_PH5P(); 06315 break; 06316 default: 06317 throw new HTMLPurifier_Exception("Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer)); 06318 } 06319 } 06320 06321 if (!$inst) throw new HTMLPurifier_Exception('No lexer was instantiated'); 06322 06323 // once PHP DOM implements native line numbers, or we 06324 // hack out something using XSLT, remove this stipulation 06325 if ($needs_tracking && !$inst->tracksLineNumbers) { 06326 throw new HTMLPurifier_Exception('Cannot use lexer that does not support line numbers with Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)'); 06327 } 06328 06329 return $inst; 06330 06331 } 06332 06333 // -- CONVENIENCE MEMBERS --------------------------------------------- 06334 06335 public function __construct() { 06336 $this->_entity_parser = new HTMLPurifier_EntityParser(); 06337 } 06338 06342 protected $_special_entity2str = 06343 array( 06344 '"' => '"', 06345 '&' => '&', 06346 '<' => '<', 06347 '>' => '>', 06348 ''' => "'", 06349 ''' => "'", 06350 ''' => "'" 06351 ); 06352 06367 public function parseData($string) { 06368 06369 // following functions require at least one character 06370 if ($string === '') return ''; 06371 06372 // subtracts amps that cannot possibly be escaped 06373 $num_amp = substr_count($string, '&') - substr_count($string, '& ') - 06374 ($string[strlen($string)-1] === '&' ? 1 : 0); 06375 06376 if (!$num_amp) return $string; // abort if no entities 06377 $num_esc_amp = substr_count($string, '&'); 06378 $string = strtr($string, $this->_special_entity2str); 06379 06380 // code duplication for sake of optimization, see above 06381 $num_amp_2 = substr_count($string, '&') - substr_count($string, '& ') - 06382 ($string[strlen($string)-1] === '&' ? 1 : 0); 06383 06384 if ($num_amp_2 <= $num_esc_amp) return $string; 06385 06386 // hmm... now we have some uncommon entities. Use the callback. 06387 $string = $this->_entity_parser->substituteSpecialEntities($string); 06388 return $string; 06389 } 06390 06397 public function tokenizeHTML($string, $config, $context) { 06398 trigger_error('Call to abstract class', E_USER_ERROR); 06399 } 06400 06407 protected static function escapeCDATA($string) { 06408 return preg_replace_callback( 06409 '/<!\[CDATA\[(.+?)\]\]>/s', 06410 array('HTMLPurifier_Lexer', 'CDATACallback'), 06411 $string 06412 ); 06413 } 06414 06418 protected static function escapeCommentedCDATA($string) { 06419 return preg_replace_callback( 06420 '#<!--//--><!\[CDATA\[//><!--(.+?)//--><!\]\]>#s', 06421 array('HTMLPurifier_Lexer', 'CDATACallback'), 06422 $string 06423 ); 06424 } 06425 06429 protected static function removeIEConditional($string) { 06430 return preg_replace( 06431 '#<!--\[if [^>]+\]>.*?<!\[endif\]-->#si', // probably should generalize for all strings 06432 '', 06433 $string 06434 ); 06435 } 06436 06446 protected static function CDATACallback($matches) { 06447 // not exactly sure why the character set is needed, but whatever 06448 return htmlspecialchars($matches[1], ENT_COMPAT, 'UTF-8'); 06449 } 06450 06456 public function normalize($html, $config, $context) { 06457 06458 // normalize newlines to \n 06459 if ($config->get('Core.NormalizeNewlines')) { 06460 $html = str_replace("\r\n", "\n", $html); 06461 $html = str_replace("\r", "\n", $html); 06462 } 06463 06464 if ($config->get('HTML.Trusted')) { 06465 // escape convoluted CDATA 06466 $html = $this->escapeCommentedCDATA($html); 06467 } 06468 06469 // escape CDATA 06470 $html = $this->escapeCDATA($html); 06471 06472 $html = $this->removeIEConditional($html); 06473 06474 // extract body from document if applicable 06475 if ($config->get('Core.ConvertDocumentToFragment')) { 06476 $e = false; 06477 if ($config->get('Core.CollectErrors')) { 06478 $e =& $context->get('ErrorCollector'); 06479 } 06480 $new_html = $this->extractBody($html); 06481 if ($e && $new_html != $html) { 06482 $e->send(E_WARNING, 'Lexer: Extracted body'); 06483 } 06484 $html = $new_html; 06485 } 06486 06487 // expand entities that aren't the big five 06488 $html = $this->_entity_parser->substituteNonSpecialEntities($html); 06489 06490 // clean into wellformed UTF-8 string for an SGML context: this has 06491 // to be done after entity expansion because the entities sometimes 06492 // represent non-SGML characters (horror, horror!) 06493 $html = HTMLPurifier_Encoder::cleanUTF8($html); 06494 06495 // if processing instructions are to removed, remove them now 06496 if ($config->get('Core.RemoveProcessingInstructions')) { 06497 $html = preg_replace('#<\?.+?\?>#s', '', $html); 06498 } 06499 06500 return $html; 06501 } 06502 06507 public function extractBody($html) { 06508 $matches = array(); 06509 $result = preg_match('!<body[^>]*>(.*)</body>!is', $html, $matches); 06510 if ($result) { 06511 return $matches[1]; 06512 } else { 06513 return $html; 06514 } 06515 } 06516 06517 } 06518 06519 06520 06521 06522 06531 class HTMLPurifier_PercentEncoder 06532 { 06533 06537 protected $preserve = array(); 06538 06542 public function __construct($preserve = false) { 06543 // unreserved letters, ought to const-ify 06544 for ($i = 48; $i <= 57; $i++) $this->preserve[$i] = true; // digits 06545 for ($i = 65; $i <= 90; $i++) $this->preserve[$i] = true; // upper-case 06546 for ($i = 97; $i <= 122; $i++) $this->preserve[$i] = true; // lower-case 06547 $this->preserve[45] = true; // Dash - 06548 $this->preserve[46] = true; // Period . 06549 $this->preserve[95] = true; // Underscore _ 06550 $this->preserve[126]= true; // Tilde ~ 06551 06552 // extra letters not to escape 06553 if ($preserve !== false) { 06554 for ($i = 0, $c = strlen($preserve); $i < $c; $i++) { 06555 $this->preserve[ord($preserve[$i])] = true; 06556 } 06557 } 06558 } 06559 06570 public function encode($string) { 06571 $ret = ''; 06572 for ($i = 0, $c = strlen($string); $i < $c; $i++) { 06573 if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])]) ) { 06574 $ret .= '%' . sprintf('%02X', $int); 06575 } else { 06576 $ret .= $string[$i]; 06577 } 06578 } 06579 return $ret; 06580 } 06581 06589 public function normalize($string) { 06590 if ($string == '') return ''; 06591 $parts = explode('%', $string); 06592 $ret = array_shift($parts); 06593 foreach ($parts as $part) { 06594 $length = strlen($part); 06595 if ($length < 2) { 06596 $ret .= '%25' . $part; 06597 continue; 06598 } 06599 $encoding = substr($part, 0, 2); 06600 $text = substr($part, 2); 06601 if (!ctype_xdigit($encoding)) { 06602 $ret .= '%25' . $part; 06603 continue; 06604 } 06605 $int = hexdec($encoding); 06606 if (isset($this->preserve[$int])) { 06607 $ret .= chr($int) . $text; 06608 continue; 06609 } 06610 $encoding = strtoupper($encoding); 06611 $ret .= '%' . $encoding . $text; 06612 } 06613 return $ret; 06614 } 06615 06616 } 06617 06618 06619 06620 06621 06625 class HTMLPurifier_PropertyList 06626 { 06630 protected $data = array(); 06631 06635 protected $parent; 06636 06637 protected $cache; 06638 06639 public function __construct($parent = null) { 06640 $this->parent = $parent; 06641 } 06642 06646 public function get($name) { 06647 if ($this->has($name)) return $this->data[$name]; 06648 // possible performance bottleneck, convert to iterative if necessary 06649 if ($this->parent) return $this->parent->get($name); 06650 throw new HTMLPurifier_Exception("Key '$name' not found"); 06651 } 06652 06656 public function set($name, $value) { 06657 $this->data[$name] = $value; 06658 } 06659 06663 public function has($name) { 06664 return array_key_exists($name, $this->data); 06665 } 06666 06671 public function reset($name = null) { 06672 if ($name == null) $this->data = array(); 06673 else unset($this->data[$name]); 06674 } 06675 06681 public function squash($force = false) { 06682 if ($this->cache !== null && !$force) return $this->cache; 06683 if ($this->parent) { 06684 return $this->cache = array_merge($this->parent->squash($force), $this->data); 06685 } else { 06686 return $this->cache = $this->data; 06687 } 06688 } 06689 06693 public function getParent() { 06694 return $this->parent; 06695 } 06696 06700 public function setParent($plist) { 06701 $this->parent = $plist; 06702 } 06703 } 06704 06705 06706 06707 06708 06712 class HTMLPurifier_PropertyListIterator extends FilterIterator 06713 { 06714 06715 protected $l; 06716 protected $filter; 06717 06722 public function __construct(Iterator $iterator, $filter = null) { 06723 parent::__construct($iterator); 06724 $this->l = strlen($filter); 06725 $this->filter = $filter; 06726 } 06727 06728 public function accept() { 06729 $key = $this->getInnerIterator()->key(); 06730 if( strncmp($key, $this->filter, $this->l) !== 0 ) { 06731 return false; 06732 } 06733 return true; 06734 } 06735 06736 } 06737 06738 06739 06740 06741 06751 abstract class HTMLPurifier_Strategy 06752 { 06753 06761 abstract public function execute($tokens, $config, $context); 06762 06763 } 06764 06765 06766 06767 06768 06777 class HTMLPurifier_StringHash extends ArrayObject 06778 { 06779 protected $accessed = array(); 06780 06784 public function offsetGet($index) { 06785 $this->accessed[$index] = true; 06786 return parent::offsetGet($index); 06787 } 06788 06793 public function getAccessed() { 06794 return $this->accessed; 06795 } 06796 06800 public function resetAccessed() { 06801 $this->accessed = array(); 06802 } 06803 } 06804 06805 06806 06807 06808 06834 class HTMLPurifier_StringHashParser 06835 { 06836 06837 public $default = 'ID'; 06838 06842 public function parseFile($file) { 06843 if (!file_exists($file)) return false; 06844 $fh = fopen($file, 'r'); 06845 if (!$fh) return false; 06846 $ret = $this->parseHandle($fh); 06847 fclose($fh); 06848 return $ret; 06849 } 06850 06854 public function parseMultiFile($file) { 06855 if (!file_exists($file)) return false; 06856 $ret = array(); 06857 $fh = fopen($file, 'r'); 06858 if (!$fh) return false; 06859 while (!feof($fh)) { 06860 $ret[] = $this->parseHandle($fh); 06861 } 06862 fclose($fh); 06863 return $ret; 06864 } 06865 06874 protected function parseHandle($fh) { 06875 $state = false; 06876 $single = false; 06877 $ret = array(); 06878 do { 06879 $line = fgets($fh); 06880 if ($line === false) break; 06881 $line = rtrim($line, "\n\r"); 06882 if (!$state && $line === '') continue; 06883 if ($line === '----') break; 06884 if (strncmp('--#', $line, 3) === 0) { 06885 // Comment 06886 continue; 06887 } elseif (strncmp('--', $line, 2) === 0) { 06888 // Multiline declaration 06889 $state = trim($line, '- '); 06890 if (!isset($ret[$state])) $ret[$state] = ''; 06891 continue; 06892 } elseif (!$state) { 06893 $single = true; 06894 if (strpos($line, ':') !== false) { 06895 // Single-line declaration 06896 list($state, $line) = explode(':', $line, 2); 06897 $line = trim($line); 06898 } else { 06899 // Use default declaration 06900 $state = $this->default; 06901 } 06902 } 06903 if ($single) { 06904 $ret[$state] = $line; 06905 $single = false; 06906 $state = false; 06907 } else { 06908 $ret[$state] .= "$line\n"; 06909 } 06910 } while (!feof($fh)); 06911 return $ret; 06912 } 06913 06914 } 06915 06916 06917 06918 06919 06923 abstract class HTMLPurifier_TagTransform 06924 { 06925 06929 public $transform_to; 06930 06937 abstract public function transform($tag, $config, $context); 06938 06946 protected function prependCSS(&$attr, $css) { 06947 $attr['style'] = isset($attr['style']) ? $attr['style'] : ''; 06948 $attr['style'] = $css . $attr['style']; 06949 } 06950 06951 } 06952 06953 06954 06955 06956 06960 class HTMLPurifier_Token { 06961 public $line; 06962 public $col; 06969 public $armor = array(); 06970 06974 public $skip; 06975 public $rewind; 06976 public $carryover; 06977 06978 public function __get($n) { 06979 if ($n === 'type') { 06980 trigger_error('Deprecated type property called; use instanceof', E_USER_NOTICE); 06981 switch (get_class($this)) { 06982 case 'HTMLPurifier_Token_Start': return 'start'; 06983 case 'HTMLPurifier_Token_Empty': return 'empty'; 06984 case 'HTMLPurifier_Token_End': return 'end'; 06985 case 'HTMLPurifier_Token_Text': return 'text'; 06986 case 'HTMLPurifier_Token_Comment': return 'comment'; 06987 default: return null; 06988 } 06989 } 06990 } 06991 06995 public function position($l = null, $c = null) { 06996 $this->line = $l; 06997 $this->col = $c; 06998 } 06999 07003 public function rawPosition($l, $c) { 07004 if ($c === -1) $l++; 07005 $this->line = $l; 07006 $this->col = $c; 07007 } 07008 07009 } 07010 07011 07012 07013 07014 07026 class HTMLPurifier_TokenFactory 07027 { 07028 07033 // p stands for prototype 07034 private $p_start, $p_end, $p_empty, $p_text, $p_comment; 07035 07039 public function __construct() { 07040 $this->p_start = new HTMLPurifier_Token_Start('', array()); 07041 $this->p_end = new HTMLPurifier_Token_End(''); 07042 $this->p_empty = new HTMLPurifier_Token_Empty('', array()); 07043 $this->p_text = new HTMLPurifier_Token_Text(''); 07044 $this->p_comment= new HTMLPurifier_Token_Comment(''); 07045 } 07046 07053 public function createStart($name, $attr = array()) { 07054 $p = clone $this->p_start; 07055 $p->__construct($name, $attr); 07056 return $p; 07057 } 07058 07064 public function createEnd($name) { 07065 $p = clone $this->p_end; 07066 $p->__construct($name); 07067 return $p; 07068 } 07069 07076 public function createEmpty($name, $attr = array()) { 07077 $p = clone $this->p_empty; 07078 $p->__construct($name, $attr); 07079 return $p; 07080 } 07081 07087 public function createText($data) { 07088 $p = clone $this->p_text; 07089 $p->__construct($data); 07090 return $p; 07091 } 07092 07098 public function createComment($data) { 07099 $p = clone $this->p_comment; 07100 $p->__construct($data); 07101 return $p; 07102 } 07103 07104 } 07105 07106 07107 07108 07109 07118 class HTMLPurifier_URI 07119 { 07120 07121 public $scheme, $userinfo, $host, $port, $path, $query, $fragment; 07122 07126 public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment) { 07127 $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme); 07128 $this->userinfo = $userinfo; 07129 $this->host = $host; 07130 $this->port = is_null($port) ? $port : (int) $port; 07131 $this->path = $path; 07132 $this->query = $query; 07133 $this->fragment = $fragment; 07134 } 07135 07142 public function getSchemeObj($config, $context) { 07143 $registry = HTMLPurifier_URISchemeRegistry::instance(); 07144 if ($this->scheme !== null) { 07145 $scheme_obj = $registry->getScheme($this->scheme, $config, $context); 07146 if (!$scheme_obj) return false; // invalid scheme, clean it out 07147 } else { 07148 // no scheme: retrieve the default one 07149 $def = $config->getDefinition('URI'); 07150 $scheme_obj = $def->getDefaultScheme($config, $context); 07151 if (!$scheme_obj) { 07152 // something funky happened to the default scheme object 07153 trigger_error( 07154 'Default scheme object "' . $def->defaultScheme . '" was not readable', 07155 E_USER_WARNING 07156 ); 07157 return false; 07158 } 07159 } 07160 return $scheme_obj; 07161 } 07162 07170 public function validate($config, $context) { 07171 07172 // ABNF definitions from RFC 3986 07173 $chars_sub_delims = '!$&\'()*+,;='; 07174 $chars_gen_delims = ':/?#[]@'; 07175 $chars_pchar = $chars_sub_delims . ':@'; 07176 07177 // validate host 07178 if (!is_null($this->host)) { 07179 $host_def = new HTMLPurifier_AttrDef_URI_Host(); 07180 $this->host = $host_def->validate($this->host, $config, $context); 07181 if ($this->host === false) $this->host = null; 07182 } 07183 07184 // validate scheme 07185 // NOTE: It's not appropriate to check whether or not this 07186 // scheme is in our registry, since a URIFilter may convert a 07187 // URI that we don't allow into one we do. So instead, we just 07188 // check if the scheme can be dropped because there is no host 07189 // and it is our default scheme. 07190 if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') { 07191 // support for relative paths is pretty abysmal when the 07192 // scheme is present, so axe it when possible 07193 $def = $config->getDefinition('URI'); 07194 if ($def->defaultScheme === $this->scheme) { 07195 $this->scheme = null; 07196 } 07197 } 07198 07199 // validate username 07200 if (!is_null($this->userinfo)) { 07201 $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':'); 07202 $this->userinfo = $encoder->encode($this->userinfo); 07203 } 07204 07205 // validate port 07206 if (!is_null($this->port)) { 07207 if ($this->port < 1 || $this->port > 65535) $this->port = null; 07208 } 07209 07210 // validate path 07211 $path_parts = array(); 07212 $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/'); 07213 if (!is_null($this->host)) { // this catches $this->host === '' 07214 // path-abempty (hier and relative) 07215 // http://www.example.com/my/path 07216 // //www.example.com/my/path (looks odd, but works, and 07217 // recognized by most browsers) 07218 // (this set is valid or invalid on a scheme by scheme 07219 // basis, so we'll deal with it later) 07220 // file:///my/path 07221 // ///my/path 07222 $this->path = $segments_encoder->encode($this->path); 07223 } elseif ($this->path !== '') { 07224 if ($this->path[0] === '/') { 07225 // path-absolute (hier and relative) 07226 // http:/my/path 07227 // /my/path 07228 if (strlen($this->path) >= 2 && $this->path[1] === '/') { 07229 // This could happen if both the host gets stripped 07230 // out 07231 // http://my/path 07232 // //my/path 07233 $this->path = ''; 07234 } else { 07235 $this->path = $segments_encoder->encode($this->path); 07236 } 07237 } elseif (!is_null($this->scheme)) { 07238 // path-rootless (hier) 07239 // http:my/path 07240 // Short circuit evaluation means we don't need to check nz 07241 $this->path = $segments_encoder->encode($this->path); 07242 } else { 07243 // path-noscheme (relative) 07244 // my/path 07245 // (once again, not checking nz) 07246 $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@'); 07247 $c = strpos($this->path, '/'); 07248 if ($c !== false) { 07249 $this->path = 07250 $segment_nc_encoder->encode(substr($this->path, 0, $c)) . 07251 $segments_encoder->encode(substr($this->path, $c)); 07252 } else { 07253 $this->path = $segment_nc_encoder->encode($this->path); 07254 } 07255 } 07256 } else { 07257 // path-empty (hier and relative) 07258 $this->path = ''; // just to be safe 07259 } 07260 07261 // qf = query and fragment 07262 $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?'); 07263 07264 if (!is_null($this->query)) { 07265 $this->query = $qf_encoder->encode($this->query); 07266 } 07267 07268 if (!is_null($this->fragment)) { 07269 $this->fragment = $qf_encoder->encode($this->fragment); 07270 } 07271 07272 return true; 07273 07274 } 07275 07280 public function toString() { 07281 // reconstruct authority 07282 $authority = null; 07283 // there is a rendering difference between a null authority 07284 // (http:foo-bar) and an empty string authority 07285 // (http:///foo-bar). 07286 if (!is_null($this->host)) { 07287 $authority = ''; 07288 if(!is_null($this->userinfo)) $authority .= $this->userinfo . '@'; 07289 $authority .= $this->host; 07290 if(!is_null($this->port)) $authority .= ':' . $this->port; 07291 } 07292 07293 // Reconstruct the result 07294 // One might wonder about parsing quirks from browsers after 07295 // this reconstruction. Unfortunately, parsing behavior depends 07296 // on what *scheme* was employed (file:///foo is handled *very* 07297 // differently than http:///foo), so unfortunately we have to 07298 // defer to the schemes to do the right thing. 07299 $result = ''; 07300 if (!is_null($this->scheme)) $result .= $this->scheme . ':'; 07301 if (!is_null($authority)) $result .= '//' . $authority; 07302 $result .= $this->path; 07303 if (!is_null($this->query)) $result .= '?' . $this->query; 07304 if (!is_null($this->fragment)) $result .= '#' . $this->fragment; 07305 07306 return $result; 07307 } 07308 07318 public function isLocal($config, $context) { 07319 if ($this->host === null) return true; 07320 $uri_def = $config->getDefinition('URI'); 07321 if ($uri_def->host === $this->host) return true; 07322 return false; 07323 } 07324 07332 public function isBenign($config, $context) { 07333 if (!$this->isLocal($config, $context)) return false; 07334 07335 $scheme_obj = $this->getSchemeObj($config, $context); 07336 if (!$scheme_obj) return false; // conservative approach 07337 07338 $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context); 07339 if ($current_scheme_obj->secure) { 07340 if (!$scheme_obj->secure) { 07341 return false; 07342 } 07343 } 07344 return true; 07345 } 07346 07347 } 07348 07349 07350 07351 07352 07353 class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition 07354 { 07355 07356 public $type = 'URI'; 07357 protected $filters = array(); 07358 protected $postFilters = array(); 07359 protected $registeredFilters = array(); 07360 07364 public $base; 07365 07369 public $host; 07370 07374 public $defaultScheme; 07375 07376 public function __construct() { 07377 $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternal()); 07378 $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources()); 07379 $this->registerFilter(new HTMLPurifier_URIFilter_HostBlacklist()); 07380 $this->registerFilter(new HTMLPurifier_URIFilter_SafeIframe()); 07381 $this->registerFilter(new HTMLPurifier_URIFilter_MakeAbsolute()); 07382 $this->registerFilter(new HTMLPurifier_URIFilter_Munge()); 07383 } 07384 07385 public function registerFilter($filter) { 07386 $this->registeredFilters[$filter->name] = $filter; 07387 } 07388 07389 public function addFilter($filter, $config) { 07390 $r = $filter->prepare($config); 07391 if ($r === false) return; // null is ok, for backwards compat 07392 if ($filter->post) { 07393 $this->postFilters[$filter->name] = $filter; 07394 } else { 07395 $this->filters[$filter->name] = $filter; 07396 } 07397 } 07398 07399 protected function doSetup($config) { 07400 $this->setupMemberVariables($config); 07401 $this->setupFilters($config); 07402 } 07403 07404 protected function setupFilters($config) { 07405 foreach ($this->registeredFilters as $name => $filter) { 07406 if ($filter->always_load) { 07407 $this->addFilter($filter, $config); 07408 } else { 07409 $conf = $config->get('URI.' . $name); 07410 if ($conf !== false && $conf !== null) { 07411 $this->addFilter($filter, $config); 07412 } 07413 } 07414 } 07415 unset($this->registeredFilters); 07416 } 07417 07418 protected function setupMemberVariables($config) { 07419 $this->host = $config->get('URI.Host'); 07420 $base_uri = $config->get('URI.Base'); 07421 if (!is_null($base_uri)) { 07422 $parser = new HTMLPurifier_URIParser(); 07423 $this->base = $parser->parse($base_uri); 07424 $this->defaultScheme = $this->base->scheme; 07425 if (is_null($this->host)) $this->host = $this->base->host; 07426 } 07427 if (is_null($this->defaultScheme)) $this->defaultScheme = $config->get('URI.DefaultScheme'); 07428 } 07429 07430 public function getDefaultScheme($config, $context) { 07431 return HTMLPurifier_URISchemeRegistry::instance()->getScheme($this->defaultScheme, $config, $context); 07432 } 07433 07434 public function filter(&$uri, $config, $context) { 07435 foreach ($this->filters as $name => $f) { 07436 $result = $f->filter($uri, $config, $context); 07437 if (!$result) return false; 07438 } 07439 return true; 07440 } 07441 07442 public function postFilter(&$uri, $config, $context) { 07443 foreach ($this->postFilters as $name => $f) { 07444 $result = $f->filter($uri, $config, $context); 07445 if (!$result) return false; 07446 } 07447 return true; 07448 } 07449 07450 } 07451 07452 07453 07454 07455 07481 abstract class HTMLPurifier_URIFilter 07482 { 07483 07487 public $name; 07488 07492 public $post = false; 07493 07499 public $always_load = false; 07500 07505 public function prepare($config) {return true;} 07506 07516 abstract public function filter(&$uri, $config, $context); 07517 07518 } 07519 07520 07521 07522 07523 07528 class HTMLPurifier_URIParser 07529 { 07530 07534 protected $percentEncoder; 07535 07536 public function __construct() { 07537 $this->percentEncoder = new HTMLPurifier_PercentEncoder(); 07538 } 07539 07546 public function parse($uri) { 07547 07548 $uri = $this->percentEncoder->normalize($uri); 07549 07550 // Regexp is as per Appendix B. 07551 // Note that ["<>] are an addition to the RFC's recommended 07552 // characters, because they represent external delimeters. 07553 $r_URI = '!'. 07554 '(([^:/?#"<>]+):)?'. // 2. Scheme 07555 '(//([^/?#"<>]*))?'. // 4. Authority 07556 '([^?#"<>]*)'. // 5. Path 07557 '(\?([^#"<>]*))?'. // 7. Query 07558 '(#([^"<>]*))?'. // 8. Fragment 07559 '!'; 07560 07561 $matches = array(); 07562 $result = preg_match($r_URI, $uri, $matches); 07563 07564 if (!$result) return false; // *really* invalid URI 07565 07566 // seperate out parts 07567 $scheme = !empty($matches[1]) ? $matches[2] : null; 07568 $authority = !empty($matches[3]) ? $matches[4] : null; 07569 $path = $matches[5]; // always present, can be empty 07570 $query = !empty($matches[6]) ? $matches[7] : null; 07571 $fragment = !empty($matches[8]) ? $matches[9] : null; 07572 07573 // further parse authority 07574 if ($authority !== null) { 07575 $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/"; 07576 $matches = array(); 07577 preg_match($r_authority, $authority, $matches); 07578 $userinfo = !empty($matches[1]) ? $matches[2] : null; 07579 $host = !empty($matches[3]) ? $matches[3] : ''; 07580 $port = !empty($matches[4]) ? (int) $matches[5] : null; 07581 } else { 07582 $port = $host = $userinfo = null; 07583 } 07584 07585 return new HTMLPurifier_URI( 07586 $scheme, $userinfo, $host, $port, $path, $query, $fragment); 07587 } 07588 07589 } 07590 07591 07592 07593 07594 07598 abstract class HTMLPurifier_URIScheme 07599 { 07600 07606 public $default_port = null; 07607 07612 public $browsable = false; 07613 07618 public $secure = false; 07619 07624 public $hierarchical = false; 07625 07631 public $may_omit_host = false; 07632 07640 public abstract function doValidate(&$uri, $config, $context); 07641 07650 public function validate(&$uri, $config, $context) { 07651 if ($this->default_port == $uri->port) $uri->port = null; 07652 // kludge: browsers do funny things when the scheme but not the 07653 // authority is set 07654 if (!$this->may_omit_host && 07655 // if the scheme is present, a missing host is always in error 07656 (!is_null($uri->scheme) && ($uri->host === '' || is_null($uri->host))) || 07657 // if the scheme is not present, a *blank* host is in error, 07658 // since this translates into '///path' which most browsers 07659 // interpret as being 'http://path'. 07660 (is_null($uri->scheme) && $uri->host === '') 07661 ) { 07662 do { 07663 if (is_null($uri->scheme)) { 07664 if (substr($uri->path, 0, 2) != '//') { 07665 $uri->host = null; 07666 break; 07667 } 07668 // URI is '////path', so we cannot nullify the 07669 // host to preserve semantics. Try expanding the 07670 // hostname instead (fall through) 07671 } 07672 // first see if we can manually insert a hostname 07673 $host = $config->get('URI.Host'); 07674 if (!is_null($host)) { 07675 $uri->host = $host; 07676 } else { 07677 // we can't do anything sensible, reject the URL. 07678 return false; 07679 } 07680 } while (false); 07681 } 07682 return $this->doValidate($uri, $config, $context); 07683 } 07684 07685 } 07686 07687 07688 07689 07690 07694 class HTMLPurifier_URISchemeRegistry 07695 { 07696 07704 public static function instance($prototype = null) { 07705 static $instance = null; 07706 if ($prototype !== null) { 07707 $instance = $prototype; 07708 } elseif ($instance === null || $prototype == true) { 07709 $instance = new HTMLPurifier_URISchemeRegistry(); 07710 } 07711 return $instance; 07712 } 07713 07717 protected $schemes = array(); 07718 07725 public function getScheme($scheme, $config, $context) { 07726 if (!$config) $config = HTMLPurifier_Config::createDefault(); 07727 07728 // important, otherwise attacker could include arbitrary file 07729 $allowed_schemes = $config->get('URI.AllowedSchemes'); 07730 if (!$config->get('URI.OverrideAllowedSchemes') && 07731 !isset($allowed_schemes[$scheme]) 07732 ) { 07733 return; 07734 } 07735 07736 if (isset($this->schemes[$scheme])) return $this->schemes[$scheme]; 07737 if (!isset($allowed_schemes[$scheme])) return; 07738 07739 $class = 'HTMLPurifier_URIScheme_' . $scheme; 07740 if (!class_exists($class)) return; 07741 $this->schemes[$scheme] = new $class(); 07742 return $this->schemes[$scheme]; 07743 } 07744 07750 public function register($scheme, $scheme_obj) { 07751 $this->schemes[$scheme] = $scheme_obj; 07752 } 07753 07754 } 07755 07756 07757 07758 07759 07764 class HTMLPurifier_UnitConverter 07765 { 07766 07767 const ENGLISH = 1; 07768 const METRIC = 2; 07769 const DIGITAL = 3; 07770 07780 protected static $units = array( 07781 self::ENGLISH => array( 07782 'px' => 3, // This is as per CSS 2.1 and Firefox. Your mileage may vary 07783 'pt' => 4, 07784 'pc' => 48, 07785 'in' => 288, 07786 self::METRIC => array('pt', '0.352777778', 'mm'), 07787 ), 07788 self::METRIC => array( 07789 'mm' => 1, 07790 'cm' => 10, 07791 self::ENGLISH => array('mm', '2.83464567', 'pt'), 07792 ), 07793 ); 07794 07798 protected $outputPrecision; 07799 07803 protected $internalPrecision; 07804 07808 private $bcmath; 07809 07810 public function __construct($output_precision = 4, $internal_precision = 10, $force_no_bcmath = false) { 07811 $this->outputPrecision = $output_precision; 07812 $this->internalPrecision = $internal_precision; 07813 $this->bcmath = !$force_no_bcmath && function_exists('bcmul'); 07814 } 07815 07834 public function convert($length, $to_unit) { 07835 07836 if (!$length->isValid()) return false; 07837 07838 $n = $length->getN(); 07839 $unit = $length->getUnit(); 07840 07841 if ($n === '0' || $unit === false) { 07842 return new HTMLPurifier_Length('0', false); 07843 } 07844 07845 $state = $dest_state = false; 07846 foreach (self::$units as $k => $x) { 07847 if (isset($x[$unit])) $state = $k; 07848 if (isset($x[$to_unit])) $dest_state = $k; 07849 } 07850 if (!$state || !$dest_state) return false; 07851 07852 // Some calculations about the initial precision of the number; 07853 // this will be useful when we need to do final rounding. 07854 $sigfigs = $this->getSigFigs($n); 07855 if ($sigfigs < $this->outputPrecision) $sigfigs = $this->outputPrecision; 07856 07857 // BCMath's internal precision deals only with decimals. Use 07858 // our default if the initial number has no decimals, or increase 07859 // it by how ever many decimals, thus, the number of guard digits 07860 // will always be greater than or equal to internalPrecision. 07861 $log = (int) floor(log(abs($n), 10)); 07862 $cp = ($log < 0) ? $this->internalPrecision - $log : $this->internalPrecision; // internal precision 07863 07864 for ($i = 0; $i < 2; $i++) { 07865 07866 // Determine what unit IN THIS SYSTEM we need to convert to 07867 if ($dest_state === $state) { 07868 // Simple conversion 07869 $dest_unit = $to_unit; 07870 } else { 07871 // Convert to the smallest unit, pending a system shift 07872 $dest_unit = self::$units[$state][$dest_state][0]; 07873 } 07874 07875 // Do the conversion if necessary 07876 if ($dest_unit !== $unit) { 07877 $factor = $this->div(self::$units[$state][$unit], self::$units[$state][$dest_unit], $cp); 07878 $n = $this->mul($n, $factor, $cp); 07879 $unit = $dest_unit; 07880 } 07881 07882 // Output was zero, so bail out early. Shouldn't ever happen. 07883 if ($n === '') { 07884 $n = '0'; 07885 $unit = $to_unit; 07886 break; 07887 } 07888 07889 // It was a simple conversion, so bail out 07890 if ($dest_state === $state) { 07891 break; 07892 } 07893 07894 if ($i !== 0) { 07895 // Conversion failed! Apparently, the system we forwarded 07896 // to didn't have this unit. This should never happen! 07897 return false; 07898 } 07899 07900 // Pre-condition: $i == 0 07901 07902 // Perform conversion to next system of units 07903 $n = $this->mul($n, self::$units[$state][$dest_state][1], $cp); 07904 $unit = self::$units[$state][$dest_state][2]; 07905 $state = $dest_state; 07906 07907 // One more loop around to convert the unit in the new system. 07908 07909 } 07910 07911 // Post-condition: $unit == $to_unit 07912 if ($unit !== $to_unit) return false; 07913 07914 // Useful for debugging: 07915 //echo "<pre>n"; 07916 //echo "$n\nsigfigs = $sigfigs\nnew_log = $new_log\nlog = $log\nrp = $rp\n</pre>\n"; 07917 07918 $n = $this->round($n, $sigfigs); 07919 if (strpos($n, '.') !== false) $n = rtrim($n, '0'); 07920 $n = rtrim($n, '.'); 07921 07922 return new HTMLPurifier_Length($n, $unit); 07923 } 07924 07930 public function getSigFigs($n) { 07931 $n = ltrim($n, '0+-'); 07932 $dp = strpos($n, '.'); // decimal position 07933 if ($dp === false) { 07934 $sigfigs = strlen(rtrim($n, '0')); 07935 } else { 07936 $sigfigs = strlen(ltrim($n, '0.')); // eliminate extra decimal character 07937 if ($dp !== 0) $sigfigs--; 07938 } 07939 return $sigfigs; 07940 } 07941 07945 private function add($s1, $s2, $scale) { 07946 if ($this->bcmath) return bcadd($s1, $s2, $scale); 07947 else return $this->scale($s1 + $s2, $scale); 07948 } 07949 07953 private function mul($s1, $s2, $scale) { 07954 if ($this->bcmath) return bcmul($s1, $s2, $scale); 07955 else return $this->scale($s1 * $s2, $scale); 07956 } 07957 07961 private function div($s1, $s2, $scale) { 07962 if ($this->bcmath) return bcdiv($s1, $s2, $scale); 07963 else return $this->scale($s1 / $s2, $scale); 07964 } 07965 07970 private function round($n, $sigfigs) { 07971 $new_log = (int) floor(log(abs($n), 10)); // Number of digits left of decimal - 1 07972 $rp = $sigfigs - $new_log - 1; // Number of decimal places needed 07973 $neg = $n < 0 ? '-' : ''; // Negative sign 07974 if ($this->bcmath) { 07975 if ($rp >= 0) { 07976 $n = bcadd($n, $neg . '0.' . str_repeat('0', $rp) . '5', $rp + 1); 07977 $n = bcdiv($n, '1', $rp); 07978 } else { 07979 // This algorithm partially depends on the standardized 07980 // form of numbers that comes out of bcmath. 07981 $n = bcadd($n, $neg . '5' . str_repeat('0', $new_log - $sigfigs), 0); 07982 $n = substr($n, 0, $sigfigs + strlen($neg)) . str_repeat('0', $new_log - $sigfigs + 1); 07983 } 07984 return $n; 07985 } else { 07986 return $this->scale(round($n, $sigfigs - $new_log - 1), $rp + 1); 07987 } 07988 } 07989 07993 private function scale($r, $scale) { 07994 if ($scale < 0) { 07995 // The f sprintf type doesn't support negative numbers, so we 07996 // need to cludge things manually. First get the string. 07997 $r = sprintf('%.0f', (float) $r); 07998 // Due to floating point precision loss, $r will more than likely 07999 // look something like 4652999999999.9234. We grab one more digit 08000 // than we need to precise from $r and then use that to round 08001 // appropriately. 08002 $precise = (string) round(substr($r, 0, strlen($r) + $scale), -1); 08003 // Now we return it, truncating the zero that was rounded off. 08004 return substr($precise, 0, -1) . str_repeat('0', -$scale + 1); 08005 } 08006 return sprintf('%.' . $scale . 'f', (float) $r); 08007 } 08008 08009 } 08010 08011 08012 08013 08014 08019 class HTMLPurifier_VarParser 08020 { 08021 08022 const STRING = 1; 08023 const ISTRING = 2; 08024 const TEXT = 3; 08025 const ITEXT = 4; 08026 const INT = 5; 08027 const FLOAT = 6; 08028 const BOOL = 7; 08029 const LOOKUP = 8; 08030 const ALIST = 9; 08031 const HASH = 10; 08032 const MIXED = 11; 08033 08038 static public $types = array( 08039 'string' => self::STRING, 08040 'istring' => self::ISTRING, 08041 'text' => self::TEXT, 08042 'itext' => self::ITEXT, 08043 'int' => self::INT, 08044 'float' => self::FLOAT, 08045 'bool' => self::BOOL, 08046 'lookup' => self::LOOKUP, 08047 'list' => self::ALIST, 08048 'hash' => self::HASH, 08049 'mixed' => self::MIXED 08050 ); 08051 08056 static public $stringTypes = array( 08057 self::STRING => true, 08058 self::ISTRING => true, 08059 self::TEXT => true, 08060 self::ITEXT => true, 08061 ); 08062 08073 final public function parse($var, $type, $allow_null = false) { 08074 if (is_string($type)) { 08075 if (!isset(HTMLPurifier_VarParser::$types[$type])) { 08076 throw new HTMLPurifier_VarParserException("Invalid type '$type'"); 08077 } else { 08078 $type = HTMLPurifier_VarParser::$types[$type]; 08079 } 08080 } 08081 $var = $this->parseImplementation($var, $type, $allow_null); 08082 if ($allow_null && $var === null) return null; 08083 // These are basic checks, to make sure nothing horribly wrong 08084 // happened in our implementations. 08085 switch ($type) { 08086 case (self::STRING): 08087 case (self::ISTRING): 08088 case (self::TEXT): 08089 case (self::ITEXT): 08090 if (!is_string($var)) break; 08091 if ($type == self::ISTRING || $type == self::ITEXT) $var = strtolower($var); 08092 return $var; 08093 case (self::INT): 08094 if (!is_int($var)) break; 08095 return $var; 08096 case (self::FLOAT): 08097 if (!is_float($var)) break; 08098 return $var; 08099 case (self::BOOL): 08100 if (!is_bool($var)) break; 08101 return $var; 08102 case (self::LOOKUP): 08103 case (self::ALIST): 08104 case (self::HASH): 08105 if (!is_array($var)) break; 08106 if ($type === self::LOOKUP) { 08107 foreach ($var as $k) if ($k !== true) $this->error('Lookup table contains value other than true'); 08108 } elseif ($type === self::ALIST) { 08109 $keys = array_keys($var); 08110 if (array_keys($keys) !== $keys) $this->error('Indices for list are not uniform'); 08111 } 08112 return $var; 08113 case (self::MIXED): 08114 return $var; 08115 default: 08116 $this->errorInconsistent(get_class($this), $type); 08117 } 08118 $this->errorGeneric($var, $type); 08119 } 08120 08125 protected function parseImplementation($var, $type, $allow_null) { 08126 return $var; 08127 } 08128 08132 protected function error($msg) { 08133 throw new HTMLPurifier_VarParserException($msg); 08134 } 08135 08142 protected function errorInconsistent($class, $type) { 08143 throw new HTMLPurifier_Exception("Inconsistency in $class: ".HTMLPurifier_VarParser::getTypeName($type)." not implemented"); 08144 } 08145 08149 protected function errorGeneric($var, $type) { 08150 $vtype = gettype($var); 08151 $this->error("Expected type ".HTMLPurifier_VarParser::getTypeName($type).", got $vtype"); 08152 } 08153 08154 static public function getTypeName($type) { 08155 static $lookup; 08156 if (!$lookup) { 08157 // Lazy load the alternative lookup table 08158 $lookup = array_flip(HTMLPurifier_VarParser::$types); 08159 } 08160 if (!isset($lookup[$type])) return 'unknown'; 08161 return $lookup[$type]; 08162 } 08163 08164 } 08165 08166 08167 08168 08169 08173 class HTMLPurifier_VarParserException extends HTMLPurifier_Exception 08174 { 08175 08176 } 08177 08178 08179 08180 08181 08193 class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef 08194 { 08195 08196 public function validate($css, $config, $context) { 08197 08198 $css = $this->parseCDATA($css); 08199 08200 $definition = $config->getCSSDefinition(); 08201 08202 // we're going to break the spec and explode by semicolons. 08203 // This is because semicolon rarely appears in escaped form 08204 // Doing this is generally flaky but fast 08205 // IT MIGHT APPEAR IN URIs, see HTMLPurifier_AttrDef_CSSURI 08206 // for details 08207 08208 $declarations = explode(';', $css); 08209 $propvalues = array(); 08210 08214 $property = false; 08215 $context->register('CurrentCSSProperty', $property); 08216 08217 foreach ($declarations as $declaration) { 08218 if (!$declaration) continue; 08219 if (!strpos($declaration, ':')) continue; 08220 list($property, $value) = explode(':', $declaration, 2); 08221 $property = trim($property); 08222 $value = trim($value); 08223 $ok = false; 08224 do { 08225 if (isset($definition->info[$property])) { 08226 $ok = true; 08227 break; 08228 } 08229 if (ctype_lower($property)) break; 08230 $property = strtolower($property); 08231 if (isset($definition->info[$property])) { 08232 $ok = true; 08233 break; 08234 } 08235 } while(0); 08236 if (!$ok) continue; 08237 // inefficient call, since the validator will do this again 08238 if (strtolower(trim($value)) !== 'inherit') { 08239 // inherit works for everything (but only on the base property) 08240 $result = $definition->info[$property]->validate( 08241 $value, $config, $context ); 08242 } else { 08243 $result = 'inherit'; 08244 } 08245 if ($result === false) continue; 08246 $propvalues[$property] = $result; 08247 } 08248 08249 $context->destroy('CurrentCSSProperty'); 08250 08251 // procedure does not write the new CSS simultaneously, so it's 08252 // slightly inefficient, but it's the only way of getting rid of 08253 // duplicates. Perhaps config to optimize it, but not now. 08254 08255 $new_declarations = ''; 08256 foreach ($propvalues as $prop => $value) { 08257 $new_declarations .= "$prop:$value;"; 08258 } 08259 08260 return $new_declarations ? $new_declarations : false; 08261 08262 } 08263 08264 } 08265 08266 08267 08268 08269 08274 class HTMLPurifier_AttrDef_Clone extends HTMLPurifier_AttrDef 08275 { 08279 protected $clone; 08280 08281 public function __construct($clone) { 08282 $this->clone = $clone; 08283 } 08284 08285 public function validate($v, $config, $context) { 08286 return $this->clone->validate($v, $config, $context); 08287 } 08288 08289 public function make($string) { 08290 return clone $this->clone; 08291 } 08292 08293 } 08294 08295 08296 08297 08298 08299 // Enum = Enumerated 08306 class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef 08307 { 08308 08313 public $valid_values = array(); 08314 08319 protected $case_sensitive = false; // values according to W3C spec 08320 08325 public function __construct( 08326 $valid_values = array(), $case_sensitive = false 08327 ) { 08328 $this->valid_values = array_flip($valid_values); 08329 $this->case_sensitive = $case_sensitive; 08330 } 08331 08332 public function validate($string, $config, $context) { 08333 $string = trim($string); 08334 if (!$this->case_sensitive) { 08335 // we may want to do full case-insensitive libraries 08336 $string = ctype_lower($string) ? $string : strtolower($string); 08337 } 08338 $result = isset($this->valid_values[$string]); 08339 08340 return $result ? $string : false; 08341 } 08342 08348 public function make($string) { 08349 if (strlen($string) > 2 && $string[0] == 's' && $string[1] == ':') { 08350 $string = substr($string, 2); 08351 $sensitive = true; 08352 } else { 08353 $sensitive = false; 08354 } 08355 $values = explode(',', $string); 08356 return new HTMLPurifier_AttrDef_Enum($values, $sensitive); 08357 } 08358 08359 } 08360 08361 08362 08363 08364 08372 class HTMLPurifier_AttrDef_Integer extends HTMLPurifier_AttrDef 08373 { 08374 08378 protected $negative = true; 08379 08383 protected $zero = true; 08384 08388 protected $positive = true; 08389 08395 public function __construct( 08396 $negative = true, $zero = true, $positive = true 08397 ) { 08398 $this->negative = $negative; 08399 $this->zero = $zero; 08400 $this->positive = $positive; 08401 } 08402 08403 public function validate($integer, $config, $context) { 08404 08405 $integer = $this->parseCDATA($integer); 08406 if ($integer === '') return false; 08407 08408 // we could possibly simply typecast it to integer, but there are 08409 // certain fringe cases that must not return an integer. 08410 08411 // clip leading sign 08412 if ( $this->negative && $integer[0] === '-' ) { 08413 $digits = substr($integer, 1); 08414 if ($digits === '0') $integer = '0'; // rm minus sign for zero 08415 } elseif( $this->positive && $integer[0] === '+' ) { 08416 $digits = $integer = substr($integer, 1); // rm unnecessary plus 08417 } else { 08418 $digits = $integer; 08419 } 08420 08421 // test if it's numeric 08422 if (!ctype_digit($digits)) return false; 08423 08424 // perform scope tests 08425 if (!$this->zero && $integer == 0) return false; 08426 if (!$this->positive && $integer > 0) return false; 08427 if (!$this->negative && $integer < 0) return false; 08428 08429 return $integer; 08430 08431 } 08432 08433 } 08434 08435 08436 08437 08438 08443 class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef 08444 { 08445 08446 public function validate($string, $config, $context) { 08447 08448 $string = trim($string); 08449 if (!$string) return false; 08450 08451 $subtags = explode('-', $string); 08452 $num_subtags = count($subtags); 08453 08454 if ($num_subtags == 0) return false; // sanity check 08455 08456 // process primary subtag : $subtags[0] 08457 $length = strlen($subtags[0]); 08458 switch ($length) { 08459 case 0: 08460 return false; 08461 case 1: 08462 if (! ($subtags[0] == 'x' || $subtags[0] == 'i') ) { 08463 return false; 08464 } 08465 break; 08466 case 2: 08467 case 3: 08468 if (! ctype_alpha($subtags[0]) ) { 08469 return false; 08470 } elseif (! ctype_lower($subtags[0]) ) { 08471 $subtags[0] = strtolower($subtags[0]); 08472 } 08473 break; 08474 default: 08475 return false; 08476 } 08477 08478 $new_string = $subtags[0]; 08479 if ($num_subtags == 1) return $new_string; 08480 08481 // process second subtag : $subtags[1] 08482 $length = strlen($subtags[1]); 08483 if ($length == 0 || ($length == 1 && $subtags[1] != 'x') || $length > 8 || !ctype_alnum($subtags[1])) { 08484 return $new_string; 08485 } 08486 if (!ctype_lower($subtags[1])) $subtags[1] = strtolower($subtags[1]); 08487 08488 $new_string .= '-' . $subtags[1]; 08489 if ($num_subtags == 2) return $new_string; 08490 08491 // process all other subtags, index 2 and up 08492 for ($i = 2; $i < $num_subtags; $i++) { 08493 $length = strlen($subtags[$i]); 08494 if ($length == 0 || $length > 8 || !ctype_alnum($subtags[$i])) { 08495 return $new_string; 08496 } 08497 if (!ctype_lower($subtags[$i])) { 08498 $subtags[$i] = strtolower($subtags[$i]); 08499 } 08500 $new_string .= '-' . $subtags[$i]; 08501 } 08502 08503 return $new_string; 08504 08505 } 08506 08507 } 08508 08509 08510 08511 08512 08516 class HTMLPurifier_AttrDef_Switch 08517 { 08518 08519 protected $tag; 08520 protected $withTag, $withoutTag; 08521 08527 public function __construct($tag, $with_tag, $without_tag) { 08528 $this->tag = $tag; 08529 $this->withTag = $with_tag; 08530 $this->withoutTag = $without_tag; 08531 } 08532 08533 public function validate($string, $config, $context) { 08534 $token = $context->get('CurrentToken', true); 08535 if (!$token || $token->name !== $this->tag) { 08536 return $this->withoutTag->validate($string, $config, $context); 08537 } else { 08538 return $this->withTag->validate($string, $config, $context); 08539 } 08540 } 08541 08542 } 08543 08544 08545 08546 08547 08551 class HTMLPurifier_AttrDef_Text extends HTMLPurifier_AttrDef 08552 { 08553 08554 public function validate($string, $config, $context) { 08555 return $this->parseCDATA($string); 08556 } 08557 08558 } 08559 08560 08561 08562 08563 08568 class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef 08569 { 08570 08571 protected $parser; 08572 protected $embedsResource; 08573 08577 public function __construct($embeds_resource = false) { 08578 $this->parser = new HTMLPurifier_URIParser(); 08579 $this->embedsResource = (bool) $embeds_resource; 08580 } 08581 08582 public function make($string) { 08583 $embeds = ($string === 'embedded'); 08584 return new HTMLPurifier_AttrDef_URI($embeds); 08585 } 08586 08587 public function validate($uri, $config, $context) { 08588 08589 if ($config->get('URI.Disable')) return false; 08590 08591 $uri = $this->parseCDATA($uri); 08592 08593 // parse the URI 08594 $uri = $this->parser->parse($uri); 08595 if ($uri === false) return false; 08596 08597 // add embedded flag to context for validators 08598 $context->register('EmbeddedURI', $this->embedsResource); 08599 08600 $ok = false; 08601 do { 08602 08603 // generic validation 08604 $result = $uri->validate($config, $context); 08605 if (!$result) break; 08606 08607 // chained filtering 08608 $uri_def = $config->getDefinition('URI'); 08609 $result = $uri_def->filter($uri, $config, $context); 08610 if (!$result) break; 08611 08612 // scheme-specific validation 08613 $scheme_obj = $uri->getSchemeObj($config, $context); 08614 if (!$scheme_obj) break; 08615 if ($this->embedsResource && !$scheme_obj->browsable) break; 08616 $result = $scheme_obj->validate($uri, $config, $context); 08617 if (!$result) break; 08618 08619 // Post chained filtering 08620 $result = $uri_def->postFilter($uri, $config, $context); 08621 if (!$result) break; 08622 08623 // survived gauntlet 08624 $ok = true; 08625 08626 } while (false); 08627 08628 $context->destroy('EmbeddedURI'); 08629 if (!$ok) return false; 08630 08631 // back to string 08632 return $uri->toString(); 08633 08634 } 08635 08636 } 08637 08638 08639 08640 08641 08645 class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef 08646 { 08647 08651 protected $non_negative = false; 08652 08656 public function __construct($non_negative = false) { 08657 $this->non_negative = $non_negative; 08658 } 08659 08664 public function validate($number, $config, $context) { 08665 08666 $number = $this->parseCDATA($number); 08667 08668 if ($number === '') return false; 08669 if ($number === '0') return '0'; 08670 08671 $sign = ''; 08672 switch ($number[0]) { 08673 case '-': 08674 if ($this->non_negative) return false; 08675 $sign = '-'; 08676 case '+': 08677 $number = substr($number, 1); 08678 } 08679 08680 if (ctype_digit($number)) { 08681 $number = ltrim($number, '0'); 08682 return $number ? $sign . $number : '0'; 08683 } 08684 08685 // Period is the only non-numeric character allowed 08686 if (strpos($number, '.') === false) return false; 08687 08688 list($left, $right) = explode('.', $number, 2); 08689 08690 if ($left === '' && $right === '') return false; 08691 if ($left !== '' && !ctype_digit($left)) return false; 08692 08693 $left = ltrim($left, '0'); 08694 $right = rtrim($right, '0'); 08695 08696 if ($right === '') { 08697 return $left ? $sign . $left : '0'; 08698 } elseif (!ctype_digit($right)) { 08699 return false; 08700 } 08701 08702 return $sign . $left . '.' . $right; 08703 08704 } 08705 08706 } 08707 08708 08709 08710 08711 08712 class HTMLPurifier_AttrDef_CSS_AlphaValue extends HTMLPurifier_AttrDef_CSS_Number 08713 { 08714 08715 public function __construct() { 08716 parent::__construct(false); // opacity is non-negative, but we will clamp it 08717 } 08718 08719 public function validate($number, $config, $context) { 08720 $result = parent::validate($number, $config, $context); 08721 if ($result === false) return $result; 08722 $float = (float) $result; 08723 if ($float < 0.0) $result = '0'; 08724 if ($float > 1.0) $result = '1'; 08725 return $result; 08726 } 08727 08728 } 08729 08730 08731 08732 08733 08738 class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef 08739 { 08740 08745 protected $info; 08746 08747 public function __construct($config) { 08748 $def = $config->getCSSDefinition(); 08749 $this->info['background-color'] = $def->info['background-color']; 08750 $this->info['background-image'] = $def->info['background-image']; 08751 $this->info['background-repeat'] = $def->info['background-repeat']; 08752 $this->info['background-attachment'] = $def->info['background-attachment']; 08753 $this->info['background-position'] = $def->info['background-position']; 08754 } 08755 08756 public function validate($string, $config, $context) { 08757 08758 // regular pre-processing 08759 $string = $this->parseCDATA($string); 08760 if ($string === '') return false; 08761 08762 // munge rgb() decl if necessary 08763 $string = $this->mungeRgb($string); 08764 08765 // assumes URI doesn't have spaces in it 08766 $bits = explode(' ', strtolower($string)); // bits to process 08767 08768 $caught = array(); 08769 $caught['color'] = false; 08770 $caught['image'] = false; 08771 $caught['repeat'] = false; 08772 $caught['attachment'] = false; 08773 $caught['position'] = false; 08774 08775 $i = 0; // number of catches 08776 $none = false; 08777 08778 foreach ($bits as $bit) { 08779 if ($bit === '') continue; 08780 foreach ($caught as $key => $status) { 08781 if ($key != 'position') { 08782 if ($status !== false) continue; 08783 $r = $this->info['background-' . $key]->validate($bit, $config, $context); 08784 } else { 08785 $r = $bit; 08786 } 08787 if ($r === false) continue; 08788 if ($key == 'position') { 08789 if ($caught[$key] === false) $caught[$key] = ''; 08790 $caught[$key] .= $r . ' '; 08791 } else { 08792 $caught[$key] = $r; 08793 } 08794 $i++; 08795 break; 08796 } 08797 } 08798 08799 if (!$i) return false; 08800 if ($caught['position'] !== false) { 08801 $caught['position'] = $this->info['background-position']-> 08802 validate($caught['position'], $config, $context); 08803 } 08804 08805 $ret = array(); 08806 foreach ($caught as $value) { 08807 if ($value === false) continue; 08808 $ret[] = $value; 08809 } 08810 08811 if (empty($ret)) return false; 08812 return implode(' ', $ret); 08813 08814 } 08815 08816 } 08817 08818 08819 08820 08821 08822 /* W3C says: 08823 [ // adjective and number must be in correct order, even if 08824 // you could switch them without introducing ambiguity. 08825 // some browsers support that syntax 08826 [ 08827 <percentage> | <length> | left | center | right 08828 ] 08829 [ 08830 <percentage> | <length> | top | center | bottom 08831 ]? 08832 ] | 08833 [ // this signifies that the vertical and horizontal adjectives 08834 // can be arbitrarily ordered, however, there can only be two, 08835 // one of each, or none at all 08836 [ 08837 left | center | right 08838 ] || 08839 [ 08840 top | center | bottom 08841 ] 08842 ] 08843 top, left = 0% 08844 center, (none) = 50% 08845 bottom, right = 100% 08846 */ 08847 08848 /* QuirksMode says: 08849 keyword + length/percentage must be ordered correctly, as per W3C 08850 08851 Internet Explorer and Opera, however, support arbitrary ordering. We 08852 should fix it up. 08853 08854 Minor issue though, not strictly necessary. 08855 */ 08856 08857 // control freaks may appreciate the ability to convert these to 08858 // percentages or something, but it's not necessary 08859 08863 class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef 08864 { 08865 08866 protected $length; 08867 protected $percentage; 08868 08869 public function __construct() { 08870 $this->length = new HTMLPurifier_AttrDef_CSS_Length(); 08871 $this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage(); 08872 } 08873 08874 public function validate($string, $config, $context) { 08875 $string = $this->parseCDATA($string); 08876 $bits = explode(' ', $string); 08877 08878 $keywords = array(); 08879 $keywords['h'] = false; // left, right 08880 $keywords['v'] = false; // top, bottom 08881 $keywords['ch'] = false; // center (first word) 08882 $keywords['cv'] = false; // center (second word) 08883 $measures = array(); 08884 08885 $i = 0; 08886 08887 $lookup = array( 08888 'top' => 'v', 08889 'bottom' => 'v', 08890 'left' => 'h', 08891 'right' => 'h', 08892 'center' => 'c' 08893 ); 08894 08895 foreach ($bits as $bit) { 08896 if ($bit === '') continue; 08897 08898 // test for keyword 08899 $lbit = ctype_lower($bit) ? $bit : strtolower($bit); 08900 if (isset($lookup[$lbit])) { 08901 $status = $lookup[$lbit]; 08902 if ($status == 'c') { 08903 if ($i == 0) { 08904 $status = 'ch'; 08905 } else { 08906 $status = 'cv'; 08907 } 08908 } 08909 $keywords[$status] = $lbit; 08910 $i++; 08911 } 08912 08913 // test for length 08914 $r = $this->length->validate($bit, $config, $context); 08915 if ($r !== false) { 08916 $measures[] = $r; 08917 $i++; 08918 } 08919 08920 // test for percentage 08921 $r = $this->percentage->validate($bit, $config, $context); 08922 if ($r !== false) { 08923 $measures[] = $r; 08924 $i++; 08925 } 08926 08927 } 08928 08929 if (!$i) return false; // no valid values were caught 08930 08931 $ret = array(); 08932 08933 // first keyword 08934 if ($keywords['h']) $ret[] = $keywords['h']; 08935 elseif ($keywords['ch']) { 08936 $ret[] = $keywords['ch']; 08937 $keywords['cv'] = false; // prevent re-use: center = center center 08938 } 08939 elseif (count($measures)) $ret[] = array_shift($measures); 08940 08941 if ($keywords['v']) $ret[] = $keywords['v']; 08942 elseif ($keywords['cv']) $ret[] = $keywords['cv']; 08943 elseif (count($measures)) $ret[] = array_shift($measures); 08944 08945 if (empty($ret)) return false; 08946 return implode(' ', $ret); 08947 08948 } 08949 08950 } 08951 08952 08953 08954 08955 08959 class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef 08960 { 08961 08965 protected $info = array(); 08966 08967 public function __construct($config) { 08968 $def = $config->getCSSDefinition(); 08969 $this->info['border-width'] = $def->info['border-width']; 08970 $this->info['border-style'] = $def->info['border-style']; 08971 $this->info['border-top-color'] = $def->info['border-top-color']; 08972 } 08973 08974 public function validate($string, $config, $context) { 08975 $string = $this->parseCDATA($string); 08976 $string = $this->mungeRgb($string); 08977 $bits = explode(' ', $string); 08978 $done = array(); // segments we've finished 08979 $ret = ''; // return value 08980 foreach ($bits as $bit) { 08981 foreach ($this->info as $propname => $validator) { 08982 if (isset($done[$propname])) continue; 08983 $r = $validator->validate($bit, $config, $context); 08984 if ($r !== false) { 08985 $ret .= $r . ' '; 08986 $done[$propname] = true; 08987 break; 08988 } 08989 } 08990 } 08991 return rtrim($ret); 08992 } 08993 08994 } 08995 08996 08997 08998 08999 09003 class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef 09004 { 09005 09006 public function validate($color, $config, $context) { 09007 09008 static $colors = null; 09009 if ($colors === null) $colors = $config->get('Core.ColorKeywords'); 09010 09011 $color = trim($color); 09012 if ($color === '') return false; 09013 09014 $lower = strtolower($color); 09015 if (isset($colors[$lower])) return $colors[$lower]; 09016 09017 if (strpos($color, 'rgb(') !== false) { 09018 // rgb literal handling 09019 $length = strlen($color); 09020 if (strpos($color, ')') !== $length - 1) return false; 09021 $triad = substr($color, 4, $length - 4 - 1); 09022 $parts = explode(',', $triad); 09023 if (count($parts) !== 3) return false; 09024 $type = false; // to ensure that they're all the same type 09025 $new_parts = array(); 09026 foreach ($parts as $part) { 09027 $part = trim($part); 09028 if ($part === '') return false; 09029 $length = strlen($part); 09030 if ($part[$length - 1] === '%') { 09031 // handle percents 09032 if (!$type) { 09033 $type = 'percentage'; 09034 } elseif ($type !== 'percentage') { 09035 return false; 09036 } 09037 $num = (float) substr($part, 0, $length - 1); 09038 if ($num < 0) $num = 0; 09039 if ($num > 100) $num = 100; 09040 $new_parts[] = "$num%"; 09041 } else { 09042 // handle integers 09043 if (!$type) { 09044 $type = 'integer'; 09045 } elseif ($type !== 'integer') { 09046 return false; 09047 } 09048 $num = (int) $part; 09049 if ($num < 0) $num = 0; 09050 if ($num > 255) $num = 255; 09051 $new_parts[] = (string) $num; 09052 } 09053 } 09054 $new_triad = implode(',', $new_parts); 09055 $color = "rgb($new_triad)"; 09056 } else { 09057 // hexadecimal handling 09058 if ($color[0] === '#') { 09059 $hex = substr($color, 1); 09060 } else { 09061 $hex = $color; 09062 $color = '#' . $color; 09063 } 09064 $length = strlen($hex); 09065 if ($length !== 3 && $length !== 6) return false; 09066 if (!ctype_xdigit($hex)) return false; 09067 } 09068 09069 return $color; 09070 09071 } 09072 09073 } 09074 09075 09076 09077 09078 09088 class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef 09089 { 09090 09095 public $defs; 09096 09100 public function __construct($defs) { 09101 $this->defs = $defs; 09102 } 09103 09104 public function validate($string, $config, $context) { 09105 foreach ($this->defs as $i => $def) { 09106 $result = $this->defs[$i]->validate($string, $config, $context); 09107 if ($result !== false) return $result; 09108 } 09109 return false; 09110 } 09111 09112 } 09113 09114 09115 09116 09117 09121 class HTMLPurifier_AttrDef_CSS_DenyElementDecorator extends HTMLPurifier_AttrDef 09122 { 09123 public $def, $element; 09124 09129 public function __construct($def, $element) { 09130 $this->def = $def; 09131 $this->element = $element; 09132 } 09136 public function validate($string, $config, $context) { 09137 $token = $context->get('CurrentToken', true); 09138 if ($token && $token->name == $this->element) return false; 09139 return $this->def->validate($string, $config, $context); 09140 } 09141 } 09142 09143 09144 09145 09146 09152 class HTMLPurifier_AttrDef_CSS_Filter extends HTMLPurifier_AttrDef 09153 { 09154 09155 protected $intValidator; 09156 09157 public function __construct() { 09158 $this->intValidator = new HTMLPurifier_AttrDef_Integer(); 09159 } 09160 09161 public function validate($value, $config, $context) { 09162 $value = $this->parseCDATA($value); 09163 if ($value === 'none') return $value; 09164 // if we looped this we could support multiple filters 09165 $function_length = strcspn($value, '('); 09166 $function = trim(substr($value, 0, $function_length)); 09167 if ($function !== 'alpha' && 09168 $function !== 'Alpha' && 09169 $function !== 'progid:DXImageTransform.Microsoft.Alpha' 09170 ) return false; 09171 $cursor = $function_length + 1; 09172 $parameters_length = strcspn($value, ')', $cursor); 09173 $parameters = substr($value, $cursor, $parameters_length); 09174 $params = explode(',', $parameters); 09175 $ret_params = array(); 09176 $lookup = array(); 09177 foreach ($params as $param) { 09178 list($key, $value) = explode('=', $param); 09179 $key = trim($key); 09180 $value = trim($value); 09181 if (isset($lookup[$key])) continue; 09182 if ($key !== 'opacity') continue; 09183 $value = $this->intValidator->validate($value, $config, $context); 09184 if ($value === false) continue; 09185 $int = (int) $value; 09186 if ($int > 100) $value = '100'; 09187 if ($int < 0) $value = '0'; 09188 $ret_params[] = "$key=$value"; 09189 $lookup[$key] = true; 09190 } 09191 $ret_parameters = implode(',', $ret_params); 09192 $ret_function = "$function($ret_parameters)"; 09193 return $ret_function; 09194 } 09195 09196 } 09197 09198 09199 09200 09201 09205 class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef 09206 { 09207 09216 protected $info = array(); 09217 09218 public function __construct($config) { 09219 $def = $config->getCSSDefinition(); 09220 $this->info['font-style'] = $def->info['font-style']; 09221 $this->info['font-variant'] = $def->info['font-variant']; 09222 $this->info['font-weight'] = $def->info['font-weight']; 09223 $this->info['font-size'] = $def->info['font-size']; 09224 $this->info['line-height'] = $def->info['line-height']; 09225 $this->info['font-family'] = $def->info['font-family']; 09226 } 09227 09228 public function validate($string, $config, $context) { 09229 09230 static $system_fonts = array( 09231 'caption' => true, 09232 'icon' => true, 09233 'menu' => true, 09234 'message-box' => true, 09235 'small-caption' => true, 09236 'status-bar' => true 09237 ); 09238 09239 // regular pre-processing 09240 $string = $this->parseCDATA($string); 09241 if ($string === '') return false; 09242 09243 // check if it's one of the keywords 09244 $lowercase_string = strtolower($string); 09245 if (isset($system_fonts[$lowercase_string])) { 09246 return $lowercase_string; 09247 } 09248 09249 $bits = explode(' ', $string); // bits to process 09250 $stage = 0; // this indicates what we're looking for 09251 $caught = array(); // which stage 0 properties have we caught? 09252 $stage_1 = array('font-style', 'font-variant', 'font-weight'); 09253 $final = ''; // output 09254 09255 for ($i = 0, $size = count($bits); $i < $size; $i++) { 09256 if ($bits[$i] === '') continue; 09257 switch ($stage) { 09258 09259 // attempting to catch font-style, font-variant or font-weight 09260 case 0: 09261 foreach ($stage_1 as $validator_name) { 09262 if (isset($caught[$validator_name])) continue; 09263 $r = $this->info[$validator_name]->validate( 09264 $bits[$i], $config, $context); 09265 if ($r !== false) { 09266 $final .= $r . ' '; 09267 $caught[$validator_name] = true; 09268 break; 09269 } 09270 } 09271 // all three caught, continue on 09272 if (count($caught) >= 3) $stage = 1; 09273 if ($r !== false) break; 09274 09275 // attempting to catch font-size and perhaps line-height 09276 case 1: 09277 $found_slash = false; 09278 if (strpos($bits[$i], '/') !== false) { 09279 list($font_size, $line_height) = 09280 explode('/', $bits[$i]); 09281 if ($line_height === '') { 09282 // ooh, there's a space after the slash! 09283 $line_height = false; 09284 $found_slash = true; 09285 } 09286 } else { 09287 $font_size = $bits[$i]; 09288 $line_height = false; 09289 } 09290 $r = $this->info['font-size']->validate( 09291 $font_size, $config, $context); 09292 if ($r !== false) { 09293 $final .= $r; 09294 // attempt to catch line-height 09295 if ($line_height === false) { 09296 // we need to scroll forward 09297 for ($j = $i + 1; $j < $size; $j++) { 09298 if ($bits[$j] === '') continue; 09299 if ($bits[$j] === '/') { 09300 if ($found_slash) { 09301 return false; 09302 } else { 09303 $found_slash = true; 09304 continue; 09305 } 09306 } 09307 $line_height = $bits[$j]; 09308 break; 09309 } 09310 } else { 09311 // slash already found 09312 $found_slash = true; 09313 $j = $i; 09314 } 09315 if ($found_slash) { 09316 $i = $j; 09317 $r = $this->info['line-height']->validate( 09318 $line_height, $config, $context); 09319 if ($r !== false) { 09320 $final .= '/' . $r; 09321 } 09322 } 09323 $final .= ' '; 09324 $stage = 2; 09325 break; 09326 } 09327 return false; 09328 09329 // attempting to catch font-family 09330 case 2: 09331 $font_family = 09332 implode(' ', array_slice($bits, $i, $size - $i)); 09333 $r = $this->info['font-family']->validate( 09334 $font_family, $config, $context); 09335 if ($r !== false) { 09336 $final .= $r . ' '; 09337 // processing completed successfully 09338 return rtrim($final); 09339 } 09340 return false; 09341 } 09342 } 09343 return false; 09344 } 09345 09346 } 09347 09348 09349 09350 09351 09355 class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef 09356 { 09357 09358 protected $mask = null; 09359 09360 public function __construct() { 09361 $this->mask = '- '; 09362 for ($c = 'a'; $c <= 'z'; $c++) $this->mask .= $c; 09363 for ($c = 'A'; $c <= 'Z'; $c++) $this->mask .= $c; 09364 for ($c = '0'; $c <= '9'; $c++) $this->mask .= $c; // cast-y, but should be fine 09365 // special bytes used by UTF-8 09366 for ($i = 0x80; $i <= 0xFF; $i++) { 09367 // We don't bother excluding invalid bytes in this range, 09368 // because the our restriction of well-formed UTF-8 will 09369 // prevent these from ever occurring. 09370 $this->mask .= chr($i); 09371 } 09372 09373 /* 09374 PHP's internal strcspn implementation is 09375 O(length of string * length of mask), making it inefficient 09376 for large masks. However, it's still faster than 09377 preg_match 8) 09378 for (p = s1;;) { 09379 spanp = s2; 09380 do { 09381 if (*spanp == c || p == s1_end) { 09382 return p - s1; 09383 } 09384 } while (spanp++ < (s2_end - 1)); 09385 c = *++p; 09386 } 09387 */ 09388 // possible optimization: invert the mask. 09389 } 09390 09391 public function validate($string, $config, $context) { 09392 static $generic_names = array( 09393 'serif' => true, 09394 'sans-serif' => true, 09395 'monospace' => true, 09396 'fantasy' => true, 09397 'cursive' => true 09398 ); 09399 $allowed_fonts = $config->get('CSS.AllowedFonts'); 09400 09401 // assume that no font names contain commas in them 09402 $fonts = explode(',', $string); 09403 $final = ''; 09404 foreach($fonts as $font) { 09405 $font = trim($font); 09406 if ($font === '') continue; 09407 // match a generic name 09408 if (isset($generic_names[$font])) { 09409 if ($allowed_fonts === null || isset($allowed_fonts[$font])) { 09410 $final .= $font . ', '; 09411 } 09412 continue; 09413 } 09414 // match a quoted name 09415 if ($font[0] === '"' || $font[0] === "'") { 09416 $length = strlen($font); 09417 if ($length <= 2) continue; 09418 $quote = $font[0]; 09419 if ($font[$length - 1] !== $quote) continue; 09420 $font = substr($font, 1, $length - 2); 09421 } 09422 09423 $font = $this->expandCSSEscape($font); 09424 09425 // $font is a pure representation of the font name 09426 09427 if ($allowed_fonts !== null && !isset($allowed_fonts[$font])) { 09428 continue; 09429 } 09430 09431 if (ctype_alnum($font) && $font !== '') { 09432 // very simple font, allow it in unharmed 09433 $final .= $font . ', '; 09434 continue; 09435 } 09436 09437 // bugger out on whitespace. form feed (0C) really 09438 // shouldn't show up regardless 09439 $font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font); 09440 09441 // Here, there are various classes of characters which need 09442 // to be treated differently: 09443 // - Alphanumeric characters are essentially safe. We 09444 // handled these above. 09445 // - Spaces require quoting, though most parsers will do 09446 // the right thing if there aren't any characters that 09447 // can be misinterpreted 09448 // - Dashes rarely occur, but they fairly unproblematic 09449 // for parsing/rendering purposes. 09450 // The above characters cover the majority of Western font 09451 // names. 09452 // - Arbitrary Unicode characters not in ASCII. Because 09453 // most parsers give little thought to Unicode, treatment 09454 // of these codepoints is basically uniform, even for 09455 // punctuation-like codepoints. These characters can 09456 // show up in non-Western pages and are supported by most 09457 // major browsers, for example: "MS 明朝" is a 09458 // legitimate font-name 09459 // <http://ja.wikipedia.org/wiki/MS_明朝>. See 09460 // the CSS3 spec for more examples: 09461 // <http://www.w3.org/TR/2011/WD-css3-fonts-20110324/localizedfamilynames.png> 09462 // You can see live samples of these on the Internet: 09463 // <http://www.google.co.jp/search?q=font-family+MS+明朝|ゴシック> 09464 // However, most of these fonts have ASCII equivalents: 09465 // for example, 'MS Mincho', and it's considered 09466 // professional to use ASCII font names instead of 09467 // Unicode font names. Thanks Takeshi Terada for 09468 // providing this information. 09469 // The following characters, to my knowledge, have not been 09470 // used to name font names. 09471 // - Single quote. While theoretically you might find a 09472 // font name that has a single quote in its name (serving 09473 // as an apostrophe, e.g. Dave's Scribble), I haven't 09474 // been able to find any actual examples of this. 09475 // Internet Explorer's cssText translation (which I 09476 // believe is invoked by innerHTML) normalizes any 09477 // quoting to single quotes, and fails to escape single 09478 // quotes. (Note that this is not IE's behavior for all 09479 // CSS properties, just some sort of special casing for 09480 // font-family). So a single quote *cannot* be used 09481 // safely in the font-family context if there will be an 09482 // innerHTML/cssText translation. Note that Firefox 3.x 09483 // does this too. 09484 // - Double quote. In IE, these get normalized to 09485 // single-quotes, no matter what the encoding. (Fun 09486 // fact, in IE8, the 'content' CSS property gained 09487 // support, where they special cased to preserve encoded 09488 // double quotes, but still translate unadorned double 09489 // quotes into single quotes.) So, because their 09490 // fixpoint behavior is identical to single quotes, they 09491 // cannot be allowed either. Firefox 3.x displays 09492 // single-quote style behavior. 09493 // - Backslashes are reduced by one (so \\ -> \) every 09494 // iteration, so they cannot be used safely. This shows 09495 // up in IE7, IE8 and FF3 09496 // - Semicolons, commas and backticks are handled properly. 09497 // - The rest of the ASCII punctuation is handled properly. 09498 // We haven't checked what browsers do to unadorned 09499 // versions, but this is not important as long as the 09500 // browser doesn't /remove/ surrounding quotes (as IE does 09501 // for HTML). 09502 // 09503 // With these results in hand, we conclude that there are 09504 // various levels of safety: 09505 // - Paranoid: alphanumeric, spaces and dashes(?) 09506 // - International: Paranoid + non-ASCII Unicode 09507 // - Edgy: Everything except quotes, backslashes 09508 // - NoJS: Standards compliance, e.g. sod IE. Note that 09509 // with some judicious character escaping (since certain 09510 // types of escaping doesn't work) this is theoretically 09511 // OK as long as innerHTML/cssText is not called. 09512 // We believe that international is a reasonable default 09513 // (that we will implement now), and once we do more 09514 // extensive research, we may feel comfortable with dropping 09515 // it down to edgy. 09516 09517 // Edgy: alphanumeric, spaces, dashes and Unicode. Use of 09518 // str(c)spn assumes that the string was already well formed 09519 // Unicode (which of course it is). 09520 if (strspn($font, $this->mask) !== strlen($font)) { 09521 continue; 09522 } 09523 09524 // Historical: 09525 // In the absence of innerHTML/cssText, these ugly 09526 // transforms don't pose a security risk (as \\ and \" 09527 // might--these escapes are not supported by most browsers). 09528 // We could try to be clever and use single-quote wrapping 09529 // when there is a double quote present, but I have choosen 09530 // not to implement that. (NOTE: you can reduce the amount 09531 // of escapes by one depending on what quoting style you use) 09532 // $font = str_replace('\\', '\\5C ', $font); 09533 // $font = str_replace('"', '\\22 ', $font); 09534 // $font = str_replace("'", '\\27 ', $font); 09535 09536 // font possibly with spaces, requires quoting 09537 $final .= "'$font', "; 09538 } 09539 $final = rtrim($final, ', '); 09540 if ($final === '') return false; 09541 return $final; 09542 } 09543 09544 } 09545 09546 09547 09548 09549 09553 class HTMLPurifier_AttrDef_CSS_Ident extends HTMLPurifier_AttrDef 09554 { 09555 09556 public function validate($string, $config, $context) { 09557 09558 $string = trim($string); 09559 09560 // early abort: '' and '0' (strings that convert to false) are invalid 09561 if (!$string) return false; 09562 09563 $pattern = '/^(-?[A-Za-z_][A-Za-z_\-0-9]*)$/'; 09564 if (!preg_match($pattern, $string)) return false; 09565 return $string; 09566 09567 } 09568 09569 } 09570 09571 09572 09573 09574 09578 class HTMLPurifier_AttrDef_CSS_ImportantDecorator extends HTMLPurifier_AttrDef 09579 { 09580 public $def, $allow; 09581 09586 public function __construct($def, $allow = false) { 09587 $this->def = $def; 09588 $this->allow = $allow; 09589 } 09593 public function validate($string, $config, $context) { 09594 // test for ! and important tokens 09595 $string = trim($string); 09596 $is_important = false; 09597 // :TODO: optimization: test directly for !important and ! important 09598 if (strlen($string) >= 9 && substr($string, -9) === 'important') { 09599 $temp = rtrim(substr($string, 0, -9)); 09600 // use a temp, because we might want to restore important 09601 if (strlen($temp) >= 1 && substr($temp, -1) === '!') { 09602 $string = rtrim(substr($temp, 0, -1)); 09603 $is_important = true; 09604 } 09605 } 09606 $string = $this->def->validate($string, $config, $context); 09607 if ($this->allow && $is_important) $string .= ' !important'; 09608 return $string; 09609 } 09610 } 09611 09612 09613 09614 09615 09619 class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef 09620 { 09621 09622 protected $min, $max; 09623 09628 public function __construct($min = null, $max = null) { 09629 $this->min = $min !== null ? HTMLPurifier_Length::make($min) : null; 09630 $this->max = $max !== null ? HTMLPurifier_Length::make($max) : null; 09631 } 09632 09633 public function validate($string, $config, $context) { 09634 $string = $this->parseCDATA($string); 09635 09636 // Optimizations 09637 if ($string === '') return false; 09638 if ($string === '0') return '0'; 09639 if (strlen($string) === 1) return false; 09640 09641 $length = HTMLPurifier_Length::make($string); 09642 if (!$length->isValid()) return false; 09643 09644 if ($this->min) { 09645 $c = $length->compareTo($this->min); 09646 if ($c === false) return false; 09647 if ($c < 0) return false; 09648 } 09649 if ($this->max) { 09650 $c = $length->compareTo($this->max); 09651 if ($c === false) return false; 09652 if ($c > 0) return false; 09653 } 09654 09655 return $length->toString(); 09656 } 09657 09658 } 09659 09660 09661 09662 09663 09668 class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef 09669 { 09670 09675 protected $info; 09676 09677 public function __construct($config) { 09678 $def = $config->getCSSDefinition(); 09679 $this->info['list-style-type'] = $def->info['list-style-type']; 09680 $this->info['list-style-position'] = $def->info['list-style-position']; 09681 $this->info['list-style-image'] = $def->info['list-style-image']; 09682 } 09683 09684 public function validate($string, $config, $context) { 09685 09686 // regular pre-processing 09687 $string = $this->parseCDATA($string); 09688 if ($string === '') return false; 09689 09690 // assumes URI doesn't have spaces in it 09691 $bits = explode(' ', strtolower($string)); // bits to process 09692 09693 $caught = array(); 09694 $caught['type'] = false; 09695 $caught['position'] = false; 09696 $caught['image'] = false; 09697 09698 $i = 0; // number of catches 09699 $none = false; 09700 09701 foreach ($bits as $bit) { 09702 if ($i >= 3) return; // optimization bit 09703 if ($bit === '') continue; 09704 foreach ($caught as $key => $status) { 09705 if ($status !== false) continue; 09706 $r = $this->info['list-style-' . $key]->validate($bit, $config, $context); 09707 if ($r === false) continue; 09708 if ($r === 'none') { 09709 if ($none) continue; 09710 else $none = true; 09711 if ($key == 'image') continue; 09712 } 09713 $caught[$key] = $r; 09714 $i++; 09715 break; 09716 } 09717 } 09718 09719 if (!$i) return false; 09720 09721 $ret = array(); 09722 09723 // construct type 09724 if ($caught['type']) $ret[] = $caught['type']; 09725 09726 // construct image 09727 if ($caught['image']) $ret[] = $caught['image']; 09728 09729 // construct position 09730 if ($caught['position']) $ret[] = $caught['position']; 09731 09732 if (empty($ret)) return false; 09733 return implode(' ', $ret); 09734 09735 } 09736 09737 } 09738 09739 09740 09741 09742 09754 class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef 09755 { 09756 09761 public $single; 09762 09767 public $max; 09768 09773 public function __construct($single, $max = 4) { 09774 $this->single = $single; 09775 $this->max = $max; 09776 } 09777 09778 public function validate($string, $config, $context) { 09779 $string = $this->parseCDATA($string); 09780 if ($string === '') return false; 09781 $parts = explode(' ', $string); // parseCDATA replaced \r, \t and \n 09782 $length = count($parts); 09783 $final = ''; 09784 for ($i = 0, $num = 0; $i < $length && $num < $this->max; $i++) { 09785 if (ctype_space($parts[$i])) continue; 09786 $result = $this->single->validate($parts[$i], $config, $context); 09787 if ($result !== false) { 09788 $final .= $result . ' '; 09789 $num++; 09790 } 09791 } 09792 if ($final === '') return false; 09793 return rtrim($final); 09794 } 09795 09796 } 09797 09798 09799 09800 09801 09805 class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef 09806 { 09807 09811 protected $number_def; 09812 09816 public function __construct($non_negative = false) { 09817 $this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative); 09818 } 09819 09820 public function validate($string, $config, $context) { 09821 09822 $string = $this->parseCDATA($string); 09823 09824 if ($string === '') return false; 09825 $length = strlen($string); 09826 if ($length === 1) return false; 09827 if ($string[$length - 1] !== '%') return false; 09828 09829 $number = substr($string, 0, $length - 1); 09830 $number = $this->number_def->validate($number, $config, $context); 09831 09832 if ($number === false) return false; 09833 return "$number%"; 09834 09835 } 09836 09837 } 09838 09839 09840 09841 09842 09848 class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef 09849 { 09850 09851 public function validate($string, $config, $context) { 09852 09853 static $allowed_values = array( 09854 'line-through' => true, 09855 'overline' => true, 09856 'underline' => true, 09857 ); 09858 09859 $string = strtolower($this->parseCDATA($string)); 09860 09861 if ($string === 'none') return $string; 09862 09863 $parts = explode(' ', $string); 09864 $final = ''; 09865 foreach ($parts as $part) { 09866 if (isset($allowed_values[$part])) { 09867 $final .= $part . ' '; 09868 } 09869 } 09870 $final = rtrim($final); 09871 if ($final === '') return false; 09872 return $final; 09873 09874 } 09875 09876 } 09877 09878 09879 09880 09881 09891 class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI 09892 { 09893 09894 public function __construct() { 09895 parent::__construct(true); // always embedded 09896 } 09897 09898 public function validate($uri_string, $config, $context) { 09899 // parse the URI out of the string and then pass it onto 09900 // the parent object 09901 09902 $uri_string = $this->parseCDATA($uri_string); 09903 if (strpos($uri_string, 'url(') !== 0) return false; 09904 $uri_string = substr($uri_string, 4); 09905 $new_length = strlen($uri_string) - 1; 09906 if ($uri_string[$new_length] != ')') return false; 09907 $uri = trim(substr($uri_string, 0, $new_length)); 09908 09909 if (!empty($uri) && ($uri[0] == "'" || $uri[0] == '"')) { 09910 $quote = $uri[0]; 09911 $new_length = strlen($uri) - 1; 09912 if ($uri[$new_length] !== $quote) return false; 09913 $uri = substr($uri, 1, $new_length - 1); 09914 } 09915 09916 $uri = $this->expandCSSEscape($uri); 09917 09918 $result = parent::validate($uri, $config, $context); 09919 09920 if ($result === false) return false; 09921 09922 // extra sanity check; should have been done by URI 09923 $result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result); 09924 09925 // suspicious characters are ()'; we're going to percent encode 09926 // them for safety. 09927 $result = str_replace(array('(', ')', "'"), array('%28', '%29', '%27'), $result); 09928 09929 // there's an extra bug where ampersands lose their escaping on 09930 // an innerHTML cycle, so a very unlucky query parameter could 09931 // then change the meaning of the URL. Unfortunately, there's 09932 // not much we can do about that... 09933 09934 return "url(\"$result\")"; 09935 09936 } 09937 09938 } 09939 09940 09941 09942 09943 09947 class HTMLPurifier_AttrDef_HTML_Bool extends HTMLPurifier_AttrDef 09948 { 09949 09950 protected $name; 09951 public $minimized = true; 09952 09953 public function __construct($name = false) {$this->name = $name;} 09954 09955 public function validate($string, $config, $context) { 09956 if (empty($string)) return false; 09957 return $this->name; 09958 } 09959 09963 public function make($string) { 09964 return new HTMLPurifier_AttrDef_HTML_Bool($string); 09965 } 09966 09967 } 09968 09969 09970 09971 09972 09976 class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef 09977 { 09978 09979 public function validate($string, $config, $context) { 09980 09981 $string = trim($string); 09982 09983 // early abort: '' and '0' (strings that convert to false) are invalid 09984 if (!$string) return false; 09985 09986 $tokens = $this->split($string, $config, $context); 09987 $tokens = $this->filter($tokens, $config, $context); 09988 if (empty($tokens)) return false; 09989 return implode(' ', $tokens); 09990 09991 } 09992 09996 protected function split($string, $config, $context) { 09997 // OPTIMIZABLE! 09998 // do the preg_match, capture all subpatterns for reformulation 09999 10000 // we don't support U+00A1 and up codepoints or 10001 // escaping because I don't know how to do that with regexps 10002 // and plus it would complicate optimization efforts (you never 10003 // see that anyway). 10004 $pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start 10005 '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'. 10006 '(?:(?=\s)|\z)/'; // look ahead for space or string end 10007 preg_match_all($pattern, $string, $matches); 10008 return $matches[1]; 10009 } 10010 10016 protected function filter($tokens, $config, $context) { 10017 return $tokens; 10018 } 10019 10020 } 10021 10022 10023 10024 10025 10029 class HTMLPurifier_AttrDef_HTML_Class extends HTMLPurifier_AttrDef_HTML_Nmtokens 10030 { 10031 protected function split($string, $config, $context) { 10032 // really, this twiddle should be lazy loaded 10033 $name = $config->getDefinition('HTML')->doctype->name; 10034 if ($name == "XHTML 1.1" || $name == "XHTML 2.0") { 10035 return parent::split($string, $config, $context); 10036 } else { 10037 return preg_split('/\s+/', $string); 10038 } 10039 } 10040 protected function filter($tokens, $config, $context) { 10041 $allowed = $config->get('Attr.AllowedClasses'); 10042 $forbidden = $config->get('Attr.ForbiddenClasses'); 10043 $ret = array(); 10044 foreach ($tokens as $token) { 10045 if ( 10046 ($allowed === null || isset($allowed[$token])) && 10047 !isset($forbidden[$token]) && 10048 // We need this O(n) check because of PHP's array 10049 // implementation that casts -0 to 0. 10050 !in_array($token, $ret, true) 10051 ) { 10052 $ret[] = $token; 10053 } 10054 } 10055 return $ret; 10056 } 10057 } 10058 10059 10060 10064 class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef 10065 { 10066 10067 public function validate($string, $config, $context) { 10068 10069 static $colors = null; 10070 if ($colors === null) $colors = $config->get('Core.ColorKeywords'); 10071 10072 $string = trim($string); 10073 10074 if (empty($string)) return false; 10075 if (isset($colors[strtolower($string)])) return $colors[$string]; 10076 if ($string[0] === '#') $hex = substr($string, 1); 10077 else $hex = $string; 10078 10079 $length = strlen($hex); 10080 if ($length !== 3 && $length !== 6) return false; 10081 if (!ctype_xdigit($hex)) return false; 10082 if ($length === 3) $hex = $hex[0].$hex[0].$hex[1].$hex[1].$hex[2].$hex[2]; 10083 10084 return "#$hex"; 10085 10086 } 10087 10088 } 10089 10090 10091 10092 10093 10097 class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum 10098 { 10099 10100 public $valid_values = false; // uninitialized value 10101 protected $case_sensitive = false; 10102 10103 public function __construct() {} 10104 10105 public function validate($string, $config, $context) { 10106 if ($this->valid_values === false) $this->valid_values = $config->get('Attr.AllowedFrameTargets'); 10107 return parent::validate($string, $config, $context); 10108 } 10109 10110 } 10111 10112 10113 10114 10115 10125 class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef 10126 { 10127 10128 // selector is NOT a valid thing to use for IDREFs, because IDREFs 10129 // *must* target IDs that exist, whereas selector #ids do not. 10130 10135 protected $selector; 10136 10137 public function __construct($selector = false) { 10138 $this->selector = $selector; 10139 } 10140 10141 public function validate($id, $config, $context) { 10142 10143 if (!$this->selector && !$config->get('Attr.EnableID')) return false; 10144 10145 $id = trim($id); // trim it first 10146 10147 if ($id === '') return false; 10148 10149 $prefix = $config->get('Attr.IDPrefix'); 10150 if ($prefix !== '') { 10151 $prefix .= $config->get('Attr.IDPrefixLocal'); 10152 // prevent re-appending the prefix 10153 if (strpos($id, $prefix) !== 0) $id = $prefix . $id; 10154 } elseif ($config->get('Attr.IDPrefixLocal') !== '') { 10155 trigger_error('%Attr.IDPrefixLocal cannot be used unless '. 10156 '%Attr.IDPrefix is set', E_USER_WARNING); 10157 } 10158 10159 if (!$this->selector) { 10160 $id_accumulator =& $context->get('IDAccumulator'); 10161 if (isset($id_accumulator->ids[$id])) return false; 10162 } 10163 10164 // we purposely avoid using regex, hopefully this is faster 10165 10166 if (ctype_alpha($id)) { 10167 $result = true; 10168 } else { 10169 if (!ctype_alpha(@$id[0])) return false; 10170 $trim = trim( // primitive style of regexps, I suppose 10171 $id, 10172 'A..Za..z0..9:-._' 10173 ); 10174 $result = ($trim === ''); 10175 } 10176 10177 $regexp = $config->get('Attr.IDBlacklistRegexp'); 10178 if ($regexp && preg_match($regexp, $id)) { 10179 return false; 10180 } 10181 10182 if (!$this->selector && $result) $id_accumulator->add($id); 10183 10184 // if no change was made to the ID, return the result 10185 // else, return the new id if stripping whitespace made it 10186 // valid, or return false. 10187 return $result ? $id : false; 10188 10189 } 10190 10191 } 10192 10193 10194 10195 10196 10200 class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef 10201 { 10202 10203 protected $max; 10204 10205 public function __construct($max = null) { 10206 $this->max = $max; 10207 } 10208 10209 public function validate($string, $config, $context) { 10210 10211 $string = trim($string); 10212 if ($string === '0') return $string; 10213 if ($string === '') return false; 10214 $length = strlen($string); 10215 if (substr($string, $length - 2) == 'px') { 10216 $string = substr($string, 0, $length - 2); 10217 } 10218 if (!is_numeric($string)) return false; 10219 $int = (int) $string; 10220 10221 if ($int < 0) return '0'; 10222 10223 // upper-bound value, extremely high values can 10224 // crash operating systems, see <http://ha.ckers.org/imagecrash.html> 10225 // WARNING, above link WILL crash you if you're using Windows 10226 10227 if ($this->max !== null && $int > $this->max) return (string) $this->max; 10228 10229 return (string) $int; 10230 10231 } 10232 10233 public function make($string) { 10234 if ($string === '') $max = null; 10235 else $max = (int) $string; 10236 $class = get_class($this); 10237 return new $class($max); 10238 } 10239 10240 } 10241 10242 10243 10244 10245 10253 class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels 10254 { 10255 10256 public function validate($string, $config, $context) { 10257 10258 $string = trim($string); 10259 if ($string === '') return false; 10260 10261 $parent_result = parent::validate($string, $config, $context); 10262 if ($parent_result !== false) return $parent_result; 10263 10264 $length = strlen($string); 10265 $last_char = $string[$length - 1]; 10266 10267 if ($last_char !== '%') return false; 10268 10269 $points = substr($string, 0, $length - 1); 10270 10271 if (!is_numeric($points)) return false; 10272 10273 $points = (int) $points; 10274 10275 if ($points < 0) return '0%'; 10276 if ($points > 100) return '100%'; 10277 10278 return ((string) $points) . '%'; 10279 10280 } 10281 10282 } 10283 10284 10285 10286 10287 10294 class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef 10295 { 10296 10298 protected $name; 10299 10300 public function __construct($name) { 10301 $configLookup = array( 10302 'rel' => 'AllowedRel', 10303 'rev' => 'AllowedRev' 10304 ); 10305 if (!isset($configLookup[$name])) { 10306 trigger_error('Unrecognized attribute name for link '. 10307 'relationship.', E_USER_ERROR); 10308 return; 10309 } 10310 $this->name = $configLookup[$name]; 10311 } 10312 10313 public function validate($string, $config, $context) { 10314 10315 $allowed = $config->get('Attr.' . $this->name); 10316 if (empty($allowed)) return false; 10317 10318 $string = $this->parseCDATA($string); 10319 $parts = explode(' ', $string); 10320 10321 // lookup to prevent duplicates 10322 $ret_lookup = array(); 10323 foreach ($parts as $part) { 10324 $part = strtolower(trim($part)); 10325 if (!isset($allowed[$part])) continue; 10326 $ret_lookup[$part] = true; 10327 } 10328 10329 if (empty($ret_lookup)) return false; 10330 $string = implode(' ', array_keys($ret_lookup)); 10331 10332 return $string; 10333 10334 } 10335 10336 } 10337 10338 10339 10340 10341 10348 class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Length 10349 { 10350 10351 public function validate($string, $config, $context) { 10352 10353 $string = trim($string); 10354 if ($string === '') return false; 10355 10356 $parent_result = parent::validate($string, $config, $context); 10357 if ($parent_result !== false) return $parent_result; 10358 10359 $length = strlen($string); 10360 $last_char = $string[$length - 1]; 10361 10362 if ($last_char !== '*') return false; 10363 10364 $int = substr($string, 0, $length - 1); 10365 10366 if ($int == '') return '*'; 10367 if (!is_numeric($int)) return false; 10368 10369 $int = (int) $int; 10370 10371 if ($int < 0) return false; 10372 if ($int == 0) return '0'; 10373 if ($int == 1) return '*'; 10374 return ((string) $int) . '*'; 10375 10376 } 10377 10378 } 10379 10380 10381 10382 10383 10384 abstract class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef 10385 { 10386 10390 function unpack($string) { 10391 // needs to be implemented 10392 } 10393 10394 } 10395 10396 // sub-implementations 10397 10398 10399 10400 10401 10405 class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef 10406 { 10407 10411 protected $ipv4; 10412 10416 protected $ipv6; 10417 10418 public function __construct() { 10419 $this->ipv4 = new HTMLPurifier_AttrDef_URI_IPv4(); 10420 $this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6(); 10421 } 10422 10423 public function validate($string, $config, $context) { 10424 $length = strlen($string); 10425 // empty hostname is OK; it's usually semantically equivalent: 10426 // the default host as defined by a URI scheme is used: 10427 // 10428 // If the URI scheme defines a default for host, then that 10429 // default applies when the host subcomponent is undefined 10430 // or when the registered name is empty (zero length). 10431 if ($string === '') return ''; 10432 if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') { 10433 //IPv6 10434 $ip = substr($string, 1, $length - 2); 10435 $valid = $this->ipv6->validate($ip, $config, $context); 10436 if ($valid === false) return false; 10437 return '['. $valid . ']'; 10438 } 10439 10440 // need to do checks on unusual encodings too 10441 $ipv4 = $this->ipv4->validate($string, $config, $context); 10442 if ($ipv4 !== false) return $ipv4; 10443 10444 // A regular domain name. 10445 10446 // This doesn't match I18N domain names, but we don't have proper IRI support, 10447 // so force users to insert Punycode. 10448 10449 // The productions describing this are: 10450 $a = '[a-z]'; // alpha 10451 $an = '[a-z0-9]'; // alphanum 10452 $and = '[a-z0-9-]'; // alphanum | "-" 10453 // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum 10454 $domainlabel = "$an($and*$an)?"; 10455 // toplabel = alpha | alpha *( alphanum | "-" ) alphanum 10456 $toplabel = "$a($and*$an)?"; 10457 // hostname = *( domainlabel "." ) toplabel [ "." ] 10458 if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) { 10459 return $string; 10460 } 10461 10462 // If we have Net_IDNA2 support, we can support IRIs by 10463 // punycoding them. (This is the most portable thing to do, 10464 // since otherwise we have to assume browsers support 10465 10466 if ($config->get('Core.EnableIDNA')) { 10467 $idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true)); 10468 // we need to encode each period separately 10469 $parts = explode('.', $string); 10470 try { 10471 $new_parts = array(); 10472 foreach ($parts as $part) { 10473 $encodable = false; 10474 for ($i = 0, $c = strlen($part); $i < $c; $i++) { 10475 if (ord($part[$i]) > 0x7a) { 10476 $encodable = true; 10477 break; 10478 } 10479 } 10480 if (!$encodable) { 10481 $new_parts[] = $part; 10482 } else { 10483 $new_parts[] = $idna->encode($part); 10484 } 10485 } 10486 $string = implode('.', $new_parts); 10487 if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) { 10488 return $string; 10489 } 10490 } catch (Exception $e) { 10491 // XXX error reporting 10492 } 10493 } 10494 10495 return false; 10496 } 10497 10498 } 10499 10500 10501 10502 10503 10508 class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef 10509 { 10510 10514 protected $ip4; 10515 10516 public function validate($aIP, $config, $context) { 10517 10518 if (!$this->ip4) $this->_loadRegex(); 10519 10520 if (preg_match('#^' . $this->ip4 . '$#s', $aIP)) 10521 { 10522 return $aIP; 10523 } 10524 10525 return false; 10526 10527 } 10528 10533 protected function _loadRegex() { 10534 $oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255 10535 $this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})"; 10536 } 10537 10538 } 10539 10540 10541 10542 10543 10550 class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4 10551 { 10552 10553 public function validate($aIP, $config, $context) { 10554 10555 if (!$this->ip4) $this->_loadRegex(); 10556 10557 $original = $aIP; 10558 10559 $hex = '[0-9a-fA-F]'; 10560 $blk = '(?:' . $hex . '{1,4})'; 10561 $pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))'; // /0 - /128 10562 10563 // prefix check 10564 if (strpos($aIP, '/') !== false) 10565 { 10566 if (preg_match('#' . $pre . '$#s', $aIP, $find)) 10567 { 10568 $aIP = substr($aIP, 0, 0-strlen($find[0])); 10569 unset($find); 10570 } 10571 else 10572 { 10573 return false; 10574 } 10575 } 10576 10577 // IPv4-compatiblity check 10578 if (preg_match('#(?<=:'.')' . $this->ip4 . '$#s', $aIP, $find)) 10579 { 10580 $aIP = substr($aIP, 0, 0-strlen($find[0])); 10581 $ip = explode('.', $find[0]); 10582 $ip = array_map('dechex', $ip); 10583 $aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3]; 10584 unset($find, $ip); 10585 } 10586 10587 // compression check 10588 $aIP = explode('::', $aIP); 10589 $c = count($aIP); 10590 if ($c > 2) 10591 { 10592 return false; 10593 } 10594 elseif ($c == 2) 10595 { 10596 list($first, $second) = $aIP; 10597 $first = explode(':', $first); 10598 $second = explode(':', $second); 10599 10600 if (count($first) + count($second) > 8) 10601 { 10602 return false; 10603 } 10604 10605 while(count($first) < 8) 10606 { 10607 array_push($first, '0'); 10608 } 10609 10610 array_splice($first, 8 - count($second), 8, $second); 10611 $aIP = $first; 10612 unset($first,$second); 10613 } 10614 else 10615 { 10616 $aIP = explode(':', $aIP[0]); 10617 } 10618 $c = count($aIP); 10619 10620 if ($c != 8) 10621 { 10622 return false; 10623 } 10624 10625 // All the pieces should be 16-bit hex strings. Are they? 10626 foreach ($aIP as $piece) 10627 { 10628 if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece))) 10629 { 10630 return false; 10631 } 10632 } 10633 10634 return $original; 10635 10636 } 10637 10638 } 10639 10640 10641 10642 10643 10648 class HTMLPurifier_AttrDef_URI_Email_SimpleCheck extends HTMLPurifier_AttrDef_URI_Email 10649 { 10650 10651 public function validate($string, $config, $context) { 10652 // no support for named mailboxes i.e. "Bob <bob@example.com>" 10653 // that needs more percent encoding to be done 10654 if ($string == '') return false; 10655 $string = trim($string); 10656 $result = preg_match('/^[A-Z0-9._%-]+@[A-Z0-9.-]+\.[A-Z]{2,4}$/i', $string); 10657 return $result ? $string : false; 10658 } 10659 10660 } 10661 10662 10663 10664 10665 10669 class HTMLPurifier_AttrTransform_Background extends HTMLPurifier_AttrTransform { 10670 10671 public function transform($attr, $config, $context) { 10672 10673 if (!isset($attr['background'])) return $attr; 10674 10675 $background = $this->confiscateAttr($attr, 'background'); 10676 // some validation should happen here 10677 10678 $this->prependCSS($attr, "background-image:url($background);"); 10679 10680 return $attr; 10681 10682 } 10683 10684 } 10685 10686 10687 10688 10689 10690 // this MUST be placed in post, as it assumes that any value in dir is valid 10691 10695 class HTMLPurifier_AttrTransform_BdoDir extends HTMLPurifier_AttrTransform 10696 { 10697 10698 public function transform($attr, $config, $context) { 10699 if (isset($attr['dir'])) return $attr; 10700 $attr['dir'] = $config->get('Attr.DefaultTextDir'); 10701 return $attr; 10702 } 10703 10704 } 10705 10706 10707 10708 10709 10713 class HTMLPurifier_AttrTransform_BgColor extends HTMLPurifier_AttrTransform { 10714 10715 public function transform($attr, $config, $context) { 10716 10717 if (!isset($attr['bgcolor'])) return $attr; 10718 10719 $bgcolor = $this->confiscateAttr($attr, 'bgcolor'); 10720 // some validation should happen here 10721 10722 $this->prependCSS($attr, "background-color:$bgcolor;"); 10723 10724 return $attr; 10725 10726 } 10727 10728 } 10729 10730 10731 10732 10733 10737 class HTMLPurifier_AttrTransform_BoolToCSS extends HTMLPurifier_AttrTransform { 10738 10742 protected $attr; 10743 10747 protected $css; 10748 10753 public function __construct($attr, $css) { 10754 $this->attr = $attr; 10755 $this->css = $css; 10756 } 10757 10758 public function transform($attr, $config, $context) { 10759 if (!isset($attr[$this->attr])) return $attr; 10760 unset($attr[$this->attr]); 10761 $this->prependCSS($attr, $this->css); 10762 return $attr; 10763 } 10764 10765 } 10766 10767 10768 10769 10770 10774 class HTMLPurifier_AttrTransform_Border extends HTMLPurifier_AttrTransform { 10775 10776 public function transform($attr, $config, $context) { 10777 if (!isset($attr['border'])) return $attr; 10778 $border_width = $this->confiscateAttr($attr, 'border'); 10779 // some validation should happen here 10780 $this->prependCSS($attr, "border:{$border_width}px solid;"); 10781 return $attr; 10782 } 10783 10784 } 10785 10786 10787 10788 10789 10794 class HTMLPurifier_AttrTransform_EnumToCSS extends HTMLPurifier_AttrTransform { 10795 10799 protected $attr; 10800 10804 protected $enumToCSS = array(); 10805 10811 protected $caseSensitive = false; 10812 10818 public function __construct($attr, $enum_to_css, $case_sensitive = false) { 10819 $this->attr = $attr; 10820 $this->enumToCSS = $enum_to_css; 10821 $this->caseSensitive = (bool) $case_sensitive; 10822 } 10823 10824 public function transform($attr, $config, $context) { 10825 10826 if (!isset($attr[$this->attr])) return $attr; 10827 10828 $value = trim($attr[$this->attr]); 10829 unset($attr[$this->attr]); 10830 10831 if (!$this->caseSensitive) $value = strtolower($value); 10832 10833 if (!isset($this->enumToCSS[$value])) { 10834 return $attr; 10835 } 10836 10837 $this->prependCSS($attr, $this->enumToCSS[$value]); 10838 10839 return $attr; 10840 10841 } 10842 10843 } 10844 10845 10846 10847 10848 10849 // must be called POST validation 10850 10857 class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform 10858 { 10859 10860 public function transform($attr, $config, $context) { 10861 10862 $src = true; 10863 if (!isset($attr['src'])) { 10864 if ($config->get('Core.RemoveInvalidImg')) return $attr; 10865 $attr['src'] = $config->get('Attr.DefaultInvalidImage'); 10866 $src = false; 10867 } 10868 10869 if (!isset($attr['alt'])) { 10870 if ($src) { 10871 $alt = $config->get('Attr.DefaultImageAlt'); 10872 if ($alt === null) { 10873 // truncate if the alt is too long 10874 $attr['alt'] = substr(basename($attr['src']),0,40); 10875 } else { 10876 $attr['alt'] = $alt; 10877 } 10878 } else { 10879 $attr['alt'] = $config->get('Attr.DefaultInvalidImageAlt'); 10880 } 10881 } 10882 10883 return $attr; 10884 10885 } 10886 10887 } 10888 10889 10890 10891 10892 10896 class HTMLPurifier_AttrTransform_ImgSpace extends HTMLPurifier_AttrTransform { 10897 10898 protected $attr; 10899 protected $css = array( 10900 'hspace' => array('left', 'right'), 10901 'vspace' => array('top', 'bottom') 10902 ); 10903 10904 public function __construct($attr) { 10905 $this->attr = $attr; 10906 if (!isset($this->css[$attr])) { 10907 trigger_error(htmlspecialchars($attr) . ' is not valid space attribute'); 10908 } 10909 } 10910 10911 public function transform($attr, $config, $context) { 10912 10913 if (!isset($attr[$this->attr])) return $attr; 10914 10915 $width = $this->confiscateAttr($attr, $this->attr); 10916 // some validation could happen here 10917 10918 if (!isset($this->css[$this->attr])) return $attr; 10919 10920 $style = ''; 10921 foreach ($this->css[$this->attr] as $suffix) { 10922 $property = "margin-$suffix"; 10923 $style .= "$property:{$width}px;"; 10924 } 10925 10926 $this->prependCSS($attr, $style); 10927 10928 return $attr; 10929 10930 } 10931 10932 } 10933 10934 10935 10936 10937 10942 class HTMLPurifier_AttrTransform_Input extends HTMLPurifier_AttrTransform { 10943 10944 protected $pixels; 10945 10946 public function __construct() { 10947 $this->pixels = new HTMLPurifier_AttrDef_HTML_Pixels(); 10948 } 10949 10950 public function transform($attr, $config, $context) { 10951 if (!isset($attr['type'])) $t = 'text'; 10952 else $t = strtolower($attr['type']); 10953 if (isset($attr['checked']) && $t !== 'radio' && $t !== 'checkbox') { 10954 unset($attr['checked']); 10955 } 10956 if (isset($attr['maxlength']) && $t !== 'text' && $t !== 'password') { 10957 unset($attr['maxlength']); 10958 } 10959 if (isset($attr['size']) && $t !== 'text' && $t !== 'password') { 10960 $result = $this->pixels->validate($attr['size'], $config, $context); 10961 if ($result === false) unset($attr['size']); 10962 else $attr['size'] = $result; 10963 } 10964 if (isset($attr['src']) && $t !== 'image') { 10965 unset($attr['src']); 10966 } 10967 if (!isset($attr['value']) && ($t === 'radio' || $t === 'checkbox')) { 10968 $attr['value'] = ''; 10969 } 10970 return $attr; 10971 } 10972 10973 } 10974 10975 10976 10977 10978 10984 class HTMLPurifier_AttrTransform_Lang extends HTMLPurifier_AttrTransform 10985 { 10986 10987 public function transform($attr, $config, $context) { 10988 10989 $lang = isset($attr['lang']) ? $attr['lang'] : false; 10990 $xml_lang = isset($attr['xml:lang']) ? $attr['xml:lang'] : false; 10991 10992 if ($lang !== false && $xml_lang === false) { 10993 $attr['xml:lang'] = $lang; 10994 } elseif ($xml_lang !== false) { 10995 $attr['lang'] = $xml_lang; 10996 } 10997 10998 return $attr; 10999 11000 } 11001 11002 } 11003 11004 11005 11006 11007 11011 class HTMLPurifier_AttrTransform_Length extends HTMLPurifier_AttrTransform 11012 { 11013 11014 protected $name; 11015 protected $cssName; 11016 11017 public function __construct($name, $css_name = null) { 11018 $this->name = $name; 11019 $this->cssName = $css_name ? $css_name : $name; 11020 } 11021 11022 public function transform($attr, $config, $context) { 11023 if (!isset($attr[$this->name])) return $attr; 11024 $length = $this->confiscateAttr($attr, $this->name); 11025 if(ctype_digit($length)) $length .= 'px'; 11026 $this->prependCSS($attr, $this->cssName . ":$length;"); 11027 return $attr; 11028 } 11029 11030 } 11031 11032 11033 11034 11035 11039 class HTMLPurifier_AttrTransform_Name extends HTMLPurifier_AttrTransform 11040 { 11041 11042 public function transform($attr, $config, $context) { 11043 // Abort early if we're using relaxed definition of name 11044 if ($config->get('HTML.Attr.Name.UseCDATA')) return $attr; 11045 if (!isset($attr['name'])) return $attr; 11046 $id = $this->confiscateAttr($attr, 'name'); 11047 if ( isset($attr['id'])) return $attr; 11048 $attr['id'] = $id; 11049 return $attr; 11050 } 11051 11052 } 11053 11054 11055 11056 11057 11063 class HTMLPurifier_AttrTransform_NameSync extends HTMLPurifier_AttrTransform 11064 { 11065 11066 public function __construct() { 11067 $this->idDef = new HTMLPurifier_AttrDef_HTML_ID(); 11068 } 11069 11070 public function transform($attr, $config, $context) { 11071 if (!isset($attr['name'])) return $attr; 11072 $name = $attr['name']; 11073 if (isset($attr['id']) && $attr['id'] === $name) return $attr; 11074 $result = $this->idDef->validate($name, $config, $context); 11075 if ($result === false) unset($attr['name']); 11076 else $attr['name'] = $result; 11077 return $attr; 11078 } 11079 11080 } 11081 11082 11083 11084 11085 11086 // must be called POST validation 11087 11092 class HTMLPurifier_AttrTransform_Nofollow extends HTMLPurifier_AttrTransform 11093 { 11094 private $parser; 11095 11096 public function __construct() { 11097 $this->parser = new HTMLPurifier_URIParser(); 11098 } 11099 11100 public function transform($attr, $config, $context) { 11101 11102 if (!isset($attr['href'])) { 11103 return $attr; 11104 } 11105 11106 // XXX Kind of inefficient 11107 $url = $this->parser->parse($attr['href']); 11108 $scheme = $url->getSchemeObj($config, $context); 11109 11110 if ($scheme->browsable && !$url->isLocal($config, $context)) { 11111 if (isset($attr['rel'])) { 11112 $rels = explode(' ', $attr); 11113 if (!in_array('nofollow', $rels)) { 11114 $rels[] = 'nofollow'; 11115 } 11116 $attr['rel'] = implode(' ', $rels); 11117 } else { 11118 $attr['rel'] = 'nofollow'; 11119 } 11120 } 11121 11122 return $attr; 11123 11124 } 11125 11126 } 11127 11128 11129 11130 11131 11132 class HTMLPurifier_AttrTransform_SafeEmbed extends HTMLPurifier_AttrTransform 11133 { 11134 public $name = "SafeEmbed"; 11135 11136 public function transform($attr, $config, $context) { 11137 $attr['allowscriptaccess'] = 'never'; 11138 $attr['allownetworking'] = 'internal'; 11139 $attr['type'] = 'application/x-shockwave-flash'; 11140 return $attr; 11141 } 11142 } 11143 11144 11145 11146 11147 11151 class HTMLPurifier_AttrTransform_SafeObject extends HTMLPurifier_AttrTransform 11152 { 11153 public $name = "SafeObject"; 11154 11155 function transform($attr, $config, $context) { 11156 if (!isset($attr['type'])) $attr['type'] = 'application/x-shockwave-flash'; 11157 return $attr; 11158 } 11159 } 11160 11161 11162 11163 11164 11177 class HTMLPurifier_AttrTransform_SafeParam extends HTMLPurifier_AttrTransform 11178 { 11179 public $name = "SafeParam"; 11180 private $uri; 11181 11182 public function __construct() { 11183 $this->uri = new HTMLPurifier_AttrDef_URI(true); // embedded 11184 $this->wmode = new HTMLPurifier_AttrDef_Enum(array('window', 'opaque', 'transparent')); 11185 } 11186 11187 public function transform($attr, $config, $context) { 11188 // If we add support for other objects, we'll need to alter the 11189 // transforms. 11190 switch ($attr['name']) { 11191 // application/x-shockwave-flash 11192 // Keep this synchronized with Injector/SafeObject.php 11193 case 'allowScriptAccess': 11194 $attr['value'] = 'never'; 11195 break; 11196 case 'allowNetworking': 11197 $attr['value'] = 'internal'; 11198 break; 11199 case 'allowFullScreen': 11200 if ($config->get('HTML.FlashAllowFullScreen')) { 11201 $attr['value'] = ($attr['value'] == 'true') ? 'true' : 'false'; 11202 } else { 11203 $attr['value'] = 'false'; 11204 } 11205 break; 11206 case 'wmode': 11207 $attr['value'] = $this->wmode->validate($attr['value'], $config, $context); 11208 break; 11209 case 'movie': 11210 case 'src': 11211 $attr['name'] = "movie"; 11212 $attr['value'] = $this->uri->validate($attr['value'], $config, $context); 11213 break; 11214 case 'flashvars': 11215 // we're going to allow arbitrary inputs to the SWF, on 11216 // the reasoning that it could only hack the SWF, not us. 11217 break; 11218 // add other cases to support other param name/value pairs 11219 default: 11220 $attr['name'] = $attr['value'] = null; 11221 } 11222 return $attr; 11223 } 11224 } 11225 11226 11227 11228 11229 11233 class HTMLPurifier_AttrTransform_ScriptRequired extends HTMLPurifier_AttrTransform 11234 { 11235 public function transform($attr, $config, $context) { 11236 if (!isset($attr['type'])) { 11237 $attr['type'] = 'text/javascript'; 11238 } 11239 return $attr; 11240 } 11241 } 11242 11243 11244 11245 11246 11247 // must be called POST validation 11248 11254 class HTMLPurifier_AttrTransform_TargetBlank extends HTMLPurifier_AttrTransform 11255 { 11256 private $parser; 11257 11258 public function __construct() { 11259 $this->parser = new HTMLPurifier_URIParser(); 11260 } 11261 11262 public function transform($attr, $config, $context) { 11263 11264 if (!isset($attr['href'])) { 11265 return $attr; 11266 } 11267 11268 // XXX Kind of inefficient 11269 $url = $this->parser->parse($attr['href']); 11270 $scheme = $url->getSchemeObj($config, $context); 11271 11272 if ($scheme->browsable && !$url->isBenign($config, $context)) { 11273 $attr['target'] = 'blank'; 11274 } 11275 11276 return $attr; 11277 11278 } 11279 11280 } 11281 11282 11283 11284 11285 11289 class HTMLPurifier_AttrTransform_Textarea extends HTMLPurifier_AttrTransform 11290 { 11291 11292 public function transform($attr, $config, $context) { 11293 // Calculated from Firefox 11294 if (!isset($attr['cols'])) $attr['cols'] = '22'; 11295 if (!isset($attr['rows'])) $attr['rows'] = '3'; 11296 return $attr; 11297 } 11298 11299 } 11300 11301 11302 11303 11304 11314 class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef 11315 { 11316 11320 public $inline; 11321 11325 public $block; 11326 11327 public $type = 'chameleon'; 11328 11333 public function __construct($inline, $block) { 11334 $this->inline = new HTMLPurifier_ChildDef_Optional($inline); 11335 $this->block = new HTMLPurifier_ChildDef_Optional($block); 11336 $this->elements = $this->block->elements; 11337 } 11338 11339 public function validateChildren($tokens_of_children, $config, $context) { 11340 if ($context->get('IsInline') === false) { 11341 return $this->block->validateChildren( 11342 $tokens_of_children, $config, $context); 11343 } else { 11344 return $this->inline->validateChildren( 11345 $tokens_of_children, $config, $context); 11346 } 11347 } 11348 } 11349 11350 11351 11352 11353 11360 class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef 11361 { 11362 public $type = 'custom'; 11363 public $allow_empty = false; 11367 public $dtd_regex; 11372 private $_pcre_regex; 11376 public function __construct($dtd_regex) { 11377 $this->dtd_regex = $dtd_regex; 11378 $this->_compileRegex(); 11379 } 11383 protected function _compileRegex() { 11384 $raw = str_replace(' ', '', $this->dtd_regex); 11385 if ($raw{0} != '(') { 11386 $raw = "($raw)"; 11387 } 11388 $el = '[#a-zA-Z0-9_.-]+'; 11389 $reg = $raw; 11390 11391 // COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M 11392 // DOING! Seriously: if there's problems, please report them. 11393 11394 // collect all elements into the $elements array 11395 preg_match_all("/$el/", $reg, $matches); 11396 foreach ($matches[0] as $match) { 11397 $this->elements[$match] = true; 11398 } 11399 11400 // setup all elements as parentheticals with leading commas 11401 $reg = preg_replace("/$el/", '(,\\0)', $reg); 11402 11403 // remove commas when they were not solicited 11404 $reg = preg_replace("/([^,(|]\(+),/", '\\1', $reg); 11405 11406 // remove all non-paranthetical commas: they are handled by first regex 11407 $reg = preg_replace("/,\(/", '(', $reg); 11408 11409 $this->_pcre_regex = $reg; 11410 } 11411 public function validateChildren($tokens_of_children, $config, $context) { 11412 $list_of_children = ''; 11413 $nesting = 0; // depth into the nest 11414 foreach ($tokens_of_children as $token) { 11415 if (!empty($token->is_whitespace)) continue; 11416 11417 $is_child = ($nesting == 0); // direct 11418 11419 if ($token instanceof HTMLPurifier_Token_Start) { 11420 $nesting++; 11421 } elseif ($token instanceof HTMLPurifier_Token_End) { 11422 $nesting--; 11423 } 11424 11425 if ($is_child) { 11426 $list_of_children .= $token->name . ','; 11427 } 11428 } 11429 // add leading comma to deal with stray comma declarations 11430 $list_of_children = ',' . rtrim($list_of_children, ','); 11431 $okay = 11432 preg_match( 11433 '/^,?'.$this->_pcre_regex.'$/', 11434 $list_of_children 11435 ); 11436 11437 return (bool) $okay; 11438 } 11439 } 11440 11441 11442 11443 11444 11452 class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef 11453 { 11454 public $allow_empty = true; 11455 public $type = 'empty'; 11456 public function __construct() {} 11457 public function validateChildren($tokens_of_children, $config, $context) { 11458 return array(); 11459 } 11460 } 11461 11462 11463 11464 11465 11469 class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef 11470 { 11471 public $type = 'list'; 11472 // lying a little bit, so that we can handle ul and ol ourselves 11473 // XXX: This whole business with 'wrap' is all a bit unsatisfactory 11474 public $elements = array('li' => true, 'ul' => true, 'ol' => true); 11475 public function validateChildren($tokens_of_children, $config, $context) { 11476 // Flag for subclasses 11477 $this->whitespace = false; 11478 11479 // if there are no tokens, delete parent node 11480 if (empty($tokens_of_children)) return false; 11481 11482 // the new set of children 11483 $result = array(); 11484 11485 // current depth into the nest 11486 $nesting = 0; 11487 11488 // a little sanity check to make sure it's not ALL whitespace 11489 $all_whitespace = true; 11490 11491 $seen_li = false; 11492 $need_close_li = false; 11493 11494 foreach ($tokens_of_children as $token) { 11495 if (!empty($token->is_whitespace)) { 11496 $result[] = $token; 11497 continue; 11498 } 11499 $all_whitespace = false; // phew, we're not talking about whitespace 11500 11501 if ($nesting == 1 && $need_close_li) { 11502 $result[] = new HTMLPurifier_Token_End('li'); 11503 $nesting--; 11504 $need_close_li = false; 11505 } 11506 11507 $is_child = ($nesting == 0); 11508 11509 if ($token instanceof HTMLPurifier_Token_Start) { 11510 $nesting++; 11511 } elseif ($token instanceof HTMLPurifier_Token_End) { 11512 $nesting--; 11513 } 11514 11515 if ($is_child) { 11516 if ($token->name === 'li') { 11517 // good 11518 $seen_li = true; 11519 } elseif ($token->name === 'ul' || $token->name === 'ol') { 11520 // we want to tuck this into the previous li 11521 $need_close_li = true; 11522 $nesting++; 11523 if (!$seen_li) { 11524 // create a new li element 11525 $result[] = new HTMLPurifier_Token_Start('li'); 11526 } else { 11527 // backtrack until </li> found 11528 while(true) { 11529 $t = array_pop($result); 11530 if ($t instanceof HTMLPurifier_Token_End) { 11531 // XXX actually, these invariants could very plausibly be violated 11532 // if we are doing silly things with modifying the set of allowed elements. 11533 // FORTUNATELY, it doesn't make a difference, since the allowed 11534 // elements are hard-coded here! 11535 if ($t->name !== 'li') { 11536 trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR); 11537 return false; 11538 } 11539 break; 11540 } elseif ($t instanceof HTMLPurifier_Token_Empty) { // bleagh 11541 if ($t->name !== 'li') { 11542 trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR); 11543 return false; 11544 } 11545 // XXX this should have a helper for it... 11546 $result[] = new HTMLPurifier_Token_Start('li', $t->attr, $t->line, $t->col, $t->armor); 11547 break; 11548 } else { 11549 if (!$t->is_whitespace) { 11550 trigger_error("Only whitespace present invariant violated in List ChildDef", E_USER_ERROR); 11551 return false; 11552 } 11553 } 11554 } 11555 } 11556 } else { 11557 // start wrapping (this doesn't precisely mimic 11558 // browser behavior, but what browsers do is kind of 11559 // hard to mimic in a standards compliant way 11560 // XXX Actually, this has no impact in practice, 11561 // because this gets handled earlier. Arguably, 11562 // we should rip out all of that processing 11563 $result[] = new HTMLPurifier_Token_Start('li'); 11564 $nesting++; 11565 $seen_li = true; 11566 $need_close_li = true; 11567 } 11568 } 11569 $result[] = $token; 11570 } 11571 if ($need_close_li) { 11572 $result[] = new HTMLPurifier_Token_End('li'); 11573 } 11574 if (empty($result)) return false; 11575 if ($all_whitespace) { 11576 return false; 11577 } 11578 if ($tokens_of_children == $result) return true; 11579 return $result; 11580 } 11581 } 11582 11583 11584 11585 11586 11590 class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef 11591 { 11596 public $elements = array(); 11600 protected $whitespace = false; 11604 public function __construct($elements) { 11605 if (is_string($elements)) { 11606 $elements = str_replace(' ', '', $elements); 11607 $elements = explode('|', $elements); 11608 } 11609 $keys = array_keys($elements); 11610 if ($keys == array_keys($keys)) { 11611 $elements = array_flip($elements); 11612 foreach ($elements as $i => $x) { 11613 $elements[$i] = true; 11614 if (empty($i)) unset($elements[$i]); // remove blank 11615 } 11616 } 11617 $this->elements = $elements; 11618 } 11619 public $allow_empty = false; 11620 public $type = 'required'; 11621 public function validateChildren($tokens_of_children, $config, $context) { 11622 // Flag for subclasses 11623 $this->whitespace = false; 11624 11625 // if there are no tokens, delete parent node 11626 if (empty($tokens_of_children)) return false; 11627 11628 // the new set of children 11629 $result = array(); 11630 11631 // current depth into the nest 11632 $nesting = 0; 11633 11634 // whether or not we're deleting a node 11635 $is_deleting = false; 11636 11637 // whether or not parsed character data is allowed 11638 // this controls whether or not we silently drop a tag 11639 // or generate escaped HTML from it 11640 $pcdata_allowed = isset($this->elements['#PCDATA']); 11641 11642 // a little sanity check to make sure it's not ALL whitespace 11643 $all_whitespace = true; 11644 11645 // some configuration 11646 $escape_invalid_children = $config->get('Core.EscapeInvalidChildren'); 11647 11648 // generator 11649 $gen = new HTMLPurifier_Generator($config, $context); 11650 11651 foreach ($tokens_of_children as $token) { 11652 if (!empty($token->is_whitespace)) { 11653 $result[] = $token; 11654 continue; 11655 } 11656 $all_whitespace = false; // phew, we're not talking about whitespace 11657 11658 $is_child = ($nesting == 0); 11659 11660 if ($token instanceof HTMLPurifier_Token_Start) { 11661 $nesting++; 11662 } elseif ($token instanceof HTMLPurifier_Token_End) { 11663 $nesting--; 11664 } 11665 11666 if ($is_child) { 11667 $is_deleting = false; 11668 if (!isset($this->elements[$token->name])) { 11669 $is_deleting = true; 11670 if ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text) { 11671 $result[] = $token; 11672 } elseif ($pcdata_allowed && $escape_invalid_children) { 11673 $result[] = new HTMLPurifier_Token_Text( 11674 $gen->generateFromToken($token) 11675 ); 11676 } 11677 continue; 11678 } 11679 } 11680 if (!$is_deleting || ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text)) { 11681 $result[] = $token; 11682 } elseif ($pcdata_allowed && $escape_invalid_children) { 11683 $result[] = 11684 new HTMLPurifier_Token_Text( 11685 $gen->generateFromToken($token) 11686 ); 11687 } else { 11688 // drop silently 11689 } 11690 } 11691 if (empty($result)) return false; 11692 if ($all_whitespace) { 11693 $this->whitespace = true; 11694 return false; 11695 } 11696 if ($tokens_of_children == $result) return true; 11697 return $result; 11698 } 11699 } 11700 11701 11702 11703 11704 11712 class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required 11713 { 11714 public $allow_empty = true; 11715 public $type = 'optional'; 11716 public function validateChildren($tokens_of_children, $config, $context) { 11717 $result = parent::validateChildren($tokens_of_children, $config, $context); 11718 // we assume that $tokens_of_children is not modified 11719 if ($result === false) { 11720 if (empty($tokens_of_children)) return true; 11721 elseif ($this->whitespace) return $tokens_of_children; 11722 else return array(); 11723 } 11724 return $result; 11725 } 11726 } 11727 11728 11729 11730 11731 11735 class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Required 11736 { 11737 protected $real_elements; 11738 protected $fake_elements; 11739 public $allow_empty = true; 11740 public $type = 'strictblockquote'; 11741 protected $init = false; 11742 11747 public function getAllowedElements($config) { 11748 $this->init($config); 11749 return $this->fake_elements; 11750 } 11751 11752 public function validateChildren($tokens_of_children, $config, $context) { 11753 11754 $this->init($config); 11755 11756 // trick the parent class into thinking it allows more 11757 $this->elements = $this->fake_elements; 11758 $result = parent::validateChildren($tokens_of_children, $config, $context); 11759 $this->elements = $this->real_elements; 11760 11761 if ($result === false) return array(); 11762 if ($result === true) $result = $tokens_of_children; 11763 11764 $def = $config->getHTMLDefinition(); 11765 $block_wrap_start = new HTMLPurifier_Token_Start($def->info_block_wrapper); 11766 $block_wrap_end = new HTMLPurifier_Token_End( $def->info_block_wrapper); 11767 $is_inline = false; 11768 $depth = 0; 11769 $ret = array(); 11770 11771 // assuming that there are no comment tokens 11772 foreach ($result as $i => $token) { 11773 $token = $result[$i]; 11774 // ifs are nested for readability 11775 if (!$is_inline) { 11776 if (!$depth) { 11777 if ( 11778 ($token instanceof HTMLPurifier_Token_Text && !$token->is_whitespace) || 11779 (!$token instanceof HTMLPurifier_Token_Text && !isset($this->elements[$token->name])) 11780 ) { 11781 $is_inline = true; 11782 $ret[] = $block_wrap_start; 11783 } 11784 } 11785 } else { 11786 if (!$depth) { 11787 // starting tokens have been inline text / empty 11788 if ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) { 11789 if (isset($this->elements[$token->name])) { 11790 // ended 11791 $ret[] = $block_wrap_end; 11792 $is_inline = false; 11793 } 11794 } 11795 } 11796 } 11797 $ret[] = $token; 11798 if ($token instanceof HTMLPurifier_Token_Start) $depth++; 11799 if ($token instanceof HTMLPurifier_Token_End) $depth--; 11800 } 11801 if ($is_inline) $ret[] = $block_wrap_end; 11802 return $ret; 11803 } 11804 11805 private function init($config) { 11806 if (!$this->init) { 11807 $def = $config->getHTMLDefinition(); 11808 // allow all inline elements 11809 $this->real_elements = $this->elements; 11810 $this->fake_elements = $def->info_content_sets['Flow']; 11811 $this->fake_elements['#PCDATA'] = true; 11812 $this->init = true; 11813 } 11814 } 11815 } 11816 11817 11818 11819 11820 11850 class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef 11851 { 11852 public $allow_empty = false; 11853 public $type = 'table'; 11854 public $elements = array('tr' => true, 'tbody' => true, 'thead' => true, 11855 'tfoot' => true, 'caption' => true, 'colgroup' => true, 'col' => true); 11856 public function __construct() {} 11857 public function validateChildren($tokens_of_children, $config, $context) { 11858 if (empty($tokens_of_children)) return false; 11859 11860 // this ensures that the loop gets run one last time before closing 11861 // up. It's a little bit of a hack, but it works! Just make sure you 11862 // get rid of the token later. 11863 $tokens_of_children[] = false; 11864 11865 // only one of these elements is allowed in a table 11866 $caption = false; 11867 $thead = false; 11868 $tfoot = false; 11869 11870 // as many of these as you want 11871 $cols = array(); 11872 $content = array(); 11873 11874 $nesting = 0; // current depth so we can determine nodes 11875 $is_collecting = false; // are we globbing together tokens to package 11876 // into one of the collectors? 11877 $collection = array(); // collected nodes 11878 $tag_index = 0; // the first node might be whitespace, 11879 // so this tells us where the start tag is 11880 $tbody_mode = false; // if true, then we need to wrap any stray 11881 // <tr>s with a <tbody>. 11882 11883 foreach ($tokens_of_children as $token) { 11884 $is_child = ($nesting == 0); 11885 11886 if ($token === false) { 11887 // terminating sequence started 11888 } elseif ($token instanceof HTMLPurifier_Token_Start) { 11889 $nesting++; 11890 } elseif ($token instanceof HTMLPurifier_Token_End) { 11891 $nesting--; 11892 } 11893 11894 // handle node collection 11895 if ($is_collecting) { 11896 if ($is_child) { 11897 // okay, let's stash the tokens away 11898 // first token tells us the type of the collection 11899 switch ($collection[$tag_index]->name) { 11900 case 'tbody': 11901 $tbody_mode = true; 11902 case 'tr': 11903 $content[] = $collection; 11904 break; 11905 case 'caption': 11906 if ($caption !== false) break; 11907 $caption = $collection; 11908 break; 11909 case 'thead': 11910 case 'tfoot': 11911 $tbody_mode = true; 11912 // XXX This breaks rendering properties with 11913 // Firefox, which never floats a <thead> to 11914 // the top. Ever. (Our scheme will float the 11915 // first <thead> to the top.) So maybe 11916 // <thead>s that are not first should be 11917 // turned into <tbody>? Very tricky, indeed. 11918 11919 // access the appropriate variable, $thead or $tfoot 11920 $var = $collection[$tag_index]->name; 11921 if ($$var === false) { 11922 $$var = $collection; 11923 } else { 11924 // Oops, there's a second one! What 11925 // should we do? Current behavior is to 11926 // transmutate the first and last entries into 11927 // tbody tags, and then put into content. 11928 // Maybe a better idea is to *attach 11929 // it* to the existing thead or tfoot? 11930 // We don't do this, because Firefox 11931 // doesn't float an extra tfoot to the 11932 // bottom like it does for the first one. 11933 $collection[$tag_index]->name = 'tbody'; 11934 $collection[count($collection)-1]->name = 'tbody'; 11935 $content[] = $collection; 11936 } 11937 break; 11938 case 'colgroup': 11939 $cols[] = $collection; 11940 break; 11941 } 11942 $collection = array(); 11943 $is_collecting = false; 11944 $tag_index = 0; 11945 } else { 11946 // add the node to the collection 11947 $collection[] = $token; 11948 } 11949 } 11950 11951 // terminate 11952 if ($token === false) break; 11953 11954 if ($is_child) { 11955 // determine what we're dealing with 11956 if ($token->name == 'col') { 11957 // the only empty tag in the possie, we can handle it 11958 // immediately 11959 $cols[] = array_merge($collection, array($token)); 11960 $collection = array(); 11961 $tag_index = 0; 11962 continue; 11963 } 11964 switch($token->name) { 11965 case 'caption': 11966 case 'colgroup': 11967 case 'thead': 11968 case 'tfoot': 11969 case 'tbody': 11970 case 'tr': 11971 $is_collecting = true; 11972 $collection[] = $token; 11973 continue; 11974 default: 11975 if (!empty($token->is_whitespace)) { 11976 $collection[] = $token; 11977 $tag_index++; 11978 } 11979 continue; 11980 } 11981 } 11982 } 11983 11984 if (empty($content)) return false; 11985 11986 $ret = array(); 11987 if ($caption !== false) $ret = array_merge($ret, $caption); 11988 if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array); 11989 if ($thead !== false) $ret = array_merge($ret, $thead); 11990 if ($tfoot !== false) $ret = array_merge($ret, $tfoot); 11991 11992 if ($tbody_mode) { 11993 // a little tricky, since the start of the collection may be 11994 // whitespace 11995 $inside_tbody = false; 11996 foreach ($content as $token_array) { 11997 // find the starting token 11998 foreach ($token_array as $t) { 11999 if ($t->name === 'tr' || $t->name === 'tbody') { 12000 break; 12001 } 12002 } // iterator variable carries over 12003 if ($t->name === 'tr') { 12004 if ($inside_tbody) { 12005 $ret = array_merge($ret, $token_array); 12006 } else { 12007 $ret[] = new HTMLPurifier_Token_Start('tbody'); 12008 $ret = array_merge($ret, $token_array); 12009 $inside_tbody = true; 12010 } 12011 } elseif ($t->name === 'tbody') { 12012 if ($inside_tbody) { 12013 $ret[] = new HTMLPurifier_Token_End('tbody'); 12014 $inside_tbody = false; 12015 $ret = array_merge($ret, $token_array); 12016 } else { 12017 $ret = array_merge($ret, $token_array); 12018 } 12019 } else { 12020 trigger_error("tr/tbody in content invariant failed in Table ChildDef", E_USER_ERROR); 12021 } 12022 } 12023 if ($inside_tbody) { 12024 $ret[] = new HTMLPurifier_Token_End('tbody'); 12025 } 12026 } else { 12027 foreach ($content as $token_array) { 12028 // invariant: everything in here is <tr>s 12029 $ret = array_merge($ret, $token_array); 12030 } 12031 } 12032 12033 if (!empty($collection) && $is_collecting == false){ 12034 // grab the trailing space 12035 $ret = array_merge($ret, $collection); 12036 } 12037 12038 array_pop($tokens_of_children); // remove phantom token 12039 12040 return ($ret === $tokens_of_children) ? true : $ret; 12041 12042 } 12043 } 12044 12045 12046 12047 12048 12049 class HTMLPurifier_DefinitionCache_Decorator extends HTMLPurifier_DefinitionCache 12050 { 12051 12055 public $cache; 12056 12057 public function __construct() {} 12058 12063 public function decorate(&$cache) { 12064 $decorator = $this->copy(); 12065 // reference is necessary for mocks in PHP 4 12066 $decorator->cache =& $cache; 12067 $decorator->type = $cache->type; 12068 return $decorator; 12069 } 12070 12074 public function copy() { 12075 return new HTMLPurifier_DefinitionCache_Decorator(); 12076 } 12077 12078 public function add($def, $config) { 12079 return $this->cache->add($def, $config); 12080 } 12081 12082 public function set($def, $config) { 12083 return $this->cache->set($def, $config); 12084 } 12085 12086 public function replace($def, $config) { 12087 return $this->cache->replace($def, $config); 12088 } 12089 12090 public function get($config) { 12091 return $this->cache->get($config); 12092 } 12093 12094 public function remove($config) { 12095 return $this->cache->remove($config); 12096 } 12097 12098 public function flush($config) { 12099 return $this->cache->flush($config); 12100 } 12101 12102 public function cleanup($config) { 12103 return $this->cache->cleanup($config); 12104 } 12105 12106 } 12107 12108 12109 12110 12111 12115 class HTMLPurifier_DefinitionCache_Null extends HTMLPurifier_DefinitionCache 12116 { 12117 12118 public function add($def, $config) { 12119 return false; 12120 } 12121 12122 public function set($def, $config) { 12123 return false; 12124 } 12125 12126 public function replace($def, $config) { 12127 return false; 12128 } 12129 12130 public function remove($config) { 12131 return false; 12132 } 12133 12134 public function get($config) { 12135 return false; 12136 } 12137 12138 public function flush($config) { 12139 return false; 12140 } 12141 12142 public function cleanup($config) { 12143 return false; 12144 } 12145 12146 } 12147 12148 12149 12150 12151 12152 class HTMLPurifier_DefinitionCache_Serializer extends 12153 HTMLPurifier_DefinitionCache 12154 { 12155 12156 public function add($def, $config) { 12157 if (!$this->checkDefType($def)) return; 12158 $file = $this->generateFilePath($config); 12159 if (file_exists($file)) return false; 12160 if (!$this->_prepareDir($config)) return false; 12161 return $this->_write($file, serialize($def), $config); 12162 } 12163 12164 public function set($def, $config) { 12165 if (!$this->checkDefType($def)) return; 12166 $file = $this->generateFilePath($config); 12167 if (!$this->_prepareDir($config)) return false; 12168 return $this->_write($file, serialize($def), $config); 12169 } 12170 12171 public function replace($def, $config) { 12172 if (!$this->checkDefType($def)) return; 12173 $file = $this->generateFilePath($config); 12174 if (!file_exists($file)) return false; 12175 if (!$this->_prepareDir($config)) return false; 12176 return $this->_write($file, serialize($def), $config); 12177 } 12178 12179 public function get($config) { 12180 $file = $this->generateFilePath($config); 12181 if (!file_exists($file)) return false; 12182 return unserialize(file_get_contents($file)); 12183 } 12184 12185 public function remove($config) { 12186 $file = $this->generateFilePath($config); 12187 if (!file_exists($file)) return false; 12188 return unlink($file); 12189 } 12190 12191 public function flush($config) { 12192 if (!$this->_prepareDir($config)) return false; 12193 $dir = $this->generateDirectoryPath($config); 12194 $dh = opendir($dir); 12195 while (false !== ($filename = readdir($dh))) { 12196 if (empty($filename)) continue; 12197 if ($filename[0] === '.') continue; 12198 unlink($dir . '/' . $filename); 12199 } 12200 } 12201 12202 public function cleanup($config) { 12203 if (!$this->_prepareDir($config)) return false; 12204 $dir = $this->generateDirectoryPath($config); 12205 $dh = opendir($dir); 12206 while (false !== ($filename = readdir($dh))) { 12207 if (empty($filename)) continue; 12208 if ($filename[0] === '.') continue; 12209 $key = substr($filename, 0, strlen($filename) - 4); 12210 if ($this->isOld($key, $config)) unlink($dir . '/' . $filename); 12211 } 12212 } 12213 12219 public function generateFilePath($config) { 12220 $key = $this->generateKey($config); 12221 return $this->generateDirectoryPath($config) . '/' . $key . '.ser'; 12222 } 12223 12229 public function generateDirectoryPath($config) { 12230 $base = $this->generateBaseDirectoryPath($config); 12231 return $base . '/' . $this->type; 12232 } 12233 12239 public function generateBaseDirectoryPath($config) { 12240 $base = $config->get('Cache.SerializerPath'); 12241 $base = is_null($base) ? HTMLPURIFIER_PREFIX . '/HTMLPurifier/DefinitionCache/Serializer' : $base; 12242 return $base; 12243 } 12244 12252 private function _write($file, $data, $config) { 12253 $result = file_put_contents($file, $data); 12254 if ($result !== false) { 12255 // set permissions of the new file (no execute) 12256 $chmod = $config->get('Cache.SerializerPermissions'); 12257 if (!$chmod) { 12258 $chmod = 0644; // invalid config or simpletest 12259 } 12260 $chmod = $chmod & 0666; 12261 chmod($file, $chmod); 12262 } 12263 return $result; 12264 } 12265 12271 private function _prepareDir($config) { 12272 $directory = $this->generateDirectoryPath($config); 12273 $chmod = $config->get('Cache.SerializerPermissions'); 12274 if (!$chmod) { 12275 $chmod = 0755; // invalid config or simpletest 12276 } 12277 if (!is_dir($directory)) { 12278 $base = $this->generateBaseDirectoryPath($config); 12279 if (!is_dir($base)) { 12280 trigger_error('Base directory '.$base.' does not exist, 12281 please create or change using %Cache.SerializerPath', 12282 E_USER_WARNING); 12283 return false; 12284 } elseif (!$this->_testPermissions($base, $chmod)) { 12285 return false; 12286 } 12287 $old = umask(0000); 12288 mkdir($directory, $chmod); 12289 umask($old); 12290 } elseif (!$this->_testPermissions($directory, $chmod)) { 12291 return false; 12292 } 12293 return true; 12294 } 12295 12303 private function _testPermissions($dir, $chmod) { 12304 // early abort, if it is writable, everything is hunky-dory 12305 if (is_writable($dir)) return true; 12306 if (!is_dir($dir)) { 12307 // generally, you'll want to handle this beforehand 12308 // so a more specific error message can be given 12309 trigger_error('Directory '.$dir.' does not exist', 12310 E_USER_WARNING); 12311 return false; 12312 } 12313 if (function_exists('posix_getuid')) { 12314 // POSIX system, we can give more specific advice 12315 if (fileowner($dir) === posix_getuid()) { 12316 // we can chmod it ourselves 12317 $chmod = $chmod | 0700; 12318 if (chmod($dir, $chmod)) return true; 12319 } elseif (filegroup($dir) === posix_getgid()) { 12320 $chmod = $chmod | 0070; 12321 } else { 12322 // PHP's probably running as nobody, so we'll 12323 // need to give global permissions 12324 $chmod = $chmod | 0777; 12325 } 12326 trigger_error('Directory '.$dir.' not writable, '. 12327 'please chmod to ' . decoct($chmod), 12328 E_USER_WARNING); 12329 } else { 12330 // generic error message 12331 trigger_error('Directory '.$dir.' not writable, '. 12332 'please alter file permissions', 12333 E_USER_WARNING); 12334 } 12335 return false; 12336 } 12337 12338 } 12339 12340 12341 12342 12343 12348 class HTMLPurifier_DefinitionCache_Decorator_Cleanup extends 12349 HTMLPurifier_DefinitionCache_Decorator 12350 { 12351 12352 public $name = 'Cleanup'; 12353 12354 public function copy() { 12355 return new HTMLPurifier_DefinitionCache_Decorator_Cleanup(); 12356 } 12357 12358 public function add($def, $config) { 12359 $status = parent::add($def, $config); 12360 if (!$status) parent::cleanup($config); 12361 return $status; 12362 } 12363 12364 public function set($def, $config) { 12365 $status = parent::set($def, $config); 12366 if (!$status) parent::cleanup($config); 12367 return $status; 12368 } 12369 12370 public function replace($def, $config) { 12371 $status = parent::replace($def, $config); 12372 if (!$status) parent::cleanup($config); 12373 return $status; 12374 } 12375 12376 public function get($config) { 12377 $ret = parent::get($config); 12378 if (!$ret) parent::cleanup($config); 12379 return $ret; 12380 } 12381 12382 } 12383 12384 12385 12386 12387 12393 class HTMLPurifier_DefinitionCache_Decorator_Memory extends 12394 HTMLPurifier_DefinitionCache_Decorator 12395 { 12396 12397 protected $definitions; 12398 public $name = 'Memory'; 12399 12400 public function copy() { 12401 return new HTMLPurifier_DefinitionCache_Decorator_Memory(); 12402 } 12403 12404 public function add($def, $config) { 12405 $status = parent::add($def, $config); 12406 if ($status) $this->definitions[$this->generateKey($config)] = $def; 12407 return $status; 12408 } 12409 12410 public function set($def, $config) { 12411 $status = parent::set($def, $config); 12412 if ($status) $this->definitions[$this->generateKey($config)] = $def; 12413 return $status; 12414 } 12415 12416 public function replace($def, $config) { 12417 $status = parent::replace($def, $config); 12418 if ($status) $this->definitions[$this->generateKey($config)] = $def; 12419 return $status; 12420 } 12421 12422 public function get($config) { 12423 $key = $this->generateKey($config); 12424 if (isset($this->definitions[$key])) return $this->definitions[$key]; 12425 $this->definitions[$key] = parent::get($config); 12426 return $this->definitions[$key]; 12427 } 12428 12429 } 12430 12431 12432 12433 12434 12439 class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule 12440 { 12441 12442 public $name = 'Bdo'; 12443 public $attr_collections = array( 12444 'I18N' => array('dir' => false) 12445 ); 12446 12447 public function setup($config) { 12448 $bdo = $this->addElement( 12449 'bdo', 'Inline', 'Inline', array('Core', 'Lang'), 12450 array( 12451 'dir' => 'Enum#ltr,rtl', // required 12452 // The Abstract Module specification has the attribute 12453 // inclusions wrong for bdo: bdo allows Lang 12454 ) 12455 ); 12456 $bdo->attr_transform_post['required-dir'] = new HTMLPurifier_AttrTransform_BdoDir(); 12457 12458 $this->attr_collections['I18N']['dir'] = 'Enum#ltr,rtl'; 12459 } 12460 12461 } 12462 12463 12464 12465 12466 12467 class HTMLPurifier_HTMLModule_CommonAttributes extends HTMLPurifier_HTMLModule 12468 { 12469 public $name = 'CommonAttributes'; 12470 12471 public $attr_collections = array( 12472 'Core' => array( 12473 0 => array('Style'), 12474 // 'xml:space' => false, 12475 'class' => 'Class', 12476 'id' => 'ID', 12477 'title' => 'CDATA', 12478 ), 12479 'Lang' => array(), 12480 'I18N' => array( 12481 0 => array('Lang'), // proprietary, for xml:lang/lang 12482 ), 12483 'Common' => array( 12484 0 => array('Core', 'I18N') 12485 ) 12486 ); 12487 12488 } 12489 12490 12491 12492 12493 12498 class HTMLPurifier_HTMLModule_Edit extends HTMLPurifier_HTMLModule 12499 { 12500 12501 public $name = 'Edit'; 12502 12503 public function setup($config) { 12504 $contents = 'Chameleon: #PCDATA | Inline ! #PCDATA | Flow'; 12505 $attr = array( 12506 'cite' => 'URI', 12507 // 'datetime' => 'Datetime', // not implemented 12508 ); 12509 $this->addElement('del', 'Inline', $contents, 'Common', $attr); 12510 $this->addElement('ins', 'Inline', $contents, 'Common', $attr); 12511 } 12512 12513 // HTML 4.01 specifies that ins/del must not contain block 12514 // elements when used in an inline context, chameleon is 12515 // a complicated workaround to acheive this effect 12516 12517 // Inline context ! Block context (exclamation mark is 12518 // separator, see getChildDef for parsing) 12519 12520 public $defines_child_def = true; 12521 public function getChildDef($def) { 12522 if ($def->content_model_type != 'chameleon') return false; 12523 $value = explode('!', $def->content_model); 12524 return new HTMLPurifier_ChildDef_Chameleon($value[0], $value[1]); 12525 } 12526 12527 } 12528 12529 12530 12531 12532 12536 class HTMLPurifier_HTMLModule_Forms extends HTMLPurifier_HTMLModule 12537 { 12538 public $name = 'Forms'; 12539 public $safe = false; 12540 12541 public $content_sets = array( 12542 'Block' => 'Form', 12543 'Inline' => 'Formctrl', 12544 ); 12545 12546 public function setup($config) { 12547 $form = $this->addElement('form', 'Form', 12548 'Required: Heading | List | Block | fieldset', 'Common', array( 12549 'accept' => 'ContentTypes', 12550 'accept-charset' => 'Charsets', 12551 'action*' => 'URI', 12552 'method' => 'Enum#get,post', 12553 // really ContentType, but these two are the only ones used today 12554 'enctype' => 'Enum#application/x-www-form-urlencoded,multipart/form-data', 12555 )); 12556 $form->excludes = array('form' => true); 12557 12558 $input = $this->addElement('input', 'Formctrl', 'Empty', 'Common', array( 12559 'accept' => 'ContentTypes', 12560 'accesskey' => 'Character', 12561 'alt' => 'Text', 12562 'checked' => 'Bool#checked', 12563 'disabled' => 'Bool#disabled', 12564 'maxlength' => 'Number', 12565 'name' => 'CDATA', 12566 'readonly' => 'Bool#readonly', 12567 'size' => 'Number', 12568 'src' => 'URI#embedded', 12569 'tabindex' => 'Number', 12570 'type' => 'Enum#text,password,checkbox,button,radio,submit,reset,file,hidden,image', 12571 'value' => 'CDATA', 12572 )); 12573 $input->attr_transform_post[] = new HTMLPurifier_AttrTransform_Input(); 12574 12575 $this->addElement('select', 'Formctrl', 'Required: optgroup | option', 'Common', array( 12576 'disabled' => 'Bool#disabled', 12577 'multiple' => 'Bool#multiple', 12578 'name' => 'CDATA', 12579 'size' => 'Number', 12580 'tabindex' => 'Number', 12581 )); 12582 12583 $this->addElement('option', false, 'Optional: #PCDATA', 'Common', array( 12584 'disabled' => 'Bool#disabled', 12585 'label' => 'Text', 12586 'selected' => 'Bool#selected', 12587 'value' => 'CDATA', 12588 )); 12589 // It's illegal for there to be more than one selected, but not 12590 // be multiple. Also, no selected means undefined behavior. This might 12591 // be difficult to implement; perhaps an injector, or a context variable. 12592 12593 $textarea = $this->addElement('textarea', 'Formctrl', 'Optional: #PCDATA', 'Common', array( 12594 'accesskey' => 'Character', 12595 'cols*' => 'Number', 12596 'disabled' => 'Bool#disabled', 12597 'name' => 'CDATA', 12598 'readonly' => 'Bool#readonly', 12599 'rows*' => 'Number', 12600 'tabindex' => 'Number', 12601 )); 12602 $textarea->attr_transform_pre[] = new HTMLPurifier_AttrTransform_Textarea(); 12603 12604 $button = $this->addElement('button', 'Formctrl', 'Optional: #PCDATA | Heading | List | Block | Inline', 'Common', array( 12605 'accesskey' => 'Character', 12606 'disabled' => 'Bool#disabled', 12607 'name' => 'CDATA', 12608 'tabindex' => 'Number', 12609 'type' => 'Enum#button,submit,reset', 12610 'value' => 'CDATA', 12611 )); 12612 12613 // For exclusions, ideally we'd specify content sets, not literal elements 12614 $button->excludes = $this->makeLookup( 12615 'form', 'fieldset', // Form 12616 'input', 'select', 'textarea', 'label', 'button', // Formctrl 12617 'a', // as per HTML 4.01 spec, this is omitted by modularization 12618 'isindex', 'iframe' // legacy items 12619 ); 12620 12621 // Extra exclusion: img usemap="" is not permitted within this element. 12622 // We'll omit this for now, since we don't have any good way of 12623 // indicating it yet. 12624 12625 // This is HIGHLY user-unfriendly; we need a custom child-def for this 12626 $this->addElement('fieldset', 'Form', 'Custom: (#WS?,legend,(Flow|#PCDATA)*)', 'Common'); 12627 12628 $label = $this->addElement('label', 'Formctrl', 'Optional: #PCDATA | Inline', 'Common', array( 12629 'accesskey' => 'Character', 12630 // 'for' => 'IDREF', // IDREF not implemented, cannot allow 12631 )); 12632 $label->excludes = array('label' => true); 12633 12634 $this->addElement('legend', false, 'Optional: #PCDATA | Inline', 'Common', array( 12635 'accesskey' => 'Character', 12636 )); 12637 12638 $this->addElement('optgroup', false, 'Required: option', 'Common', array( 12639 'disabled' => 'Bool#disabled', 12640 'label*' => 'Text', 12641 )); 12642 12643 // Don't forget an injector for <isindex>. This one's a little complex 12644 // because it maps to multiple elements. 12645 12646 } 12647 } 12648 12649 12650 12651 12652 12656 class HTMLPurifier_HTMLModule_Hypertext extends HTMLPurifier_HTMLModule 12657 { 12658 12659 public $name = 'Hypertext'; 12660 12661 public function setup($config) { 12662 $a = $this->addElement( 12663 'a', 'Inline', 'Inline', 'Common', 12664 array( 12665 // 'accesskey' => 'Character', 12666 // 'charset' => 'Charset', 12667 'href' => 'URI', 12668 // 'hreflang' => 'LanguageCode', 12669 'rel' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rel'), 12670 'rev' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rev'), 12671 // 'tabindex' => 'Number', 12672 // 'type' => 'ContentType', 12673 ) 12674 ); 12675 $a->formatting = true; 12676 $a->excludes = array('a' => true); 12677 } 12678 12679 } 12680 12681 12682 12683 12684 12692 class HTMLPurifier_HTMLModule_Iframe extends HTMLPurifier_HTMLModule 12693 { 12694 12695 public $name = 'Iframe'; 12696 public $safe = false; 12697 12698 public function setup($config) { 12699 if ($config->get('HTML.SafeIframe')) { 12700 $this->safe = true; 12701 } 12702 $this->addElement( 12703 'iframe', 'Inline', 'Flow', 'Common', 12704 array( 12705 'src' => 'URI#embedded', 12706 'width' => 'Length', 12707 'height' => 'Length', 12708 'name' => 'ID', 12709 'scrolling' => 'Enum#yes,no,auto', 12710 'frameborder' => 'Enum#0,1', 12711 'longdesc' => 'URI', 12712 'marginheight' => 'Pixels', 12713 'marginwidth' => 'Pixels', 12714 ) 12715 ); 12716 } 12717 12718 } 12719 12720 12721 12722 12723 12729 class HTMLPurifier_HTMLModule_Image extends HTMLPurifier_HTMLModule 12730 { 12731 12732 public $name = 'Image'; 12733 12734 public function setup($config) { 12735 $max = $config->get('HTML.MaxImgLength'); 12736 $img = $this->addElement( 12737 'img', 'Inline', 'Empty', 'Common', 12738 array( 12739 'alt*' => 'Text', 12740 // According to the spec, it's Length, but percents can 12741 // be abused, so we allow only Pixels. 12742 'height' => 'Pixels#' . $max, 12743 'width' => 'Pixels#' . $max, 12744 'longdesc' => 'URI', 12745 'src*' => new HTMLPurifier_AttrDef_URI(true), // embedded 12746 ) 12747 ); 12748 if ($max === null || $config->get('HTML.Trusted')) { 12749 $img->attr['height'] = 12750 $img->attr['width'] = 'Length'; 12751 } 12752 12753 // kind of strange, but splitting things up would be inefficient 12754 $img->attr_transform_pre[] = 12755 $img->attr_transform_post[] = 12756 new HTMLPurifier_AttrTransform_ImgRequired(); 12757 } 12758 12759 } 12760 12761 12762 12763 12764 12781 class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule 12782 { 12783 12784 public $name = 'Legacy'; 12785 12786 public function setup($config) { 12787 12788 $this->addElement('basefont', 'Inline', 'Empty', false, array( 12789 'color' => 'Color', 12790 'face' => 'Text', // extremely broad, we should 12791 'size' => 'Text', // tighten it 12792 'id' => 'ID' 12793 )); 12794 $this->addElement('center', 'Block', 'Flow', 'Common'); 12795 $this->addElement('dir', 'Block', 'Required: li', 'Common', array( 12796 'compact' => 'Bool#compact' 12797 )); 12798 $this->addElement('font', 'Inline', 'Inline', array('Core', 'I18N'), array( 12799 'color' => 'Color', 12800 'face' => 'Text', // extremely broad, we should 12801 'size' => 'Text', // tighten it 12802 )); 12803 $this->addElement('menu', 'Block', 'Required: li', 'Common', array( 12804 'compact' => 'Bool#compact' 12805 )); 12806 12807 $s = $this->addElement('s', 'Inline', 'Inline', 'Common'); 12808 $s->formatting = true; 12809 12810 $strike = $this->addElement('strike', 'Inline', 'Inline', 'Common'); 12811 $strike->formatting = true; 12812 12813 $u = $this->addElement('u', 'Inline', 'Inline', 'Common'); 12814 $u->formatting = true; 12815 12816 // setup modifications to old elements 12817 12818 $align = 'Enum#left,right,center,justify'; 12819 12820 $address = $this->addBlankElement('address'); 12821 $address->content_model = 'Inline | #PCDATA | p'; 12822 $address->content_model_type = 'optional'; 12823 $address->child = false; 12824 12825 $blockquote = $this->addBlankElement('blockquote'); 12826 $blockquote->content_model = 'Flow | #PCDATA'; 12827 $blockquote->content_model_type = 'optional'; 12828 $blockquote->child = false; 12829 12830 $br = $this->addBlankElement('br'); 12831 $br->attr['clear'] = 'Enum#left,all,right,none'; 12832 12833 $caption = $this->addBlankElement('caption'); 12834 $caption->attr['align'] = 'Enum#top,bottom,left,right'; 12835 12836 $div = $this->addBlankElement('div'); 12837 $div->attr['align'] = $align; 12838 12839 $dl = $this->addBlankElement('dl'); 12840 $dl->attr['compact'] = 'Bool#compact'; 12841 12842 for ($i = 1; $i <= 6; $i++) { 12843 $h = $this->addBlankElement("h$i"); 12844 $h->attr['align'] = $align; 12845 } 12846 12847 $hr = $this->addBlankElement('hr'); 12848 $hr->attr['align'] = $align; 12849 $hr->attr['noshade'] = 'Bool#noshade'; 12850 $hr->attr['size'] = 'Pixels'; 12851 $hr->attr['width'] = 'Length'; 12852 12853 $img = $this->addBlankElement('img'); 12854 $img->attr['align'] = 'IAlign'; 12855 $img->attr['border'] = 'Pixels'; 12856 $img->attr['hspace'] = 'Pixels'; 12857 $img->attr['vspace'] = 'Pixels'; 12858 12859 // figure out this integer business 12860 12861 $li = $this->addBlankElement('li'); 12862 $li->attr['value'] = new HTMLPurifier_AttrDef_Integer(); 12863 $li->attr['type'] = 'Enum#s:1,i,I,a,A,disc,square,circle'; 12864 12865 $ol = $this->addBlankElement('ol'); 12866 $ol->attr['compact'] = 'Bool#compact'; 12867 $ol->attr['start'] = new HTMLPurifier_AttrDef_Integer(); 12868 $ol->attr['type'] = 'Enum#s:1,i,I,a,A'; 12869 12870 $p = $this->addBlankElement('p'); 12871 $p->attr['align'] = $align; 12872 12873 $pre = $this->addBlankElement('pre'); 12874 $pre->attr['width'] = 'Number'; 12875 12876 // script omitted 12877 12878 $table = $this->addBlankElement('table'); 12879 $table->attr['align'] = 'Enum#left,center,right'; 12880 $table->attr['bgcolor'] = 'Color'; 12881 12882 $tr = $this->addBlankElement('tr'); 12883 $tr->attr['bgcolor'] = 'Color'; 12884 12885 $th = $this->addBlankElement('th'); 12886 $th->attr['bgcolor'] = 'Color'; 12887 $th->attr['height'] = 'Length'; 12888 $th->attr['nowrap'] = 'Bool#nowrap'; 12889 $th->attr['width'] = 'Length'; 12890 12891 $td = $this->addBlankElement('td'); 12892 $td->attr['bgcolor'] = 'Color'; 12893 $td->attr['height'] = 'Length'; 12894 $td->attr['nowrap'] = 'Bool#nowrap'; 12895 $td->attr['width'] = 'Length'; 12896 12897 $ul = $this->addBlankElement('ul'); 12898 $ul->attr['compact'] = 'Bool#compact'; 12899 $ul->attr['type'] = 'Enum#square,disc,circle'; 12900 12901 // "safe" modifications to "unsafe" elements 12902 // WARNING: If you want to add support for an unsafe, legacy 12903 // attribute, make a new TrustedLegacy module with the trusted 12904 // bit set appropriately 12905 12906 $form = $this->addBlankElement('form'); 12907 $form->content_model = 'Flow | #PCDATA'; 12908 $form->content_model_type = 'optional'; 12909 $form->attr['target'] = 'FrameTarget'; 12910 12911 $input = $this->addBlankElement('input'); 12912 $input->attr['align'] = 'IAlign'; 12913 12914 $legend = $this->addBlankElement('legend'); 12915 $legend->attr['align'] = 'LAlign'; 12916 12917 } 12918 12919 } 12920 12921 12922 12923 12924 12928 class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule 12929 { 12930 12931 public $name = 'List'; 12932 12933 // According to the abstract schema, the List content set is a fully formed 12934 // one or more expr, but it invariably occurs in an optional declaration 12935 // so we're not going to do that subtlety. It might cause trouble 12936 // if a user defines "List" and expects that multiple lists are 12937 // allowed to be specified, but then again, that's not very intuitive. 12938 // Furthermore, the actual XML Schema may disagree. Regardless, 12939 // we don't have support for such nested expressions without using 12940 // the incredibly inefficient and draconic Custom ChildDef. 12941 12942 public $content_sets = array('Flow' => 'List'); 12943 12944 public function setup($config) { 12945 $ol = $this->addElement('ol', 'List', new HTMLPurifier_ChildDef_List(), 'Common'); 12946 $ul = $this->addElement('ul', 'List', new HTMLPurifier_ChildDef_List(), 'Common'); 12947 // XXX The wrap attribute is handled by MakeWellFormed. This is all 12948 // quite unsatisfactory, because we generated this 12949 // *specifically* for lists, and now a big chunk of the handling 12950 // is done properly by the List ChildDef. So actually, we just 12951 // want enough information to make autoclosing work properly, 12952 // and then hand off the tricky stuff to the ChildDef. 12953 $ol->wrap = 'li'; 12954 $ul->wrap = 'li'; 12955 $this->addElement('dl', 'List', 'Required: dt | dd', 'Common'); 12956 12957 $this->addElement('li', false, 'Flow', 'Common'); 12958 12959 $this->addElement('dd', false, 'Flow', 'Common'); 12960 $this->addElement('dt', false, 'Inline', 'Common'); 12961 } 12962 12963 } 12964 12965 12966 12967 12968 12969 class HTMLPurifier_HTMLModule_Name extends HTMLPurifier_HTMLModule 12970 { 12971 12972 public $name = 'Name'; 12973 12974 public function setup($config) { 12975 $elements = array('a', 'applet', 'form', 'frame', 'iframe', 'img', 'map'); 12976 foreach ($elements as $name) { 12977 $element = $this->addBlankElement($name); 12978 $element->attr['name'] = 'CDATA'; 12979 if (!$config->get('HTML.Attr.Name.UseCDATA')) { 12980 $element->attr_transform_post['NameSync'] = new HTMLPurifier_AttrTransform_NameSync(); 12981 } 12982 } 12983 } 12984 12985 } 12986 12987 12988 12989 12990 12995 class HTMLPurifier_HTMLModule_Nofollow extends HTMLPurifier_HTMLModule 12996 { 12997 12998 public $name = 'Nofollow'; 12999 13000 public function setup($config) { 13001 $a = $this->addBlankElement('a'); 13002 $a->attr_transform_post[] = new HTMLPurifier_AttrTransform_Nofollow(); 13003 } 13004 13005 } 13006 13007 13008 13009 13010 13011 class HTMLPurifier_HTMLModule_NonXMLCommonAttributes extends HTMLPurifier_HTMLModule 13012 { 13013 public $name = 'NonXMLCommonAttributes'; 13014 13015 public $attr_collections = array( 13016 'Lang' => array( 13017 'lang' => 'LanguageCode', 13018 ) 13019 ); 13020 } 13021 13022 13023 13024 13025 13031 class HTMLPurifier_HTMLModule_Object extends HTMLPurifier_HTMLModule 13032 { 13033 13034 public $name = 'Object'; 13035 public $safe = false; 13036 13037 public function setup($config) { 13038 13039 $this->addElement('object', 'Inline', 'Optional: #PCDATA | Flow | param', 'Common', 13040 array( 13041 'archive' => 'URI', 13042 'classid' => 'URI', 13043 'codebase' => 'URI', 13044 'codetype' => 'Text', 13045 'data' => 'URI', 13046 'declare' => 'Bool#declare', 13047 'height' => 'Length', 13048 'name' => 'CDATA', 13049 'standby' => 'Text', 13050 'tabindex' => 'Number', 13051 'type' => 'ContentType', 13052 'width' => 'Length' 13053 ) 13054 ); 13055 13056 $this->addElement('param', false, 'Empty', false, 13057 array( 13058 'id' => 'ID', 13059 'name*' => 'Text', 13060 'type' => 'Text', 13061 'value' => 'Text', 13062 'valuetype' => 'Enum#data,ref,object' 13063 ) 13064 ); 13065 13066 } 13067 13068 } 13069 13070 13071 13072 13073 13084 class HTMLPurifier_HTMLModule_Presentation extends HTMLPurifier_HTMLModule 13085 { 13086 13087 public $name = 'Presentation'; 13088 13089 public function setup($config) { 13090 $this->addElement('hr', 'Block', 'Empty', 'Common'); 13091 $this->addElement('sub', 'Inline', 'Inline', 'Common'); 13092 $this->addElement('sup', 'Inline', 'Inline', 'Common'); 13093 $b = $this->addElement('b', 'Inline', 'Inline', 'Common'); 13094 $b->formatting = true; 13095 $big = $this->addElement('big', 'Inline', 'Inline', 'Common'); 13096 $big->formatting = true; 13097 $i = $this->addElement('i', 'Inline', 'Inline', 'Common'); 13098 $i->formatting = true; 13099 $small = $this->addElement('small', 'Inline', 'Inline', 'Common'); 13100 $small->formatting = true; 13101 $tt = $this->addElement('tt', 'Inline', 'Inline', 'Common'); 13102 $tt->formatting = true; 13103 } 13104 13105 } 13106 13107 13108 13109 13110 13115 class HTMLPurifier_HTMLModule_Proprietary extends HTMLPurifier_HTMLModule 13116 { 13117 13118 public $name = 'Proprietary'; 13119 13120 public function setup($config) { 13121 13122 $this->addElement('marquee', 'Inline', 'Flow', 'Common', 13123 array( 13124 'direction' => 'Enum#left,right,up,down', 13125 'behavior' => 'Enum#alternate', 13126 'width' => 'Length', 13127 'height' => 'Length', 13128 'scrolldelay' => 'Number', 13129 'scrollamount' => 'Number', 13130 'loop' => 'Number', 13131 'bgcolor' => 'Color', 13132 'hspace' => 'Pixels', 13133 'vspace' => 'Pixels', 13134 ) 13135 ); 13136 13137 } 13138 13139 } 13140 13141 13142 13143 13144 13149 class HTMLPurifier_HTMLModule_Ruby extends HTMLPurifier_HTMLModule 13150 { 13151 13152 public $name = 'Ruby'; 13153 13154 public function setup($config) { 13155 $this->addElement('ruby', 'Inline', 13156 'Custom: ((rb, (rt | (rp, rt, rp))) | (rbc, rtc, rtc?))', 13157 'Common'); 13158 $this->addElement('rbc', false, 'Required: rb', 'Common'); 13159 $this->addElement('rtc', false, 'Required: rt', 'Common'); 13160 $rb = $this->addElement('rb', false, 'Inline', 'Common'); 13161 $rb->excludes = array('ruby' => true); 13162 $rt = $this->addElement('rt', false, 'Inline', 'Common', array('rbspan' => 'Number')); 13163 $rt->excludes = array('ruby' => true); 13164 $this->addElement('rp', false, 'Optional: #PCDATA', 'Common'); 13165 } 13166 13167 } 13168 13169 13170 13171 13172 13176 class HTMLPurifier_HTMLModule_SafeEmbed extends HTMLPurifier_HTMLModule 13177 { 13178 13179 public $name = 'SafeEmbed'; 13180 13181 public function setup($config) { 13182 13183 $max = $config->get('HTML.MaxImgLength'); 13184 $embed = $this->addElement( 13185 'embed', 'Inline', 'Empty', 'Common', 13186 array( 13187 'src*' => 'URI#embedded', 13188 'type' => 'Enum#application/x-shockwave-flash', 13189 'width' => 'Pixels#' . $max, 13190 'height' => 'Pixels#' . $max, 13191 'allowscriptaccess' => 'Enum#never', 13192 'allownetworking' => 'Enum#internal', 13193 'flashvars' => 'Text', 13194 'wmode' => 'Enum#window,transparent,opaque', 13195 'name' => 'ID', 13196 ) 13197 ); 13198 $embed->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeEmbed(); 13199 13200 } 13201 13202 } 13203 13204 13205 13206 13207 13214 class HTMLPurifier_HTMLModule_SafeObject extends HTMLPurifier_HTMLModule 13215 { 13216 13217 public $name = 'SafeObject'; 13218 13219 public function setup($config) { 13220 13221 // These definitions are not intrinsically safe: the attribute transforms 13222 // are a vital part of ensuring safety. 13223 13224 $max = $config->get('HTML.MaxImgLength'); 13225 $object = $this->addElement( 13226 'object', 13227 'Inline', 13228 'Optional: param | Flow | #PCDATA', 13229 'Common', 13230 array( 13231 // While technically not required by the spec, we're forcing 13232 // it to this value. 13233 'type' => 'Enum#application/x-shockwave-flash', 13234 'width' => 'Pixels#' . $max, 13235 'height' => 'Pixels#' . $max, 13236 'data' => 'URI#embedded', 13237 'codebase' => new HTMLPurifier_AttrDef_Enum(array( 13238 'http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,40,0')), 13239 ) 13240 ); 13241 $object->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeObject(); 13242 13243 $param = $this->addElement('param', false, 'Empty', false, 13244 array( 13245 'id' => 'ID', 13246 'name*' => 'Text', 13247 'value' => 'Text' 13248 ) 13249 ); 13250 $param->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeParam(); 13251 $this->info_injector[] = 'SafeObject'; 13252 13253 } 13254 13255 } 13256 13257 13258 13259 13260 13261 /* 13262 13263 WARNING: THIS MODULE IS EXTREMELY DANGEROUS AS IT ENABLES INLINE SCRIPTING 13264 INSIDE HTML PURIFIER DOCUMENTS. USE ONLY WITH TRUSTED USER INPUT!!! 13265 13266 */ 13267 13274 class HTMLPurifier_HTMLModule_Scripting extends HTMLPurifier_HTMLModule 13275 { 13276 public $name = 'Scripting'; 13277 public $elements = array('script', 'noscript'); 13278 public $content_sets = array('Block' => 'script | noscript', 'Inline' => 'script | noscript'); 13279 public $safe = false; 13280 13281 public function setup($config) { 13282 // TODO: create custom child-definition for noscript that 13283 // auto-wraps stray #PCDATA in a similar manner to 13284 // blockquote's custom definition (we would use it but 13285 // blockquote's contents are optional while noscript's contents 13286 // are required) 13287 13288 // TODO: convert this to new syntax, main problem is getting 13289 // both content sets working 13290 13291 // In theory, this could be safe, but I don't see any reason to 13292 // allow it. 13293 $this->info['noscript'] = new HTMLPurifier_ElementDef(); 13294 $this->info['noscript']->attr = array( 0 => array('Common') ); 13295 $this->info['noscript']->content_model = 'Heading | List | Block'; 13296 $this->info['noscript']->content_model_type = 'required'; 13297 13298 $this->info['script'] = new HTMLPurifier_ElementDef(); 13299 $this->info['script']->attr = array( 13300 'defer' => new HTMLPurifier_AttrDef_Enum(array('defer')), 13301 'src' => new HTMLPurifier_AttrDef_URI(true), 13302 'type' => new HTMLPurifier_AttrDef_Enum(array('text/javascript')) 13303 ); 13304 $this->info['script']->content_model = '#PCDATA'; 13305 $this->info['script']->content_model_type = 'optional'; 13306 $this->info['script']->attr_transform_pre['type'] = 13307 $this->info['script']->attr_transform_post['type'] = 13308 new HTMLPurifier_AttrTransform_ScriptRequired(); 13309 } 13310 } 13311 13312 13313 13314 13315 13320 class HTMLPurifier_HTMLModule_StyleAttribute extends HTMLPurifier_HTMLModule 13321 { 13322 13323 public $name = 'StyleAttribute'; 13324 public $attr_collections = array( 13325 // The inclusion routine differs from the Abstract Modules but 13326 // is in line with the DTD and XML Schemas. 13327 'Style' => array('style' => false), // see constructor 13328 'Core' => array(0 => array('Style')) 13329 ); 13330 13331 public function setup($config) { 13332 $this->attr_collections['Style']['style'] = new HTMLPurifier_AttrDef_CSS(); 13333 } 13334 13335 } 13336 13337 13338 13339 13340 13344 class HTMLPurifier_HTMLModule_Tables extends HTMLPurifier_HTMLModule 13345 { 13346 13347 public $name = 'Tables'; 13348 13349 public function setup($config) { 13350 13351 $this->addElement('caption', false, 'Inline', 'Common'); 13352 13353 $this->addElement('table', 'Block', 13354 new HTMLPurifier_ChildDef_Table(), 'Common', 13355 array( 13356 'border' => 'Pixels', 13357 'cellpadding' => 'Length', 13358 'cellspacing' => 'Length', 13359 'frame' => 'Enum#void,above,below,hsides,lhs,rhs,vsides,box,border', 13360 'rules' => 'Enum#none,groups,rows,cols,all', 13361 'summary' => 'Text', 13362 'width' => 'Length' 13363 ) 13364 ); 13365 13366 // common attributes 13367 $cell_align = array( 13368 'align' => 'Enum#left,center,right,justify,char', 13369 'charoff' => 'Length', 13370 'valign' => 'Enum#top,middle,bottom,baseline', 13371 ); 13372 13373 $cell_t = array_merge( 13374 array( 13375 'abbr' => 'Text', 13376 'colspan' => 'Number', 13377 'rowspan' => 'Number', 13378 // Apparently, as of HTML5 this attribute only applies 13379 // to 'th' elements. 13380 'scope' => 'Enum#row,col,rowgroup,colgroup', 13381 ), 13382 $cell_align 13383 ); 13384 $this->addElement('td', false, 'Flow', 'Common', $cell_t); 13385 $this->addElement('th', false, 'Flow', 'Common', $cell_t); 13386 13387 $this->addElement('tr', false, 'Required: td | th', 'Common', $cell_align); 13388 13389 $cell_col = array_merge( 13390 array( 13391 'span' => 'Number', 13392 'width' => 'MultiLength', 13393 ), 13394 $cell_align 13395 ); 13396 $this->addElement('col', false, 'Empty', 'Common', $cell_col); 13397 $this->addElement('colgroup', false, 'Optional: col', 'Common', $cell_col); 13398 13399 $this->addElement('tbody', false, 'Required: tr', 'Common', $cell_align); 13400 $this->addElement('thead', false, 'Required: tr', 'Common', $cell_align); 13401 $this->addElement('tfoot', false, 'Required: tr', 'Common', $cell_align); 13402 13403 } 13404 13405 } 13406 13407 13408 13409 13410 13414 class HTMLPurifier_HTMLModule_Target extends HTMLPurifier_HTMLModule 13415 { 13416 13417 public $name = 'Target'; 13418 13419 public function setup($config) { 13420 $elements = array('a'); 13421 foreach ($elements as $name) { 13422 $e = $this->addBlankElement($name); 13423 $e->attr = array( 13424 'target' => new HTMLPurifier_AttrDef_HTML_FrameTarget() 13425 ); 13426 } 13427 } 13428 13429 } 13430 13431 13432 13433 13434 13439 class HTMLPurifier_HTMLModule_TargetBlank extends HTMLPurifier_HTMLModule 13440 { 13441 13442 public $name = 'TargetBlank'; 13443 13444 public function setup($config) { 13445 $a = $this->addBlankElement('a'); 13446 $a->attr_transform_post[] = new HTMLPurifier_AttrTransform_TargetBlank(); 13447 } 13448 13449 } 13450 13451 13452 13453 13454 13467 class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule 13468 { 13469 13470 public $name = 'Text'; 13471 public $content_sets = array( 13472 'Flow' => 'Heading | Block | Inline' 13473 ); 13474 13475 public function setup($config) { 13476 13477 // Inline Phrasal ------------------------------------------------- 13478 $this->addElement('abbr', 'Inline', 'Inline', 'Common'); 13479 $this->addElement('acronym', 'Inline', 'Inline', 'Common'); 13480 $this->addElement('cite', 'Inline', 'Inline', 'Common'); 13481 $this->addElement('dfn', 'Inline', 'Inline', 'Common'); 13482 $this->addElement('kbd', 'Inline', 'Inline', 'Common'); 13483 $this->addElement('q', 'Inline', 'Inline', 'Common', array('cite' => 'URI')); 13484 $this->addElement('samp', 'Inline', 'Inline', 'Common'); 13485 $this->addElement('var', 'Inline', 'Inline', 'Common'); 13486 13487 $em = $this->addElement('em', 'Inline', 'Inline', 'Common'); 13488 $em->formatting = true; 13489 13490 $strong = $this->addElement('strong', 'Inline', 'Inline', 'Common'); 13491 $strong->formatting = true; 13492 13493 $code = $this->addElement('code', 'Inline', 'Inline', 'Common'); 13494 $code->formatting = true; 13495 13496 // Inline Structural ---------------------------------------------- 13497 $this->addElement('span', 'Inline', 'Inline', 'Common'); 13498 $this->addElement('br', 'Inline', 'Empty', 'Core'); 13499 13500 // Block Phrasal -------------------------------------------------- 13501 $this->addElement('address', 'Block', 'Inline', 'Common'); 13502 $this->addElement('blockquote', 'Block', 'Optional: Heading | Block | List', 'Common', array('cite' => 'URI') ); 13503 $pre = $this->addElement('pre', 'Block', 'Inline', 'Common'); 13504 $pre->excludes = $this->makeLookup( 13505 'img', 'big', 'small', 'object', 'applet', 'font', 'basefont' ); 13506 $this->addElement('h1', 'Heading', 'Inline', 'Common'); 13507 $this->addElement('h2', 'Heading', 'Inline', 'Common'); 13508 $this->addElement('h3', 'Heading', 'Inline', 'Common'); 13509 $this->addElement('h4', 'Heading', 'Inline', 'Common'); 13510 $this->addElement('h5', 'Heading', 'Inline', 'Common'); 13511 $this->addElement('h6', 'Heading', 'Inline', 'Common'); 13512 13513 // Block Structural ----------------------------------------------- 13514 $p = $this->addElement('p', 'Block', 'Inline', 'Common'); 13515 $p->autoclose = array_flip(array("address", "blockquote", "center", "dir", "div", "dl", "fieldset", "ol", "p", "ul")); 13516 13517 $this->addElement('div', 'Block', 'Flow', 'Common'); 13518 13519 } 13520 13521 } 13522 13523 13524 13525 13526 13532 class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule 13533 { 13534 13539 public $levels = array(0 => 'none', 'light', 'medium', 'heavy'); 13540 13544 public $defaultLevel = null; 13545 13550 public $fixesForLevel = array( 13551 'light' => array(), 13552 'medium' => array(), 13553 'heavy' => array() 13554 ); 13555 13562 public function setup($config) { 13563 13564 // create fixes, initialize fixesForLevel 13565 $fixes = $this->makeFixes(); 13566 $this->makeFixesForLevel($fixes); 13567 13568 // figure out which fixes to use 13569 $level = $config->get('HTML.TidyLevel'); 13570 $fixes_lookup = $this->getFixesForLevel($level); 13571 13572 // get custom fix declarations: these need namespace processing 13573 $add_fixes = $config->get('HTML.TidyAdd'); 13574 $remove_fixes = $config->get('HTML.TidyRemove'); 13575 13576 foreach ($fixes as $name => $fix) { 13577 // needs to be refactored a little to implement globbing 13578 if ( 13579 isset($remove_fixes[$name]) || 13580 (!isset($add_fixes[$name]) && !isset($fixes_lookup[$name])) 13581 ) { 13582 unset($fixes[$name]); 13583 } 13584 } 13585 13586 // populate this module with necessary fixes 13587 $this->populate($fixes); 13588 13589 } 13590 13597 public function getFixesForLevel($level) { 13598 if ($level == $this->levels[0]) { 13599 return array(); 13600 } 13601 $activated_levels = array(); 13602 for ($i = 1, $c = count($this->levels); $i < $c; $i++) { 13603 $activated_levels[] = $this->levels[$i]; 13604 if ($this->levels[$i] == $level) break; 13605 } 13606 if ($i == $c) { 13607 trigger_error( 13608 'Tidy level ' . htmlspecialchars($level) . ' not recognized', 13609 E_USER_WARNING 13610 ); 13611 return array(); 13612 } 13613 $ret = array(); 13614 foreach ($activated_levels as $level) { 13615 foreach ($this->fixesForLevel[$level] as $fix) { 13616 $ret[$fix] = true; 13617 } 13618 } 13619 return $ret; 13620 } 13621 13627 public function makeFixesForLevel($fixes) { 13628 if (!isset($this->defaultLevel)) return; 13629 if (!isset($this->fixesForLevel[$this->defaultLevel])) { 13630 trigger_error( 13631 'Default level ' . $this->defaultLevel . ' does not exist', 13632 E_USER_ERROR 13633 ); 13634 return; 13635 } 13636 $this->fixesForLevel[$this->defaultLevel] = array_keys($fixes); 13637 } 13638 13644 public function populate($fixes) { 13645 foreach ($fixes as $name => $fix) { 13646 // determine what the fix is for 13647 list($type, $params) = $this->getFixType($name); 13648 switch ($type) { 13649 case 'attr_transform_pre': 13650 case 'attr_transform_post': 13651 $attr = $params['attr']; 13652 if (isset($params['element'])) { 13653 $element = $params['element']; 13654 if (empty($this->info[$element])) { 13655 $e = $this->addBlankElement($element); 13656 } else { 13657 $e = $this->info[$element]; 13658 } 13659 } else { 13660 $type = "info_$type"; 13661 $e = $this; 13662 } 13663 // PHP does some weird parsing when I do 13664 // $e->$type[$attr], so I have to assign a ref. 13665 $f =& $e->$type; 13666 $f[$attr] = $fix; 13667 break; 13668 case 'tag_transform': 13669 $this->info_tag_transform[$params['element']] = $fix; 13670 break; 13671 case 'child': 13672 case 'content_model_type': 13673 $element = $params['element']; 13674 if (empty($this->info[$element])) { 13675 $e = $this->addBlankElement($element); 13676 } else { 13677 $e = $this->info[$element]; 13678 } 13679 $e->$type = $fix; 13680 break; 13681 default: 13682 trigger_error("Fix type $type not supported", E_USER_ERROR); 13683 break; 13684 } 13685 } 13686 } 13687 13696 public function getFixType($name) { 13697 // parse it 13698 $property = $attr = null; 13699 if (strpos($name, '#') !== false) list($name, $property) = explode('#', $name); 13700 if (strpos($name, '@') !== false) list($name, $attr) = explode('@', $name); 13701 13702 // figure out the parameters 13703 $params = array(); 13704 if ($name !== '') $params['element'] = $name; 13705 if (!is_null($attr)) $params['attr'] = $attr; 13706 13707 // special case: attribute transform 13708 if (!is_null($attr)) { 13709 if (is_null($property)) $property = 'pre'; 13710 $type = 'attr_transform_' . $property; 13711 return array($type, $params); 13712 } 13713 13714 // special case: tag transform 13715 if (is_null($property)) { 13716 return array('tag_transform', $params); 13717 } 13718 13719 return array($property, $params); 13720 13721 } 13722 13727 public function makeFixes() {} 13728 13729 } 13730 13731 13732 13733 13734 13735 class HTMLPurifier_HTMLModule_XMLCommonAttributes extends HTMLPurifier_HTMLModule 13736 { 13737 public $name = 'XMLCommonAttributes'; 13738 13739 public $attr_collections = array( 13740 'Lang' => array( 13741 'xml:lang' => 'LanguageCode', 13742 ) 13743 ); 13744 } 13745 13746 13747 13748 13749 13753 class HTMLPurifier_HTMLModule_Tidy_Name extends HTMLPurifier_HTMLModule_Tidy 13754 { 13755 public $name = 'Tidy_Name'; 13756 public $defaultLevel = 'heavy'; 13757 public function makeFixes() { 13758 13759 $r = array(); 13760 13761 // @name for img, a ----------------------------------------------- 13762 // Technically, it's allowed even on strict, so we allow authors to use 13763 // it. However, it's deprecated in future versions of XHTML. 13764 $r['img@name'] = 13765 $r['a@name'] = new HTMLPurifier_AttrTransform_Name(); 13766 13767 return $r; 13768 } 13769 } 13770 13771 13772 13773 13774 13775 class HTMLPurifier_HTMLModule_Tidy_Proprietary extends HTMLPurifier_HTMLModule_Tidy 13776 { 13777 13778 public $name = 'Tidy_Proprietary'; 13779 public $defaultLevel = 'light'; 13780 13781 public function makeFixes() { 13782 $r = array(); 13783 $r['table@background'] = new HTMLPurifier_AttrTransform_Background(); 13784 $r['td@background'] = new HTMLPurifier_AttrTransform_Background(); 13785 $r['th@background'] = new HTMLPurifier_AttrTransform_Background(); 13786 $r['tr@background'] = new HTMLPurifier_AttrTransform_Background(); 13787 $r['thead@background'] = new HTMLPurifier_AttrTransform_Background(); 13788 $r['tfoot@background'] = new HTMLPurifier_AttrTransform_Background(); 13789 $r['tbody@background'] = new HTMLPurifier_AttrTransform_Background(); 13790 $r['table@height'] = new HTMLPurifier_AttrTransform_Length('height'); 13791 return $r; 13792 } 13793 13794 } 13795 13796 13797 13798 13799 13800 class HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 extends HTMLPurifier_HTMLModule_Tidy 13801 { 13802 13803 public function makeFixes() { 13804 13805 $r = array(); 13806 13807 // == deprecated tag transforms =================================== 13808 13809 $r['font'] = new HTMLPurifier_TagTransform_Font(); 13810 $r['menu'] = new HTMLPurifier_TagTransform_Simple('ul'); 13811 $r['dir'] = new HTMLPurifier_TagTransform_Simple('ul'); 13812 $r['center'] = new HTMLPurifier_TagTransform_Simple('div', 'text-align:center;'); 13813 $r['u'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:underline;'); 13814 $r['s'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:line-through;'); 13815 $r['strike'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:line-through;'); 13816 13817 // == deprecated attribute transforms ============================= 13818 13819 $r['caption@align'] = 13820 new HTMLPurifier_AttrTransform_EnumToCSS('align', array( 13821 // we're following IE's behavior, not Firefox's, due 13822 // to the fact that no one supports caption-side:right, 13823 // W3C included (with CSS 2.1). This is a slightly 13824 // unreasonable attribute! 13825 'left' => 'text-align:left;', 13826 'right' => 'text-align:right;', 13827 'top' => 'caption-side:top;', 13828 'bottom' => 'caption-side:bottom;' // not supported by IE 13829 )); 13830 13831 // @align for img ------------------------------------------------- 13832 $r['img@align'] = 13833 new HTMLPurifier_AttrTransform_EnumToCSS('align', array( 13834 'left' => 'float:left;', 13835 'right' => 'float:right;', 13836 'top' => 'vertical-align:top;', 13837 'middle' => 'vertical-align:middle;', 13838 'bottom' => 'vertical-align:baseline;', 13839 )); 13840 13841 // @align for table ----------------------------------------------- 13842 $r['table@align'] = 13843 new HTMLPurifier_AttrTransform_EnumToCSS('align', array( 13844 'left' => 'float:left;', 13845 'center' => 'margin-left:auto;margin-right:auto;', 13846 'right' => 'float:right;' 13847 )); 13848 13849 // @align for hr ----------------------------------------------- 13850 $r['hr@align'] = 13851 new HTMLPurifier_AttrTransform_EnumToCSS('align', array( 13852 // we use both text-align and margin because these work 13853 // for different browsers (IE and Firefox, respectively) 13854 // and the melange makes for a pretty cross-compatible 13855 // solution 13856 'left' => 'margin-left:0;margin-right:auto;text-align:left;', 13857 'center' => 'margin-left:auto;margin-right:auto;text-align:center;', 13858 'right' => 'margin-left:auto;margin-right:0;text-align:right;' 13859 )); 13860 13861 // @align for h1, h2, h3, h4, h5, h6, p, div ---------------------- 13862 // {{{ 13863 $align_lookup = array(); 13864 $align_values = array('left', 'right', 'center', 'justify'); 13865 foreach ($align_values as $v) $align_lookup[$v] = "text-align:$v;"; 13866 // }}} 13867 $r['h1@align'] = 13868 $r['h2@align'] = 13869 $r['h3@align'] = 13870 $r['h4@align'] = 13871 $r['h5@align'] = 13872 $r['h6@align'] = 13873 $r['p@align'] = 13874 $r['div@align'] = 13875 new HTMLPurifier_AttrTransform_EnumToCSS('align', $align_lookup); 13876 13877 // @bgcolor for table, tr, td, th --------------------------------- 13878 $r['table@bgcolor'] = 13879 $r['td@bgcolor'] = 13880 $r['th@bgcolor'] = 13881 new HTMLPurifier_AttrTransform_BgColor(); 13882 13883 // @border for img ------------------------------------------------ 13884 $r['img@border'] = new HTMLPurifier_AttrTransform_Border(); 13885 13886 // @clear for br -------------------------------------------------- 13887 $r['br@clear'] = 13888 new HTMLPurifier_AttrTransform_EnumToCSS('clear', array( 13889 'left' => 'clear:left;', 13890 'right' => 'clear:right;', 13891 'all' => 'clear:both;', 13892 'none' => 'clear:none;', 13893 )); 13894 13895 // @height for td, th --------------------------------------------- 13896 $r['td@height'] = 13897 $r['th@height'] = 13898 new HTMLPurifier_AttrTransform_Length('height'); 13899 13900 // @hspace for img ------------------------------------------------ 13901 $r['img@hspace'] = new HTMLPurifier_AttrTransform_ImgSpace('hspace'); 13902 13903 // @noshade for hr ------------------------------------------------ 13904 // this transformation is not precise but often good enough. 13905 // different browsers use different styles to designate noshade 13906 $r['hr@noshade'] = 13907 new HTMLPurifier_AttrTransform_BoolToCSS( 13908 'noshade', 13909 'color:#808080;background-color:#808080;border:0;' 13910 ); 13911 13912 // @nowrap for td, th --------------------------------------------- 13913 $r['td@nowrap'] = 13914 $r['th@nowrap'] = 13915 new HTMLPurifier_AttrTransform_BoolToCSS( 13916 'nowrap', 13917 'white-space:nowrap;' 13918 ); 13919 13920 // @size for hr -------------------------------------------------- 13921 $r['hr@size'] = new HTMLPurifier_AttrTransform_Length('size', 'height'); 13922 13923 // @type for li, ol, ul ------------------------------------------- 13924 // {{{ 13925 $ul_types = array( 13926 'disc' => 'list-style-type:disc;', 13927 'square' => 'list-style-type:square;', 13928 'circle' => 'list-style-type:circle;' 13929 ); 13930 $ol_types = array( 13931 '1' => 'list-style-type:decimal;', 13932 'i' => 'list-style-type:lower-roman;', 13933 'I' => 'list-style-type:upper-roman;', 13934 'a' => 'list-style-type:lower-alpha;', 13935 'A' => 'list-style-type:upper-alpha;' 13936 ); 13937 $li_types = $ul_types + $ol_types; 13938 // }}} 13939 13940 $r['ul@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ul_types); 13941 $r['ol@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ol_types, true); 13942 $r['li@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $li_types, true); 13943 13944 // @vspace for img ------------------------------------------------ 13945 $r['img@vspace'] = new HTMLPurifier_AttrTransform_ImgSpace('vspace'); 13946 13947 // @width for hr, td, th ------------------------------------------ 13948 $r['td@width'] = 13949 $r['th@width'] = 13950 $r['hr@width'] = new HTMLPurifier_AttrTransform_Length('width'); 13951 13952 return $r; 13953 13954 } 13955 13956 } 13957 13958 13959 13960 13961 13962 class HTMLPurifier_HTMLModule_Tidy_Strict extends HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 13963 { 13964 public $name = 'Tidy_Strict'; 13965 public $defaultLevel = 'light'; 13966 13967 public function makeFixes() { 13968 $r = parent::makeFixes(); 13969 $r['blockquote#content_model_type'] = 'strictblockquote'; 13970 return $r; 13971 } 13972 13973 public $defines_child_def = true; 13974 public function getChildDef($def) { 13975 if ($def->content_model_type != 'strictblockquote') return parent::getChildDef($def); 13976 return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model); 13977 } 13978 } 13979 13980 13981 13982 13983 13984 class HTMLPurifier_HTMLModule_Tidy_Transitional extends HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 13985 { 13986 public $name = 'Tidy_Transitional'; 13987 public $defaultLevel = 'heavy'; 13988 } 13989 13990 13991 13992 13993 13994 class HTMLPurifier_HTMLModule_Tidy_XHTML extends HTMLPurifier_HTMLModule_Tidy 13995 { 13996 13997 public $name = 'Tidy_XHTML'; 13998 public $defaultLevel = 'medium'; 13999 14000 public function makeFixes() { 14001 $r = array(); 14002 $r['@lang'] = new HTMLPurifier_AttrTransform_Lang(); 14003 return $r; 14004 } 14005 14006 } 14007 14008 14009 14010 14011 14018 class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector 14019 { 14020 14021 public $name = 'AutoParagraph'; 14022 public $needed = array('p'); 14023 14024 private function _pStart() { 14025 $par = new HTMLPurifier_Token_Start('p'); 14026 $par->armor['MakeWellFormed_TagClosedError'] = true; 14027 return $par; 14028 } 14029 14030 public function handleText(&$token) { 14031 $text = $token->data; 14032 // Does the current parent allow <p> tags? 14033 if ($this->allowsElement('p')) { 14034 if (empty($this->currentNesting) || strpos($text, "\n\n") !== false) { 14035 // Note that we have differing behavior when dealing with text 14036 // in the anonymous root node, or a node inside the document. 14037 // If the text as a double-newline, the treatment is the same; 14038 // if it doesn't, see the next if-block if you're in the document. 14039 14040 $i = $nesting = null; 14041 if (!$this->forwardUntilEndToken($i, $current, $nesting) && $token->is_whitespace) { 14042 // State 1.1: ... ^ (whitespace, then document end) 14043 // ---- 14044 // This is a degenerate case 14045 } else { 14046 if (!$token->is_whitespace || $this->_isInline($current)) { 14047 // State 1.2: PAR1 14048 // ---- 14049 14050 // State 1.3: PAR1\n\nPAR2 14051 // ------------ 14052 14053 // State 1.4: <div>PAR1\n\nPAR2 (see State 2) 14054 // ------------ 14055 $token = array($this->_pStart()); 14056 $this->_splitText($text, $token); 14057 } else { 14058 // State 1.5: \n<hr /> 14059 // -- 14060 } 14061 } 14062 } else { 14063 // State 2: <div>PAR1... (similar to 1.4) 14064 // ---- 14065 14066 // We're in an element that allows paragraph tags, but we're not 14067 // sure if we're going to need them. 14068 if ($this->_pLookAhead()) { 14069 // State 2.1: <div>PAR1<b>PAR1\n\nPAR2 14070 // ---- 14071 // Note: This will always be the first child, since any 14072 // previous inline element would have triggered this very 14073 // same routine, and found the double newline. One possible 14074 // exception would be a comment. 14075 $token = array($this->_pStart(), $token); 14076 } else { 14077 // State 2.2.1: <div>PAR1<div> 14078 // ---- 14079 14080 // State 2.2.2: <div>PAR1<b>PAR1</b></div> 14081 // ---- 14082 } 14083 } 14084 // Is the current parent a <p> tag? 14085 } elseif ( 14086 !empty($this->currentNesting) && 14087 $this->currentNesting[count($this->currentNesting)-1]->name == 'p' 14088 ) { 14089 // State 3.1: ...<p>PAR1 14090 // ---- 14091 14092 // State 3.2: ...<p>PAR1\n\nPAR2 14093 // ------------ 14094 $token = array(); 14095 $this->_splitText($text, $token); 14096 // Abort! 14097 } else { 14098 // State 4.1: ...<b>PAR1 14099 // ---- 14100 14101 // State 4.2: ...<b>PAR1\n\nPAR2 14102 // ------------ 14103 } 14104 } 14105 14106 public function handleElement(&$token) { 14107 // We don't have to check if we're already in a <p> tag for block 14108 // tokens, because the tag would have been autoclosed by MakeWellFormed. 14109 if ($this->allowsElement('p')) { 14110 if (!empty($this->currentNesting)) { 14111 if ($this->_isInline($token)) { 14112 // State 1: <div>...<b> 14113 // --- 14114 14115 // Check if this token is adjacent to the parent token 14116 // (seek backwards until token isn't whitespace) 14117 $i = null; 14118 $this->backward($i, $prev); 14119 14120 if (!$prev instanceof HTMLPurifier_Token_Start) { 14121 // Token wasn't adjacent 14122 14123 if ( 14124 $prev instanceof HTMLPurifier_Token_Text && 14125 substr($prev->data, -2) === "\n\n" 14126 ) { 14127 // State 1.1.4: <div><p>PAR1</p>\n\n<b> 14128 // --- 14129 14130 // Quite frankly, this should be handled by splitText 14131 $token = array($this->_pStart(), $token); 14132 } else { 14133 // State 1.1.1: <div><p>PAR1</p><b> 14134 // --- 14135 14136 // State 1.1.2: <div><br /><b> 14137 // --- 14138 14139 // State 1.1.3: <div>PAR<b> 14140 // --- 14141 } 14142 14143 } else { 14144 // State 1.2.1: <div><b> 14145 // --- 14146 14147 // Lookahead to see if <p> is needed. 14148 if ($this->_pLookAhead()) { 14149 // State 1.3.1: <div><b>PAR1\n\nPAR2 14150 // --- 14151 $token = array($this->_pStart(), $token); 14152 } else { 14153 // State 1.3.2: <div><b>PAR1</b></div> 14154 // --- 14155 14156 // State 1.3.3: <div><b>PAR1</b><div></div>\n\n</div> 14157 // --- 14158 } 14159 } 14160 } else { 14161 // State 2.3: ...<div> 14162 // ----- 14163 } 14164 } else { 14165 if ($this->_isInline($token)) { 14166 // State 3.1: <b> 14167 // --- 14168 // This is where the {p} tag is inserted, not reflected in 14169 // inputTokens yet, however. 14170 $token = array($this->_pStart(), $token); 14171 } else { 14172 // State 3.2: <div> 14173 // ----- 14174 } 14175 14176 $i = null; 14177 if ($this->backward($i, $prev)) { 14178 if ( 14179 !$prev instanceof HTMLPurifier_Token_Text 14180 ) { 14181 // State 3.1.1: ...</p>{p}<b> 14182 // --- 14183 14184 // State 3.2.1: ...</p><div> 14185 // ----- 14186 14187 if (!is_array($token)) $token = array($token); 14188 array_unshift($token, new HTMLPurifier_Token_Text("\n\n")); 14189 } else { 14190 // State 3.1.2: ...</p>\n\n{p}<b> 14191 // --- 14192 14193 // State 3.2.2: ...</p>\n\n<div> 14194 // ----- 14195 14196 // Note: PAR<ELEM> cannot occur because PAR would have been 14197 // wrapped in <p> tags. 14198 } 14199 } 14200 } 14201 } else { 14202 // State 2.2: <ul><li> 14203 // ---- 14204 14205 // State 2.4: <p><b> 14206 // --- 14207 } 14208 } 14209 14220 private function _splitText($data, &$result) { 14221 $raw_paragraphs = explode("\n\n", $data); 14222 $paragraphs = array(); // without empty paragraphs 14223 $needs_start = false; 14224 $needs_end = false; 14225 14226 $c = count($raw_paragraphs); 14227 if ($c == 1) { 14228 // There were no double-newlines, abort quickly. In theory this 14229 // should never happen. 14230 $result[] = new HTMLPurifier_Token_Text($data); 14231 return; 14232 } 14233 for ($i = 0; $i < $c; $i++) { 14234 $par = $raw_paragraphs[$i]; 14235 if (trim($par) !== '') { 14236 $paragraphs[] = $par; 14237 } else { 14238 if ($i == 0) { 14239 // Double newline at the front 14240 if (empty($result)) { 14241 // The empty result indicates that the AutoParagraph 14242 // injector did not add any start paragraph tokens. 14243 // This means that we have been in a paragraph for 14244 // a while, and the newline means we should start a new one. 14245 $result[] = new HTMLPurifier_Token_End('p'); 14246 $result[] = new HTMLPurifier_Token_Text("\n\n"); 14247 // However, the start token should only be added if 14248 // there is more processing to be done (i.e. there are 14249 // real paragraphs in here). If there are none, the 14250 // next start paragraph tag will be handled by the 14251 // next call to the injector 14252 $needs_start = true; 14253 } else { 14254 // We just started a new paragraph! 14255 // Reinstate a double-newline for presentation's sake, since 14256 // it was in the source code. 14257 array_unshift($result, new HTMLPurifier_Token_Text("\n\n")); 14258 } 14259 } elseif ($i + 1 == $c) { 14260 // Double newline at the end 14261 // There should be a trailing </p> when we're finally done. 14262 $needs_end = true; 14263 } 14264 } 14265 } 14266 14267 // Check if this was just a giant blob of whitespace. Move this earlier, 14268 // perhaps? 14269 if (empty($paragraphs)) { 14270 return; 14271 } 14272 14273 // Add the start tag indicated by \n\n at the beginning of $data 14274 if ($needs_start) { 14275 $result[] = $this->_pStart(); 14276 } 14277 14278 // Append the paragraphs onto the result 14279 foreach ($paragraphs as $par) { 14280 $result[] = new HTMLPurifier_Token_Text($par); 14281 $result[] = new HTMLPurifier_Token_End('p'); 14282 $result[] = new HTMLPurifier_Token_Text("\n\n"); 14283 $result[] = $this->_pStart(); 14284 } 14285 14286 // Remove trailing start token; Injector will handle this later if 14287 // it was indeed needed. This prevents from needing to do a lookahead, 14288 // at the cost of a lookbehind later. 14289 array_pop($result); 14290 14291 // If there is no need for an end tag, remove all of it and let 14292 // MakeWellFormed close it later. 14293 if (!$needs_end) { 14294 array_pop($result); // removes \n\n 14295 array_pop($result); // removes </p> 14296 } 14297 14298 } 14299 14304 private function _isInline($token) { 14305 return isset($this->htmlDefinition->info['p']->child->elements[$token->name]); 14306 } 14307 14312 private function _pLookAhead() { 14313 $this->current($i, $current); 14314 if ($current instanceof HTMLPurifier_Token_Start) $nesting = 1; 14315 else $nesting = 0; 14316 $ok = false; 14317 while ($this->forwardUntilEndToken($i, $current, $nesting)) { 14318 $result = $this->_checkNeedsP($current); 14319 if ($result !== null) { 14320 $ok = $result; 14321 break; 14322 } 14323 } 14324 return $ok; 14325 } 14326 14331 private function _checkNeedsP($current) { 14332 if ($current instanceof HTMLPurifier_Token_Start){ 14333 if (!$this->_isInline($current)) { 14334 // <div>PAR1<div> 14335 // ---- 14336 // Terminate early, since we hit a block element 14337 return false; 14338 } 14339 } elseif ($current instanceof HTMLPurifier_Token_Text) { 14340 if (strpos($current->data, "\n\n") !== false) { 14341 // <div>PAR1<b>PAR1\n\nPAR2 14342 // ---- 14343 return true; 14344 } else { 14345 // <div>PAR1<b>PAR1... 14346 // ---- 14347 } 14348 } 14349 return null; 14350 } 14351 14352 } 14353 14354 14355 14356 14357 14361 class HTMLPurifier_Injector_DisplayLinkURI extends HTMLPurifier_Injector 14362 { 14363 14364 public $name = 'DisplayLinkURI'; 14365 public $needed = array('a'); 14366 14367 public function handleElement(&$token) { 14368 } 14369 14370 public function handleEnd(&$token) { 14371 if (isset($token->start->attr['href'])){ 14372 $url = $token->start->attr['href']; 14373 unset($token->start->attr['href']); 14374 $token = array($token, new HTMLPurifier_Token_Text(" ($url)")); 14375 } else { 14376 // nothing to display 14377 } 14378 } 14379 } 14380 14381 14382 14383 14384 14388 class HTMLPurifier_Injector_Linkify extends HTMLPurifier_Injector 14389 { 14390 14391 public $name = 'Linkify'; 14392 public $needed = array('a' => array('href')); 14393 14394 public function handleText(&$token) { 14395 if (!$this->allowsElement('a')) return; 14396 14397 if (strpos($token->data, '://') === false) { 14398 // our really quick heuristic failed, abort 14399 // this may not work so well if we want to match things like 14400 // "google.com", but then again, most people don't 14401 return; 14402 } 14403 14404 // there is/are URL(s). Let's split the string: 14405 // Note: this regex is extremely permissive 14406 $bits = preg_split('#((?:https?|ftp)://[^\s\'"<>()]+)#S', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE); 14407 14408 $token = array(); 14409 14410 // $i = index 14411 // $c = count 14412 // $l = is link 14413 for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) { 14414 if (!$l) { 14415 if ($bits[$i] === '') continue; 14416 $token[] = new HTMLPurifier_Token_Text($bits[$i]); 14417 } else { 14418 $token[] = new HTMLPurifier_Token_Start('a', array('href' => $bits[$i])); 14419 $token[] = new HTMLPurifier_Token_Text($bits[$i]); 14420 $token[] = new HTMLPurifier_Token_End('a'); 14421 } 14422 } 14423 14424 } 14425 14426 } 14427 14428 14429 14430 14431 14436 class HTMLPurifier_Injector_PurifierLinkify extends HTMLPurifier_Injector 14437 { 14438 14439 public $name = 'PurifierLinkify'; 14440 public $docURL; 14441 public $needed = array('a' => array('href')); 14442 14443 public function prepare($config, $context) { 14444 $this->docURL = $config->get('AutoFormat.PurifierLinkify.DocURL'); 14445 return parent::prepare($config, $context); 14446 } 14447 14448 public function handleText(&$token) { 14449 if (!$this->allowsElement('a')) return; 14450 if (strpos($token->data, '%') === false) return; 14451 14452 $bits = preg_split('#%([a-z0-9]+\.[a-z0-9]+)#Si', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE); 14453 $token = array(); 14454 14455 // $i = index 14456 // $c = count 14457 // $l = is link 14458 for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) { 14459 if (!$l) { 14460 if ($bits[$i] === '') continue; 14461 $token[] = new HTMLPurifier_Token_Text($bits[$i]); 14462 } else { 14463 $token[] = new HTMLPurifier_Token_Start('a', 14464 array('href' => str_replace('%s', $bits[$i], $this->docURL))); 14465 $token[] = new HTMLPurifier_Token_Text('%' . $bits[$i]); 14466 $token[] = new HTMLPurifier_Token_End('a'); 14467 } 14468 } 14469 14470 } 14471 14472 } 14473 14474 14475 14476 14477 14478 class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector 14479 { 14480 14481 private $context, $config, $attrValidator, $removeNbsp, $removeNbspExceptions; 14482 14483 public function prepare($config, $context) { 14484 parent::prepare($config, $context); 14485 $this->config = $config; 14486 $this->context = $context; 14487 $this->removeNbsp = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp'); 14488 $this->removeNbspExceptions = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions'); 14489 $this->attrValidator = new HTMLPurifier_AttrValidator(); 14490 } 14491 14492 public function handleElement(&$token) { 14493 if (!$token instanceof HTMLPurifier_Token_Start) return; 14494 $next = false; 14495 for ($i = $this->inputIndex + 1, $c = count($this->inputTokens); $i < $c; $i++) { 14496 $next = $this->inputTokens[$i]; 14497 if ($next instanceof HTMLPurifier_Token_Text) { 14498 if ($next->is_whitespace) continue; 14499 if ($this->removeNbsp && !isset($this->removeNbspExceptions[$token->name])) { 14500 $plain = str_replace("\xC2\xA0", "", $next->data); 14501 $isWsOrNbsp = $plain === '' || ctype_space($plain); 14502 if ($isWsOrNbsp) continue; 14503 } 14504 } 14505 break; 14506 } 14507 if (!$next || ($next instanceof HTMLPurifier_Token_End && $next->name == $token->name)) { 14508 if ($token->name == 'colgroup') return; 14509 $this->attrValidator->validateToken($token, $this->config, $this->context); 14510 $token->armor['ValidateAttributes'] = true; 14511 if (isset($token->attr['id']) || isset($token->attr['name'])) return; 14512 $token = $i - $this->inputIndex + 1; 14513 for ($b = $this->inputIndex - 1; $b > 0; $b--) { 14514 $prev = $this->inputTokens[$b]; 14515 if ($prev instanceof HTMLPurifier_Token_Text && $prev->is_whitespace) continue; 14516 break; 14517 } 14518 // This is safe because we removed the token that triggered this. 14519 $this->rewind($b - 1); 14520 return; 14521 } 14522 } 14523 14524 } 14525 14526 14527 14528 14529 14533 class HTMLPurifier_Injector_RemoveSpansWithoutAttributes extends HTMLPurifier_Injector 14534 { 14535 public $name = 'RemoveSpansWithoutAttributes'; 14536 public $needed = array('span'); 14537 14538 private $attrValidator; 14539 14543 private $config; 14544 private $context; 14545 14546 public function prepare($config, $context) { 14547 $this->attrValidator = new HTMLPurifier_AttrValidator(); 14548 $this->config = $config; 14549 $this->context = $context; 14550 return parent::prepare($config, $context); 14551 } 14552 14553 public function handleElement(&$token) { 14554 if ($token->name !== 'span' || !$token instanceof HTMLPurifier_Token_Start) { 14555 return; 14556 } 14557 14558 // We need to validate the attributes now since this doesn't normally 14559 // happen until after MakeWellFormed. If all the attributes are removed 14560 // the span needs to be removed too. 14561 $this->attrValidator->validateToken($token, $this->config, $this->context); 14562 $token->armor['ValidateAttributes'] = true; 14563 14564 if (!empty($token->attr)) { 14565 return; 14566 } 14567 14568 $nesting = 0; 14569 $spanContentTokens = array(); 14570 while ($this->forwardUntilEndToken($i, $current, $nesting)) {} 14571 14572 if ($current instanceof HTMLPurifier_Token_End && $current->name === 'span') { 14573 // Mark closing span tag for deletion 14574 $current->markForDeletion = true; 14575 // Delete open span tag 14576 $token = false; 14577 } 14578 } 14579 14580 public function handleEnd(&$token) { 14581 if ($token->markForDeletion) { 14582 $token = false; 14583 } 14584 } 14585 } 14586 14587 14588 14589 14590 14595 class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector 14596 { 14597 public $name = 'SafeObject'; 14598 public $needed = array('object', 'param'); 14599 14600 protected $objectStack = array(); 14601 protected $paramStack = array(); 14602 14603 // Keep this synchronized with AttrTransform/SafeParam.php 14604 protected $addParam = array( 14605 'allowScriptAccess' => 'never', 14606 'allowNetworking' => 'internal', 14607 ); 14608 protected $allowedParam = array( 14609 'wmode' => true, 14610 'movie' => true, 14611 'flashvars' => true, 14612 'src' => true, 14613 'allowFullScreen' => true, // if omitted, assume to be 'false' 14614 ); 14615 14616 public function prepare($config, $context) { 14617 parent::prepare($config, $context); 14618 } 14619 14620 public function handleElement(&$token) { 14621 if ($token->name == 'object') { 14622 $this->objectStack[] = $token; 14623 $this->paramStack[] = array(); 14624 $new = array($token); 14625 foreach ($this->addParam as $name => $value) { 14626 $new[] = new HTMLPurifier_Token_Empty('param', array('name' => $name, 'value' => $value)); 14627 } 14628 $token = $new; 14629 } elseif ($token->name == 'param') { 14630 $nest = count($this->currentNesting) - 1; 14631 if ($nest >= 0 && $this->currentNesting[$nest]->name === 'object') { 14632 $i = count($this->objectStack) - 1; 14633 if (!isset($token->attr['name'])) { 14634 $token = false; 14635 return; 14636 } 14637 $n = $token->attr['name']; 14638 // We need this fix because YouTube doesn't supply a data 14639 // attribute, which we need if a type is specified. This is 14640 // *very* Flash specific. 14641 if (!isset($this->objectStack[$i]->attr['data']) && 14642 ($token->attr['name'] == 'movie' || $token->attr['name'] == 'src')) { 14643 $this->objectStack[$i]->attr['data'] = $token->attr['value']; 14644 } 14645 // Check if the parameter is the correct value but has not 14646 // already been added 14647 if ( 14648 !isset($this->paramStack[$i][$n]) && 14649 isset($this->addParam[$n]) && 14650 $token->attr['name'] === $this->addParam[$n] 14651 ) { 14652 // keep token, and add to param stack 14653 $this->paramStack[$i][$n] = true; 14654 } elseif (isset($this->allowedParam[$n])) { 14655 // keep token, don't do anything to it 14656 // (could possibly check for duplicates here) 14657 } else { 14658 $token = false; 14659 } 14660 } else { 14661 // not directly inside an object, DENY! 14662 $token = false; 14663 } 14664 } 14665 } 14666 14667 public function handleEnd(&$token) { 14668 // This is the WRONG way of handling the object and param stacks; 14669 // we should be inserting them directly on the relevant object tokens 14670 // so that the global stack handling handles it. 14671 if ($token->name == 'object') { 14672 array_pop($this->objectStack); 14673 array_pop($this->paramStack); 14674 } 14675 } 14676 14677 } 14678 14679 14680 14681 14682 14707 class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer 14708 { 14709 14710 private $factory; 14711 14712 public function __construct() { 14713 // setup the factory 14714 parent::__construct(); 14715 $this->factory = new HTMLPurifier_TokenFactory(); 14716 } 14717 14718 public function tokenizeHTML($html, $config, $context) { 14719 14720 $html = $this->normalize($html, $config, $context); 14721 14722 // attempt to armor stray angled brackets that cannot possibly 14723 // form tags and thus are probably being used as emoticons 14724 if ($config->get('Core.AggressivelyFixLt')) { 14725 $char = '[^a-z!\/]'; 14726 $comment = "/<!--(.*?)(-->|\z)/is"; 14727 $html = preg_replace_callback($comment, array($this, 'callbackArmorCommentEntities'), $html); 14728 do { 14729 $old = $html; 14730 $html = preg_replace("/<($char)/i", '<\\1', $html); 14731 } while ($html !== $old); 14732 $html = preg_replace_callback($comment, array($this, 'callbackUndoCommentSubst'), $html); // fix comments 14733 } 14734 14735 // preprocess html, essential for UTF-8 14736 $html = $this->wrapHTML($html, $config, $context); 14737 14738 $doc = new DOMDocument(); 14739 $doc->encoding = 'UTF-8'; // theoretically, the above has this covered 14740 14741 set_error_handler(array($this, 'muteErrorHandler')); 14742 $doc->loadHTML($html); 14743 restore_error_handler(); 14744 14745 $tokens = array(); 14746 $this->tokenizeDOM( 14747 $doc->getElementsByTagName('html')->item(0)-> // <html> 14748 getElementsByTagName('body')->item(0)-> // <body> 14749 getElementsByTagName('div')->item(0) // <div> 14750 , $tokens); 14751 return $tokens; 14752 } 14753 14761 protected function tokenizeDOM($node, &$tokens) { 14762 14763 $level = 0; 14764 $nodes = array($level => array($node)); 14765 $closingNodes = array(); 14766 do { 14767 while (!empty($nodes[$level])) { 14768 $node = array_shift($nodes[$level]); // FIFO 14769 $collect = $level > 0 ? true : false; 14770 $needEndingTag = $this->createStartNode($node, $tokens, $collect); 14771 if ($needEndingTag) { 14772 $closingNodes[$level][] = $node; 14773 } 14774 if ($node->childNodes && $node->childNodes->length) { 14775 $level++; 14776 $nodes[$level] = array(); 14777 foreach ($node->childNodes as $childNode) { 14778 array_push($nodes[$level], $childNode); 14779 } 14780 } 14781 } 14782 $level--; 14783 if ($level && isset($closingNodes[$level])) { 14784 while($node = array_pop($closingNodes[$level])) { 14785 $this->createEndNode($node, $tokens); 14786 } 14787 } 14788 } while ($level > 0); 14789 } 14790 14799 protected function createStartNode($node, &$tokens, $collect) { 14800 // intercept non element nodes. WE MUST catch all of them, 14801 // but we're not getting the character reference nodes because 14802 // those should have been preprocessed 14803 if ($node->nodeType === XML_TEXT_NODE) { 14804 $tokens[] = $this->factory->createText($node->data); 14805 return false; 14806 } elseif ($node->nodeType === XML_CDATA_SECTION_NODE) { 14807 // undo libxml's special treatment of <script> and <style> tags 14808 $last = end($tokens); 14809 $data = $node->data; 14810 // (note $node->tagname is already normalized) 14811 if ($last instanceof HTMLPurifier_Token_Start && ($last->name == 'script' || $last->name == 'style')) { 14812 $new_data = trim($data); 14813 if (substr($new_data, 0, 4) === '<!--') { 14814 $data = substr($new_data, 4); 14815 if (substr($data, -3) === '-->') { 14816 $data = substr($data, 0, -3); 14817 } else { 14818 // Highly suspicious! Not sure what to do... 14819 } 14820 } 14821 } 14822 $tokens[] = $this->factory->createText($this->parseData($data)); 14823 return false; 14824 } elseif ($node->nodeType === XML_COMMENT_NODE) { 14825 // this is code is only invoked for comments in script/style in versions 14826 // of libxml pre-2.6.28 (regular comments, of course, are still 14827 // handled regularly) 14828 $tokens[] = $this->factory->createComment($node->data); 14829 return false; 14830 } elseif ( 14831 // not-well tested: there may be other nodes we have to grab 14832 $node->nodeType !== XML_ELEMENT_NODE 14833 ) { 14834 return false; 14835 } 14836 14837 $attr = $node->hasAttributes() ? $this->transformAttrToAssoc($node->attributes) : array(); 14838 14839 // We still have to make sure that the element actually IS empty 14840 if (!$node->childNodes->length) { 14841 if ($collect) { 14842 $tokens[] = $this->factory->createEmpty($node->tagName, $attr); 14843 } 14844 return false; 14845 } else { 14846 if ($collect) { 14847 $tokens[] = $this->factory->createStart( 14848 $tag_name = $node->tagName, // somehow, it get's dropped 14849 $attr 14850 ); 14851 } 14852 return true; 14853 } 14854 } 14855 14856 protected function createEndNode($node, &$tokens) { 14857 $tokens[] = $this->factory->createEnd($node->tagName); 14858 } 14859 14860 14867 protected function transformAttrToAssoc($node_map) { 14868 // NamedNodeMap is documented very well, so we're using undocumented 14869 // features, namely, the fact that it implements Iterator and 14870 // has a ->length attribute 14871 if ($node_map->length === 0) return array(); 14872 $array = array(); 14873 foreach ($node_map as $attr) { 14874 $array[$attr->name] = $attr->value; 14875 } 14876 return $array; 14877 } 14878 14882 public function muteErrorHandler($errno, $errstr) {} 14883 14888 public function callbackUndoCommentSubst($matches) { 14889 return '<!--' . strtr($matches[1], array('&'=>'&','<'=>'<')) . $matches[2]; 14890 } 14891 14896 public function callbackArmorCommentEntities($matches) { 14897 return '<!--' . str_replace('&', '&', $matches[1]) . $matches[2]; 14898 } 14899 14903 protected function wrapHTML($html, $config, $context) { 14904 $def = $config->getDefinition('HTML'); 14905 $ret = ''; 14906 14907 if (!empty($def->doctype->dtdPublic) || !empty($def->doctype->dtdSystem)) { 14908 $ret .= '<!DOCTYPE html '; 14909 if (!empty($def->doctype->dtdPublic)) $ret .= 'PUBLIC "' . $def->doctype->dtdPublic . '" '; 14910 if (!empty($def->doctype->dtdSystem)) $ret .= '"' . $def->doctype->dtdSystem . '" '; 14911 $ret .= '>'; 14912 } 14913 14914 $ret .= '<html><head>'; 14915 $ret .= '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />'; 14916 // No protection if $html contains a stray </div>! 14917 $ret .= '</head><body><div>'.$html.'</div></body></html>'; 14918 return $ret; 14919 } 14920 14921 } 14922 14923 14924 14925 14926 14937 class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer 14938 { 14939 14940 public $tracksLineNumbers = true; 14941 14945 protected $_whitespace = "\x20\x09\x0D\x0A"; 14946 14951 protected function scriptCallback($matches) { 14952 return $matches[1] . htmlspecialchars($matches[2], ENT_COMPAT, 'UTF-8') . $matches[3]; 14953 } 14954 14955 public function tokenizeHTML($html, $config, $context) { 14956 14957 // special normalization for script tags without any armor 14958 // our "armor" heurstic is a < sign any number of whitespaces after 14959 // the first script tag 14960 if ($config->get('HTML.Trusted')) { 14961 $html = preg_replace_callback('#(<script[^>]*>)(\s*[^<].+?)(</script>)#si', 14962 array($this, 'scriptCallback'), $html); 14963 } 14964 14965 $html = $this->normalize($html, $config, $context); 14966 14967 $cursor = 0; // our location in the text 14968 $inside_tag = false; // whether or not we're parsing the inside of a tag 14969 $array = array(); // result array 14970 14971 // This is also treated to mean maintain *column* numbers too 14972 $maintain_line_numbers = $config->get('Core.MaintainLineNumbers'); 14973 14974 if ($maintain_line_numbers === null) { 14975 // automatically determine line numbering by checking 14976 // if error collection is on 14977 $maintain_line_numbers = $config->get('Core.CollectErrors'); 14978 } 14979 14980 if ($maintain_line_numbers) { 14981 $current_line = 1; 14982 $current_col = 0; 14983 $length = strlen($html); 14984 } else { 14985 $current_line = false; 14986 $current_col = false; 14987 $length = false; 14988 } 14989 $context->register('CurrentLine', $current_line); 14990 $context->register('CurrentCol', $current_col); 14991 $nl = "\n"; 14992 // how often to manually recalculate. This will ALWAYS be right, 14993 // but it's pretty wasteful. Set to 0 to turn off 14994 $synchronize_interval = $config->get('Core.DirectLexLineNumberSyncInterval'); 14995 14996 $e = false; 14997 if ($config->get('Core.CollectErrors')) { 14998 $e =& $context->get('ErrorCollector'); 14999 } 15000 15001 // for testing synchronization 15002 $loops = 0; 15003 15004 while(++$loops) { 15005 15006 // $cursor is either at the start of a token, or inside of 15007 // a tag (i.e. there was a < immediately before it), as indicated 15008 // by $inside_tag 15009 15010 if ($maintain_line_numbers) { 15011 15012 // $rcursor, however, is always at the start of a token. 15013 $rcursor = $cursor - (int) $inside_tag; 15014 15015 // Column number is cheap, so we calculate it every round. 15016 // We're interested at the *end* of the newline string, so 15017 // we need to add strlen($nl) == 1 to $nl_pos before subtracting it 15018 // from our "rcursor" position. 15019 $nl_pos = strrpos($html, $nl, $rcursor - $length); 15020 $current_col = $rcursor - (is_bool($nl_pos) ? 0 : $nl_pos + 1); 15021 15022 // recalculate lines 15023 if ( 15024 $synchronize_interval && // synchronization is on 15025 $cursor > 0 && // cursor is further than zero 15026 $loops % $synchronize_interval === 0 // time to synchronize! 15027 ) { 15028 $current_line = 1 + $this->substrCount($html, $nl, 0, $cursor); 15029 } 15030 15031 } 15032 15033 $position_next_lt = strpos($html, '<', $cursor); 15034 $position_next_gt = strpos($html, '>', $cursor); 15035 15036 // triggers on "<b>asdf</b>" but not "asdf <b></b>" 15037 // special case to set up context 15038 if ($position_next_lt === $cursor) { 15039 $inside_tag = true; 15040 $cursor++; 15041 } 15042 15043 if (!$inside_tag && $position_next_lt !== false) { 15044 // We are not inside tag and there still is another tag to parse 15045 $token = new 15046 HTMLPurifier_Token_Text( 15047 $this->parseData( 15048 substr( 15049 $html, $cursor, $position_next_lt - $cursor 15050 ) 15051 ) 15052 ); 15053 if ($maintain_line_numbers) { 15054 $token->rawPosition($current_line, $current_col); 15055 $current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor); 15056 } 15057 $array[] = $token; 15058 $cursor = $position_next_lt + 1; 15059 $inside_tag = true; 15060 continue; 15061 } elseif (!$inside_tag) { 15062 // We are not inside tag but there are no more tags 15063 // If we're already at the end, break 15064 if ($cursor === strlen($html)) break; 15065 // Create Text of rest of string 15066 $token = new 15067 HTMLPurifier_Token_Text( 15068 $this->parseData( 15069 substr( 15070 $html, $cursor 15071 ) 15072 ) 15073 ); 15074 if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col); 15075 $array[] = $token; 15076 break; 15077 } elseif ($inside_tag && $position_next_gt !== false) { 15078 // We are in tag and it is well formed 15079 // Grab the internals of the tag 15080 $strlen_segment = $position_next_gt - $cursor; 15081 15082 if ($strlen_segment < 1) { 15083 // there's nothing to process! 15084 $token = new HTMLPurifier_Token_Text('<'); 15085 $cursor++; 15086 continue; 15087 } 15088 15089 $segment = substr($html, $cursor, $strlen_segment); 15090 15091 if ($segment === false) { 15092 // somehow, we attempted to access beyond the end of 15093 // the string, defense-in-depth, reported by Nate Abele 15094 break; 15095 } 15096 15097 // Check if it's a comment 15098 if ( 15099 substr($segment, 0, 3) === '!--' 15100 ) { 15101 // re-determine segment length, looking for --> 15102 $position_comment_end = strpos($html, '-->', $cursor); 15103 if ($position_comment_end === false) { 15104 // uh oh, we have a comment that extends to 15105 // infinity. Can't be helped: set comment 15106 // end position to end of string 15107 if ($e) $e->send(E_WARNING, 'Lexer: Unclosed comment'); 15108 $position_comment_end = strlen($html); 15109 $end = true; 15110 } else { 15111 $end = false; 15112 } 15113 $strlen_segment = $position_comment_end - $cursor; 15114 $segment = substr($html, $cursor, $strlen_segment); 15115 $token = new 15116 HTMLPurifier_Token_Comment( 15117 substr( 15118 $segment, 3, $strlen_segment - 3 15119 ) 15120 ); 15121 if ($maintain_line_numbers) { 15122 $token->rawPosition($current_line, $current_col); 15123 $current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment); 15124 } 15125 $array[] = $token; 15126 $cursor = $end ? $position_comment_end : $position_comment_end + 3; 15127 $inside_tag = false; 15128 continue; 15129 } 15130 15131 // Check if it's an end tag 15132 $is_end_tag = (strpos($segment,'/') === 0); 15133 if ($is_end_tag) { 15134 $type = substr($segment, 1); 15135 $token = new HTMLPurifier_Token_End($type); 15136 if ($maintain_line_numbers) { 15137 $token->rawPosition($current_line, $current_col); 15138 $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); 15139 } 15140 $array[] = $token; 15141 $inside_tag = false; 15142 $cursor = $position_next_gt + 1; 15143 continue; 15144 } 15145 15146 // Check leading character is alnum, if not, we may 15147 // have accidently grabbed an emoticon. Translate into 15148 // text and go our merry way 15149 if (!ctype_alpha($segment[0])) { 15150 // XML: $segment[0] !== '_' && $segment[0] !== ':' 15151 if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt'); 15152 $token = new HTMLPurifier_Token_Text('<'); 15153 if ($maintain_line_numbers) { 15154 $token->rawPosition($current_line, $current_col); 15155 $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); 15156 } 15157 $array[] = $token; 15158 $inside_tag = false; 15159 continue; 15160 } 15161 15162 // Check if it is explicitly self closing, if so, remove 15163 // trailing slash. Remember, we could have a tag like <br>, so 15164 // any later token processing scripts must convert improperly 15165 // classified EmptyTags from StartTags. 15166 $is_self_closing = (strrpos($segment,'/') === $strlen_segment-1); 15167 if ($is_self_closing) { 15168 $strlen_segment--; 15169 $segment = substr($segment, 0, $strlen_segment); 15170 } 15171 15172 // Check if there are any attributes 15173 $position_first_space = strcspn($segment, $this->_whitespace); 15174 15175 if ($position_first_space >= $strlen_segment) { 15176 if ($is_self_closing) { 15177 $token = new HTMLPurifier_Token_Empty($segment); 15178 } else { 15179 $token = new HTMLPurifier_Token_Start($segment); 15180 } 15181 if ($maintain_line_numbers) { 15182 $token->rawPosition($current_line, $current_col); 15183 $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); 15184 } 15185 $array[] = $token; 15186 $inside_tag = false; 15187 $cursor = $position_next_gt + 1; 15188 continue; 15189 } 15190 15191 // Grab out all the data 15192 $type = substr($segment, 0, $position_first_space); 15193 $attribute_string = 15194 trim( 15195 substr( 15196 $segment, $position_first_space 15197 ) 15198 ); 15199 if ($attribute_string) { 15200 $attr = $this->parseAttributeString( 15201 $attribute_string 15202 , $config, $context 15203 ); 15204 } else { 15205 $attr = array(); 15206 } 15207 15208 if ($is_self_closing) { 15209 $token = new HTMLPurifier_Token_Empty($type, $attr); 15210 } else { 15211 $token = new HTMLPurifier_Token_Start($type, $attr); 15212 } 15213 if ($maintain_line_numbers) { 15214 $token->rawPosition($current_line, $current_col); 15215 $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); 15216 } 15217 $array[] = $token; 15218 $cursor = $position_next_gt + 1; 15219 $inside_tag = false; 15220 continue; 15221 } else { 15222 // inside tag, but there's no ending > sign 15223 if ($e) $e->send(E_WARNING, 'Lexer: Missing gt'); 15224 $token = new 15225 HTMLPurifier_Token_Text( 15226 '<' . 15227 $this->parseData( 15228 substr($html, $cursor) 15229 ) 15230 ); 15231 if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col); 15232 // no cursor scroll? Hmm... 15233 $array[] = $token; 15234 break; 15235 } 15236 break; 15237 } 15238 15239 $context->destroy('CurrentLine'); 15240 $context->destroy('CurrentCol'); 15241 return $array; 15242 } 15243 15247 protected function substrCount($haystack, $needle, $offset, $length) { 15248 static $oldVersion; 15249 if ($oldVersion === null) { 15250 $oldVersion = version_compare(PHP_VERSION, '5.1', '<'); 15251 } 15252 if ($oldVersion) { 15253 $haystack = substr($haystack, $offset, $length); 15254 return substr_count($haystack, $needle); 15255 } else { 15256 return substr_count($haystack, $needle, $offset, $length); 15257 } 15258 } 15259 15266 public function parseAttributeString($string, $config, $context) { 15267 $string = (string) $string; // quick typecast 15268 15269 if ($string == '') return array(); // no attributes 15270 15271 $e = false; 15272 if ($config->get('Core.CollectErrors')) { 15273 $e =& $context->get('ErrorCollector'); 15274 } 15275 15276 // let's see if we can abort as quickly as possible 15277 // one equal sign, no spaces => one attribute 15278 $num_equal = substr_count($string, '='); 15279 $has_space = strpos($string, ' '); 15280 if ($num_equal === 0 && !$has_space) { 15281 // bool attribute 15282 return array($string => $string); 15283 } elseif ($num_equal === 1 && !$has_space) { 15284 // only one attribute 15285 list($key, $quoted_value) = explode('=', $string); 15286 $quoted_value = trim($quoted_value); 15287 if (!$key) { 15288 if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key'); 15289 return array(); 15290 } 15291 if (!$quoted_value) return array($key => ''); 15292 $first_char = @$quoted_value[0]; 15293 $last_char = @$quoted_value[strlen($quoted_value)-1]; 15294 15295 $same_quote = ($first_char == $last_char); 15296 $open_quote = ($first_char == '"' || $first_char == "'"); 15297 15298 if ( $same_quote && $open_quote) { 15299 // well behaved 15300 $value = substr($quoted_value, 1, strlen($quoted_value) - 2); 15301 } else { 15302 // not well behaved 15303 if ($open_quote) { 15304 if ($e) $e->send(E_ERROR, 'Lexer: Missing end quote'); 15305 $value = substr($quoted_value, 1); 15306 } else { 15307 $value = $quoted_value; 15308 } 15309 } 15310 if ($value === false) $value = ''; 15311 return array($key => $this->parseData($value)); 15312 } 15313 15314 // setup loop environment 15315 $array = array(); // return assoc array of attributes 15316 $cursor = 0; // current position in string (moves forward) 15317 $size = strlen($string); // size of the string (stays the same) 15318 15319 // if we have unquoted attributes, the parser expects a terminating 15320 // space, so let's guarantee that there's always a terminating space. 15321 $string .= ' '; 15322 15323 while(true) { 15324 15325 if ($cursor >= $size) { 15326 break; 15327 } 15328 15329 $cursor += ($value = strspn($string, $this->_whitespace, $cursor)); 15330 // grab the key 15331 15332 $key_begin = $cursor; //we're currently at the start of the key 15333 15334 // scroll past all characters that are the key (not whitespace or =) 15335 $cursor += strcspn($string, $this->_whitespace . '=', $cursor); 15336 15337 $key_end = $cursor; // now at the end of the key 15338 15339 $key = substr($string, $key_begin, $key_end - $key_begin); 15340 15341 if (!$key) { 15342 if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key'); 15343 $cursor += strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop 15344 continue; // empty key 15345 } 15346 15347 // scroll past all whitespace 15348 $cursor += strspn($string, $this->_whitespace, $cursor); 15349 15350 if ($cursor >= $size) { 15351 $array[$key] = $key; 15352 break; 15353 } 15354 15355 // if the next character is an equal sign, we've got a regular 15356 // pair, otherwise, it's a bool attribute 15357 $first_char = @$string[$cursor]; 15358 15359 if ($first_char == '=') { 15360 // key="value" 15361 15362 $cursor++; 15363 $cursor += strspn($string, $this->_whitespace, $cursor); 15364 15365 if ($cursor === false) { 15366 $array[$key] = ''; 15367 break; 15368 } 15369 15370 // we might be in front of a quote right now 15371 15372 $char = @$string[$cursor]; 15373 15374 if ($char == '"' || $char == "'") { 15375 // it's quoted, end bound is $char 15376 $cursor++; 15377 $value_begin = $cursor; 15378 $cursor = strpos($string, $char, $cursor); 15379 $value_end = $cursor; 15380 } else { 15381 // it's not quoted, end bound is whitespace 15382 $value_begin = $cursor; 15383 $cursor += strcspn($string, $this->_whitespace, $cursor); 15384 $value_end = $cursor; 15385 } 15386 15387 // we reached a premature end 15388 if ($cursor === false) { 15389 $cursor = $size; 15390 $value_end = $cursor; 15391 } 15392 15393 $value = substr($string, $value_begin, $value_end - $value_begin); 15394 if ($value === false) $value = ''; 15395 $array[$key] = $this->parseData($value); 15396 $cursor++; 15397 15398 } else { 15399 // boolattr 15400 if ($key !== '') { 15401 $array[$key] = $key; 15402 } else { 15403 // purely theoretical 15404 if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key'); 15405 } 15406 15407 } 15408 } 15409 return $array; 15410 } 15411 15412 } 15413 15414 15415 15416 15417 15421 abstract class HTMLPurifier_Strategy_Composite extends HTMLPurifier_Strategy 15422 { 15423 15427 protected $strategies = array(); 15428 15429 public function execute($tokens, $config, $context) { 15430 foreach ($this->strategies as $strategy) { 15431 $tokens = $strategy->execute($tokens, $config, $context); 15432 } 15433 return $tokens; 15434 } 15435 15436 } 15437 15438 15439 15440 15441 15445 class HTMLPurifier_Strategy_Core extends HTMLPurifier_Strategy_Composite 15446 { 15447 15448 public function __construct() { 15449 $this->strategies[] = new HTMLPurifier_Strategy_RemoveForeignElements(); 15450 $this->strategies[] = new HTMLPurifier_Strategy_MakeWellFormed(); 15451 $this->strategies[] = new HTMLPurifier_Strategy_FixNesting(); 15452 $this->strategies[] = new HTMLPurifier_Strategy_ValidateAttributes(); 15453 } 15454 15455 } 15456 15457 15458 15459 15460 15489 class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy 15490 { 15491 15492 public function execute($tokens, $config, $context) { 15493 //####################################################################// 15494 // Pre-processing 15495 15496 // get a copy of the HTML definition 15497 $definition = $config->getHTMLDefinition(); 15498 15499 // insert implicit "parent" node, will be removed at end. 15500 // DEFINITION CALL 15501 $parent_name = $definition->info_parent; 15502 array_unshift($tokens, new HTMLPurifier_Token_Start($parent_name)); 15503 $tokens[] = new HTMLPurifier_Token_End($parent_name); 15504 15505 // setup the context variable 'IsInline', for chameleon processing 15506 // is 'false' when we are not inline, 'true' when it must always 15507 // be inline, and an integer when it is inline for a certain 15508 // branch of the document tree 15509 $is_inline = $definition->info_parent_def->descendants_are_inline; 15510 $context->register('IsInline', $is_inline); 15511 15512 // setup error collector 15513 $e =& $context->get('ErrorCollector', true); 15514 15515 //####################################################################// 15516 // Loop initialization 15517 15518 // stack that contains the indexes of all parents, 15519 // $stack[count($stack)-1] being the current parent 15520 $stack = array(); 15521 15522 // stack that contains all elements that are excluded 15523 // it is organized by parent elements, similar to $stack, 15524 // but it is only populated when an element with exclusions is 15525 // processed, i.e. there won't be empty exclusions. 15526 $exclude_stack = array(); 15527 15528 // variable that contains the start token while we are processing 15529 // nodes. This enables error reporting to do its job 15530 $start_token = false; 15531 $context->register('CurrentToken', $start_token); 15532 15533 //####################################################################// 15534 // Loop 15535 15536 // iterate through all start nodes. Determining the start node 15537 // is complicated so it has been omitted from the loop construct 15538 for ($i = 0, $size = count($tokens) ; $i < $size; ) { 15539 15540 //################################################################// 15541 // Gather information on children 15542 15543 // child token accumulator 15544 $child_tokens = array(); 15545 15546 // scroll to the end of this node, report number, and collect 15547 // all children 15548 for ($j = $i, $depth = 0; ; $j++) { 15549 if ($tokens[$j] instanceof HTMLPurifier_Token_Start) { 15550 $depth++; 15551 // skip token assignment on first iteration, this is the 15552 // token we currently are on 15553 if ($depth == 1) continue; 15554 } elseif ($tokens[$j] instanceof HTMLPurifier_Token_End) { 15555 $depth--; 15556 // skip token assignment on last iteration, this is the 15557 // end token of the token we're currently on 15558 if ($depth == 0) break; 15559 } 15560 $child_tokens[] = $tokens[$j]; 15561 } 15562 15563 // $i is index of start token 15564 // $j is index of end token 15565 15566 $start_token = $tokens[$i]; // to make token available via CurrentToken 15567 15568 //################################################################// 15569 // Gather information on parent 15570 15571 // calculate parent information 15572 if ($count = count($stack)) { 15573 $parent_index = $stack[$count-1]; 15574 $parent_name = $tokens[$parent_index]->name; 15575 if ($parent_index == 0) { 15576 $parent_def = $definition->info_parent_def; 15577 } else { 15578 $parent_def = $definition->info[$parent_name]; 15579 } 15580 } else { 15581 // processing as if the parent were the "root" node 15582 // unknown info, it won't be used anyway, in the future, 15583 // we may want to enforce one element only (this is 15584 // necessary for HTML Purifier to clean entire documents 15585 $parent_index = $parent_name = $parent_def = null; 15586 } 15587 15588 // calculate context 15589 if ($is_inline === false) { 15590 // check if conditions make it inline 15591 if (!empty($parent_def) && $parent_def->descendants_are_inline) { 15592 $is_inline = $count - 1; 15593 } 15594 } else { 15595 // check if we're out of inline 15596 if ($count === $is_inline) { 15597 $is_inline = false; 15598 } 15599 } 15600 15601 //################################################################// 15602 // Determine whether element is explicitly excluded SGML-style 15603 15604 // determine whether or not element is excluded by checking all 15605 // parent exclusions. The array should not be very large, two 15606 // elements at most. 15607 $excluded = false; 15608 if (!empty($exclude_stack)) { 15609 foreach ($exclude_stack as $lookup) { 15610 if (isset($lookup[$tokens[$i]->name])) { 15611 $excluded = true; 15612 // no need to continue processing 15613 break; 15614 } 15615 } 15616 } 15617 15618 //################################################################// 15619 // Perform child validation 15620 15621 if ($excluded) { 15622 // there is an exclusion, remove the entire node 15623 $result = false; 15624 $excludes = array(); // not used, but good to initialize anyway 15625 } else { 15626 // DEFINITION CALL 15627 if ($i === 0) { 15628 // special processing for the first node 15629 $def = $definition->info_parent_def; 15630 } else { 15631 $def = $definition->info[$tokens[$i]->name]; 15632 15633 } 15634 15635 if (!empty($def->child)) { 15636 // have DTD child def validate children 15637 $result = $def->child->validateChildren( 15638 $child_tokens, $config, $context); 15639 } else { 15640 // weird, no child definition, get rid of everything 15641 $result = false; 15642 } 15643 15644 // determine whether or not this element has any exclusions 15645 $excludes = $def->excludes; 15646 } 15647 15648 // $result is now a bool or array 15649 15650 //################################################################// 15651 // Process result by interpreting $result 15652 15653 if ($result === true || $child_tokens === $result) { 15654 // leave the node as is 15655 15656 // register start token as a parental node start 15657 $stack[] = $i; 15658 15659 // register exclusions if there are any 15660 if (!empty($excludes)) $exclude_stack[] = $excludes; 15661 15662 // move cursor to next possible start node 15663 $i++; 15664 15665 } elseif($result === false) { 15666 // remove entire node 15667 15668 if ($e) { 15669 if ($excluded) { 15670 $e->send(E_ERROR, 'Strategy_FixNesting: Node excluded'); 15671 } else { 15672 $e->send(E_ERROR, 'Strategy_FixNesting: Node removed'); 15673 } 15674 } 15675 15676 // calculate length of inner tokens and current tokens 15677 $length = $j - $i + 1; 15678 15679 // perform removal 15680 array_splice($tokens, $i, $length); 15681 15682 // update size 15683 $size -= $length; 15684 15685 // there is no start token to register, 15686 // current node is now the next possible start node 15687 // unless it turns out that we need to do a double-check 15688 15689 // this is a rought heuristic that covers 100% of HTML's 15690 // cases and 99% of all other cases. A child definition 15691 // that would be tricked by this would be something like: 15692 // ( | a b c) where it's all or nothing. Fortunately, 15693 // our current implementation claims that that case would 15694 // not allow empty, even if it did 15695 if (!$parent_def->child->allow_empty) { 15696 // we need to do a double-check 15697 $i = $parent_index; 15698 array_pop($stack); 15699 } 15700 15701 // PROJECTED OPTIMIZATION: Process all children elements before 15702 // reprocessing parent node. 15703 15704 } else { 15705 // replace node with $result 15706 15707 // calculate length of inner tokens 15708 $length = $j - $i - 1; 15709 15710 if ($e) { 15711 if (empty($result) && $length) { 15712 $e->send(E_ERROR, 'Strategy_FixNesting: Node contents removed'); 15713 } else { 15714 $e->send(E_WARNING, 'Strategy_FixNesting: Node reorganized'); 15715 } 15716 } 15717 15718 // perform replacement 15719 array_splice($tokens, $i + 1, $length, $result); 15720 15721 // update size 15722 $size -= $length; 15723 $size += count($result); 15724 15725 // register start token as a parental node start 15726 $stack[] = $i; 15727 15728 // register exclusions if there are any 15729 if (!empty($excludes)) $exclude_stack[] = $excludes; 15730 15731 // move cursor to next possible start node 15732 $i++; 15733 15734 } 15735 15736 //################################################################// 15737 // Scroll to next start node 15738 15739 // We assume, at this point, that $i is the index of the token 15740 // that is the first possible new start point for a node. 15741 15742 // Test if the token indeed is a start tag, if not, move forward 15743 // and test again. 15744 $size = count($tokens); 15745 while ($i < $size and !$tokens[$i] instanceof HTMLPurifier_Token_Start) { 15746 if ($tokens[$i] instanceof HTMLPurifier_Token_End) { 15747 // pop a token index off the stack if we ended a node 15748 array_pop($stack); 15749 // pop an exclusion lookup off exclusion stack if 15750 // we ended node and that node had exclusions 15751 if ($i == 0 || $i == $size - 1) { 15752 // use specialized var if it's the super-parent 15753 $s_excludes = $definition->info_parent_def->excludes; 15754 } else { 15755 $s_excludes = $definition->info[$tokens[$i]->name]->excludes; 15756 } 15757 if ($s_excludes) { 15758 array_pop($exclude_stack); 15759 } 15760 } 15761 $i++; 15762 } 15763 15764 } 15765 15766 //####################################################################// 15767 // Post-processing 15768 15769 // remove implicit parent tokens at the beginning and end 15770 array_shift($tokens); 15771 array_pop($tokens); 15772 15773 // remove context variables 15774 $context->destroy('IsInline'); 15775 $context->destroy('CurrentToken'); 15776 15777 //####################################################################// 15778 // Return 15779 15780 return $tokens; 15781 15782 } 15783 15784 } 15785 15786 15787 15788 15789 15801 class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy 15802 { 15803 15807 protected $tokens; 15808 15812 protected $t; 15813 15817 protected $stack; 15818 15822 protected $injectors; 15823 15827 protected $config; 15828 15832 protected $context; 15833 15834 public function execute($tokens, $config, $context) { 15835 15836 $definition = $config->getHTMLDefinition(); 15837 15838 // local variables 15839 $generator = new HTMLPurifier_Generator($config, $context); 15840 $escape_invalid_tags = $config->get('Core.EscapeInvalidTags'); 15841 // used for autoclose early abortion 15842 $global_parent_allowed_elements = array(); 15843 if (isset($definition->info[$definition->info_parent])) { 15844 // may be unset under testing circumstances 15845 $global_parent_allowed_elements = $definition->info[$definition->info_parent]->child->getAllowedElements($config); 15846 } 15847 $e = $context->get('ErrorCollector', true); 15848 $t = false; // token index 15849 $i = false; // injector index 15850 $token = false; // the current token 15851 $reprocess = false; // whether or not to reprocess the same token 15852 $stack = array(); 15853 15854 // member variables 15855 $this->stack =& $stack; 15856 $this->t =& $t; 15857 $this->tokens =& $tokens; 15858 $this->config = $config; 15859 $this->context = $context; 15860 15861 // context variables 15862 $context->register('CurrentNesting', $stack); 15863 $context->register('InputIndex', $t); 15864 $context->register('InputTokens', $tokens); 15865 $context->register('CurrentToken', $token); 15866 15867 // -- begin INJECTOR -- 15868 15869 $this->injectors = array(); 15870 15871 $injectors = $config->getBatch('AutoFormat'); 15872 $def_injectors = $definition->info_injector; 15873 $custom_injectors = $injectors['Custom']; 15874 unset($injectors['Custom']); // special case 15875 foreach ($injectors as $injector => $b) { 15876 // XXX: Fix with a legitimate lookup table of enabled filters 15877 if (strpos($injector, '.') !== false) continue; 15878 $injector = "HTMLPurifier_Injector_$injector"; 15879 if (!$b) continue; 15880 $this->injectors[] = new $injector; 15881 } 15882 foreach ($def_injectors as $injector) { 15883 // assumed to be objects 15884 $this->injectors[] = $injector; 15885 } 15886 foreach ($custom_injectors as $injector) { 15887 if (!$injector) continue; 15888 if (is_string($injector)) { 15889 $injector = "HTMLPurifier_Injector_$injector"; 15890 $injector = new $injector; 15891 } 15892 $this->injectors[] = $injector; 15893 } 15894 15895 // give the injectors references to the definition and context 15896 // variables for performance reasons 15897 foreach ($this->injectors as $ix => $injector) { 15898 $error = $injector->prepare($config, $context); 15899 if (!$error) continue; 15900 array_splice($this->injectors, $ix, 1); // rm the injector 15901 trigger_error("Cannot enable {$injector->name} injector because $error is not allowed", E_USER_WARNING); 15902 } 15903 15904 // -- end INJECTOR -- 15905 15906 // a note on reprocessing: 15907 // In order to reduce code duplication, whenever some code needs 15908 // to make HTML changes in order to make things "correct", the 15909 // new HTML gets sent through the purifier, regardless of its 15910 // status. This means that if we add a start token, because it 15911 // was totally necessary, we don't have to update nesting; we just 15912 // punt ($reprocess = true; continue;) and it does that for us. 15913 15914 // isset is in loop because $tokens size changes during loop exec 15915 for ( 15916 $t = 0; 15917 $t == 0 || isset($tokens[$t - 1]); 15918 // only increment if we don't need to reprocess 15919 $reprocess ? $reprocess = false : $t++ 15920 ) { 15921 15922 // check for a rewind 15923 if (is_int($i) && $i >= 0) { 15924 // possibility: disable rewinding if the current token has a 15925 // rewind set on it already. This would offer protection from 15926 // infinite loop, but might hinder some advanced rewinding. 15927 $rewind_to = $this->injectors[$i]->getRewind(); 15928 if (is_int($rewind_to) && $rewind_to < $t) { 15929 if ($rewind_to < 0) $rewind_to = 0; 15930 while ($t > $rewind_to) { 15931 $t--; 15932 $prev = $tokens[$t]; 15933 // indicate that other injectors should not process this token, 15934 // but we need to reprocess it 15935 unset($prev->skip[$i]); 15936 $prev->rewind = $i; 15937 if ($prev instanceof HTMLPurifier_Token_Start) array_pop($this->stack); 15938 elseif ($prev instanceof HTMLPurifier_Token_End) $this->stack[] = $prev->start; 15939 } 15940 } 15941 $i = false; 15942 } 15943 15944 // handle case of document end 15945 if (!isset($tokens[$t])) { 15946 // kill processing if stack is empty 15947 if (empty($this->stack)) break; 15948 15949 // peek 15950 $top_nesting = array_pop($this->stack); 15951 $this->stack[] = $top_nesting; 15952 15953 // send error [TagClosedSuppress] 15954 if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) { 15955 $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting); 15956 } 15957 15958 // append, don't splice, since this is the end 15959 $tokens[] = new HTMLPurifier_Token_End($top_nesting->name); 15960 15961 // punt! 15962 $reprocess = true; 15963 continue; 15964 } 15965 15966 $token = $tokens[$t]; 15967 15968 //echo '<br>'; printTokens($tokens, $t); printTokens($this->stack); 15969 //flush(); 15970 15971 // quick-check: if it's not a tag, no need to process 15972 if (empty($token->is_tag)) { 15973 if ($token instanceof HTMLPurifier_Token_Text) { 15974 foreach ($this->injectors as $i => $injector) { 15975 if (isset($token->skip[$i])) continue; 15976 if ($token->rewind !== null && $token->rewind !== $i) continue; 15977 $injector->handleText($token); 15978 $this->processToken($token, $i); 15979 $reprocess = true; 15980 break; 15981 } 15982 } 15983 // another possibility is a comment 15984 continue; 15985 } 15986 15987 if (isset($definition->info[$token->name])) { 15988 $type = $definition->info[$token->name]->child->type; 15989 } else { 15990 $type = false; // Type is unknown, treat accordingly 15991 } 15992 15993 // quick tag checks: anything that's *not* an end tag 15994 $ok = false; 15995 if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) { 15996 // claims to be a start tag but is empty 15997 $token = new HTMLPurifier_Token_Empty($token->name, $token->attr, $token->line, $token->col, $token->armor); 15998 $ok = true; 15999 } elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) { 16000 // claims to be empty but really is a start tag 16001 $this->swap(new HTMLPurifier_Token_End($token->name)); 16002 $this->insertBefore(new HTMLPurifier_Token_Start($token->name, $token->attr, $token->line, $token->col, $token->armor)); 16003 // punt (since we had to modify the input stream in a non-trivial way) 16004 $reprocess = true; 16005 continue; 16006 } elseif ($token instanceof HTMLPurifier_Token_Empty) { 16007 // real empty token 16008 $ok = true; 16009 } elseif ($token instanceof HTMLPurifier_Token_Start) { 16010 // start tag 16011 16012 // ...unless they also have to close their parent 16013 if (!empty($this->stack)) { 16014 16015 // Performance note: you might think that it's rather 16016 // inefficient, recalculating the autoclose information 16017 // for every tag that a token closes (since when we 16018 // do an autoclose, we push a new token into the 16019 // stream and then /process/ that, before 16020 // re-processing this token.) But this is 16021 // necessary, because an injector can make an 16022 // arbitrary transformations to the autoclosing 16023 // tokens we introduce, so things may have changed 16024 // in the meantime. Also, doing the inefficient thing is 16025 // "easy" to reason about (for certain perverse definitions 16026 // of "easy") 16027 16028 $parent = array_pop($this->stack); 16029 $this->stack[] = $parent; 16030 16031 if (isset($definition->info[$parent->name])) { 16032 $elements = $definition->info[$parent->name]->child->getAllowedElements($config); 16033 $autoclose = !isset($elements[$token->name]); 16034 } else { 16035 $autoclose = false; 16036 } 16037 16038 if ($autoclose && $definition->info[$token->name]->wrap) { 16039 // Check if an element can be wrapped by another 16040 // element to make it valid in a context (for 16041 // example, <ul><ul> needs a <li> in between) 16042 $wrapname = $definition->info[$token->name]->wrap; 16043 $wrapdef = $definition->info[$wrapname]; 16044 $elements = $wrapdef->child->getAllowedElements($config); 16045 $parent_elements = $definition->info[$parent->name]->child->getAllowedElements($config); 16046 if (isset($elements[$token->name]) && isset($parent_elements[$wrapname])) { 16047 $newtoken = new HTMLPurifier_Token_Start($wrapname); 16048 $this->insertBefore($newtoken); 16049 $reprocess = true; 16050 continue; 16051 } 16052 } 16053 16054 $carryover = false; 16055 if ($autoclose && $definition->info[$parent->name]->formatting) { 16056 $carryover = true; 16057 } 16058 16059 if ($autoclose) { 16060 // check if this autoclose is doomed to fail 16061 // (this rechecks $parent, which his harmless) 16062 $autoclose_ok = isset($global_parent_allowed_elements[$token->name]); 16063 if (!$autoclose_ok) { 16064 foreach ($this->stack as $ancestor) { 16065 $elements = $definition->info[$ancestor->name]->child->getAllowedElements($config); 16066 if (isset($elements[$token->name])) { 16067 $autoclose_ok = true; 16068 break; 16069 } 16070 if ($definition->info[$token->name]->wrap) { 16071 $wrapname = $definition->info[$token->name]->wrap; 16072 $wrapdef = $definition->info[$wrapname]; 16073 $wrap_elements = $wrapdef->child->getAllowedElements($config); 16074 if (isset($wrap_elements[$token->name]) && isset($elements[$wrapname])) { 16075 $autoclose_ok = true; 16076 break; 16077 } 16078 } 16079 } 16080 } 16081 if ($autoclose_ok) { 16082 // errors need to be updated 16083 $new_token = new HTMLPurifier_Token_End($parent->name); 16084 $new_token->start = $parent; 16085 if ($carryover) { 16086 $element = clone $parent; 16087 // [TagClosedAuto] 16088 $element->armor['MakeWellFormed_TagClosedError'] = true; 16089 $element->carryover = true; 16090 $this->processToken(array($new_token, $token, $element)); 16091 } else { 16092 $this->insertBefore($new_token); 16093 } 16094 // [TagClosedSuppress] 16095 if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) { 16096 if (!$carryover) { 16097 $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent); 16098 } else { 16099 $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent); 16100 } 16101 } 16102 } else { 16103 $this->remove(); 16104 } 16105 $reprocess = true; 16106 continue; 16107 } 16108 16109 } 16110 $ok = true; 16111 } 16112 16113 if ($ok) { 16114 foreach ($this->injectors as $i => $injector) { 16115 if (isset($token->skip[$i])) continue; 16116 if ($token->rewind !== null && $token->rewind !== $i) continue; 16117 $injector->handleElement($token); 16118 $this->processToken($token, $i); 16119 $reprocess = true; 16120 break; 16121 } 16122 if (!$reprocess) { 16123 // ah, nothing interesting happened; do normal processing 16124 $this->swap($token); 16125 if ($token instanceof HTMLPurifier_Token_Start) { 16126 $this->stack[] = $token; 16127 } elseif ($token instanceof HTMLPurifier_Token_End) { 16128 throw new HTMLPurifier_Exception('Improper handling of end tag in start code; possible error in MakeWellFormed'); 16129 } 16130 } 16131 continue; 16132 } 16133 16134 // sanity check: we should be dealing with a closing tag 16135 if (!$token instanceof HTMLPurifier_Token_End) { 16136 throw new HTMLPurifier_Exception('Unaccounted for tag token in input stream, bug in HTML Purifier'); 16137 } 16138 16139 // make sure that we have something open 16140 if (empty($this->stack)) { 16141 if ($escape_invalid_tags) { 16142 if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text'); 16143 $this->swap(new HTMLPurifier_Token_Text( 16144 $generator->generateFromToken($token) 16145 )); 16146 } else { 16147 $this->remove(); 16148 if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed'); 16149 } 16150 $reprocess = true; 16151 continue; 16152 } 16153 16154 // first, check for the simplest case: everything closes neatly. 16155 // Eventually, everything passes through here; if there are problems 16156 // we modify the input stream accordingly and then punt, so that 16157 // the tokens get processed again. 16158 $current_parent = array_pop($this->stack); 16159 if ($current_parent->name == $token->name) { 16160 $token->start = $current_parent; 16161 foreach ($this->injectors as $i => $injector) { 16162 if (isset($token->skip[$i])) continue; 16163 if ($token->rewind !== null && $token->rewind !== $i) continue; 16164 $injector->handleEnd($token); 16165 $this->processToken($token, $i); 16166 $this->stack[] = $current_parent; 16167 $reprocess = true; 16168 break; 16169 } 16170 continue; 16171 } 16172 16173 // okay, so we're trying to close the wrong tag 16174 16175 // undo the pop previous pop 16176 $this->stack[] = $current_parent; 16177 16178 // scroll back the entire nest, trying to find our tag. 16179 // (feature could be to specify how far you'd like to go) 16180 $size = count($this->stack); 16181 // -2 because -1 is the last element, but we already checked that 16182 $skipped_tags = false; 16183 for ($j = $size - 2; $j >= 0; $j--) { 16184 if ($this->stack[$j]->name == $token->name) { 16185 $skipped_tags = array_slice($this->stack, $j); 16186 break; 16187 } 16188 } 16189 16190 // we didn't find the tag, so remove 16191 if ($skipped_tags === false) { 16192 if ($escape_invalid_tags) { 16193 $this->swap(new HTMLPurifier_Token_Text( 16194 $generator->generateFromToken($token) 16195 )); 16196 if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text'); 16197 } else { 16198 $this->remove(); 16199 if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed'); 16200 } 16201 $reprocess = true; 16202 continue; 16203 } 16204 16205 // do errors, in REVERSE $j order: a,b,c with </a></b></c> 16206 $c = count($skipped_tags); 16207 if ($e) { 16208 for ($j = $c - 1; $j > 0; $j--) { 16209 // notice we exclude $j == 0, i.e. the current ending tag, from 16210 // the errors... [TagClosedSuppress] 16211 if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) { 16212 $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]); 16213 } 16214 } 16215 } 16216 16217 // insert tags, in FORWARD $j order: c,b,a with </a></b></c> 16218 $replace = array($token); 16219 for ($j = 1; $j < $c; $j++) { 16220 // ...as well as from the insertions 16221 $new_token = new HTMLPurifier_Token_End($skipped_tags[$j]->name); 16222 $new_token->start = $skipped_tags[$j]; 16223 array_unshift($replace, $new_token); 16224 if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) { 16225 // [TagClosedAuto] 16226 $element = clone $skipped_tags[$j]; 16227 $element->carryover = true; 16228 $element->armor['MakeWellFormed_TagClosedError'] = true; 16229 $replace[] = $element; 16230 } 16231 } 16232 $this->processToken($replace); 16233 $reprocess = true; 16234 continue; 16235 } 16236 16237 $context->destroy('CurrentNesting'); 16238 $context->destroy('InputTokens'); 16239 $context->destroy('InputIndex'); 16240 $context->destroy('CurrentToken'); 16241 16242 unset($this->injectors, $this->stack, $this->tokens, $this->t); 16243 return $tokens; 16244 } 16245 16266 protected function processToken($token, $injector = -1) { 16267 16268 // normalize forms of token 16269 if (is_object($token)) $token = array(1, $token); 16270 if (is_int($token)) $token = array($token); 16271 if ($token === false) $token = array(1); 16272 if (!is_array($token)) throw new HTMLPurifier_Exception('Invalid token type from injector'); 16273 if (!is_int($token[0])) array_unshift($token, 1); 16274 if ($token[0] === 0) throw new HTMLPurifier_Exception('Deleting zero tokens is not valid'); 16275 16276 // $token is now an array with the following form: 16277 // array(number nodes to delete, new node 1, new node 2, ...) 16278 16279 $delete = array_shift($token); 16280 $old = array_splice($this->tokens, $this->t, $delete, $token); 16281 16282 if ($injector > -1) { 16283 // determine appropriate skips 16284 $oldskip = isset($old[0]) ? $old[0]->skip : array(); 16285 foreach ($token as $object) { 16286 $object->skip = $oldskip; 16287 $object->skip[$injector] = true; 16288 } 16289 } 16290 16291 } 16292 16297 private function insertBefore($token) { 16298 array_splice($this->tokens, $this->t, 0, array($token)); 16299 } 16300 16305 private function remove() { 16306 array_splice($this->tokens, $this->t, 1); 16307 } 16308 16313 private function swap($token) { 16314 $this->tokens[$this->t] = $token; 16315 } 16316 16317 } 16318 16319 16320 16321 16322 16331 class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy 16332 { 16333 16334 public function execute($tokens, $config, $context) { 16335 $definition = $config->getHTMLDefinition(); 16336 $generator = new HTMLPurifier_Generator($config, $context); 16337 $result = array(); 16338 16339 $escape_invalid_tags = $config->get('Core.EscapeInvalidTags'); 16340 $remove_invalid_img = $config->get('Core.RemoveInvalidImg'); 16341 16342 // currently only used to determine if comments should be kept 16343 $trusted = $config->get('HTML.Trusted'); 16344 $comment_lookup = $config->get('HTML.AllowedComments'); 16345 $comment_regexp = $config->get('HTML.AllowedCommentsRegexp'); 16346 $check_comments = $comment_lookup !== array() || $comment_regexp !== null; 16347 16348 $remove_script_contents = $config->get('Core.RemoveScriptContents'); 16349 $hidden_elements = $config->get('Core.HiddenElements'); 16350 16351 // remove script contents compatibility 16352 if ($remove_script_contents === true) { 16353 $hidden_elements['script'] = true; 16354 } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) { 16355 unset($hidden_elements['script']); 16356 } 16357 16358 $attr_validator = new HTMLPurifier_AttrValidator(); 16359 16360 // removes tokens until it reaches a closing tag with its value 16361 $remove_until = false; 16362 16363 // converts comments into text tokens when this is equal to a tag name 16364 $textify_comments = false; 16365 16366 $token = false; 16367 $context->register('CurrentToken', $token); 16368 16369 $e = false; 16370 if ($config->get('Core.CollectErrors')) { 16371 $e =& $context->get('ErrorCollector'); 16372 } 16373 16374 foreach($tokens as $token) { 16375 if ($remove_until) { 16376 if (empty($token->is_tag) || $token->name !== $remove_until) { 16377 continue; 16378 } 16379 } 16380 if (!empty( $token->is_tag )) { 16381 // DEFINITION CALL 16382 16383 // before any processing, try to transform the element 16384 if ( 16385 isset($definition->info_tag_transform[$token->name]) 16386 ) { 16387 $original_name = $token->name; 16388 // there is a transformation for this tag 16389 // DEFINITION CALL 16390 $token = $definition-> 16391 info_tag_transform[$token->name]-> 16392 transform($token, $config, $context); 16393 if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name); 16394 } 16395 16396 if (isset($definition->info[$token->name])) { 16397 16398 // mostly everything's good, but 16399 // we need to make sure required attributes are in order 16400 if ( 16401 ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) && 16402 $definition->info[$token->name]->required_attr && 16403 ($token->name != 'img' || $remove_invalid_img) // ensure config option still works 16404 ) { 16405 $attr_validator->validateToken($token, $config, $context); 16406 $ok = true; 16407 foreach ($definition->info[$token->name]->required_attr as $name) { 16408 if (!isset($token->attr[$name])) { 16409 $ok = false; 16410 break; 16411 } 16412 } 16413 if (!$ok) { 16414 if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $name); 16415 continue; 16416 } 16417 $token->armor['ValidateAttributes'] = true; 16418 } 16419 16420 if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) { 16421 $textify_comments = $token->name; 16422 } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) { 16423 $textify_comments = false; 16424 } 16425 16426 } elseif ($escape_invalid_tags) { 16427 // invalid tag, generate HTML representation and insert in 16428 if ($e) $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text'); 16429 $token = new HTMLPurifier_Token_Text( 16430 $generator->generateFromToken($token) 16431 ); 16432 } else { 16433 // check if we need to destroy all of the tag's children 16434 // CAN BE GENERICIZED 16435 if (isset($hidden_elements[$token->name])) { 16436 if ($token instanceof HTMLPurifier_Token_Start) { 16437 $remove_until = $token->name; 16438 } elseif ($token instanceof HTMLPurifier_Token_Empty) { 16439 // do nothing: we're still looking 16440 } else { 16441 $remove_until = false; 16442 } 16443 if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed'); 16444 } else { 16445 if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed'); 16446 } 16447 continue; 16448 } 16449 } elseif ($token instanceof HTMLPurifier_Token_Comment) { 16450 // textify comments in script tags when they are allowed 16451 if ($textify_comments !== false) { 16452 $data = $token->data; 16453 $token = new HTMLPurifier_Token_Text($data); 16454 } elseif ($trusted || $check_comments) { 16455 // always cleanup comments 16456 $trailing_hyphen = false; 16457 if ($e) { 16458 // perform check whether or not there's a trailing hyphen 16459 if (substr($token->data, -1) == '-') { 16460 $trailing_hyphen = true; 16461 } 16462 } 16463 $token->data = rtrim($token->data, '-'); 16464 $found_double_hyphen = false; 16465 while (strpos($token->data, '--') !== false) { 16466 $found_double_hyphen = true; 16467 $token->data = str_replace('--', '-', $token->data); 16468 } 16469 if ($trusted || !empty($comment_lookup[trim($token->data)]) || ($comment_regexp !== NULL && preg_match($comment_regexp, trim($token->data)))) { 16470 // OK good 16471 if ($e) { 16472 if ($trailing_hyphen) { 16473 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed'); 16474 } 16475 if ($found_double_hyphen) { 16476 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed'); 16477 } 16478 } 16479 } else { 16480 if ($e) { 16481 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed'); 16482 } 16483 continue; 16484 } 16485 } else { 16486 // strip comments 16487 if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed'); 16488 continue; 16489 } 16490 } elseif ($token instanceof HTMLPurifier_Token_Text) { 16491 } else { 16492 continue; 16493 } 16494 $result[] = $token; 16495 } 16496 if ($remove_until && $e) { 16497 // we removed tokens until the end, throw error 16498 $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until); 16499 } 16500 16501 $context->destroy('CurrentToken'); 16502 16503 return $result; 16504 } 16505 16506 } 16507 16508 16509 16510 16511 16516 class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy 16517 { 16518 16519 public function execute($tokens, $config, $context) { 16520 16521 // setup validator 16522 $validator = new HTMLPurifier_AttrValidator(); 16523 16524 $token = false; 16525 $context->register('CurrentToken', $token); 16526 16527 foreach ($tokens as $key => $token) { 16528 16529 // only process tokens that have attributes, 16530 // namely start and empty tags 16531 if (!$token instanceof HTMLPurifier_Token_Start && !$token instanceof HTMLPurifier_Token_Empty) continue; 16532 16533 // skip tokens that are armored 16534 if (!empty($token->armor['ValidateAttributes'])) continue; 16535 16536 // note that we have no facilities here for removing tokens 16537 $validator->validateToken($token, $config, $context); 16538 16539 $tokens[$key] = $token; // for PHP 4 16540 } 16541 $context->destroy('CurrentToken'); 16542 16543 return $tokens; 16544 } 16545 16546 } 16547 16548 16549 16550 16551 16567 class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform 16568 { 16569 16570 public $transform_to = 'span'; 16571 16572 protected $_size_lookup = array( 16573 '0' => 'xx-small', 16574 '1' => 'xx-small', 16575 '2' => 'small', 16576 '3' => 'medium', 16577 '4' => 'large', 16578 '5' => 'x-large', 16579 '6' => 'xx-large', 16580 '7' => '300%', 16581 '-1' => 'smaller', 16582 '-2' => '60%', 16583 '+1' => 'larger', 16584 '+2' => '150%', 16585 '+3' => '200%', 16586 '+4' => '300%' 16587 ); 16588 16589 public function transform($tag, $config, $context) { 16590 16591 if ($tag instanceof HTMLPurifier_Token_End) { 16592 $new_tag = clone $tag; 16593 $new_tag->name = $this->transform_to; 16594 return $new_tag; 16595 } 16596 16597 $attr = $tag->attr; 16598 $prepend_style = ''; 16599 16600 // handle color transform 16601 if (isset($attr['color'])) { 16602 $prepend_style .= 'color:' . $attr['color'] . ';'; 16603 unset($attr['color']); 16604 } 16605 16606 // handle face transform 16607 if (isset($attr['face'])) { 16608 $prepend_style .= 'font-family:' . $attr['face'] . ';'; 16609 unset($attr['face']); 16610 } 16611 16612 // handle size transform 16613 if (isset($attr['size'])) { 16614 // normalize large numbers 16615 if ($attr['size'] !== '') { 16616 if ($attr['size']{0} == '+' || $attr['size']{0} == '-') { 16617 $size = (int) $attr['size']; 16618 if ($size < -2) $attr['size'] = '-2'; 16619 if ($size > 4) $attr['size'] = '+4'; 16620 } else { 16621 $size = (int) $attr['size']; 16622 if ($size > 7) $attr['size'] = '7'; 16623 } 16624 } 16625 if (isset($this->_size_lookup[$attr['size']])) { 16626 $prepend_style .= 'font-size:' . 16627 $this->_size_lookup[$attr['size']] . ';'; 16628 } 16629 unset($attr['size']); 16630 } 16631 16632 if ($prepend_style) { 16633 $attr['style'] = isset($attr['style']) ? 16634 $prepend_style . $attr['style'] : 16635 $prepend_style; 16636 } 16637 16638 $new_tag = clone $tag; 16639 $new_tag->name = $this->transform_to; 16640 $new_tag->attr = $attr; 16641 16642 return $new_tag; 16643 16644 } 16645 } 16646 16647 16648 16649 16650 16656 class HTMLPurifier_TagTransform_Simple extends HTMLPurifier_TagTransform 16657 { 16658 16659 protected $style; 16660 16665 public function __construct($transform_to, $style = null) { 16666 $this->transform_to = $transform_to; 16667 $this->style = $style; 16668 } 16669 16670 public function transform($tag, $config, $context) { 16671 $new_tag = clone $tag; 16672 $new_tag->name = $this->transform_to; 16673 if (!is_null($this->style) && 16674 ($new_tag instanceof HTMLPurifier_Token_Start || $new_tag instanceof HTMLPurifier_Token_Empty) 16675 ) { 16676 $this->prependCSS($new_tag->attr, $this->style); 16677 } 16678 return $new_tag; 16679 } 16680 16681 } 16682 16683 16684 16685 16686 16690 class HTMLPurifier_Token_Comment extends HTMLPurifier_Token 16691 { 16692 public $data; 16693 public $is_whitespace = true; 16699 public function __construct($data, $line = null, $col = null) { 16700 $this->data = $data; 16701 $this->line = $line; 16702 $this->col = $col; 16703 } 16704 } 16705 16706 16707 16708 16709 16713 class HTMLPurifier_Token_Tag extends HTMLPurifier_Token 16714 { 16721 public $is_tag = true; 16722 16730 public $name; 16731 16735 public $attr = array(); 16736 16743 public function __construct($name, $attr = array(), $line = null, $col = null, $armor = array()) { 16744 $this->name = ctype_lower($name) ? $name : strtolower($name); 16745 foreach ($attr as $key => $value) { 16746 // normalization only necessary when key is not lowercase 16747 if (!ctype_lower($key)) { 16748 $new_key = strtolower($key); 16749 if (!isset($attr[$new_key])) { 16750 $attr[$new_key] = $attr[$key]; 16751 } 16752 if ($new_key !== $key) { 16753 unset($attr[$key]); 16754 } 16755 } 16756 } 16757 $this->attr = $attr; 16758 $this->line = $line; 16759 $this->col = $col; 16760 $this->armor = $armor; 16761 } 16762 } 16763 16764 16765 16766 16767 16771 class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag 16772 { 16773 16774 } 16775 16776 16777 16778 16779 16787 class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag 16788 { 16793 public $start; 16794 } 16795 16796 16797 16798 16799 16803 class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag 16804 { 16805 16806 } 16807 16808 16809 16810 16811 16821 class HTMLPurifier_Token_Text extends HTMLPurifier_Token 16822 { 16823 16824 public $name = '#PCDATA'; 16825 public $data; 16826 public $is_whitespace; 16833 public function __construct($data, $line = null, $col = null) { 16834 $this->data = $data; 16835 $this->is_whitespace = ctype_space($data); 16836 $this->line = $line; 16837 $this->col = $col; 16838 } 16839 16840 } 16841 16842 16843 16844 16845 16846 class HTMLPurifier_URIFilter_DisableExternal extends HTMLPurifier_URIFilter 16847 { 16848 public $name = 'DisableExternal'; 16849 protected $ourHostParts = false; 16850 public function prepare($config) { 16851 $our_host = $config->getDefinition('URI')->host; 16852 if ($our_host !== null) $this->ourHostParts = array_reverse(explode('.', $our_host)); 16853 } 16854 public function filter(&$uri, $config, $context) { 16855 if (is_null($uri->host)) return true; 16856 if ($this->ourHostParts === false) return false; 16857 $host_parts = array_reverse(explode('.', $uri->host)); 16858 foreach ($this->ourHostParts as $i => $x) { 16859 if (!isset($host_parts[$i])) return false; 16860 if ($host_parts[$i] != $this->ourHostParts[$i]) return false; 16861 } 16862 return true; 16863 } 16864 } 16865 16866 16867 16868 16869 16870 class HTMLPurifier_URIFilter_DisableExternalResources extends HTMLPurifier_URIFilter_DisableExternal 16871 { 16872 public $name = 'DisableExternalResources'; 16873 public function filter(&$uri, $config, $context) { 16874 if (!$context->get('EmbeddedURI', true)) return true; 16875 return parent::filter($uri, $config, $context); 16876 } 16877 } 16878 16879 16880 16881 16882 16883 class HTMLPurifier_URIFilter_DisableResources extends HTMLPurifier_URIFilter 16884 { 16885 public $name = 'DisableResources'; 16886 public function filter(&$uri, $config, $context) { 16887 return !$context->get('EmbeddedURI', true); 16888 } 16889 } 16890 16891 16892 16893 16894 16895 // It's not clear to me whether or not Punycode means that hostnames 16896 // do not have canonical forms anymore. As far as I can tell, it's 16897 // not a problem (punycoding should be identity when no Unicode 16898 // points are involved), but I'm not 100% sure 16899 class HTMLPurifier_URIFilter_HostBlacklist extends HTMLPurifier_URIFilter 16900 { 16901 public $name = 'HostBlacklist'; 16902 protected $blacklist = array(); 16903 public function prepare($config) { 16904 $this->blacklist = $config->get('URI.HostBlacklist'); 16905 return true; 16906 } 16907 public function filter(&$uri, $config, $context) { 16908 foreach($this->blacklist as $blacklisted_host_fragment) { 16909 if (strpos($uri->host, $blacklisted_host_fragment) !== false) { 16910 return false; 16911 } 16912 } 16913 return true; 16914 } 16915 } 16916 16917 16918 16919 16920 16921 // does not support network paths 16922 16923 class HTMLPurifier_URIFilter_MakeAbsolute extends HTMLPurifier_URIFilter 16924 { 16925 public $name = 'MakeAbsolute'; 16926 protected $base; 16927 protected $basePathStack = array(); 16928 public function prepare($config) { 16929 $def = $config->getDefinition('URI'); 16930 $this->base = $def->base; 16931 if (is_null($this->base)) { 16932 trigger_error('URI.MakeAbsolute is being ignored due to lack of value for URI.Base configuration', E_USER_WARNING); 16933 return false; 16934 } 16935 $this->base->fragment = null; // fragment is invalid for base URI 16936 $stack = explode('/', $this->base->path); 16937 array_pop($stack); // discard last segment 16938 $stack = $this->_collapseStack($stack); // do pre-parsing 16939 $this->basePathStack = $stack; 16940 return true; 16941 } 16942 public function filter(&$uri, $config, $context) { 16943 if (is_null($this->base)) return true; // abort early 16944 if ( 16945 $uri->path === '' && is_null($uri->scheme) && 16946 is_null($uri->host) && is_null($uri->query) && is_null($uri->fragment) 16947 ) { 16948 // reference to current document 16949 $uri = clone $this->base; 16950 return true; 16951 } 16952 if (!is_null($uri->scheme)) { 16953 // absolute URI already: don't change 16954 if (!is_null($uri->host)) return true; 16955 $scheme_obj = $uri->getSchemeObj($config, $context); 16956 if (!$scheme_obj) { 16957 // scheme not recognized 16958 return false; 16959 } 16960 if (!$scheme_obj->hierarchical) { 16961 // non-hierarchal URI with explicit scheme, don't change 16962 return true; 16963 } 16964 // special case: had a scheme but always is hierarchical and had no authority 16965 } 16966 if (!is_null($uri->host)) { 16967 // network path, don't bother 16968 return true; 16969 } 16970 if ($uri->path === '') { 16971 $uri->path = $this->base->path; 16972 } elseif ($uri->path[0] !== '/') { 16973 // relative path, needs more complicated processing 16974 $stack = explode('/', $uri->path); 16975 $new_stack = array_merge($this->basePathStack, $stack); 16976 if ($new_stack[0] !== '' && !is_null($this->base->host)) { 16977 array_unshift($new_stack, ''); 16978 } 16979 $new_stack = $this->_collapseStack($new_stack); 16980 $uri->path = implode('/', $new_stack); 16981 } else { 16982 // absolute path, but still we should collapse 16983 $uri->path = implode('/', $this->_collapseStack(explode('/', $uri->path))); 16984 } 16985 // re-combine 16986 $uri->scheme = $this->base->scheme; 16987 if (is_null($uri->userinfo)) $uri->userinfo = $this->base->userinfo; 16988 if (is_null($uri->host)) $uri->host = $this->base->host; 16989 if (is_null($uri->port)) $uri->port = $this->base->port; 16990 return true; 16991 } 16992 16996 private function _collapseStack($stack) { 16997 $result = array(); 16998 $is_folder = false; 16999 for ($i = 0; isset($stack[$i]); $i++) { 17000 $is_folder = false; 17001 // absorb an internally duplicated slash 17002 if ($stack[$i] == '' && $i && isset($stack[$i+1])) continue; 17003 if ($stack[$i] == '..') { 17004 if (!empty($result)) { 17005 $segment = array_pop($result); 17006 if ($segment === '' && empty($result)) { 17007 // error case: attempted to back out too far: 17008 // restore the leading slash 17009 $result[] = ''; 17010 } elseif ($segment === '..') { 17011 $result[] = '..'; // cannot remove .. with .. 17012 } 17013 } else { 17014 // relative path, preserve the double-dots 17015 $result[] = '..'; 17016 } 17017 $is_folder = true; 17018 continue; 17019 } 17020 if ($stack[$i] == '.') { 17021 // silently absorb 17022 $is_folder = true; 17023 continue; 17024 } 17025 $result[] = $stack[$i]; 17026 } 17027 if ($is_folder) $result[] = ''; 17028 return $result; 17029 } 17030 } 17031 17032 17033 17034 17035 17036 class HTMLPurifier_URIFilter_Munge extends HTMLPurifier_URIFilter 17037 { 17038 public $name = 'Munge'; 17039 public $post = true; 17040 private $target, $parser, $doEmbed, $secretKey; 17041 17042 protected $replace = array(); 17043 17044 public function prepare($config) { 17045 $this->target = $config->get('URI.' . $this->name); 17046 $this->parser = new HTMLPurifier_URIParser(); 17047 $this->doEmbed = $config->get('URI.MungeResources'); 17048 $this->secretKey = $config->get('URI.MungeSecretKey'); 17049 return true; 17050 } 17051 public function filter(&$uri, $config, $context) { 17052 if ($context->get('EmbeddedURI', true) && !$this->doEmbed) return true; 17053 17054 $scheme_obj = $uri->getSchemeObj($config, $context); 17055 if (!$scheme_obj) return true; // ignore unknown schemes, maybe another postfilter did it 17056 if (!$scheme_obj->browsable) return true; // ignore non-browseable schemes, since we can't munge those in a reasonable way 17057 if ($uri->isBenign($config, $context)) return true; // don't redirect if a benign URL 17058 17059 $this->makeReplace($uri, $config, $context); 17060 $this->replace = array_map('rawurlencode', $this->replace); 17061 17062 $new_uri = strtr($this->target, $this->replace); 17063 $new_uri = $this->parser->parse($new_uri); 17064 // don't redirect if the target host is the same as the 17065 // starting host 17066 if ($uri->host === $new_uri->host) return true; 17067 $uri = $new_uri; // overwrite 17068 return true; 17069 } 17070 17071 protected function makeReplace($uri, $config, $context) { 17072 $string = $uri->toString(); 17073 // always available 17074 $this->replace['%s'] = $string; 17075 $this->replace['%r'] = $context->get('EmbeddedURI', true); 17076 $token = $context->get('CurrentToken', true); 17077 $this->replace['%n'] = $token ? $token->name : null; 17078 $this->replace['%m'] = $context->get('CurrentAttr', true); 17079 $this->replace['%p'] = $context->get('CurrentCSSProperty', true); 17080 // not always available 17081 if ($this->secretKey) $this->replace['%t'] = sha1($this->secretKey . ':' . $string); 17082 } 17083 17084 } 17085 17086 17087 17088 17089 17096 class HTMLPurifier_URIFilter_SafeIframe extends HTMLPurifier_URIFilter 17097 { 17098 public $name = 'SafeIframe'; 17099 public $always_load = true; 17100 protected $regexp = NULL; 17101 // XXX: The not so good bit about how this is all setup now is we 17102 // can't check HTML.SafeIframe in the 'prepare' step: we have to 17103 // defer till the actual filtering. 17104 public function prepare($config) { 17105 $this->regexp = $config->get('URI.SafeIframeRegexp'); 17106 return true; 17107 } 17108 public function filter(&$uri, $config, $context) { 17109 // check if filter not applicable 17110 if (!$config->get('HTML.SafeIframe')) return true; 17111 // check if the filter should actually trigger 17112 if (!$context->get('EmbeddedURI', true)) return true; 17113 $token = $context->get('CurrentToken', true); 17114 if (!($token && $token->name == 'iframe')) return true; 17115 // check if we actually have some whitelists enabled 17116 if ($this->regexp === null) return false; 17117 // actually check the whitelists 17118 return preg_match($this->regexp, $uri->toString()); 17119 } 17120 } 17121 17122 17123 17124 17125 17129 class HTMLPurifier_URIScheme_data extends HTMLPurifier_URIScheme { 17130 17131 public $browsable = true; 17132 public $allowed_types = array( 17133 // you better write validation code for other types if you 17134 // decide to allow them 17135 'image/jpeg' => true, 17136 'image/gif' => true, 17137 'image/png' => true, 17138 ); 17139 // this is actually irrelevant since we only write out the path 17140 // component 17141 public $may_omit_host = true; 17142 17143 public function doValidate(&$uri, $config, $context) { 17144 $result = explode(',', $uri->path, 2); 17145 $is_base64 = false; 17146 $charset = null; 17147 $content_type = null; 17148 if (count($result) == 2) { 17149 list($metadata, $data) = $result; 17150 // do some legwork on the metadata 17151 $metas = explode(';', $metadata); 17152 while(!empty($metas)) { 17153 $cur = array_shift($metas); 17154 if ($cur == 'base64') { 17155 $is_base64 = true; 17156 break; 17157 } 17158 if (substr($cur, 0, 8) == 'charset=') { 17159 // doesn't match if there are arbitrary spaces, but 17160 // whatever dude 17161 if ($charset !== null) continue; // garbage 17162 $charset = substr($cur, 8); // not used 17163 } else { 17164 if ($content_type !== null) continue; // garbage 17165 $content_type = $cur; 17166 } 17167 } 17168 } else { 17169 $data = $result[0]; 17170 } 17171 if ($content_type !== null && empty($this->allowed_types[$content_type])) { 17172 return false; 17173 } 17174 if ($charset !== null) { 17175 // error; we don't allow plaintext stuff 17176 $charset = null; 17177 } 17178 $data = rawurldecode($data); 17179 if ($is_base64) { 17180 $raw_data = base64_decode($data); 17181 } else { 17182 $raw_data = $data; 17183 } 17184 // XXX probably want to refactor this into a general mechanism 17185 // for filtering arbitrary content types 17186 $file = tempnam("/tmp", ""); 17187 file_put_contents($file, $raw_data); 17188 if (function_exists('exif_imagetype')) { 17189 $image_code = exif_imagetype($file); 17190 } elseif (function_exists('getimagesize')) { 17191 set_error_handler(array($this, 'muteErrorHandler')); 17192 $info = getimagesize($file); 17193 restore_error_handler(); 17194 if ($info == false) return false; 17195 $image_code = $info[2]; 17196 } else { 17197 trigger_error("could not find exif_imagetype or getimagesize functions", E_USER_ERROR); 17198 } 17199 $real_content_type = image_type_to_mime_type($image_code); 17200 if ($real_content_type != $content_type) { 17201 // we're nice guys; if the content type is something else we 17202 // support, change it over 17203 if (empty($this->allowed_types[$real_content_type])) return false; 17204 $content_type = $real_content_type; 17205 } 17206 // ok, it's kosher, rewrite what we need 17207 $uri->userinfo = null; 17208 $uri->host = null; 17209 $uri->port = null; 17210 $uri->fragment = null; 17211 $uri->query = null; 17212 $uri->path = "$content_type;base64," . base64_encode($raw_data); 17213 return true; 17214 } 17215 17216 public function muteErrorHandler($errno, $errstr) {} 17217 17218 } 17219 17220 17221 17222 17226 class HTMLPurifier_URIScheme_file extends HTMLPurifier_URIScheme { 17227 17228 // Generally file:// URLs are not accessible from most 17229 // machines, so placing them as an img src is incorrect. 17230 public $browsable = false; 17231 17232 // Basically the *only* URI scheme for which this is true, since 17233 // accessing files on the local machine is very common. In fact, 17234 // browsers on some operating systems don't understand the 17235 // authority, though I hear it is used on Windows to refer to 17236 // network shares. 17237 public $may_omit_host = true; 17238 17239 public function doValidate(&$uri, $config, $context) { 17240 // Authentication method is not supported 17241 $uri->userinfo = null; 17242 // file:// makes no provisions for accessing the resource 17243 $uri->port = null; 17244 // While it seems to work on Firefox, the querystring has 17245 // no possible effect and is thus stripped. 17246 $uri->query = null; 17247 return true; 17248 } 17249 17250 } 17251 17252 17253 17254 17255 17259 class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme { 17260 17261 public $default_port = 21; 17262 public $browsable = true; // usually 17263 public $hierarchical = true; 17264 17265 public function doValidate(&$uri, $config, $context) { 17266 $uri->query = null; 17267 17268 // typecode check 17269 $semicolon_pos = strrpos($uri->path, ';'); // reverse 17270 if ($semicolon_pos !== false) { 17271 $type = substr($uri->path, $semicolon_pos + 1); // no semicolon 17272 $uri->path = substr($uri->path, 0, $semicolon_pos); 17273 $type_ret = ''; 17274 if (strpos($type, '=') !== false) { 17275 // figure out whether or not the declaration is correct 17276 list($key, $typecode) = explode('=', $type, 2); 17277 if ($key !== 'type') { 17278 // invalid key, tack it back on encoded 17279 $uri->path .= '%3B' . $type; 17280 } elseif ($typecode === 'a' || $typecode === 'i' || $typecode === 'd') { 17281 $type_ret = ";type=$typecode"; 17282 } 17283 } else { 17284 $uri->path .= '%3B' . $type; 17285 } 17286 $uri->path = str_replace(';', '%3B', $uri->path); 17287 $uri->path .= $type_ret; 17288 } 17289 17290 return true; 17291 } 17292 17293 } 17294 17295 17296 17297 17298 17302 class HTMLPurifier_URIScheme_http extends HTMLPurifier_URIScheme { 17303 17304 public $default_port = 80; 17305 public $browsable = true; 17306 public $hierarchical = true; 17307 17308 public function doValidate(&$uri, $config, $context) { 17309 $uri->userinfo = null; 17310 return true; 17311 } 17312 17313 } 17314 17315 17316 17317 17318 17322 class HTMLPurifier_URIScheme_https extends HTMLPurifier_URIScheme_http { 17323 17324 public $default_port = 443; 17325 public $secure = true; 17326 17327 } 17328 17329 17330 17331 17332 17333 // VERY RELAXED! Shouldn't cause problems, not even Firefox checks if the 17334 // email is valid, but be careful! 17335 17342 class HTMLPurifier_URIScheme_mailto extends HTMLPurifier_URIScheme { 17343 17344 public $browsable = false; 17345 public $may_omit_host = true; 17346 17347 public function doValidate(&$uri, $config, $context) { 17348 $uri->userinfo = null; 17349 $uri->host = null; 17350 $uri->port = null; 17351 // we need to validate path against RFC 2368's addr-spec 17352 return true; 17353 } 17354 17355 } 17356 17357 17358 17359 17360 17364 class HTMLPurifier_URIScheme_news extends HTMLPurifier_URIScheme { 17365 17366 public $browsable = false; 17367 public $may_omit_host = true; 17368 17369 public function doValidate(&$uri, $config, $context) { 17370 $uri->userinfo = null; 17371 $uri->host = null; 17372 $uri->port = null; 17373 $uri->query = null; 17374 // typecode check needed on path 17375 return true; 17376 } 17377 17378 } 17379 17380 17381 17382 17383 17387 class HTMLPurifier_URIScheme_nntp extends HTMLPurifier_URIScheme { 17388 17389 public $default_port = 119; 17390 public $browsable = false; 17391 17392 public function doValidate(&$uri, $config, $context) { 17393 $uri->userinfo = null; 17394 $uri->query = null; 17395 return true; 17396 } 17397 17398 } 17399 17400 17401 17402 17403 17409 class HTMLPurifier_VarParser_Flexible extends HTMLPurifier_VarParser 17410 { 17411 17412 protected function parseImplementation($var, $type, $allow_null) { 17413 if ($allow_null && $var === null) return null; 17414 switch ($type) { 17415 // Note: if code "breaks" from the switch, it triggers a generic 17416 // exception to be thrown. Specific errors can be specifically 17417 // done here. 17418 case self::MIXED : 17419 case self::ISTRING : 17420 case self::STRING : 17421 case self::TEXT : 17422 case self::ITEXT : 17423 return $var; 17424 case self::INT : 17425 if (is_string($var) && ctype_digit($var)) $var = (int) $var; 17426 return $var; 17427 case self::FLOAT : 17428 if ((is_string($var) && is_numeric($var)) || is_int($var)) $var = (float) $var; 17429 return $var; 17430 case self::BOOL : 17431 if (is_int($var) && ($var === 0 || $var === 1)) { 17432 $var = (bool) $var; 17433 } elseif (is_string($var)) { 17434 if ($var == 'on' || $var == 'true' || $var == '1') { 17435 $var = true; 17436 } elseif ($var == 'off' || $var == 'false' || $var == '0') { 17437 $var = false; 17438 } else { 17439 throw new HTMLPurifier_VarParserException("Unrecognized value '$var' for $type"); 17440 } 17441 } 17442 return $var; 17443 case self::ALIST : 17444 case self::HASH : 17445 case self::LOOKUP : 17446 if (is_string($var)) { 17447 // special case: technically, this is an array with 17448 // a single empty string item, but having an empty 17449 // array is more intuitive 17450 if ($var == '') return array(); 17451 if (strpos($var, "\n") === false && strpos($var, "\r") === false) { 17452 // simplistic string to array method that only works 17453 // for simple lists of tag names or alphanumeric characters 17454 $var = explode(',',$var); 17455 } else { 17456 $var = preg_split('/(,|[\n\r]+)/', $var); 17457 } 17458 // remove spaces 17459 foreach ($var as $i => $j) $var[$i] = trim($j); 17460 if ($type === self::HASH) { 17461 // key:value,key2:value2 17462 $nvar = array(); 17463 foreach ($var as $keypair) { 17464 $c = explode(':', $keypair, 2); 17465 if (!isset($c[1])) continue; 17466 $nvar[trim($c[0])] = trim($c[1]); 17467 } 17468 $var = $nvar; 17469 } 17470 } 17471 if (!is_array($var)) break; 17472 $keys = array_keys($var); 17473 if ($keys === array_keys($keys)) { 17474 if ($type == self::ALIST) return $var; 17475 elseif ($type == self::LOOKUP) { 17476 $new = array(); 17477 foreach ($var as $key) { 17478 $new[$key] = true; 17479 } 17480 return $new; 17481 } else break; 17482 } 17483 if ($type === self::ALIST) { 17484 trigger_error("Array list did not have consecutive integer indexes", E_USER_WARNING); 17485 return array_values($var); 17486 } 17487 if ($type === self::LOOKUP) { 17488 foreach ($var as $key => $value) { 17489 if ($value !== true) { 17490 trigger_error("Lookup array has non-true value at key '$key'; maybe your input array was not indexed numerically", E_USER_WARNING); 17491 } 17492 $var[$key] = true; 17493 } 17494 } 17495 return $var; 17496 default: 17497 $this->errorInconsistent(__CLASS__, $type); 17498 } 17499 $this->errorGeneric($var, $type); 17500 } 17501 17502 } 17503 17504 17505 17506 17507 17513 class HTMLPurifier_VarParser_Native extends HTMLPurifier_VarParser 17514 { 17515 17516 protected function parseImplementation($var, $type, $allow_null) { 17517 return $this->evalExpression($var); 17518 } 17519 17520 protected function evalExpression($expr) { 17521 $var = null; 17522 $result = eval("\$var = $expr;"); 17523 if ($result === false) { 17524 throw new HTMLPurifier_VarParserException("Fatal error in evaluated code"); 17525 } 17526 return $var; 17527 } 17528 17529 } 17530 17531 17532