HTMLPurifier 4.4.0
/home/ezyang/Dev/htmlpurifier/library/HTMLPurifier/EntityParser.php
Go to the documentation of this file.
00001 <?php
00002 
00003 // if want to implement error collecting here, we'll need to use some sort
00004 // of global data (probably trigger_error) because it's impossible to pass
00005 // $config or $context to the callback functions.
00006 
00010 class HTMLPurifier_EntityParser
00011 {
00012 
00016     protected $_entity_lookup;
00017 
00021     protected $_substituteEntitiesRegex =
00022 '/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/';
00023 //     1. hex             2. dec      3. string (XML style)
00024 
00025 
00029     protected $_special_dec2str =
00030             array(
00031                     34 => '"',
00032                     38 => '&',
00033                     39 => "'",
00034                     60 => '<',
00035                     62 => '>'
00036             );
00037 
00041     protected $_special_ent2dec =
00042             array(
00043                     'quot' => 34,
00044                     'amp'  => 38,
00045                     'lt'   => 60,
00046                     'gt'   => 62
00047             );
00048 
00057     public function substituteNonSpecialEntities($string) {
00058         // it will try to detect missing semicolons, but don't rely on it
00059         return preg_replace_callback(
00060             $this->_substituteEntitiesRegex,
00061             array($this, 'nonSpecialEntityCallback'),
00062             $string
00063             );
00064     }
00065 
00075     protected function nonSpecialEntityCallback($matches) {
00076         // replaces all but big five
00077         $entity = $matches[0];
00078         $is_num = (@$matches[0][1] === '#');
00079         if ($is_num) {
00080             $is_hex = (@$entity[2] === 'x');
00081             $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
00082 
00083             // abort for special characters
00084             if (isset($this->_special_dec2str[$code]))  return $entity;
00085 
00086             return HTMLPurifier_Encoder::unichr($code);
00087         } else {
00088             if (isset($this->_special_ent2dec[$matches[3]])) return $entity;
00089             if (!$this->_entity_lookup) {
00090                 $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
00091             }
00092             if (isset($this->_entity_lookup->table[$matches[3]])) {
00093                 return $this->_entity_lookup->table[$matches[3]];
00094             } else {
00095                 return $entity;
00096             }
00097         }
00098     }
00099 
00109     public function substituteSpecialEntities($string) {
00110         return preg_replace_callback(
00111             $this->_substituteEntitiesRegex,
00112             array($this, 'specialEntityCallback'),
00113             $string);
00114     }
00115 
00126     protected function specialEntityCallback($matches) {
00127         $entity = $matches[0];
00128         $is_num = (@$matches[0][1] === '#');
00129         if ($is_num) {
00130             $is_hex = (@$entity[2] === 'x');
00131             $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
00132             return isset($this->_special_dec2str[$int]) ?
00133                 $this->_special_dec2str[$int] :
00134                 $entity;
00135         } else {
00136             return isset($this->_special_ent2dec[$matches[3]]) ?
00137                 $this->_special_ent2dec[$matches[3]] :
00138                 $entity;
00139         }
00140     }
00141 
00142 }
00143 
00144 // vim: et sw=4 sts=4