HTMLPurifier 4.4.0
|
00001 <?php 00002 00003 // if want to implement error collecting here, we'll need to use some sort 00004 // of global data (probably trigger_error) because it's impossible to pass 00005 // $config or $context to the callback functions. 00006 00010 class HTMLPurifier_EntityParser 00011 { 00012 00016 protected $_entity_lookup; 00017 00021 protected $_substituteEntitiesRegex = 00022 '/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/'; 00023 // 1. hex 2. dec 3. string (XML style) 00024 00025 00029 protected $_special_dec2str = 00030 array( 00031 34 => '"', 00032 38 => '&', 00033 39 => "'", 00034 60 => '<', 00035 62 => '>' 00036 ); 00037 00041 protected $_special_ent2dec = 00042 array( 00043 'quot' => 34, 00044 'amp' => 38, 00045 'lt' => 60, 00046 'gt' => 62 00047 ); 00048 00057 public function substituteNonSpecialEntities($string) { 00058 // it will try to detect missing semicolons, but don't rely on it 00059 return preg_replace_callback( 00060 $this->_substituteEntitiesRegex, 00061 array($this, 'nonSpecialEntityCallback'), 00062 $string 00063 ); 00064 } 00065 00075 protected function nonSpecialEntityCallback($matches) { 00076 // replaces all but big five 00077 $entity = $matches[0]; 00078 $is_num = (@$matches[0][1] === '#'); 00079 if ($is_num) { 00080 $is_hex = (@$entity[2] === 'x'); 00081 $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2]; 00082 00083 // abort for special characters 00084 if (isset($this->_special_dec2str[$code])) return $entity; 00085 00086 return HTMLPurifier_Encoder::unichr($code); 00087 } else { 00088 if (isset($this->_special_ent2dec[$matches[3]])) return $entity; 00089 if (!$this->_entity_lookup) { 00090 $this->_entity_lookup = HTMLPurifier_EntityLookup::instance(); 00091 } 00092 if (isset($this->_entity_lookup->table[$matches[3]])) { 00093 return $this->_entity_lookup->table[$matches[3]]; 00094 } else { 00095 return $entity; 00096 } 00097 } 00098 } 00099 00109 public function substituteSpecialEntities($string) { 00110 return preg_replace_callback( 00111 $this->_substituteEntitiesRegex, 00112 array($this, 'specialEntityCallback'), 00113 $string); 00114 } 00115 00126 protected function specialEntityCallback($matches) { 00127 $entity = $matches[0]; 00128 $is_num = (@$matches[0][1] === '#'); 00129 if ($is_num) { 00130 $is_hex = (@$entity[2] === 'x'); 00131 $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2]; 00132 return isset($this->_special_dec2str[$int]) ? 00133 $this->_special_dec2str[$int] : 00134 $entity; 00135 } else { 00136 return isset($this->_special_ent2dec[$matches[3]]) ? 00137 $this->_special_ent2dec[$matches[3]] : 00138 $entity; 00139 } 00140 } 00141 00142 } 00143 00144 // vim: et sw=4 sts=4