HTMLPurifier 4.4.0
|
00001 <?php 00002 00011 class HTMLPurifier_URI 00012 { 00013 00014 public $scheme, $userinfo, $host, $port, $path, $query, $fragment; 00015 00019 public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment) { 00020 $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme); 00021 $this->userinfo = $userinfo; 00022 $this->host = $host; 00023 $this->port = is_null($port) ? $port : (int) $port; 00024 $this->path = $path; 00025 $this->query = $query; 00026 $this->fragment = $fragment; 00027 } 00028 00035 public function getSchemeObj($config, $context) { 00036 $registry = HTMLPurifier_URISchemeRegistry::instance(); 00037 if ($this->scheme !== null) { 00038 $scheme_obj = $registry->getScheme($this->scheme, $config, $context); 00039 if (!$scheme_obj) return false; // invalid scheme, clean it out 00040 } else { 00041 // no scheme: retrieve the default one 00042 $def = $config->getDefinition('URI'); 00043 $scheme_obj = $def->getDefaultScheme($config, $context); 00044 if (!$scheme_obj) { 00045 // something funky happened to the default scheme object 00046 trigger_error( 00047 'Default scheme object "' . $def->defaultScheme . '" was not readable', 00048 E_USER_WARNING 00049 ); 00050 return false; 00051 } 00052 } 00053 return $scheme_obj; 00054 } 00055 00063 public function validate($config, $context) { 00064 00065 // ABNF definitions from RFC 3986 00066 $chars_sub_delims = '!$&\'()*+,;='; 00067 $chars_gen_delims = ':/?#[]@'; 00068 $chars_pchar = $chars_sub_delims . ':@'; 00069 00070 // validate host 00071 if (!is_null($this->host)) { 00072 $host_def = new HTMLPurifier_AttrDef_URI_Host(); 00073 $this->host = $host_def->validate($this->host, $config, $context); 00074 if ($this->host === false) $this->host = null; 00075 } 00076 00077 // validate scheme 00078 // NOTE: It's not appropriate to check whether or not this 00079 // scheme is in our registry, since a URIFilter may convert a 00080 // URI that we don't allow into one we do. So instead, we just 00081 // check if the scheme can be dropped because there is no host 00082 // and it is our default scheme. 00083 if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') { 00084 // support for relative paths is pretty abysmal when the 00085 // scheme is present, so axe it when possible 00086 $def = $config->getDefinition('URI'); 00087 if ($def->defaultScheme === $this->scheme) { 00088 $this->scheme = null; 00089 } 00090 } 00091 00092 // validate username 00093 if (!is_null($this->userinfo)) { 00094 $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':'); 00095 $this->userinfo = $encoder->encode($this->userinfo); 00096 } 00097 00098 // validate port 00099 if (!is_null($this->port)) { 00100 if ($this->port < 1 || $this->port > 65535) $this->port = null; 00101 } 00102 00103 // validate path 00104 $path_parts = array(); 00105 $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/'); 00106 if (!is_null($this->host)) { // this catches $this->host === '' 00107 // path-abempty (hier and relative) 00108 // http://www.example.com/my/path 00109 // //www.example.com/my/path (looks odd, but works, and 00110 // recognized by most browsers) 00111 // (this set is valid or invalid on a scheme by scheme 00112 // basis, so we'll deal with it later) 00113 // file:///my/path 00114 // ///my/path 00115 $this->path = $segments_encoder->encode($this->path); 00116 } elseif ($this->path !== '') { 00117 if ($this->path[0] === '/') { 00118 // path-absolute (hier and relative) 00119 // http:/my/path 00120 // /my/path 00121 if (strlen($this->path) >= 2 && $this->path[1] === '/') { 00122 // This could happen if both the host gets stripped 00123 // out 00124 // http://my/path 00125 // //my/path 00126 $this->path = ''; 00127 } else { 00128 $this->path = $segments_encoder->encode($this->path); 00129 } 00130 } elseif (!is_null($this->scheme)) { 00131 // path-rootless (hier) 00132 // http:my/path 00133 // Short circuit evaluation means we don't need to check nz 00134 $this->path = $segments_encoder->encode($this->path); 00135 } else { 00136 // path-noscheme (relative) 00137 // my/path 00138 // (once again, not checking nz) 00139 $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@'); 00140 $c = strpos($this->path, '/'); 00141 if ($c !== false) { 00142 $this->path = 00143 $segment_nc_encoder->encode(substr($this->path, 0, $c)) . 00144 $segments_encoder->encode(substr($this->path, $c)); 00145 } else { 00146 $this->path = $segment_nc_encoder->encode($this->path); 00147 } 00148 } 00149 } else { 00150 // path-empty (hier and relative) 00151 $this->path = ''; // just to be safe 00152 } 00153 00154 // qf = query and fragment 00155 $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?'); 00156 00157 if (!is_null($this->query)) { 00158 $this->query = $qf_encoder->encode($this->query); 00159 } 00160 00161 if (!is_null($this->fragment)) { 00162 $this->fragment = $qf_encoder->encode($this->fragment); 00163 } 00164 00165 return true; 00166 00167 } 00168 00173 public function toString() { 00174 // reconstruct authority 00175 $authority = null; 00176 // there is a rendering difference between a null authority 00177 // (http:foo-bar) and an empty string authority 00178 // (http:///foo-bar). 00179 if (!is_null($this->host)) { 00180 $authority = ''; 00181 if(!is_null($this->userinfo)) $authority .= $this->userinfo . '@'; 00182 $authority .= $this->host; 00183 if(!is_null($this->port)) $authority .= ':' . $this->port; 00184 } 00185 00186 // Reconstruct the result 00187 // One might wonder about parsing quirks from browsers after 00188 // this reconstruction. Unfortunately, parsing behavior depends 00189 // on what *scheme* was employed (file:///foo is handled *very* 00190 // differently than http:///foo), so unfortunately we have to 00191 // defer to the schemes to do the right thing. 00192 $result = ''; 00193 if (!is_null($this->scheme)) $result .= $this->scheme . ':'; 00194 if (!is_null($authority)) $result .= '//' . $authority; 00195 $result .= $this->path; 00196 if (!is_null($this->query)) $result .= '?' . $this->query; 00197 if (!is_null($this->fragment)) $result .= '#' . $this->fragment; 00198 00199 return $result; 00200 } 00201 00211 public function isLocal($config, $context) { 00212 if ($this->host === null) return true; 00213 $uri_def = $config->getDefinition('URI'); 00214 if ($uri_def->host === $this->host) return true; 00215 return false; 00216 } 00217 00225 public function isBenign($config, $context) { 00226 if (!$this->isLocal($config, $context)) return false; 00227 00228 $scheme_obj = $this->getSchemeObj($config, $context); 00229 if (!$scheme_obj) return false; // conservative approach 00230 00231 $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context); 00232 if ($current_scheme_obj->secure) { 00233 if (!$scheme_obj->secure) { 00234 return false; 00235 } 00236 } 00237 return true; 00238 } 00239 00240 } 00241 00242 // vim: et sw=4 sts=4