HTMLPurifier 4.4.0
/home/ezyang/Dev/htmlpurifier/library/HTMLPurifier/URI.php
Go to the documentation of this file.
00001 <?php
00002 
00011 class HTMLPurifier_URI
00012 {
00013 
00014     public $scheme, $userinfo, $host, $port, $path, $query, $fragment;
00015 
00019     public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment) {
00020         $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
00021         $this->userinfo = $userinfo;
00022         $this->host = $host;
00023         $this->port = is_null($port) ? $port : (int) $port;
00024         $this->path = $path;
00025         $this->query = $query;
00026         $this->fragment = $fragment;
00027     }
00028 
00035     public function getSchemeObj($config, $context) {
00036         $registry = HTMLPurifier_URISchemeRegistry::instance();
00037         if ($this->scheme !== null) {
00038             $scheme_obj = $registry->getScheme($this->scheme, $config, $context);
00039             if (!$scheme_obj) return false; // invalid scheme, clean it out
00040         } else {
00041             // no scheme: retrieve the default one
00042             $def = $config->getDefinition('URI');
00043             $scheme_obj = $def->getDefaultScheme($config, $context);
00044             if (!$scheme_obj) {
00045                 // something funky happened to the default scheme object
00046                 trigger_error(
00047                     'Default scheme object "' . $def->defaultScheme . '" was not readable',
00048                     E_USER_WARNING
00049                 );
00050                 return false;
00051             }
00052         }
00053         return $scheme_obj;
00054     }
00055 
00063     public function validate($config, $context) {
00064 
00065         // ABNF definitions from RFC 3986
00066         $chars_sub_delims = '!$&\'()*+,;=';
00067         $chars_gen_delims = ':/?#[]@';
00068         $chars_pchar = $chars_sub_delims . ':@';
00069 
00070         // validate host
00071         if (!is_null($this->host)) {
00072             $host_def = new HTMLPurifier_AttrDef_URI_Host();
00073             $this->host = $host_def->validate($this->host, $config, $context);
00074             if ($this->host === false) $this->host = null;
00075         }
00076 
00077         // validate scheme
00078         // NOTE: It's not appropriate to check whether or not this
00079         // scheme is in our registry, since a URIFilter may convert a
00080         // URI that we don't allow into one we do.  So instead, we just
00081         // check if the scheme can be dropped because there is no host
00082         // and it is our default scheme.
00083         if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') {
00084             // support for relative paths is pretty abysmal when the
00085             // scheme is present, so axe it when possible
00086             $def = $config->getDefinition('URI');
00087             if ($def->defaultScheme === $this->scheme) {
00088                 $this->scheme = null;
00089             }
00090         }
00091 
00092         // validate username
00093         if (!is_null($this->userinfo)) {
00094             $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
00095             $this->userinfo = $encoder->encode($this->userinfo);
00096         }
00097 
00098         // validate port
00099         if (!is_null($this->port)) {
00100             if ($this->port < 1 || $this->port > 65535) $this->port = null;
00101         }
00102 
00103         // validate path
00104         $path_parts = array();
00105         $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
00106         if (!is_null($this->host)) { // this catches $this->host === ''
00107             // path-abempty (hier and relative)
00108             // http://www.example.com/my/path
00109             // //www.example.com/my/path (looks odd, but works, and
00110             //                            recognized by most browsers)
00111             // (this set is valid or invalid on a scheme by scheme
00112             // basis, so we'll deal with it later)
00113             // file:///my/path
00114             // ///my/path
00115             $this->path = $segments_encoder->encode($this->path);
00116         } elseif ($this->path !== '') {
00117             if ($this->path[0] === '/') {
00118                 // path-absolute (hier and relative)
00119                 // http:/my/path
00120                 // /my/path
00121                 if (strlen($this->path) >= 2 && $this->path[1] === '/') {
00122                     // This could happen if both the host gets stripped
00123                     // out
00124                     // http://my/path
00125                     // //my/path
00126                     $this->path = '';
00127                 } else {
00128                     $this->path = $segments_encoder->encode($this->path);
00129                 }
00130             } elseif (!is_null($this->scheme)) {
00131                 // path-rootless (hier)
00132                 // http:my/path
00133                 // Short circuit evaluation means we don't need to check nz
00134                 $this->path = $segments_encoder->encode($this->path);
00135             } else {
00136                 // path-noscheme (relative)
00137                 // my/path
00138                 // (once again, not checking nz)
00139                 $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
00140                 $c = strpos($this->path, '/');
00141                 if ($c !== false) {
00142                     $this->path =
00143                         $segment_nc_encoder->encode(substr($this->path, 0, $c)) .
00144                         $segments_encoder->encode(substr($this->path, $c));
00145                 } else {
00146                     $this->path = $segment_nc_encoder->encode($this->path);
00147                 }
00148             }
00149         } else {
00150             // path-empty (hier and relative)
00151             $this->path = ''; // just to be safe
00152         }
00153 
00154         // qf = query and fragment
00155         $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');
00156 
00157         if (!is_null($this->query)) {
00158             $this->query = $qf_encoder->encode($this->query);
00159         }
00160 
00161         if (!is_null($this->fragment)) {
00162             $this->fragment = $qf_encoder->encode($this->fragment);
00163         }
00164 
00165         return true;
00166 
00167     }
00168 
00173     public function toString() {
00174         // reconstruct authority
00175         $authority = null;
00176         // there is a rendering difference between a null authority
00177         // (http:foo-bar) and an empty string authority
00178         // (http:///foo-bar).
00179         if (!is_null($this->host)) {
00180             $authority = '';
00181             if(!is_null($this->userinfo)) $authority .= $this->userinfo . '@';
00182             $authority .= $this->host;
00183             if(!is_null($this->port))     $authority .= ':' . $this->port;
00184         }
00185 
00186         // Reconstruct the result
00187         // One might wonder about parsing quirks from browsers after
00188         // this reconstruction.  Unfortunately, parsing behavior depends
00189         // on what *scheme* was employed (file:///foo is handled *very*
00190         // differently than http:///foo), so unfortunately we have to
00191         // defer to the schemes to do the right thing.
00192         $result = '';
00193         if (!is_null($this->scheme))    $result .= $this->scheme . ':';
00194         if (!is_null($authority))       $result .=  '//' . $authority;
00195         $result .= $this->path;
00196         if (!is_null($this->query))     $result .= '?' . $this->query;
00197         if (!is_null($this->fragment))  $result .= '#' . $this->fragment;
00198 
00199         return $result;
00200     }
00201 
00211     public function isLocal($config, $context) {
00212         if ($this->host === null) return true;
00213         $uri_def = $config->getDefinition('URI');
00214         if ($uri_def->host === $this->host) return true;
00215         return false;
00216     }
00217 
00225     public function isBenign($config, $context) {
00226         if (!$this->isLocal($config, $context)) return false;
00227 
00228         $scheme_obj = $this->getSchemeObj($config, $context);
00229         if (!$scheme_obj) return false; // conservative approach
00230 
00231         $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context);
00232         if ($current_scheme_obj->secure) {
00233             if (!$scheme_obj->secure) {
00234                 return false;
00235             }
00236         }
00237         return true;
00238     }
00239 
00240 }
00241 
00242 // vim: et sw=4 sts=4