HTMLPurifier 4.4.0
/home/ezyang/Dev/htmlpurifier/library/HTMLPurifier/URIFilter/MakeAbsolute.php
Go to the documentation of this file.
00001 <?php
00002 
00003 // does not support network paths
00004 
00005 class HTMLPurifier_URIFilter_MakeAbsolute extends HTMLPurifier_URIFilter
00006 {
00007     public $name = 'MakeAbsolute';
00008     protected $base;
00009     protected $basePathStack = array();
00010     public function prepare($config) {
00011         $def = $config->getDefinition('URI');
00012         $this->base = $def->base;
00013         if (is_null($this->base)) {
00014             trigger_error('URI.MakeAbsolute is being ignored due to lack of value for URI.Base configuration', E_USER_WARNING);
00015             return false;
00016         }
00017         $this->base->fragment = null; // fragment is invalid for base URI
00018         $stack = explode('/', $this->base->path);
00019         array_pop($stack); // discard last segment
00020         $stack = $this->_collapseStack($stack); // do pre-parsing
00021         $this->basePathStack = $stack;
00022         return true;
00023     }
00024     public function filter(&$uri, $config, $context) {
00025         if (is_null($this->base)) return true; // abort early
00026         if (
00027             $uri->path === '' && is_null($uri->scheme) &&
00028             is_null($uri->host) && is_null($uri->query) && is_null($uri->fragment)
00029         ) {
00030             // reference to current document
00031             $uri = clone $this->base;
00032             return true;
00033         }
00034         if (!is_null($uri->scheme)) {
00035             // absolute URI already: don't change
00036             if (!is_null($uri->host)) return true;
00037             $scheme_obj = $uri->getSchemeObj($config, $context);
00038             if (!$scheme_obj) {
00039                 // scheme not recognized
00040                 return false;
00041             }
00042             if (!$scheme_obj->hierarchical) {
00043                 // non-hierarchal URI with explicit scheme, don't change
00044                 return true;
00045             }
00046             // special case: had a scheme but always is hierarchical and had no authority
00047         }
00048         if (!is_null($uri->host)) {
00049             // network path, don't bother
00050             return true;
00051         }
00052         if ($uri->path === '') {
00053             $uri->path = $this->base->path;
00054         } elseif ($uri->path[0] !== '/') {
00055             // relative path, needs more complicated processing
00056             $stack = explode('/', $uri->path);
00057             $new_stack = array_merge($this->basePathStack, $stack);
00058             if ($new_stack[0] !== '' && !is_null($this->base->host)) {
00059                 array_unshift($new_stack, '');
00060             }
00061             $new_stack = $this->_collapseStack($new_stack);
00062             $uri->path = implode('/', $new_stack);
00063         } else {
00064             // absolute path, but still we should collapse
00065             $uri->path = implode('/', $this->_collapseStack(explode('/', $uri->path)));
00066         }
00067         // re-combine
00068         $uri->scheme = $this->base->scheme;
00069         if (is_null($uri->userinfo)) $uri->userinfo = $this->base->userinfo;
00070         if (is_null($uri->host))     $uri->host     = $this->base->host;
00071         if (is_null($uri->port))     $uri->port     = $this->base->port;
00072         return true;
00073     }
00074 
00078     private function _collapseStack($stack) {
00079         $result = array();
00080         $is_folder = false;
00081         for ($i = 0; isset($stack[$i]); $i++) {
00082             $is_folder = false;
00083             // absorb an internally duplicated slash
00084             if ($stack[$i] == '' && $i && isset($stack[$i+1])) continue;
00085             if ($stack[$i] == '..') {
00086                 if (!empty($result)) {
00087                     $segment = array_pop($result);
00088                     if ($segment === '' && empty($result)) {
00089                         // error case: attempted to back out too far:
00090                         // restore the leading slash
00091                         $result[] = '';
00092                     } elseif ($segment === '..') {
00093                         $result[] = '..'; // cannot remove .. with ..
00094                     }
00095                 } else {
00096                     // relative path, preserve the double-dots
00097                     $result[] = '..';
00098                 }
00099                 $is_folder = true;
00100                 continue;
00101             }
00102             if ($stack[$i] == '.') {
00103                 // silently absorb
00104                 $is_folder = true;
00105                 continue;
00106             }
00107             $result[] = $stack[$i];
00108         }
00109         if ($is_folder) $result[] = '';
00110         return $result;
00111     }
00112 }
00113 
00114 // vim: et sw=4 sts=4