HTMLPurifier 4.4.0
|
00001 <?php 00002 00007 class HTMLPurifier_URIParser 00008 { 00009 00013 protected $percentEncoder; 00014 00015 public function __construct() { 00016 $this->percentEncoder = new HTMLPurifier_PercentEncoder(); 00017 } 00018 00025 public function parse($uri) { 00026 00027 $uri = $this->percentEncoder->normalize($uri); 00028 00029 // Regexp is as per Appendix B. 00030 // Note that ["<>] are an addition to the RFC's recommended 00031 // characters, because they represent external delimeters. 00032 $r_URI = '!'. 00033 '(([^:/?#"<>]+):)?'. // 2. Scheme 00034 '(//([^/?#"<>]*))?'. // 4. Authority 00035 '([^?#"<>]*)'. // 5. Path 00036 '(\?([^#"<>]*))?'. // 7. Query 00037 '(#([^"<>]*))?'. // 8. Fragment 00038 '!'; 00039 00040 $matches = array(); 00041 $result = preg_match($r_URI, $uri, $matches); 00042 00043 if (!$result) return false; // *really* invalid URI 00044 00045 // seperate out parts 00046 $scheme = !empty($matches[1]) ? $matches[2] : null; 00047 $authority = !empty($matches[3]) ? $matches[4] : null; 00048 $path = $matches[5]; // always present, can be empty 00049 $query = !empty($matches[6]) ? $matches[7] : null; 00050 $fragment = !empty($matches[8]) ? $matches[9] : null; 00051 00052 // further parse authority 00053 if ($authority !== null) { 00054 $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/"; 00055 $matches = array(); 00056 preg_match($r_authority, $authority, $matches); 00057 $userinfo = !empty($matches[1]) ? $matches[2] : null; 00058 $host = !empty($matches[3]) ? $matches[3] : ''; 00059 $port = !empty($matches[4]) ? (int) $matches[5] : null; 00060 } else { 00061 $port = $host = $userinfo = null; 00062 } 00063 00064 return new HTMLPurifier_URI( 00065 $scheme, $userinfo, $host, $port, $path, $query, $fragment); 00066 } 00067 00068 } 00069 00070 // vim: et sw=4 sts=4