HTMLPurifier 4.4.0
/home/ezyang/Dev/htmlpurifier/library/HTMLPurifier/URIParser.php
Go to the documentation of this file.
00001 <?php
00002 
00007 class HTMLPurifier_URIParser
00008 {
00009 
00013     protected $percentEncoder;
00014 
00015     public function __construct() {
00016         $this->percentEncoder = new HTMLPurifier_PercentEncoder();
00017     }
00018 
00025     public function parse($uri) {
00026 
00027         $uri = $this->percentEncoder->normalize($uri);
00028 
00029         // Regexp is as per Appendix B.
00030         // Note that ["<>] are an addition to the RFC's recommended
00031         // characters, because they represent external delimeters.
00032         $r_URI = '!'.
00033             '(([^:/?#"<>]+):)?'. // 2. Scheme
00034             '(//([^/?#"<>]*))?'. // 4. Authority
00035             '([^?#"<>]*)'.       // 5. Path
00036             '(\?([^#"<>]*))?'.   // 7. Query
00037             '(#([^"<>]*))?'.     // 8. Fragment
00038             '!';
00039 
00040         $matches = array();
00041         $result = preg_match($r_URI, $uri, $matches);
00042 
00043         if (!$result) return false; // *really* invalid URI
00044 
00045         // seperate out parts
00046         $scheme     = !empty($matches[1]) ? $matches[2] : null;
00047         $authority  = !empty($matches[3]) ? $matches[4] : null;
00048         $path       = $matches[5]; // always present, can be empty
00049         $query      = !empty($matches[6]) ? $matches[7] : null;
00050         $fragment   = !empty($matches[8]) ? $matches[9] : null;
00051 
00052         // further parse authority
00053         if ($authority !== null) {
00054             $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
00055             $matches = array();
00056             preg_match($r_authority, $authority, $matches);
00057             $userinfo   = !empty($matches[1]) ? $matches[2] : null;
00058             $host       = !empty($matches[3]) ? $matches[3] : '';
00059             $port       = !empty($matches[4]) ? (int) $matches[5] : null;
00060         } else {
00061             $port = $host = $userinfo = null;
00062         }
00063 
00064         return new HTMLPurifier_URI(
00065             $scheme, $userinfo, $host, $port, $path, $query, $fragment);
00066     }
00067 
00068 }
00069 
00070 // vim: et sw=4 sts=4