HTMLPurifier 4.4.0
/home/ezyang/Dev/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php
Go to the documentation of this file.
00001 <?php
00002 
00006 class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
00007 {
00008 
00012     protected $ipv4;
00013 
00017     protected $ipv6;
00018 
00019     public function __construct() {
00020         $this->ipv4 = new HTMLPurifier_AttrDef_URI_IPv4();
00021         $this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6();
00022     }
00023 
00024     public function validate($string, $config, $context) {
00025         $length = strlen($string);
00026         // empty hostname is OK; it's usually semantically equivalent:
00027         // the default host as defined by a URI scheme is used:
00028         //
00029         //      If the URI scheme defines a default for host, then that
00030         //      default applies when the host subcomponent is undefined
00031         //      or when the registered name is empty (zero length).
00032         if ($string === '') return '';
00033         if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') {
00034             //IPv6
00035             $ip = substr($string, 1, $length - 2);
00036             $valid = $this->ipv6->validate($ip, $config, $context);
00037             if ($valid === false) return false;
00038             return '['. $valid . ']';
00039         }
00040 
00041         // need to do checks on unusual encodings too
00042         $ipv4 = $this->ipv4->validate($string, $config, $context);
00043         if ($ipv4 !== false) return $ipv4;
00044 
00045         // A regular domain name.
00046 
00047         // This doesn't match I18N domain names, but we don't have proper IRI support,
00048         // so force users to insert Punycode.
00049 
00050         // The productions describing this are:
00051         $a   = '[a-z]';     // alpha
00052         $an  = '[a-z0-9]';  // alphanum
00053         $and = '[a-z0-9-]'; // alphanum | "-"
00054         // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
00055         $domainlabel   = "$an($and*$an)?";
00056         // toplabel    = alpha | alpha *( alphanum | "-" ) alphanum
00057         $toplabel      = "$a($and*$an)?";
00058         // hostname    = *( domainlabel "." ) toplabel [ "." ]
00059         if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
00060             return $string;
00061         }
00062 
00063         // If we have Net_IDNA2 support, we can support IRIs by
00064         // punycoding them. (This is the most portable thing to do,
00065         // since otherwise we have to assume browsers support
00066 
00067         if ($config->get('Core.EnableIDNA')) {
00068             $idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true));
00069             // we need to encode each period separately
00070             $parts = explode('.', $string);
00071             try {
00072                 $new_parts = array();
00073                 foreach ($parts as $part) {
00074                     $encodable = false;
00075                     for ($i = 0, $c = strlen($part); $i < $c; $i++) {
00076                         if (ord($part[$i]) > 0x7a) {
00077                             $encodable = true;
00078                             break;
00079                         }
00080                     }
00081                     if (!$encodable) {
00082                         $new_parts[] = $part;
00083                     } else {
00084                         $new_parts[] = $idna->encode($part);
00085                     }
00086                 }
00087                 $string = implode('.', $new_parts);
00088                 if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
00089                     return $string;
00090                 }
00091             } catch (Exception $e) {
00092                 // XXX error reporting
00093             }
00094         }
00095 
00096         return false;
00097     }
00098 
00099 }
00100 
00101 // vim: et sw=4 sts=4