1: 2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26: 27: 28: 29: 30: 31: 32: 33: 34: 35: 36: 37: 38: 39: 40: 41: 42: 43: 44: 45: 46: 47: 48: 49: 50: 51: 52: 53: 54: 55: 56: 57: 58: 59: 60: 61: 62: 63: 64: 65: 66: 67: 68: 69: 70: 71: 72: 73: 74: 75: 76: 77: 78: 79: 80: 81: 82: 83: 84: 85: 86: 87: 88: 89: 90: 91: 92: 93: 94: 95: 96: 97: 98: 99: 100: 101: 102: 103: 104: 105: 106: 107: 108: 109: 110: 111: 112: 113: 114: 115: 116: 117: 118: 119: 120: 121: 122: 123: 124: 125: 126: 127: 128: 129: 130: 131: 132: 133: 134: 135: 136: 137: 138: 139: 140: 141: 142: 143: 144: 145: 146: 147: 148: 149: 150: 151: 152: 153: 154: 155: 156: 157: 158: 159: 160: 161: 162: 163: 164: 165: 166: 167: 168: 169: 170: 171: 172: 173: 174: 175: 176: 177: 178: 179: 180: 181: 182: 183: 184: 185: 186: 187: 188: 189: 190: 191: 192: 193: 194: 195: 196: 197: 198: 199: 200: 201: 202: 203: 204: 205: 206: 207: 208: 209: 210: 211: 212: 213: 214: 215: 216: 217: 218: 219: 220: 221: 222: 223: 224: 225: 226: 227: 228: 229: 230: 231: 232: 233: 234: 235: 236: 237: 238: 239: 240: 241: 242: 243: 244: 245: 246: 247: 248: 249: 250: 251: 252: 253: 254: 255: 256: 257: 258: 259: 260: 261: 262: 263: 264: 265: 266: 267: 268: 269: 270: 271: 272: 273: 274: 275: 276: 277: 278: 279: 280: 281: 282: 283: 284: 285: 286: 287: 288: 289: 290: 291:
namespace esperecyan\url\lib;
class URLencoding
use Utility;
public static function parseURLencoded($input)
$tuples = [];
foreach (explode('&', $input) as $bytes) {
if ($bytes === '') {
$tuples[] = strpos($bytes, '=') !== false ? explode('=', $bytes, 2) : [$bytes, ''];
$output = [];
foreach ($tuples as $tuple) {
foreach ($tuple as &$nameOrValue) {
$nameOrValue = self::utf8DecodeWithoutBOM(urldecode($nameOrValue));
$output[] = $tuple;
return $output;
public static function serializeURLencodedByte($input)
return str_replace('%2A', '*', urlencode($input));
public static function serializeURLencoded($tuples, $encodingOverride = 'UTF-8')
$encoding = (string)$encodingOverride ? self::getOutputEncoding((string)$encodingOverride) : 'UTF-8';
foreach ($tuples as $i => &$tuple) {
$outputPair = [];
$outputPair[0] = self::serializeURLencodedByte(self::encode($tuple[0], $encoding));
if (isset($tuple[2]) && $outputPair[0] === '_charset_' && $tuple[2] === 'hidden') {
$outputPair[1] = $encoding;
} elseif (isset($tuple[2]) && $tuple[2] === 'file') {
$outputPair[1] = $tuple[1]['name'];
} else {
$outputPair[1] = $tuple[1];
$outputPair[1] = self::serializeURLencodedByte(self::encode($outputPair[1], $encoding));
$tuple = implode('=', $outputPair);
return implode('&', $tuples);
public static function parseURLencodedString($input)
return self::parseURLencoded($input);
const ASCII_WHITESPACE = "\t\n\f\r ";
public static function utf8DecodeWithoutBOM($stream)
return self::convertEncoding($stream, 'UTF-8', true);
public static function encode($input, $encoding)
switch (strtolower($encoding)) {
case 'utf-8':
$output = $input;
case 'x-user-defined':
$output = preg_replace_callback('/[^\\x00-\\x7F]/u', function ($matches) {
$codePoint = self::getCodePoint($matches[0]);
return $codePoint <= 0xF7FF ? chr($codePoint - 0xF780 + 0x80) : '&#' . $codePoint . ';';
}, $input);
$output = self::convertEncoding($input, $encoding);
return $output;
public static function getOutputEncoding($encoding)
return in_array(strtolower($encoding), ['replacement', 'utf-16be', 'utf-16le']) ? 'UTF-8' : $encoding;
private static function convertEncoding($input, $encoding, $decoding = false)
switch (strtolower($encoding)) {
case 'utf-8':
case 'ibm866':
case 'iso-8859-2':
case 'iso-8859-3':
case 'iso-8859-4':
case 'iso-8859-5':
case 'iso-8859-6':
case 'iso-8859-7':
case 'iso-8859-8':
case 'iso-8859-8-i':
case 'iso-8859-10':
case 'iso-8859-13':
case 'iso-8859-14':
case 'iso-8859-15':
case 'iso-8859-16':
case 'koi8-r':
case 'koi8-u':
case 'windows-1251':
case 'windows-1252':
case 'windows-1254':
case 'gbk':
case 'gb18030':
case 'big5':
case 'euc-jp':
case 'iso-2022-jp':
case 'shift_jis':
case 'euc-kr':
case 'utf-16be':
case 'utf-16le':
$characterEncoding = strtoupper($encoding) == 'ISO-8859-8-I' ? 'ISO-8859-8' : $encoding;
if ($decoding) {
$previousSubstituteCharacter = mb_substitute_character();
mb_substitute_character($decoding ? 0xFFFD : 'entity');
$output = mb_convert_encoding(
$decoding ? 'UTF-8' : $characterEncoding,
$decoding ? $encoding : 'UTF-8'
if ($decoding) {
case 'macintosh':
case 'windows-874':
case 'windows-1250':
case 'windows-1253':
case 'windows-1255':
case 'windows-1256':
case 'windows-1257':
case 'windows-1258':
case 'x-mac-cyrillic':
$characterEncoding = $encoding == 'x-mac-cyrillic' ? 'MacCyrillic' : $encoding;
$output = iconv(
$decoding ? $characterEncoding : 'UTF-8',
($decoding ? 'UTF-8' : $characterEncoding) . '//TRANSLIT//IGNORE',
throw new \DomainException(
sprintf('"%s" is a name of encoding which is not defined by Eoncding Standard', $encoding)
return $output;
private static function getCodePoint($char)
if ($char !== htmlspecialchars_decode(htmlspecialchars($char, ENT_COMPAT, 'UTF-8'))) {
return 0xFFFD;
$x = ord($char[0]);
if ($x < 0x80) {
return $x;
} elseif ($x < 0xE0) {
$y = ord($char[1]);
return (($x & 0x1F) << 6) | ($y & 0x3F);
} elseif ($x < 0xF0) {
$y = ord($char[1]);
$z = ord($char[2]);
return (($x & 0xF) << 12) | (($y & 0x3F) << 6) | ($z & 0x3F);
$y = ord($char[1]);
$z = ord($char[2]);
$w = ord($char[3]);
return (($x & 0x7) << 18) | (($y & 0x3F) << 12) | (($z & 0x3F) << 6) | ($w & 0x3F);
private static function getUTF8Character($cp)
if (!is_int($cp)) {
exit("$cp is not integer\n");
if ($cp < 0 || (0xD7FF < $cp && $cp < 0xE000) || 0x10FFFF < $cp) {
exit("$cp is out of range\n");
if ($cp < 0x80) {
return chr($cp);
} elseif ($cp < 0xA0) {
return chr(0xC0 | $cp >> 6).chr(0x80 | $cp & 0x3F);
return html_entity_decode('&#'.$cp.';');