This repository was archived by the owner on Jan 10, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathParseUrl.php
235 lines (201 loc) · 5.39 KB
/
ParseUrl.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
<?php
/**
* URL Parsing class. Feed it a web URL and it'll do some basic
* structural validation and extract/separate the URL's components
* for easy reference.
*
* Regarding TLD extensions: the class only validates for
* three letter domains, 2-2 letter domains (like .co.uk),
* common longer extensions available at the time, and
* any custom extensions you add. It does not actually validate
* that they are 100% valid, so an extension like "qq.qq"
* will parse just fine.
*
* @author Eric "Aken" Roberts <[email protected]>
* @copyright Copyright (c) 2012, Eric Roberts
* @link http://www.cryode.com Cryode Web Studio
*/
class ParseUrl {
/**
* URLs without http(s):// on the front will not go through
* parse_url() properly. Set this option to TRUE if you want
* the class to add http:// for you in the event that it's missing.
*
* If set to FALSE and http(s):// is omitted, the class will
* throw an error.
*
* var boolean
*/
protected $_forceHttp = false;
/**
* Domain extensions that don't follow the typical
* 2-3 letter TLD or 2-2 (".co.uk") style domains.
*
* If you want to parse any custom domains,
* this is a good place to put them. Do not
* include any periods.
*
* var array
*/
protected $_extensions = array(
'aero', 'arpa', 'asia', 'coop', 'info', 'jobs', 'mobi',
'museum', 'name', 'travel',
);
public $original;
public $url;
// Default keys generated by parse_url()
public $scheme;
public $host;
public $port;
public $user;
public $pass;
public $path;
public $query; // Query strings, IE: ?item=value&another=value
public $fragment; // Hash anchors, anything after #
public $tld;
public $domain;
public $subdomain;
public $subdomainArray = array();
public $queryArray;
public $isValid = true;
public $error;
/**
* Constructor. Does all the parsing, mkay.
*
* @return void
*/
public function __construct($url)
{
if (empty($url))
{
return $this->_setInvalid('No URL supplied.');
}
$this->original = $url;
$this->url = trim(strtolower($url));
// Check for http(s)
if (substr($this->url, 0, 7) !== 'http://' && substr($this->url, 0, 8) !== 'https://')
{
// No http(s). Should we add it?
if ($this->_forceHttp)
{
$this->url = 'http://' . $this->url;
}
else
{
return $this->_setInvalid('No http(s):// prefix on the URL.');
}
}
// Run a typical parse_url() to break initially.
foreach (parse_url($this->url) as $k => $v)
{
$this->$k = $v;
}
// Check to make sure the host exists. Shouldn't happen, but just in case...
if (empty($this->host)) return $this->_setInvalid('Could not determine the host of the URL.');
// Check our host value for any goofy characters
// as part of a basic validation.
if ( ! preg_match('/^[a-z0-9\-_\.]+$/', $this->host))
{
return $this->_setInvalid('Odd characters found in the (sub)domain.');
}
// Create a query string associative array.
parse_str($this->query, $this->queryArray);
// Get the TLD.
// First, see if we match a 3-letter (.com) or 2.2-letter (.co.uk) TLD.
if (preg_match('/(\.(?:[a-z]{3}|[a-z]{2}(?:\.[a-z]{2})?))$/', $this->host, $tld))
{
$this->tld = $tld[1];
}
else
{
// Standard format didn't match - check our array of TLD extensions.
$ext = pathinfo($this->host, PATHINFO_EXTENSION);
if (in_array($ext, $this->_extensions))
{
$this->tld = '.'.$ext;
}
else
{
// Nothing found.
return $this->_setInvalid('Invalid top level domain extension.');
}
}
// Remove subdomain from host, separate subdomain(s) from domain.
$domain = explode('.', substr($this->host, 0, -strlen($this->tld)));
$this->domain = array_pop($domain);
$this->subdomain = implode('.', $domain);
$this->subdomainArray = $domain;
}
/**
* Whether the provided URL is valid.
*
* @return boolean
*/
public function isValid()
{
return (boolean) $this->isValid;
}
/**
* Gets the last error message, if available.
*
* @return string
*/
public function getError()
{
return (string) $this->error;
}
/**
* Magic method set up to use hasProperty and getProperty functions.
*
* Calling getScheme() will return $this->scheme
* Calling hasScheme() will return boolean if scheme property has a value.
*
* Calling a different method, or a property that doesn't exist
* will thrown an exception.
*/
public function __call($method, $args)
{
$mode = strtolower(substr($method, 0, 3));
$property = substr($method, 3);
$property[0] = strtolower($property[0]);
switch ($mode)
{
case 'get':
if ($this->_hasProp($property)) return $this->$property;
break;
case 'has':
if ($this->_hasProp($property))
{
return ! empty($this->$property);
}
default:
throw new Exception("Unknown method '{$method}' called.");
break;
}
}
/**
* Internal method for setting the isValid to false
* and adding any provided error message.
*
* @return null
*/
protected function _setInvalid($error = null)
{
$this->isValid = false;
if ( ! empty($error)) $this->error = $error;
return;
}
/**
* Internal method for checking if a property exists,
* and throwing an exception if it doesn't.
*/
protected function _hasProp($prop)
{
if ( ! property_exists($this, $prop))
{
throw new Exception("Unknown property '{$prop}' requested.");
}
return true;
}
}
/* END ParseUrl.php */