修訂 | 1eeb9c71186f0f18ba035010606a10a957b6b622 (tree) |
---|---|
時間 | 2007-07-03 23:47:20 |
作者 | henoheno <henoheno> |
Commiter | henoheno |
$Id: spam.php,v 1.196 2007/07/02 14:51:40 henoheno Exp $
$Id: spam_pickup.php,v 1.51 2007/07/02 14:51:40 henoheno Exp $
$Id: spam.ini.php,v 1.130 2007/07/03 14:40:05 henoheno Exp $
$Id: domain.ini.php,v 1.2 2007/06/28 14:51:10 henoheno Exp $
* Separate spam.php => spam.php, spam_pickup.php, and domain.ini.php
* Reorder some functions
* Remove unused function: array_leaf()
* spam_uri_pickup_preprocess(): abstruction
* spam.ini.php: C-2: Affiliates, Hypes, Catalog retailers, Multi-level marketings, Resellers (in Japan)
@@ -0,0 +1,655 @@ | ||
1 | +<?php | |
2 | +// $Id: domain.ini.php,v 1.1 2007/07/03 14:47:04 henoheno Exp $ | |
3 | +// Domain related setting | |
4 | + | |
5 | +// Domains who have 2nd and/or 3rd level domains | |
6 | +$domain = array( | |
7 | + | |
8 | + // ccTLD: Australia | |
9 | + // http://www.auda.org.au/ | |
10 | + // NIC : http://www.aunic.net/ | |
11 | + // Whois: http://www.ausregistry.com.au/ | |
12 | + 'au' => array( | |
13 | + // .au Second Level Domains | |
14 | + // http://www.auda.org.au/domains/ | |
15 | + 'asn' => TRUE, | |
16 | + 'com' => TRUE, | |
17 | + 'conf' => TRUE, | |
18 | + 'csiro' => TRUE, | |
19 | + 'edu' => array( // http://www.domainname.edu.au/ | |
20 | + // Geographic | |
21 | + 'act' => TRUE, | |
22 | + 'nt' => TRUE, | |
23 | + 'nsw' => TRUE, | |
24 | + 'qld' => TRUE, | |
25 | + 'sa' => TRUE, | |
26 | + 'tas' => TRUE, | |
27 | + 'vic' => TRUE, | |
28 | + 'wa' => TRUE, | |
29 | + ), | |
30 | + 'gov' => array( | |
31 | + // Geographic | |
32 | + 'act' => TRUE, // Australian Capital Territory | |
33 | + 'nt' => TRUE, // Northern Territory | |
34 | + 'nsw' => TRUE, // New South Wales | |
35 | + 'qld' => TRUE, // Queensland | |
36 | + 'sa' => TRUE, // South Australia | |
37 | + 'tas' => TRUE, // Tasmania | |
38 | + 'vic' => TRUE, // Victoria | |
39 | + 'wa' => TRUE, // Western Australia | |
40 | + ), | |
41 | + 'id' => TRUE, | |
42 | + 'net' => TRUE, | |
43 | + 'org' => TRUE, | |
44 | + 'info' => TRUE, | |
45 | + ), | |
46 | + | |
47 | + // ccTLD: Bahrain | |
48 | + // NIC : http://www.inet.com.bh/ (.bh policies not found) | |
49 | + // Whois: (Not available) http://www.inet.com.bh/ | |
50 | + 'bh' => array( | |
51 | + // Observed | |
52 | + 'com' => TRUE, | |
53 | + 'edu' => TRUE, | |
54 | + 'gov' => TRUE, | |
55 | + 'org' => TRUE, | |
56 | + ), | |
57 | + | |
58 | + // ccTLD: China | |
59 | + // NIC : http://www.cnnic.net.cn/en/index/ | |
60 | + // Whois: http://ewhois.cnnic.cn/ | |
61 | + 'cn' => array( | |
62 | + // Provisional Administrative Rules for Registration of Domain Names in China | |
63 | + // http://www.cnnic.net.cn/html/Dir/2003/11/27/1520.htm | |
64 | + | |
65 | + // Organizational | |
66 | + 'ac' => TRUE, | |
67 | + 'com' => TRUE, | |
68 | + 'edu' => TRUE, | |
69 | + 'gov' => TRUE, | |
70 | + 'net' => TRUE, | |
71 | + 'org' => TRUE, | |
72 | + | |
73 | + // Geographic | |
74 | + 'ah' => TRUE, | |
75 | + 'bj' => TRUE, | |
76 | + 'cq' => TRUE, | |
77 | + 'fj' => TRUE, | |
78 | + 'gd' => TRUE, | |
79 | + 'gs' => TRUE, | |
80 | + 'gx' => TRUE, | |
81 | + 'gz' => TRUE, | |
82 | + 'ha' => TRUE, | |
83 | + 'hb' => TRUE, | |
84 | + 'he' => TRUE, | |
85 | + 'hi' => TRUE, | |
86 | + 'hk' => TRUE, | |
87 | + 'hl' => TRUE, | |
88 | + 'hn' => TRUE, | |
89 | + 'jl' => TRUE, | |
90 | + 'js' => TRUE, | |
91 | + 'jx' => TRUE, | |
92 | + 'ln' => TRUE, | |
93 | + 'mo' => TRUE, | |
94 | + 'nm' => TRUE, | |
95 | + 'nx' => TRUE, | |
96 | + 'qh' => TRUE, | |
97 | + 'sc' => TRUE, | |
98 | + 'sd' => TRUE, | |
99 | + 'sh' => TRUE, | |
100 | + 'sn' => TRUE, | |
101 | + 'sx' => TRUE, | |
102 | + 'tj' => TRUE, | |
103 | + 'tw' => TRUE, | |
104 | + 'xj' => TRUE, | |
105 | + 'xz' => TRUE, | |
106 | + 'yn' => TRUE, | |
107 | + 'zj' => TRUE, | |
108 | + ), | |
109 | + | |
110 | + // ccTLD: India | |
111 | + // NIC : http://www.inregistry.in/ | |
112 | + // Whois: http://www.inregistry.in/whois_search/ | |
113 | + 'in' => array( | |
114 | + // Policies http://www.inregistry.in/policies/ | |
115 | + 'ac' => TRUE, | |
116 | + 'co' => TRUE, | |
117 | + 'firm' => TRUE, | |
118 | + 'gen' => TRUE, | |
119 | + 'gov' => TRUE, | |
120 | + 'ind' => TRUE, | |
121 | + 'mil' => TRUE, | |
122 | + 'net' => TRUE, | |
123 | + 'org' => TRUE, | |
124 | + 'res' => TRUE, | |
125 | + // Reserved Names by the government (for the 2nd level) | |
126 | + // http://www.inregistry.in/policies/reserved_names | |
127 | + ), | |
128 | + | |
129 | + // ccTLD: South Korea | |
130 | + // NIC : http://www.nic.or.kr/english/ | |
131 | + // Whois: http://whois.nida.or.kr/english/ | |
132 | + 'kr' => array( | |
133 | + // .kr domain policy [appendix 1] : Qualifications for Second Level Domains | |
134 | + // http://domain.nida.or.kr/eng/policy.jsp | |
135 | + | |
136 | + // Organizational | |
137 | + 'co' => TRUE, | |
138 | + 'ne ' => TRUE, | |
139 | + 'or ' => TRUE, | |
140 | + 're ' => TRUE, | |
141 | + 'pe' => TRUE, | |
142 | + 'go ' => TRUE, | |
143 | + 'mil' => TRUE, | |
144 | + 'ac' => TRUE, | |
145 | + 'hs' => TRUE, | |
146 | + 'ms' => TRUE, | |
147 | + 'es' => TRUE, | |
148 | + 'sc' => TRUE, | |
149 | + 'kg' => TRUE, | |
150 | + | |
151 | + // Geographic | |
152 | + 'seoul' => TRUE, | |
153 | + 'busan' => TRUE, | |
154 | + 'daegu' => TRUE, | |
155 | + 'incheon' => TRUE, | |
156 | + 'gwangju' => TRUE, | |
157 | + 'daejeon' => TRUE, | |
158 | + 'ulsan' => TRUE, | |
159 | + 'gyeonggi' => TRUE, | |
160 | + 'gangwon' => TRUE, | |
161 | + 'chungbuk' => TRUE, | |
162 | + 'chungnam' => TRUE, | |
163 | + 'jeonbuk' => TRUE, | |
164 | + 'jeonnam' => TRUE, | |
165 | + 'gyeongbuk' => TRUE, | |
166 | + 'gyeongnam' => TRUE, | |
167 | + 'jeju' => TRUE, | |
168 | + ), | |
169 | + | |
170 | + // ccTLD: Japan | |
171 | + // NIC : http://jprs.co.jp/en/ | |
172 | + // Whois: http://whois.jprs.jp/en/ | |
173 | + 'jp' => array( | |
174 | + // Guide to JP Domain Name | |
175 | + // http://jprs.co.jp/en/jpdomain.html | |
176 | + | |
177 | + // Organizational | |
178 | + 'ac' => TRUE, | |
179 | + 'ad' => TRUE, | |
180 | + 'co' => TRUE, | |
181 | + 'ed' => TRUE, | |
182 | + 'go' => TRUE, | |
183 | + 'gr' => TRUE, | |
184 | + 'lg' => TRUE, // pref.<geographic2nd>.lg.jp etc. | |
185 | + 'ne' => TRUE, | |
186 | + 'or' => TRUE, | |
187 | + | |
188 | + // Geographic | |
189 | + // | |
190 | + // Examples for 3rd level domains | |
191 | + //'kumamoto' => array( | |
192 | + // // http://www.pref.kumamoto.jp/link/list.asp#4 | |
193 | + // 'amakusa' => TRUE, | |
194 | + // 'hitoyoshi' => TRUE, | |
195 | + // 'jonan' => TRUE, | |
196 | + // 'kumamoto' => TRUE, | |
197 | + // ... | |
198 | + //), | |
199 | + 'aichi' => TRUE, | |
200 | + 'akita' => TRUE, | |
201 | + 'aomori' => TRUE, | |
202 | + 'chiba' => TRUE, | |
203 | + 'ehime' => TRUE, | |
204 | + 'fukui' => TRUE, | |
205 | + 'fukuoka' => TRUE, | |
206 | + 'fukushima' => TRUE, | |
207 | + 'gifu' => TRUE, | |
208 | + 'gunma' => TRUE, | |
209 | + 'hiroshima' => TRUE, | |
210 | + 'hokkaido' => TRUE, | |
211 | + 'hyogo' => TRUE, | |
212 | + 'ibaraki' => TRUE, | |
213 | + 'ishikawa' => TRUE, | |
214 | + 'iwate' => TRUE, | |
215 | + 'kagawa' => TRUE, | |
216 | + 'kagoshima' => TRUE, | |
217 | + 'kanagawa' => TRUE, | |
218 | + 'kawasaki' => TRUE, | |
219 | + 'kitakyushu'=> TRUE, | |
220 | + 'kobe' => TRUE, | |
221 | + 'kochi' => TRUE, | |
222 | + 'kumamoto' => TRUE, | |
223 | + 'kyoto' => TRUE, | |
224 | + 'mie' => TRUE, | |
225 | + 'miyagi' => TRUE, | |
226 | + 'miyazaki' => TRUE, | |
227 | + 'nagano' => TRUE, | |
228 | + 'nagasaki' => TRUE, | |
229 | + 'nagoya' => TRUE, | |
230 | + 'nara' => TRUE, | |
231 | + 'niigata' => TRUE, | |
232 | + 'oita' => TRUE, | |
233 | + 'okayama' => TRUE, | |
234 | + 'okinawa' => TRUE, | |
235 | + 'osaka' => TRUE, | |
236 | + 'saga' => TRUE, | |
237 | + 'saitama' => TRUE, | |
238 | + 'sapporo' => TRUE, | |
239 | + 'sendai' => TRUE, | |
240 | + 'shiga' => TRUE, | |
241 | + 'shimane' => TRUE, | |
242 | + 'shizuoka' => TRUE, | |
243 | + 'tochigi' => TRUE, | |
244 | + 'tokushima' => TRUE, | |
245 | + 'tokyo' => TRUE, | |
246 | + 'tottori' => TRUE, | |
247 | + 'toyama' => TRUE, | |
248 | + 'wakayama' => TRUE, | |
249 | + 'yamagata' => TRUE, | |
250 | + 'yamaguchi' => TRUE, | |
251 | + 'yamanashi' => TRUE, | |
252 | + 'yokohama' => TRUE, | |
253 | + ), | |
254 | + | |
255 | + // ccTLD: Mexico | |
256 | + // NIC : http://www.nic.mx/ | |
257 | + // Whois: http://www.nic.mx/es/Busqueda.Who_Is | |
258 | + 'mx' => array( | |
259 | + // Politicas Generales de Nombres de Dominio | |
260 | + // http://www.nic.mx/es/Politicas?CATEGORY=INDICE | |
261 | + 'com' => TRUE, | |
262 | + 'edu' => TRUE, | |
263 | + 'gob' => TRUE, | |
264 | + 'net' => TRUE, | |
265 | + 'org' => TRUE, | |
266 | + ), | |
267 | + | |
268 | + // ccTLD: Russia | |
269 | + // NIC : http://www.cctld.ru/en/ | |
270 | + // Whois: http://www.ripn.net:8080/nic/whois/en/ | |
271 | + 'ru' => array( | |
272 | + // List of Reserved second-level Domain Names | |
273 | + // http://www.cctld.ru/en/doc/detail.php?id21=20&i21=2 | |
274 | + | |
275 | + // Organizational | |
276 | + 'ac' => TRUE, | |
277 | + 'com' => TRUE, | |
278 | + 'edu' => TRUE, | |
279 | + 'gov' => TRUE, | |
280 | + 'int' => TRUE, | |
281 | + 'mil' => TRUE, | |
282 | + 'net' => TRUE, | |
283 | + 'org' => TRUE, | |
284 | + 'pp' => TRUE, | |
285 | + //'test' => TRUE, | |
286 | + | |
287 | + // Geographic | |
288 | + 'adygeya' => TRUE, | |
289 | + 'altai' => TRUE, | |
290 | + 'amur' => TRUE, | |
291 | + 'amursk' => TRUE, | |
292 | + 'arkhangelsk' => TRUE, | |
293 | + 'astrakhan' => TRUE, | |
294 | + 'baikal' => TRUE, | |
295 | + 'bashkiria' => TRUE, | |
296 | + 'belgorod' => TRUE, | |
297 | + 'bir' => TRUE, | |
298 | + 'bryansk' => TRUE, | |
299 | + 'buryatia' => TRUE, | |
300 | + 'cbg' => TRUE, | |
301 | + 'chel' => TRUE, | |
302 | + 'chelyabinsk' => TRUE, | |
303 | + 'chita' => TRUE, | |
304 | + 'chukotka' => TRUE, | |
305 | + 'chuvashia' => TRUE, | |
306 | + 'cmw' => TRUE, | |
307 | + 'dagestan' => TRUE, | |
308 | + 'dudinka' => TRUE, | |
309 | + 'e-burg' => TRUE, | |
310 | + 'fareast' => TRUE, | |
311 | + 'grozny' => TRUE, | |
312 | + 'irkutsk' => TRUE, | |
313 | + 'ivanovo' => TRUE, | |
314 | + 'izhevsk' => TRUE, | |
315 | + 'jamal' => TRUE, | |
316 | + 'jar' => TRUE, | |
317 | + 'joshkar-ola' => TRUE, | |
318 | + 'k-uralsk' => TRUE, | |
319 | + 'kalmykia' => TRUE, | |
320 | + 'kaluga' => TRUE, | |
321 | + 'kamchatka' => TRUE, | |
322 | + 'karelia' => TRUE, | |
323 | + 'kazan' => TRUE, | |
324 | + 'kchr' => TRUE, | |
325 | + 'kemerovo' => TRUE, | |
326 | + 'khabarovsk' => TRUE, | |
327 | + 'khakassia' => TRUE, | |
328 | + 'khv' => TRUE, | |
329 | + 'kirov' => TRUE, | |
330 | + 'kms' => TRUE, | |
331 | + 'koenig' => TRUE, | |
332 | + 'komi' => TRUE, | |
333 | + 'kostroma' => TRUE, | |
334 | + 'krasnoyarsk' => TRUE, | |
335 | + 'kuban' => TRUE, | |
336 | + 'kurgan' => TRUE, | |
337 | + 'kursk' => TRUE, | |
338 | + 'kustanai' => TRUE, | |
339 | + 'kuzbass' => TRUE, | |
340 | + 'lipetsk' => TRUE, | |
341 | + 'magadan' => TRUE, | |
342 | + 'magnitka' => TRUE, | |
343 | + 'mari-el' => TRUE, | |
344 | + 'mari' => TRUE, | |
345 | + 'marine' => TRUE, | |
346 | + 'mordovia' => TRUE, | |
347 | + 'mosreg' => TRUE, | |
348 | + 'msk' => TRUE, | |
349 | + 'murmansk' => TRUE, | |
350 | + 'mytis' => TRUE, | |
351 | + 'nakhodka' => TRUE, | |
352 | + 'nalchik' => TRUE, | |
353 | + 'nkz' => TRUE, | |
354 | + 'nnov' => TRUE, | |
355 | + 'norilsk' => TRUE, | |
356 | + 'nov' => TRUE, | |
357 | + 'novosibirsk' => TRUE, | |
358 | + 'nsk' => TRUE, | |
359 | + 'omsk' => TRUE, | |
360 | + 'orenburg' => TRUE, | |
361 | + 'oryol' => TRUE, | |
362 | + 'oskol' => TRUE, | |
363 | + 'palana' => TRUE, | |
364 | + 'penza' => TRUE, | |
365 | + 'perm' => TRUE, | |
366 | + 'pskov' => TRUE, | |
367 | + 'ptz' => TRUE, | |
368 | + 'pyatigorsk' => TRUE, | |
369 | + 'rnd' => TRUE, | |
370 | + 'rubtsovsk' => TRUE, | |
371 | + 'ryazan' => TRUE, | |
372 | + 'sakhalin' => TRUE, | |
373 | + 'samara' => TRUE, | |
374 | + 'saratov' => TRUE, | |
375 | + 'simbirsk' => TRUE, | |
376 | + 'smolensk' => TRUE, | |
377 | + 'snz' => TRUE, | |
378 | + 'spb' => TRUE, | |
379 | + 'stavropol' => TRUE, | |
380 | + 'stv' => TRUE, | |
381 | + 'surgut' => TRUE, | |
382 | + 'syzran' => TRUE, | |
383 | + 'tambov' => TRUE, | |
384 | + 'tatarstan' => TRUE, | |
385 | + 'tom' => TRUE, | |
386 | + 'tomsk' => TRUE, | |
387 | + 'tsaritsyn' => TRUE, | |
388 | + 'tsk' => TRUE, | |
389 | + 'tula' => TRUE, | |
390 | + 'tuva' => TRUE, | |
391 | + 'tver' => TRUE, | |
392 | + 'tyumen' => TRUE, | |
393 | + 'udm' => TRUE, | |
394 | + 'udmurtia' => TRUE, | |
395 | + 'ulan-ude' => TRUE, | |
396 | + 'vdonsk' => TRUE, | |
397 | + 'vladikavkaz' => TRUE, | |
398 | + 'vladimir' => TRUE, | |
399 | + 'vladivostok' => TRUE, | |
400 | + 'volgograd' => TRUE, | |
401 | + 'vologda' => TRUE, | |
402 | + 'voronezh' => TRUE, | |
403 | + 'vrn' => TRUE, | |
404 | + 'vyatka' => TRUE, | |
405 | + 'yakutia' => TRUE, | |
406 | + 'yamal' => TRUE, | |
407 | + 'yaroslavl' => TRUE, | |
408 | + 'yekaterinburg' => TRUE, | |
409 | + 'yuzhno-sakhalinsk' => TRUE, | |
410 | + 'zgrad' => TRUE, | |
411 | + ), | |
412 | + | |
413 | + // ccTLD: Seychelles | |
414 | + // NIC : http://www.nic.sc/ | |
415 | + // Whois: (Not available) | |
416 | + 'sc' => array( | |
417 | + // http://www.nic.sc/policies.html | |
418 | + 'com' => TRUE, | |
419 | + 'edu' => TRUE, | |
420 | + 'gov' => TRUE, | |
421 | + 'net' => TRUE, | |
422 | + 'org' => TRUE, | |
423 | + ), | |
424 | + | |
425 | + // ccTLD: Taiwan | |
426 | + // NIC : http://www.twnic.net.tw/ | |
427 | + // Whois: http://www.twnic.net.tw/ | |
428 | + 'tw' => array( | |
429 | + // Guidelines for Administration of Domain Name Registration | |
430 | + // http://www.twnic.net.tw/english/dn/dn_02.htm | |
431 | + // II. Types of TWNIC Domain Names and Application Requirements | |
432 | + // http://www.twnic.net.tw/english/dn/dn_02_b.htm | |
433 | + 'club' => TRUE, | |
434 | + 'com' => TRUE, | |
435 | + 'ebiz' => TRUE, | |
436 | + 'edu' => TRUE, | |
437 | + 'game' => TRUE, | |
438 | + 'gov' => TRUE, | |
439 | + 'idv' => TRUE, | |
440 | + 'mil' => TRUE, | |
441 | + 'net' => TRUE, | |
442 | + 'org' => TRUE, | |
443 | + // Reserved words for the 2nd level | |
444 | + // http://mydn.twnic.net.tw/en/dn02/INDEX.htm | |
445 | + ), | |
446 | + | |
447 | + // ccTLD: Tanzania | |
448 | + // NIC : http://www.psg.com/dns/tz/ | |
449 | + // Whois: (Not available) | |
450 | + 'tz' => array( | |
451 | + // TZ DOMAIN NAMING STRUCTURE | |
452 | + // http://www.psg.com/dns/tz/tz.txt | |
453 | + 'ac' => TRUE, | |
454 | + 'co' => TRUE, | |
455 | + 'go' => TRUE, | |
456 | + 'ne' => TRUE, | |
457 | + 'or' => TRUE, | |
458 | + ), | |
459 | + | |
460 | + // ccTLD: Ukraine | |
461 | + // NIC : http://www.nic.net.ua/ | |
462 | + // Whois: http://whois.com.ua/ | |
463 | + 'ua' => array( | |
464 | + // policy for alternative 2nd level domain names (a2ld) | |
465 | + // http://www.nic.net.ua/doc/a2ld | |
466 | + // http://whois.com.ua/ | |
467 | + 'cherkassy' => TRUE, | |
468 | + 'chernigov' => TRUE, | |
469 | + 'chernovtsy' => TRUE, | |
470 | + 'ck' => TRUE, | |
471 | + 'cn' => TRUE, | |
472 | + 'com' => TRUE, | |
473 | + 'crimea' => TRUE, | |
474 | + 'cv' => TRUE, | |
475 | + 'dn' => TRUE, | |
476 | + 'dnepropetrovsk' => TRUE, | |
477 | + 'donetsk' => TRUE, | |
478 | + 'dp' => TRUE, | |
479 | + 'edu' => TRUE, | |
480 | + 'gov' => TRUE, | |
481 | + 'if' => TRUE, | |
482 | + 'ivano-frankivsk' => TRUE, | |
483 | + 'kh' => TRUE, | |
484 | + 'kharkov' => TRUE, | |
485 | + 'kherson' => TRUE, | |
486 | + 'kiev' => TRUE, | |
487 | + 'kirovograd' => TRUE, | |
488 | + 'km' => TRUE, | |
489 | + 'kr' => TRUE, | |
490 | + 'ks' => TRUE, | |
491 | + 'lg' => TRUE, | |
492 | + 'lugansk' => TRUE, | |
493 | + 'lutsk' => TRUE, | |
494 | + 'lviv' => TRUE, | |
495 | + 'mk' => TRUE, | |
496 | + 'net' => TRUE, | |
497 | + 'nikolaev' => TRUE, | |
498 | + 'od' => TRUE, | |
499 | + 'odessa' => TRUE, | |
500 | + 'org' => TRUE, | |
501 | + 'pl' => TRUE, | |
502 | + 'poltava' => TRUE, | |
503 | + 'rovno' => TRUE, | |
504 | + 'rv' => TRUE, | |
505 | + 'sebastopol' => TRUE, | |
506 | + 'sumy' => TRUE, | |
507 | + 'te' => TRUE, | |
508 | + 'ternopil' => TRUE, | |
509 | + 'uz' => TRUE, | |
510 | + 'uzhgorod' => TRUE, | |
511 | + 'vinnica' => TRUE, | |
512 | + 'vn' => TRUE, | |
513 | + 'zaporizhzhe' => TRUE, | |
514 | + 'zhitomir' => TRUE, | |
515 | + 'zp' => TRUE, | |
516 | + 'zt' => TRUE, | |
517 | + ), | |
518 | + | |
519 | + // ccTLD: United Kingdom | |
520 | + // NIC : http://www.nic.uk/ | |
521 | + 'uk' => array( | |
522 | + // Second Level Domains | |
523 | + // http://www.nic.uk/registrants/aboutdomainnames/sld/ | |
524 | + 'co' => TRUE, | |
525 | + 'ltd' => TRUE, | |
526 | + 'me' => TRUE, | |
527 | + 'net' => TRUE, | |
528 | + 'nic' => TRUE, | |
529 | + 'org' => TRUE, | |
530 | + 'plc' => TRUE, | |
531 | + 'sch' => TRUE, | |
532 | + | |
533 | + // Delegated Second Level Domains | |
534 | + // http://www.nic.uk/registrants/aboutdomainnames/sld/delegated/ | |
535 | + 'ac' => TRUE, | |
536 | + 'gov' => TRUE, | |
537 | + 'mil' => TRUE, | |
538 | + 'mod' => TRUE, | |
539 | + 'nhs' => TRUE, | |
540 | + 'police' => TRUE, | |
541 | + ), | |
542 | + | |
543 | + // ccTLD: United States of America | |
544 | + // NIC : http://nic.us/ | |
545 | + // Whois: http://whois.us/ | |
546 | + 'us' => array( | |
547 | + // See RFC1480 | |
548 | + | |
549 | + // Organizational | |
550 | + 'dni', | |
551 | + 'fed', | |
552 | + 'isa', | |
553 | + 'kids', | |
554 | + 'nsn', | |
555 | + | |
556 | + // Geographical | |
557 | + // United States Postal Service: State abbreviations (for postal codes) | |
558 | + // http://www.usps.com/ncsc/lookups/abbreviations.html | |
559 | + 'ak' => TRUE, // Alaska | |
560 | + 'al' => TRUE, // Alabama | |
561 | + 'ar' => TRUE, // Arkansas | |
562 | + 'as' => TRUE, // American samoa | |
563 | + 'az' => TRUE, // Arizona | |
564 | + 'ca' => TRUE, // California | |
565 | + 'co' => TRUE, // Colorado | |
566 | + 'ct' => TRUE, // Connecticut | |
567 | + 'dc' => TRUE, // District of Columbia | |
568 | + 'de' => TRUE, // Delaware | |
569 | + 'fl' => TRUE, // Florida | |
570 | + 'fm' => TRUE, // Federated states of Micronesia | |
571 | + 'ga' => TRUE, // Georgia | |
572 | + 'gu' => TRUE, // Guam | |
573 | + 'hi' => TRUE, // Hawaii | |
574 | + 'ia' => TRUE, // Iowa | |
575 | + 'id' => TRUE, // Idaho | |
576 | + 'il' => TRUE, // Illinois | |
577 | + 'in' => TRUE, // Indiana | |
578 | + 'ks' => TRUE, // Kansas | |
579 | + 'ky' => TRUE, // Kentucky | |
580 | + 'la' => TRUE, // Louisiana | |
581 | + 'ma' => TRUE, // Massachusetts | |
582 | + 'md' => TRUE, // Maryland | |
583 | + 'me' => TRUE, // Maine | |
584 | + 'mh' => TRUE, // Marshall Islands | |
585 | + 'mi' => TRUE, // Michigan | |
586 | + 'mn' => TRUE, // Minnesota | |
587 | + 'mo' => TRUE, // Missouri | |
588 | + 'mp' => TRUE, // Northern mariana islands | |
589 | + 'ms' => TRUE, // Mississippi | |
590 | + 'mt' => TRUE, // Montana | |
591 | + 'nc' => TRUE, // North Carolina | |
592 | + 'nd' => TRUE, // North Dakota | |
593 | + 'ne' => TRUE, // Nebraska | |
594 | + 'nh' => TRUE, // New Hampshire | |
595 | + 'nj' => TRUE, // New Jersey | |
596 | + 'nm' => TRUE, // New Mexico | |
597 | + 'nv' => TRUE, // Nevada | |
598 | + 'ny' => TRUE, // New York | |
599 | + 'oh' => TRUE, // Ohio | |
600 | + 'ok' => TRUE, // Oklahoma | |
601 | + 'or' => TRUE, // Oregon | |
602 | + 'pa' => TRUE, // Pennsylvania | |
603 | + 'pr' => TRUE, // Puerto Rico | |
604 | + 'pw' => TRUE, // Palau | |
605 | + 'ri' => TRUE, // Rhode Island | |
606 | + 'sc' => TRUE, // South Carolina | |
607 | + 'sd' => TRUE, // South Dakota | |
608 | + 'tn' => TRUE, // Tennessee | |
609 | + 'tx' => TRUE, // Texas | |
610 | + 'ut' => TRUE, // Utah | |
611 | + 'va' => TRUE, // Virginia | |
612 | + 'vi' => TRUE, // Virgin Islands | |
613 | + 'vt' => TRUE, // Vermont | |
614 | + 'wa' => TRUE, // Washington | |
615 | + 'wi' => TRUE, // Wisconsin | |
616 | + 'wv' => TRUE, // West Virginia | |
617 | + 'wy' => TRUE, // Wyoming | |
618 | + ), | |
619 | + | |
620 | + // ccTLD: South Africa | |
621 | + // NIC : http://www.zadna.org.za/ | |
622 | + // Whois: | |
623 | + // ac.za http://www.tenet.ac.za/cgi/cgi_domainquery.exe | |
624 | + // co.za http://co.za/whois.shtml | |
625 | + // gov.za http://dnsadmin.gov.za/ | |
626 | + // org.za http://www.org.za/ | |
627 | + 'za' => array( | |
628 | + // Second-level subdomains of .ZA | |
629 | + // http://www.zadna.org.za/slds.html | |
630 | + 'ac' => TRUE, | |
631 | + 'city' => TRUE, | |
632 | + 'co' => TRUE, | |
633 | + 'edu' => TRUE, | |
634 | + 'gov' => TRUE, | |
635 | + 'law' => TRUE, | |
636 | + 'mil' => TRUE, | |
637 | + 'nom' => TRUE, | |
638 | + 'org' => TRUE, | |
639 | + 'school' => array( | |
640 | + // Provincial Domains | |
641 | + // http://www.esn.org.za/dns/ | |
642 | + 'ecape' => TRUE, | |
643 | + 'fs.' => TRUE, | |
644 | + 'gp' => TRUE, | |
645 | + 'kzn' => TRUE, | |
646 | + 'lp' => TRUE, | |
647 | + 'mpm' => TRUE, | |
648 | + 'ncape' => TRUE, | |
649 | + 'nw' => TRUE, | |
650 | + 'wcape' => TRUE, | |
651 | + ), | |
652 | + ), | |
653 | + | |
654 | +); | |
655 | +?> | |
\ No newline at end of file |
@@ -1,5 +1,5 @@ | ||
1 | 1 | <?php |
2 | -// $Id: spam.php,v 1.28 2007/06/24 15:25:06 henoheno Exp $ | |
2 | +// $Id: spam.php,v 1.29 2007/07/03 14:47:20 henoheno Exp $ | |
3 | 3 | // Copyright (C) 2006-2007 PukiWiki Developers Team |
4 | 4 | // License: GPL v2 or (at your option) any later version |
5 | 5 | // |
@@ -7,7 +7,10 @@ | ||
7 | 7 | // |
8 | 8 | // (PHP 4 >= 4.3.0): preg_match_all(PREG_OFFSET_CAPTURE): $method['uri_XXX'] related feature |
9 | 9 | |
10 | -if (! defined('SPAM_INI_FILE')) define('SPAM_INI_FILE', 'spam.ini.php'); | |
10 | +require_once('spam_pickup.php'); | |
11 | + | |
12 | +if (! defined('SPAM_INI_FILE')) define('SPAM_INI_FILE', 'spam.ini.php'); | |
13 | +if (! defined('DOMAIN_INI_FILE')) define('DOMAIN_INI_FILE', 'domain.ini.php'); | |
11 | 14 | |
12 | 15 | // --------------------- |
13 | 16 | // Compat etc |
@@ -37,7 +40,9 @@ function preg_grep_invert($pattern = '//', $input = array()) | ||
37 | 40 | } |
38 | 41 | } |
39 | 42 | |
40 | -// ---- | |
43 | + | |
44 | +// --------------------- | |
45 | +// Utilities | |
41 | 46 | |
42 | 47 | // Very roughly, shrink the lines of var_export() |
43 | 48 | // NOTE: If the same data exists, it must be corrupted. |
@@ -67,41 +72,29 @@ function var_export_shrink($expression, $return = FALSE, $ignore_numeric_keys = | ||
67 | 72 | } |
68 | 73 | } |
69 | 74 | |
70 | -// Remove redundant values from array() | |
71 | -function array_unique_recursive($array = array()) | |
75 | +// Reverse $string with specified delimiter | |
76 | +function delimiter_reverse($string = 'foo.bar.example.com', $from_delim = '.', $to_delim = '.') | |
72 | 77 | { |
73 | - if (! is_array($array)) return $array; | |
74 | - | |
75 | - $tmp = array(); | |
76 | - foreach($array as $key => $value){ | |
77 | - if (is_array($value)) { | |
78 | - $array[$key] = array_unique_recursive($value); | |
79 | - } else { | |
80 | - if (isset($tmp[$value])) { | |
81 | - unset($array[$key]); | |
82 | - } else { | |
83 | - $tmp[$value] = TRUE; | |
84 | - } | |
85 | - } | |
86 | - } | |
78 | + if (! is_string($string) || ! is_string($from_delim) || ! is_string($to_delim)) | |
79 | + return $string; | |
87 | 80 | |
88 | - return $array; | |
81 | + // com.example.bar.foo | |
82 | + return implode($to_delim, array_reverse(explode($from_delim, $string))); | |
89 | 83 | } |
90 | 84 | |
91 | -// Renumber all numeric keys from 0 | |
92 | -function array_renumber_numeric_keys(& $array) | |
85 | +// ksort() by domain | |
86 | +function ksort_by_domain(& $array) | |
93 | 87 | { |
94 | - if (! is_array($array)) return $array; | |
95 | - | |
96 | - $count = -1; | |
97 | - $tmp = array(); | |
98 | - foreach($array as $key => $value){ | |
99 | - if (is_array($value)) array_renumber_numeric_keys($array[$key]); // Recurse | |
100 | - if (is_numeric($key)) $tmp[$key] = ++$count; | |
88 | + $sort = array(); | |
89 | + foreach(array_keys($array) as $key) { | |
90 | + $sort[delimiter_reverse($key)] = $key; | |
101 | 91 | } |
102 | - array_rename_keys($array, $tmp); | |
103 | - | |
104 | - return $array; | |
92 | + ksort($sort, SORT_STRING); | |
93 | + $result = array(); | |
94 | + foreach($sort as $key) { | |
95 | + $result[$key] = & $array[$key]; | |
96 | + } | |
97 | + $array = $result; | |
105 | 98 | } |
106 | 99 | |
107 | 100 | // Roughly strings(1) using PCRE |
@@ -153,154 +146,41 @@ function strings($binary = '', $min_len = 4, $ignore_space = FALSE, $multibyte = | ||
153 | 146 | return $binary; |
154 | 147 | } |
155 | 148 | |
156 | -// Reverse $string with specified delimiter | |
157 | -function delimiter_reverse($string = 'foo.bar.example.com', $from_delim = '.', $to_delim = '.') | |
158 | -{ | |
159 | - if (! is_string($string) || ! is_string($from_delim) || ! is_string($to_delim)) | |
160 | - return $string; | |
161 | - | |
162 | - // com.example.bar.foo | |
163 | - return implode($to_delim, array_reverse(explode($from_delim, $string))); | |
164 | -} | |
165 | - | |
166 | 149 | |
167 | 150 | // --------------------- |
168 | -// URI pickup | |
169 | - | |
170 | -// Return an array of URIs in the $string | |
171 | -// [OK] http://nasty.example.org#nasty_string | |
172 | -// [OK] http://nasty.example.org:80/foo/xxx#nasty_string/bar | |
173 | -// [OK] ftp://nasty.example.org:80/dfsdfs | |
174 | -// [OK] ftp://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm (from RFC3986) | |
175 | -function uri_pickup($string = '') | |
176 | -{ | |
177 | - if (! is_string($string)) return array(); | |
178 | - | |
179 | - // Not available for: IDN(ignored) | |
180 | - $array = array(); | |
181 | - preg_match_all( | |
182 | - // scheme://userinfo@host:port/path/or/pathinfo/maybefile.and?query=string#fragment | |
183 | - // Refer RFC3986 (Regex below is not strict) | |
184 | - '#(\b[a-z][a-z0-9.+-]{1,8}):[/\\\]+' . // 1: Scheme | |
185 | - '(?:' . | |
186 | - '([^\s<>"\'\[\]/\#?@]*)' . // 2: Userinfo (Username) | |
187 | - '@)?' . | |
188 | - '(' . | |
189 | - // 3: Host | |
190 | - '\[[0-9a-f:.]+\]' . '|' . // IPv6([colon-hex and dot]): RFC2732 | |
191 | - '(?:[0-9]{1,3}\.){3}[0-9]{1,3}' . '|' . // IPv4(dot-decimal): 001.22.3.44 | |
192 | - '[a-z0-9_-][a-z0-9_.-]+[a-z0-9_-]' . // hostname(FQDN) : foo.example.org | |
193 | - ')' . | |
194 | - '(?::([0-9]*))?' . // 4: Port | |
195 | - '((?:/+[^\s<>"\'\[\]/\#]+)*/+)?' . // 5: Directory path or path-info | |
196 | - '([^\s<>"\'\[\]\#?]+)?' . // 6: File? | |
197 | - '(?:\?([^\s<>"\'\[\]\#]+))?' . // 7: Query string | |
198 | - '(?:\#([a-z0-9._~%!$&\'()*+,;=:@-]*))?' . // 8: Fragment | |
199 | - '#i', | |
200 | - $string, $array, PREG_SET_ORDER | PREG_OFFSET_CAPTURE | |
201 | - ); | |
202 | - | |
203 | - // Format the $array | |
204 | - static $parts = array( | |
205 | - 1 => 'scheme', 2 => 'userinfo', 3 => 'host', 4 => 'port', | |
206 | - 5 => 'path', 6 => 'file', 7 => 'query', 8 => 'fragment' | |
207 | - ); | |
208 | - $default = array(''); | |
209 | - foreach(array_keys($array) as $uri) { | |
210 | - $_uri = & $array[$uri]; | |
211 | - array_rename_keys($_uri, $parts, TRUE, $default); | |
212 | - $offset = $_uri['scheme'][1]; // Scheme's offset = URI's offset | |
213 | - foreach(array_keys($_uri) as $part) { | |
214 | - $_uri[$part] = & $_uri[$part][0]; // Remove offsets | |
215 | - } | |
216 | - } | |
217 | - | |
218 | - foreach(array_keys($array) as $uri) { | |
219 | - $_uri = & $array[$uri]; | |
220 | - if ($_uri['scheme'] === '') { | |
221 | - unset($array[$uri]); // Considererd harmless | |
222 | - continue; | |
223 | - } | |
224 | - unset($_uri[0]); // Matched string itself | |
225 | - $_uri['area']['offset'] = $offset; // Area offset for area_measure() | |
226 | - } | |
227 | - | |
228 | - return $array; | |
229 | -} | |
151 | +// Utilities: Arrays | |
230 | 152 | |
231 | -// Normalize an array of URI arrays | |
232 | -// NOTE: Give me the uri_pickup() results | |
233 | -function uri_pickup_normalize(& $pickups, $destructive = TRUE) | |
153 | +// Count leaves (A leaf = value that is not an array, or an empty array) | |
154 | +function array_count_leaves($array = array(), $count_empty = FALSE) | |
234 | 155 | { |
235 | - if (! is_array($pickups)) return $pickups; | |
156 | + if (! is_array($array) || (empty($array) && $count_empty)) return 1; | |
236 | 157 | |
237 | - if ($destructive) { | |
238 | - foreach (array_keys($pickups) as $key) { | |
239 | - $_key = & $pickups[$key]; | |
240 | - $_key['scheme'] = isset($_key['scheme']) ? scheme_normalize($_key['scheme']) : ''; | |
241 | - $_key['host'] = isset($_key['host']) ? host_normalize($_key['host']) : ''; | |
242 | - $_key['port'] = isset($_key['port']) ? port_normalize($_key['port'], $_key['scheme'], FALSE) : ''; | |
243 | - $_key['path'] = isset($_key['path']) ? strtolower(path_normalize($_key['path'])) : ''; | |
244 | - $_key['file'] = isset($_key['file']) ? file_normalize($_key['file']) : ''; | |
245 | - $_key['query'] = isset($_key['query']) ? query_normalize($_key['query']) : ''; | |
246 | - $_key['fragment'] = isset($_key['fragment']) ? strtolower($_key['fragment']) : ''; | |
247 | - } | |
248 | - } else { | |
249 | - foreach (array_keys($pickups) as $key) { | |
250 | - $_key = & $pickups[$key]; | |
251 | - $_key['scheme'] = isset($_key['scheme']) ? scheme_normalize($_key['scheme']) : ''; | |
252 | - $_key['host'] = isset($_key['host']) ? strtolower($_key['host']) : ''; | |
253 | - $_key['port'] = isset($_key['port']) ? port_normalize($_key['port'], $_key['scheme'], FALSE) : ''; | |
254 | - $_key['path'] = isset($_key['path']) ? path_normalize($_key['path']) : ''; | |
255 | - } | |
158 | + // Recurse | |
159 | + $count = 0; | |
160 | + foreach ($array as $part) { | |
161 | + $count += array_count_leaves($part, $count_empty); | |
256 | 162 | } |
257 | - | |
258 | - return $pickups; | |
163 | + return $count; | |
259 | 164 | } |
260 | 165 | |
261 | -// An URI array => An URI (See uri_pickup()) | |
262 | -// USAGE: | |
263 | -// $pickups = uri_pickup('a string include some URIs'); | |
264 | -// $uris = array(); | |
265 | -// foreach (array_keys($pickups) as $key) { | |
266 | -// $uris[$key] = uri_pickup_implode($pickups[$key]); | |
267 | -// } | |
268 | -function uri_pickup_implode($uri = array()) | |
166 | +// An array-leaves to a flat array | |
167 | +function array_flat_leaves($array, $unique = TRUE) | |
269 | 168 | { |
270 | - if (empty($uri) || ! is_array($uri)) return NULL; | |
169 | + if (! is_array($array)) return $array; | |
271 | 170 | |
272 | 171 | $tmp = array(); |
273 | - if (isset($uri['scheme']) && $uri['scheme'] !== '') { | |
274 | - $tmp[] = & $uri['scheme']; | |
275 | - $tmp[] = '://'; | |
276 | - } | |
277 | - if (isset($uri['userinfo']) && $uri['userinfo'] !== '') { | |
278 | - $tmp[] = & $uri['userinfo']; | |
279 | - $tmp[] = '@'; | |
280 | - } | |
281 | - if (isset($uri['host']) && $uri['host'] !== '') { | |
282 | - $tmp[] = & $uri['host']; | |
283 | - } | |
284 | - if (isset($uri['port']) && $uri['port'] !== '') { | |
285 | - $tmp[] = ':'; | |
286 | - $tmp[] = & $uri['port']; | |
287 | - } | |
288 | - if (isset($uri['path']) && $uri['path'] !== '') { | |
289 | - $tmp[] = & $uri['path']; | |
290 | - } | |
291 | - if (isset($uri['file']) && $uri['file'] !== '') { | |
292 | - $tmp[] = & $uri['file']; | |
293 | - } | |
294 | - if (isset($uri['query']) && $uri['query'] !== '') { | |
295 | - $tmp[] = '?'; | |
296 | - $tmp[] = & $uri['query']; | |
297 | - } | |
298 | - if (isset($uri['fragment']) && $uri['fragment'] !== '') { | |
299 | - $tmp[] = '#'; | |
300 | - $tmp[] = & $uri['fragment']; | |
172 | + foreach(array_keys($array) as $key) { | |
173 | + if (is_array($array[$key])) { | |
174 | + // Recurse | |
175 | + foreach(array_flat_leaves($array[$key]) as $_value) { | |
176 | + $tmp[] = $_value; | |
177 | + } | |
178 | + } else { | |
179 | + $tmp[] = & $array[$key]; | |
180 | + } | |
301 | 181 | } |
302 | 182 | |
303 | - return implode('', $tmp); | |
183 | + return $unique ? array_values(array_unique($tmp)) : $tmp; | |
304 | 184 | } |
305 | 185 | |
306 | 186 | // $array['something'] => $array['wanted'] |
@@ -327,641 +207,28 @@ function array_rename_keys(& $array, $keys = array('from' => 'to'), $force = FAL | ||
327 | 207 | return TRUE; |
328 | 208 | } |
329 | 209 | |
330 | -// --------------------- | |
331 | -// Area pickup | |
332 | - | |
333 | -// Pickup all of markup areas | |
334 | -function area_pickup($string = '', $method = array()) | |
335 | -{ | |
336 | - $area = array(); | |
337 | - if (empty($method)) return $area; | |
338 | - | |
339 | - // Anchor tag pair by preg_match and preg_match_all() | |
340 | - // [OK] <a href></a> | |
341 | - // [OK] <a href= >Good site!</a> | |
342 | - // [OK] <a href= "#" >test</a> | |
343 | - // [OK] <a href="http://nasty.example.com">visit http://nasty.example.com/</a> | |
344 | - // [OK] <a href=\'http://nasty.example.com/\' >discount foobar</a> | |
345 | - // [NG] <a href="http://ng.example.com">visit http://ng.example.com _not_ended_ | |
346 | - $regex = '#<a\b[^>]*\bhref\b[^>]*>.*?</a\b[^>]*(>)#is'; | |
347 | - if (isset($method['area_anchor'])) { | |
348 | - $areas = array(); | |
349 | - $count = isset($method['asap']) ? | |
350 | - preg_match($regex, $string) : | |
351 | - preg_match_all($regex, $string, $areas); | |
352 | - if (! empty($count)) $area['area_anchor'] = $count; | |
353 | - } | |
354 | - if (isset($method['uri_anchor'])) { | |
355 | - $areas = array(); | |
356 | - preg_match_all($regex, $string, $areas, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); | |
357 | - foreach(array_keys($areas) as $_area) { | |
358 | - $areas[$_area] = array( | |
359 | - $areas[$_area][0][1], // Area start (<a href>) | |
360 | - $areas[$_area][1][1], // Area end (</a>) | |
361 | - ); | |
362 | - } | |
363 | - if (! empty($areas)) $area['uri_anchor'] = $areas; | |
364 | - } | |
365 | - | |
366 | - // phpBB's "BBCode" pair by preg_match and preg_match_all() | |
367 | - // [OK] [url][/url] | |
368 | - // [OK] [url]http://nasty.example.com/[/url] | |
369 | - // [OK] [link]http://nasty.example.com/[/link] | |
370 | - // [OK] [url=http://nasty.example.com]visit http://nasty.example.com/[/url] | |
371 | - // [OK] [link http://nasty.example.com/]buy something[/link] | |
372 | - $regex = '#\[(url|link)\b[^\]]*\].*?\[/\1\b[^\]]*(\])#is'; | |
373 | - if (isset($method['area_bbcode'])) { | |
374 | - $areas = array(); | |
375 | - $count = isset($method['asap']) ? | |
376 | - preg_match($regex, $string) : | |
377 | - preg_match_all($regex, $string, $areas, PREG_SET_ORDER); | |
378 | - if (! empty($count)) $area['area_bbcode'] = $count; | |
379 | - } | |
380 | - if (isset($method['uri_bbcode'])) { | |
381 | - $areas = array(); | |
382 | - preg_match_all($regex, $string, $areas, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); | |
383 | - foreach(array_keys($areas) as $_area) { | |
384 | - $areas[$_area] = array( | |
385 | - $areas[$_area][0][1], // Area start ([url]) | |
386 | - $areas[$_area][2][1], // Area end ([/url]) | |
387 | - ); | |
388 | - } | |
389 | - if (! empty($areas)) $area['uri_bbcode'] = $areas; | |
390 | - } | |
391 | - | |
392 | - // Various Wiki syntax | |
393 | - // [text_or_uri>text_or_uri] | |
394 | - // [text_or_uri:text_or_uri] | |
395 | - // [text_or_uri|text_or_uri] | |
396 | - // [text_or_uri->text_or_uri] | |
397 | - // [text_or_uri text_or_uri] // MediaWiki | |
398 | - // MediaWiki: [http://nasty.example.com/ visit http://nasty.example.com/] | |
399 | - | |
400 | - return $area; | |
401 | -} | |
402 | - | |
403 | -// If in doubt, it's a little doubtful | |
404 | -// if (Area => inside <= Area) $brief += -1 | |
405 | -function area_measure($areas, & $array, $belief = -1, $a_key = 'area', $o_key = 'offset') | |
406 | -{ | |
407 | - if (! is_array($areas) || ! is_array($array)) return; | |
408 | - | |
409 | - $areas_keys = array_keys($areas); | |
410 | - foreach(array_keys($array) as $u_index) { | |
411 | - $offset = isset($array[$u_index][$o_key]) ? | |
412 | - intval($array[$u_index][$o_key]) : 0; | |
413 | - foreach($areas_keys as $a_index) { | |
414 | - if (isset($array[$u_index][$a_key])) { | |
415 | - $offset_s = intval($areas[$a_index][0]); | |
416 | - $offset_e = intval($areas[$a_index][1]); | |
417 | - // [Area => inside <= Area] | |
418 | - if ($offset_s < $offset && $offset < $offset_e) { | |
419 | - $array[$u_index][$a_key] += $belief; | |
420 | - } | |
421 | - } | |
422 | - } | |
423 | - } | |
424 | -} | |
425 | - | |
426 | -// --------------------- | |
427 | -// Spam-uri pickup | |
428 | - | |
429 | -// Domain exposure callback (See spam_uri_pickup_preprocess()) | |
430 | -// http://victim.example.org/?foo+site:nasty.example.com+bar | |
431 | -// => http://nasty.example.com/?refer=victim.example.org | |
432 | -// NOTE: 'refer=' is not so good for (at this time). | |
433 | -// Consider about using IP address of the victim, try to avoid that. | |
434 | -function _preg_replace_callback_domain_exposure($matches = array()) | |
435 | -{ | |
436 | - $result = ''; | |
437 | - | |
438 | - // Preserve the victim URI as a complicity or ... | |
439 | - if (isset($matches[5])) { | |
440 | - $result = | |
441 | - $matches[1] . '://' . // scheme | |
442 | - $matches[2] . '/' . // victim.example.org | |
443 | - $matches[3]; // The rest of all (before victim) | |
444 | - } | |
445 | - | |
446 | - // Flipped URI | |
447 | - if (isset($matches[4])) { | |
448 | - $result = | |
449 | - $matches[1] . '://' . // scheme | |
450 | - $matches[4] . // nasty.example.com | |
451 | - '/?refer=' . strtolower($matches[2]) . // victim.example.org | |
452 | - ' ' . $result; | |
453 | - } | |
454 | - | |
455 | - return $result; | |
456 | -} | |
457 | - | |
458 | -// Preprocess: Removing uninterest part for URI detection | |
459 | -function spam_uri_removing_hocus_pocus($binary = '', $method = array()) | |
460 | -{ | |
461 | - $length = 4 ; // 'http'(1) and '://'(2) and 'fqdn'(1) | |
462 | - if (is_array($method)) { | |
463 | - // '<a'(2) or 'href='(5) or '>'(1) or '</a>'(4) | |
464 | - // '[uri'(4) or ']'(1) or '[/uri]'(6) | |
465 | - if (isset($method['area_anchor']) || isset($method['uri_anchor']) || | |
466 | - isset($method['area_bbcode']) || isset($method['uri_bbcode'])) | |
467 | - $length = 1; // Seems not effective | |
468 | - } | |
469 | - | |
470 | - // Removing sequential spaces and too short lines | |
471 | - $binary = strings($binary, $length, TRUE, FALSE); // Multibyte NOT needed | |
472 | - | |
473 | - // Remove words (has no '<>[]:') between spaces | |
474 | - $binary = preg_replace('/[ \t][\w.,()\ \t]+[ \t]/', ' ', $binary); | |
475 | - | |
476 | - return $binary; | |
477 | -} | |
478 | - | |
479 | -// Preprocess: rawurldecode() and adding space(s) and something | |
480 | -// to detect/count some URIs _if possible_ | |
481 | -// NOTE: It's maybe danger to var_dump(result). [e.g. 'javascript:'] | |
482 | -// [OK] http://victim.example.org/?site:nasty.example.org | |
483 | -// [OK] http://victim.example.org/nasty.example.org | |
484 | -// [OK] http://victim.example.org/go?http%3A%2F%2Fnasty.example.org | |
485 | -// [OK] http://victim.example.org/http://nasty.example.org | |
486 | -function spam_uri_pickup_preprocess($string = '', $method = array()) | |
487 | -{ | |
488 | - if (! is_string($string)) return ''; | |
489 | - | |
490 | - $string = spam_uri_removing_hocus_pocus(rawurldecode($string), $method); | |
491 | - //var_dump(htmlspecialchars($string)); | |
492 | - | |
493 | - // Domain exposure (simple) | |
494 | - // http://victim.example.org/nasty.example.org/path#frag | |
495 | - // => http://nasty.example.org/?refer=victim.example.org and original | |
496 | - $string = preg_replace( | |
497 | - '#h?ttp://' . | |
498 | - '(' . | |
499 | - 'ime\.nu' . '|' . // 2ch.net | |
500 | - 'ime\.st' . '|' . // 2ch.net | |
501 | - 'link\.toolbot\.com' . '|' . | |
502 | - 'urlx\.org' . | |
503 | - ')' . | |
504 | - '/([a-z0-9.%_-]+\.[a-z0-9.%_-]+)#i', // nasty.example.org | |
505 | - 'http://$2/?refer=$1 $0', // Preserve $0 or remove? | |
506 | - $string | |
507 | - ); | |
508 | - | |
509 | - // Domain exposure (gate-big5) | |
510 | - // http://victim.example.org/gate/big5/nasty.example.org/path | |
511 | - // => http://nasty.example.org/?refer=victim.example.org and original | |
512 | - $string = preg_replace( | |
513 | - '#h?ttp://' . | |
514 | - '(' . | |
515 | - 'big5.51job.com' . '|' . | |
516 | - 'big5.china.com' . '|' . | |
517 | - 'big5.xinhuanet.com' . '|' . | |
518 | - ')' . | |
519 | - '/gate/big5' . | |
520 | - '/([a-z0-9.%_-]+\.[a-z0-9.%_-]+)' . | |
521 | - '#i', // nasty.example.org | |
522 | - 'http://$2/?refer=$1 $0', // Preserve $0 or remove? | |
523 | - $string | |
524 | - ); | |
525 | - | |
526 | - // Domain exposure (See _preg_replace_callback_domain_exposure()) | |
527 | - $string = preg_replace_callback( | |
528 | - array( | |
529 | - '#(http)://' . | |
530 | - '(' . | |
531 | - // Something Google: http://www.google.com/supported_domains | |
532 | - '(?:[a-z0-9.]+\.)?google\.[a-z]{2,3}(?:\.[a-z]{2})?' . | |
533 | - '|' . | |
534 | - // AltaVista | |
535 | - '(?:[a-z0-9.]+\.)?altavista.com' . | |
536 | - | |
537 | - ')' . | |
538 | - '/' . | |
539 | - '([a-z0-9?=&.%_/\'\\\+-]+)' . // path/?query=foo+bar+ | |
540 | - '\bsite:([a-z0-9.%_-]+\.[a-z0-9.%_-]+)' . // site:nasty.example.com | |
541 | - //'()' . // Preserve or remove? | |
542 | - '#i', | |
543 | - ), | |
544 | - '_preg_replace_callback_domain_exposure', | |
545 | - $string | |
546 | - ); | |
547 | - | |
548 | - // URI exposure (uriuri => uri uri) | |
549 | - $string = preg_replace( | |
550 | - array( | |
551 | - '#(?<! )(?:https?|ftp):/#i', | |
552 | - // '#[a-z][a-z0-9.+-]{1,8}://#i', | |
553 | - // '#[a-z][a-z0-9.+-]{1,8}://#i' | |
554 | - ), | |
555 | - ' $0', | |
556 | - $string | |
557 | - ); | |
558 | - | |
559 | - return $string; | |
560 | -} | |
561 | - | |
562 | -// Main function of spam-uri pickup, | |
563 | -// A wrapper function of uri_pickup() | |
564 | -function spam_uri_pickup($string = '', $method = array()) | |
565 | -{ | |
566 | - if (! is_array($method) || empty($method)) { | |
567 | - $method = check_uri_spam_method(); | |
568 | - } | |
569 | - | |
570 | - $string = spam_uri_pickup_preprocess($string, $method); | |
571 | - | |
572 | - $array = uri_pickup($string); | |
573 | - | |
574 | - // Area elevation of URIs, for '(especially external)link' intension | |
575 | - if (! empty($array)) { | |
576 | - $_method = array(); | |
577 | - if (isset($method['uri_anchor'])) $_method['uri_anchor'] = & $method['uri_anchor']; | |
578 | - if (isset($method['uri_bbcode'])) $_method['uri_bbcode'] = & $method['uri_bbcode']; | |
579 | - $areas = area_pickup($string, $_method, TRUE); | |
580 | - if (! empty($areas)) { | |
581 | - $area_shadow = array(); | |
582 | - foreach (array_keys($array) as $key) { | |
583 | - $area_shadow[$key] = & $array[$key]['area']; | |
584 | - foreach (array_keys($_method) as $_key) { | |
585 | - $area_shadow[$key][$_key] = 0; | |
586 | - } | |
587 | - } | |
588 | - foreach (array_keys($_method) as $_key) { | |
589 | - if (isset($areas[$_key])) { | |
590 | - area_measure($areas[$_key], $area_shadow, 1, $_key); | |
591 | - } | |
592 | - } | |
593 | - } | |
594 | - } | |
595 | - | |
596 | - // Remove 'offset's for area_measure() | |
597 | - foreach(array_keys($array) as $key) | |
598 | - unset($array[$key]['area']['offset']); | |
599 | - | |
600 | - return $array; | |
601 | -} | |
602 | - | |
603 | - | |
604 | -// --------------------- | |
605 | -// Normalization | |
606 | - | |
607 | -// Scheme normalization: Renaming the schemes | |
608 | -// snntp://example.org => nntps://example.org | |
609 | -// NOTE: Keep the static lists simple. See also port_normalize(). | |
610 | -function scheme_normalize($scheme = '', $abbrevs_harmfull = TRUE) | |
611 | -{ | |
612 | - // Abbreviations they have no intention of link | |
613 | - static $abbrevs = array( | |
614 | - 'ttp' => 'http', | |
615 | - 'ttps' => 'https', | |
616 | - ); | |
617 | - | |
618 | - // Aliases => normalized ones | |
619 | - static $aliases = array( | |
620 | - 'pop' => 'pop3', | |
621 | - 'news' => 'nntp', | |
622 | - 'imap4' => 'imap', | |
623 | - 'snntp' => 'nntps', | |
624 | - 'snews' => 'nntps', | |
625 | - 'spop3' => 'pop3s', | |
626 | - 'pops' => 'pop3s', | |
627 | - ); | |
628 | - | |
629 | - if (! is_string($scheme)) return ''; | |
630 | - | |
631 | - $scheme = strtolower($scheme); | |
632 | - if (isset($abbrevs[$scheme])) { | |
633 | - $scheme = $abbrevs_harmfull ? $abbrevs[$scheme] : ''; | |
634 | - } | |
635 | - if (isset($aliases[$scheme])) { | |
636 | - $scheme = $aliases[$scheme]; | |
637 | - } | |
638 | - | |
639 | - return $scheme; | |
640 | -} | |
641 | - | |
642 | -// Hostname normlization (Destructive) | |
643 | -// www.foo => www.foo ('foo' seems TLD) | |
644 | -// www.foo.bar => foo.bar | |
645 | -// www.10.20 => www.10.20 (Invalid hostname) | |
646 | -// NOTE: | |
647 | -// 'www' is mostly used as traditional hostname of WWW server. | |
648 | -// 'www.foo.bar' may be identical with 'foo.bar'. | |
649 | -function host_normalize($host = '') | |
650 | -{ | |
651 | - if (! is_string($host)) return ''; | |
652 | - | |
653 | - $host = strtolower($host); | |
654 | - $matches = array(); | |
655 | - if (preg_match('/^www\.(.+\.[a-z]+)$/', $host, $matches)) { | |
656 | - return $matches[1]; | |
657 | - } else { | |
658 | - return $host; | |
659 | - } | |
660 | -} | |
661 | - | |
662 | -// Port normalization: Suppress the (redundant) default port | |
663 | -// HTTP://example.org:80/ => http://example.org/ | |
664 | -// HTTP://example.org:8080/ => http://example.org:8080/ | |
665 | -// HTTPS://example.org:443/ => https://example.org/ | |
666 | -function port_normalize($port, $scheme, $scheme_normalize = FALSE) | |
667 | -{ | |
668 | - // Schemes that users _maybe_ want to add protocol-handlers | |
669 | - // to their web browsers. (and attackers _maybe_ want to use ...) | |
670 | - // Reference: http://www.iana.org/assignments/port-numbers | |
671 | - static $array = array( | |
672 | - // scheme => default port | |
673 | - 'ftp' => 21, | |
674 | - 'ssh' => 22, | |
675 | - 'telnet' => 23, | |
676 | - 'smtp' => 25, | |
677 | - 'tftp' => 69, | |
678 | - 'gopher' => 70, | |
679 | - 'finger' => 79, | |
680 | - 'http' => 80, | |
681 | - 'pop3' => 110, | |
682 | - 'sftp' => 115, | |
683 | - 'nntp' => 119, | |
684 | - 'imap' => 143, | |
685 | - 'irc' => 194, | |
686 | - 'wais' => 210, | |
687 | - 'https' => 443, | |
688 | - 'nntps' => 563, | |
689 | - 'rsync' => 873, | |
690 | - 'ftps' => 990, | |
691 | - 'telnets' => 992, | |
692 | - 'imaps' => 993, | |
693 | - 'ircs' => 994, | |
694 | - 'pop3s' => 995, | |
695 | - 'mysql' => 3306, | |
696 | - ); | |
697 | - | |
698 | - // intval() converts '0-1' to '0', so preg_match() rejects these invalid ones | |
699 | - if (! is_numeric($port) || $port < 0 || preg_match('/[^0-9]/i', $port)) | |
700 | - return ''; | |
701 | - | |
702 | - $port = intval($port); | |
703 | - if ($scheme_normalize) $scheme = scheme_normalize($scheme); | |
704 | - if (isset($array[$scheme]) && $port == $array[$scheme]) | |
705 | - $port = ''; // Ignore the defaults | |
706 | - | |
707 | - return $port; | |
708 | -} | |
709 | - | |
710 | -// Path normalization | |
711 | -// http://example.org => http://example.org/ | |
712 | -// http://example.org#hoge => http://example.org/#hoge | |
713 | -// http://example.org/path/a/b/./c////./d => http://example.org/path/a/b/c/d | |
714 | -// http://example.org/path/../../a/../back => http://example.org/back | |
715 | -function path_normalize($path = '', $divider = '/', $add_root = TRUE) | |
210 | +// Remove redundant values from array() | |
211 | +function array_unique_recursive($array = array()) | |
716 | 212 | { |
717 | - if (! is_string($divider)) return is_string($path) ? $path : ''; | |
718 | - | |
719 | - if ($add_root) { | |
720 | - $first_div = & $divider; | |
721 | - } else { | |
722 | - $first_div = ''; | |
723 | - } | |
724 | - if (! is_string($path) || $path == '') return $first_div; | |
725 | - | |
726 | - if (strpos($path, $divider, strlen($path) - strlen($divider)) === FALSE) { | |
727 | - $last_div = ''; | |
728 | - } else { | |
729 | - $last_div = & $divider; | |
730 | - } | |
731 | - | |
732 | - $array = explode($divider, $path); | |
733 | - | |
734 | - // Remove paddings ('//' and '/./') | |
735 | - foreach(array_keys($array) as $key) { | |
736 | - if ($array[$key] == '' || $array[$key] == '.') { | |
737 | - unset($array[$key]); | |
738 | - } | |
739 | - } | |
213 | + if (! is_array($array)) return $array; | |
740 | 214 | |
741 | - // Remove back-tracks ('/../') | |
742 | 215 | $tmp = array(); |
743 | - foreach($array as $value) { | |
744 | - if ($value == '..') { | |
745 | - array_pop($tmp); | |
216 | + foreach($array as $key => $value){ | |
217 | + if (is_array($value)) { | |
218 | + $array[$key] = array_unique_recursive($value); | |
746 | 219 | } else { |
747 | - array_push($tmp, $value); | |
748 | - } | |
749 | - } | |
750 | - $array = & $tmp; | |
751 | - | |
752 | - if (empty($array)) { | |
753 | - return $first_div; | |
754 | - } else { | |
755 | - return $first_div . implode($divider, $array) . $last_div; | |
756 | - } | |
757 | -} | |
758 | - | |
759 | -// DirectoryIndex normalize (Destructive and rough) | |
760 | -// TODO: sample.en.ja.html.gz => sample.html | |
761 | -function file_normalize($file = 'index.html.en') | |
762 | -{ | |
763 | - static $simple_defaults = array( | |
764 | - 'default.htm' => TRUE, | |
765 | - 'default.html' => TRUE, | |
766 | - 'default.asp' => TRUE, | |
767 | - 'default.aspx' => TRUE, | |
768 | - 'index' => TRUE, // Some system can omit the suffix | |
769 | - ); | |
770 | - | |
771 | - static $content_suffix = array( | |
772 | - // index.xxx, sample.xxx | |
773 | - 'htm' => TRUE, | |
774 | - 'html' => TRUE, | |
775 | - 'shtml' => TRUE, | |
776 | - 'jsp' => TRUE, | |
777 | - 'php' => TRUE, | |
778 | - 'php3' => TRUE, | |
779 | - 'php4' => TRUE, | |
780 | - 'pl' => TRUE, | |
781 | - 'py' => TRUE, | |
782 | - 'rb' => TRUE, | |
783 | - 'cgi' => TRUE, | |
784 | - 'xml' => TRUE, | |
785 | - ); | |
786 | - | |
787 | - static $language_suffix = array( | |
788 | - // Reference: Apache 2.0.59 'AddLanguage' default | |
789 | - 'ca' => TRUE, | |
790 | - 'cs' => TRUE, // cs | |
791 | - 'cz' => TRUE, // cs | |
792 | - 'de' => TRUE, | |
793 | - 'dk' => TRUE, // da | |
794 | - 'el' => TRUE, | |
795 | - 'en' => TRUE, | |
796 | - 'eo' => TRUE, | |
797 | - 'es' => TRUE, | |
798 | - 'et' => TRUE, | |
799 | - 'fr' => TRUE, | |
800 | - 'he' => TRUE, | |
801 | - 'hr' => TRUE, | |
802 | - 'it' => TRUE, | |
803 | - 'ja' => TRUE, | |
804 | - 'ko' => TRUE, | |
805 | - 'ltz' => TRUE, | |
806 | - 'nl' => TRUE, | |
807 | - 'nn' => TRUE, | |
808 | - 'no' => TRUE, | |
809 | - 'po' => TRUE, | |
810 | - 'pt' => TRUE, | |
811 | - 'pt-br' => TRUE, | |
812 | - 'ru' => TRUE, | |
813 | - 'sv' => TRUE, | |
814 | - 'zh-cn' => TRUE, | |
815 | - 'zh-tw' => TRUE, | |
816 | - | |
817 | - // Reference: Apache 2.0.59 default 'index.html' variants | |
818 | - 'ee' => TRUE, | |
819 | - 'lb' => TRUE, | |
820 | - 'var' => TRUE, | |
821 | - ); | |
822 | - | |
823 | - static $charset_suffix = array( | |
824 | - // Reference: Apache 2.0.59 'AddCharset' default | |
825 | - 'iso8859-1' => TRUE, // ISO-8859-1 | |
826 | - 'latin1' => TRUE, // ISO-8859-1 | |
827 | - 'iso8859-2' => TRUE, // ISO-8859-2 | |
828 | - 'latin2' => TRUE, // ISO-8859-2 | |
829 | - 'cen' => TRUE, // ISO-8859-2 | |
830 | - 'iso8859-3' => TRUE, // ISO-8859-3 | |
831 | - 'latin3' => TRUE, // ISO-8859-3 | |
832 | - 'iso8859-4' => TRUE, // ISO-8859-4 | |
833 | - 'latin4' => TRUE, // ISO-8859-4 | |
834 | - 'iso8859-5' => TRUE, // ISO-8859-5 | |
835 | - 'latin5' => TRUE, // ISO-8859-5 | |
836 | - 'cyr' => TRUE, // ISO-8859-5 | |
837 | - 'iso-ru' => TRUE, // ISO-8859-5 | |
838 | - 'iso8859-6' => TRUE, // ISO-8859-6 | |
839 | - 'latin6' => TRUE, // ISO-8859-6 | |
840 | - 'arb' => TRUE, // ISO-8859-6 | |
841 | - 'iso8859-7' => TRUE, // ISO-8859-7 | |
842 | - 'latin7' => TRUE, // ISO-8859-7 | |
843 | - 'grk' => TRUE, // ISO-8859-7 | |
844 | - 'iso8859-8' => TRUE, // ISO-8859-8 | |
845 | - 'latin8' => TRUE, // ISO-8859-8 | |
846 | - 'heb' => TRUE, // ISO-8859-8 | |
847 | - 'iso8859-9' => TRUE, // ISO-8859-9 | |
848 | - 'latin9' => TRUE, // ISO-8859-9 | |
849 | - 'trk' => TRUE, // ISO-8859-9 | |
850 | - 'iso2022-jp'=> TRUE, // ISO-2022-JP | |
851 | - 'jis' => TRUE, // ISO-2022-JP | |
852 | - 'iso2022-kr'=> TRUE, // ISO-2022-KR | |
853 | - 'kis' => TRUE, // ISO-2022-KR | |
854 | - 'iso2022-cn'=> TRUE, // ISO-2022-CN | |
855 | - 'cis' => TRUE, // ISO-2022-CN | |
856 | - 'big5' => TRUE, | |
857 | - 'cp-1251' => TRUE, // ru, WINDOWS-1251 | |
858 | - 'win-1251' => TRUE, // ru, WINDOWS-1251 | |
859 | - 'cp866' => TRUE, // ru | |
860 | - 'koi8-r' => TRUE, // ru, KOI8-r | |
861 | - 'koi8-ru' => TRUE, // ru, KOI8-r | |
862 | - 'koi8-uk' => TRUE, // ru, KOI8-ru | |
863 | - 'ua' => TRUE, // ru, KOI8-ru | |
864 | - 'ucs2' => TRUE, // ru, ISO-10646-UCS-2 | |
865 | - 'ucs4' => TRUE, // ru, ISO-10646-UCS-4 | |
866 | - 'utf8' => TRUE, | |
867 | - | |
868 | - // Reference: Apache 2.0.59 default 'index.html' variants | |
869 | - 'euc-kr' => TRUE, | |
870 | - 'gb2312' => TRUE, | |
871 | - ); | |
872 | - | |
873 | - // May uncompress by web browsers on the fly | |
874 | - // Must be at the last of the filename | |
875 | - // Reference: Apache 2.0.59 'AddEncoding' | |
876 | - static $encoding_suffix = array( | |
877 | - 'z' => TRUE, | |
878 | - 'gz' => TRUE, | |
879 | - ); | |
880 | - | |
881 | - if (! is_string($file)) return ''; | |
882 | - $_file = strtolower($file); | |
883 | - if (isset($simple_defaults[$_file])) return ''; | |
884 | - | |
885 | - | |
886 | - // Roughly removing language/character-set/encoding suffixes | |
887 | - // References: | |
888 | - // * Apache 2 document about 'Content-negotiaton', 'mod_mime' and 'mod_negotiation' | |
889 | - // http://httpd.apache.org/docs/2.0/content-negotiation.html | |
890 | - // http://httpd.apache.org/docs/2.0/mod/mod_mime.html | |
891 | - // http://httpd.apache.org/docs/2.0/mod/mod_negotiation.html | |
892 | - // * http://www.iana.org/assignments/character-sets | |
893 | - // * RFC3066: Tags for the Identification of Languages | |
894 | - // http://www.ietf.org/rfc/rfc3066.txt | |
895 | - // * ISO 639: codes of 'language names' | |
896 | - $suffixes = explode('.', $_file); | |
897 | - $body = array_shift($suffixes); | |
898 | - if ($suffixes) { | |
899 | - // Remove the last .gz/.z | |
900 | - $last_key = end(array_keys($suffixes)); | |
901 | - if (isset($encoding_suffix[$suffixes[$last_key]])) { | |
902 | - unset($suffixes[$last_key]); | |
903 | - } | |
904 | - } | |
905 | - // Cut language and charset suffixes | |
906 | - foreach($suffixes as $key => $value){ | |
907 | - if (isset($language_suffix[$value]) || isset($charset_suffix[$value])) { | |
908 | - unset($suffixes[$key]); | |
909 | - } | |
910 | - } | |
911 | - if (empty($suffixes)) return $body; | |
912 | - | |
913 | - // Index.xxx | |
914 | - $count = count($suffixes); | |
915 | - reset($suffixes); | |
916 | - $current = current($suffixes); | |
917 | - if ($body == 'index' && $count == 1 && isset($content_suffix[$current])) return ''; | |
918 | - | |
919 | - return $file; | |
920 | -} | |
921 | - | |
922 | -// Sort query-strings if possible (Destructive and rough) | |
923 | -// [OK] &&&&f=d&b&d&c&a=0dd => a=0dd&b&c&d&f=d | |
924 | -// [OK] nothing==&eg=dummy&eg=padding&eg=foobar => eg=foobar | |
925 | -function query_normalize($string = '', $equal = TRUE, $equal_cutempty = TRUE, $stortolower = TRUE) | |
926 | -{ | |
927 | - if (! is_string($string)) return ''; | |
928 | - if ($stortolower) $string = strtolower($string); | |
929 | - | |
930 | - $array = explode('&', $string); | |
931 | - | |
932 | - // Remove '&' paddings | |
933 | - foreach(array_keys($array) as $key) { | |
934 | - if ($array[$key] == '') { | |
935 | - unset($array[$key]); | |
936 | - } | |
937 | - } | |
938 | - | |
939 | - // Consider '='-sepalated input and paddings | |
940 | - if ($equal) { | |
941 | - $equals = $not_equals = array(); | |
942 | - foreach ($array as $part) { | |
943 | - if (strpos($part, '=') === FALSE) { | |
944 | - $not_equals[] = $part; | |
220 | + if (isset($tmp[$value])) { | |
221 | + unset($array[$key]); | |
945 | 222 | } else { |
946 | - list($key, $value) = explode('=', $part, 2); | |
947 | - $value = ltrim($value, '='); | |
948 | - if (! $equal_cutempty || $value != '') { | |
949 | - $equals[$key] = $value; | |
950 | - } | |
223 | + $tmp[$value] = TRUE; | |
951 | 224 | } |
952 | 225 | } |
953 | - | |
954 | - $array = & $not_equals; | |
955 | - foreach ($equals as $key => $value) { | |
956 | - $array[] = $key . '=' . $value; | |
957 | - } | |
958 | - unset($equals); | |
959 | 226 | } |
960 | 227 | |
961 | - natsort($array); | |
962 | - return implode('&', $array); | |
228 | + return $array; | |
963 | 229 | } |
964 | 230 | |
231 | + | |
965 | 232 | // --------------------- |
966 | 233 | // Part One : Checker |
967 | 234 |
@@ -999,21 +266,6 @@ function generate_glob_regex($string = '', $divider = '/') | ||
999 | 266 | return $string; |
1000 | 267 | } |
1001 | 268 | |
1002 | -// Rough hostname checker | |
1003 | -// [OK] 192.168. | |
1004 | -// TODO: Strict digit, 0x, CIDR, IPv6 | |
1005 | -function is_ip($string = '') | |
1006 | -{ | |
1007 | - if (preg_match('/^' . | |
1008 | - '(?:[0-9]{1,3}\.){3}[0-9]{1,3}' . '|' . | |
1009 | - '(?:[0-9]{1,3}\.){1,3}' . '$/', | |
1010 | - $string)) { | |
1011 | - return 4; // Seems IPv4(dot-decimal) | |
1012 | - } else { | |
1013 | - return 0; // Seems not IP | |
1014 | - } | |
1015 | -} | |
1016 | - | |
1017 | 269 | // Generate host (FQDN, IPv4, ...) regex |
1018 | 270 | // 'localhost' : Matches with 'localhost' only |
1019 | 271 | // 'example.org' : Matches with 'example.org' only (See host_normalize() about 'www') |
@@ -1048,6 +300,21 @@ function generate_host_regex($string = '', $divider = '/') | ||
1048 | 300 | } |
1049 | 301 | } |
1050 | 302 | |
303 | +// Rough hostname checker | |
304 | +// [OK] 192.168. | |
305 | +// TODO: Strict digit, 0x, CIDR, IPv6 | |
306 | +function is_ip($string = '') | |
307 | +{ | |
308 | + if (preg_match('/^' . | |
309 | + '(?:[0-9]{1,3}\.){3}[0-9]{1,3}' . '|' . | |
310 | + '(?:[0-9]{1,3}\.){1,3}' . '$/', | |
311 | + $string)) { | |
312 | + return 4; // Seems IPv4(dot-decimal) | |
313 | + } else { | |
314 | + return 0; // Seems not IP | |
315 | + } | |
316 | +} | |
317 | + | |
1051 | 318 | function get_blocklist($list = '') |
1052 | 319 | { |
1053 | 320 | static $regexes; |
@@ -1145,6 +412,10 @@ function blocklist_distiller(& $hosts, $keys = array('goodhost', 'badhost'), $as | ||
1145 | 412 | return $blocked; |
1146 | 413 | } |
1147 | 414 | |
415 | + | |
416 | +// --------------------- | |
417 | + | |
418 | + | |
1148 | 419 | // Default (enabled) methods and thresholds (for content insertion) |
1149 | 420 | function check_uri_spam_method($times = 1, $t_area = 0, $rule = TRUE) |
1150 | 421 | { |
@@ -1405,62 +676,6 @@ function check_uri_spam($target = '', $method = array()) | ||
1405 | 676 | return $progress; |
1406 | 677 | } |
1407 | 678 | |
1408 | -// Count leaves (A leaf = value that is not an array, or an empty array) | |
1409 | -function array_count_leaves($array = array(), $count_empty = FALSE) | |
1410 | -{ | |
1411 | - if (! is_array($array) || (empty($array) && $count_empty)) return 1; | |
1412 | - | |
1413 | - // Recurse | |
1414 | - $count = 0; | |
1415 | - foreach ($array as $part) { | |
1416 | - $count += array_count_leaves($part, $count_empty); | |
1417 | - } | |
1418 | - return $count; | |
1419 | -} | |
1420 | - | |
1421 | -// An array-leaves to a flat array | |
1422 | -function array_flat_leaves($array, $unique = TRUE) | |
1423 | -{ | |
1424 | - if (! is_array($array)) return $array; | |
1425 | - | |
1426 | - $tmp = array(); | |
1427 | - foreach(array_keys($array) as $key) { | |
1428 | - if (is_array($array[$key])) { | |
1429 | - // Recurse | |
1430 | - foreach(array_flat_leaves($array[$key]) as $_value) { | |
1431 | - $tmp[] = $_value; | |
1432 | - } | |
1433 | - } else { | |
1434 | - $tmp[] = & $array[$key]; | |
1435 | - } | |
1436 | - } | |
1437 | - | |
1438 | - return $unique ? array_values(array_unique($tmp)) : $tmp; | |
1439 | -} | |
1440 | - | |
1441 | -// An array() to an array leaf | |
1442 | -function array_leaf($array = array('A', 'B', 'C.D'), $stem = FALSE, $edge = TRUE) | |
1443 | -{ | |
1444 | - if (! is_array($array)) return $array; | |
1445 | - | |
1446 | - $leaf = array(); | |
1447 | - $tmp = & $leaf; | |
1448 | - foreach($array as $arg) { | |
1449 | - if (! is_string($arg) && ! is_int($arg)) continue; | |
1450 | - $tmp[$arg] = array(); | |
1451 | - $parent = & $tmp; | |
1452 | - $tmp = & $tmp[$arg]; | |
1453 | - } | |
1454 | - if ($stem) { | |
1455 | - $parent[key($parent)] = & $edge; | |
1456 | - } else { | |
1457 | - $parent = key($parent); | |
1458 | - } | |
1459 | - | |
1460 | - return $leaf; // array('A' => array('B' => 'C.D')) | |
1461 | -} | |
1462 | - | |
1463 | - | |
1464 | 679 | // --------------------- |
1465 | 680 | // Reporting |
1466 | 681 |
@@ -1564,20 +779,6 @@ function summarize_detail_newtral($progress = array()) | ||
1564 | 779 | ')'; |
1565 | 780 | } |
1566 | 781 | |
1567 | -// ksort() by domain | |
1568 | -function ksort_by_domain(& $array) | |
1569 | -{ | |
1570 | - $sort = array(); | |
1571 | - foreach(array_keys($array) as $key) { | |
1572 | - $sort[delimiter_reverse($key)] = $key; | |
1573 | - } | |
1574 | - ksort($sort, SORT_STRING); | |
1575 | - $result = array(); | |
1576 | - foreach($sort as $key) { | |
1577 | - $result[$key] = & $array[$key]; | |
1578 | - } | |
1579 | - $array = $result; | |
1580 | -} | |
1581 | 782 | |
1582 | 783 | // Check responsibility-root of the FQDN |
1583 | 784 | // 'foo.bar.example.com' => 'example.com' (.com has the last whois for it) |
@@ -1586,658 +787,22 @@ function ksort_by_domain(& $array) | ||
1586 | 787 | // 'foo.bar.example.act.edu.au' => 'example.act.edu.au' (.act.edu.au has the last whois for it) |
1587 | 788 | function whois_responsibility($fqdn = 'foo.bar.example.com', $parent = FALSE, $implicit = TRUE) |
1588 | 789 | { |
1589 | - // Domains who have 2nd and/or 3rd level domains | |
1590 | - static $domain = array( | |
1591 | - | |
1592 | - // ccTLD: Australia | |
1593 | - // http://www.auda.org.au/ | |
1594 | - // NIC : http://www.aunic.net/ | |
1595 | - // Whois: http://www.ausregistry.com.au/ | |
1596 | - 'au' => array( | |
1597 | - // .au Second Level Domains | |
1598 | - // http://www.auda.org.au/domains/ | |
1599 | - 'asn' => TRUE, | |
1600 | - 'com' => TRUE, | |
1601 | - 'conf' => TRUE, | |
1602 | - 'csiro' => TRUE, | |
1603 | - 'edu' => array( // http://www.domainname.edu.au/ | |
1604 | - // Geographic | |
1605 | - 'act' => TRUE, | |
1606 | - 'nt' => TRUE, | |
1607 | - 'nsw' => TRUE, | |
1608 | - 'qld' => TRUE, | |
1609 | - 'sa' => TRUE, | |
1610 | - 'tas' => TRUE, | |
1611 | - 'vic' => TRUE, | |
1612 | - 'wa' => TRUE, | |
1613 | - ), | |
1614 | - 'gov' => array( | |
1615 | - // Geographic | |
1616 | - 'act' => TRUE, // Australian Capital Territory | |
1617 | - 'nt' => TRUE, // Northern Territory | |
1618 | - 'nsw' => TRUE, // New South Wales | |
1619 | - 'qld' => TRUE, // Queensland | |
1620 | - 'sa' => TRUE, // South Australia | |
1621 | - 'tas' => TRUE, // Tasmania | |
1622 | - 'vic' => TRUE, // Victoria | |
1623 | - 'wa' => TRUE, // Western Australia | |
1624 | - ), | |
1625 | - 'id' => TRUE, | |
1626 | - 'net' => TRUE, | |
1627 | - 'org' => TRUE, | |
1628 | - 'info' => TRUE, | |
1629 | - ), | |
1630 | - | |
1631 | - // ccTLD: Bahrain | |
1632 | - // NIC : http://www.inet.com.bh/ (.bh policies not found) | |
1633 | - // Whois: (Not available) http://www.inet.com.bh/ | |
1634 | - 'bh' => array( | |
1635 | - // Observed | |
1636 | - 'com' => TRUE, | |
1637 | - 'edu' => TRUE, | |
1638 | - 'gov' => TRUE, | |
1639 | - 'org' => TRUE, | |
1640 | - ), | |
1641 | - | |
1642 | - // ccTLD: China | |
1643 | - // NIC : http://www.cnnic.net.cn/en/index/ | |
1644 | - // Whois: http://ewhois.cnnic.cn/ | |
1645 | - 'cn' => array( | |
1646 | - // Provisional Administrative Rules for Registration of Domain Names in China | |
1647 | - // http://www.cnnic.net.cn/html/Dir/2003/11/27/1520.htm | |
1648 | - | |
1649 | - // Organizational | |
1650 | - 'ac' => TRUE, | |
1651 | - 'com' => TRUE, | |
1652 | - 'edu' => TRUE, | |
1653 | - 'gov' => TRUE, | |
1654 | - 'net' => TRUE, | |
1655 | - 'org' => TRUE, | |
1656 | - | |
1657 | - // Geographic | |
1658 | - 'ah' => TRUE, | |
1659 | - 'bj' => TRUE, | |
1660 | - 'cq' => TRUE, | |
1661 | - 'fj' => TRUE, | |
1662 | - 'gd' => TRUE, | |
1663 | - 'gs' => TRUE, | |
1664 | - 'gx' => TRUE, | |
1665 | - 'gz' => TRUE, | |
1666 | - 'ha' => TRUE, | |
1667 | - 'hb' => TRUE, | |
1668 | - 'he' => TRUE, | |
1669 | - 'hi' => TRUE, | |
1670 | - 'hk' => TRUE, | |
1671 | - 'hl' => TRUE, | |
1672 | - 'hn' => TRUE, | |
1673 | - 'jl' => TRUE, | |
1674 | - 'js' => TRUE, | |
1675 | - 'jx' => TRUE, | |
1676 | - 'ln' => TRUE, | |
1677 | - 'mo' => TRUE, | |
1678 | - 'nm' => TRUE, | |
1679 | - 'nx' => TRUE, | |
1680 | - 'qh' => TRUE, | |
1681 | - 'sc' => TRUE, | |
1682 | - 'sd' => TRUE, | |
1683 | - 'sh' => TRUE, | |
1684 | - 'sn' => TRUE, | |
1685 | - 'sx' => TRUE, | |
1686 | - 'tj' => TRUE, | |
1687 | - 'tw' => TRUE, | |
1688 | - 'xj' => TRUE, | |
1689 | - 'xz' => TRUE, | |
1690 | - 'yn' => TRUE, | |
1691 | - 'zj' => TRUE, | |
1692 | - ), | |
1693 | - | |
1694 | - // ccTLD: India | |
1695 | - // NIC : http://www.inregistry.in/ | |
1696 | - // Whois: http://www.inregistry.in/whois_search/ | |
1697 | - 'in' => array( | |
1698 | - // Policies http://www.inregistry.in/policies/ | |
1699 | - 'ac' => TRUE, | |
1700 | - 'co' => TRUE, | |
1701 | - 'firm' => TRUE, | |
1702 | - 'gen' => TRUE, | |
1703 | - 'gov' => TRUE, | |
1704 | - 'ind' => TRUE, | |
1705 | - 'mil' => TRUE, | |
1706 | - 'net' => TRUE, | |
1707 | - 'org' => TRUE, | |
1708 | - 'res' => TRUE, | |
1709 | - // Reserved Names by the government (for the 2nd level) | |
1710 | - // http://www.inregistry.in/policies/reserved_names | |
1711 | - ), | |
1712 | - | |
1713 | - // ccTLD: South Korea | |
1714 | - // NIC : http://www.nic.or.kr/english/ | |
1715 | - // Whois: http://whois.nida.or.kr/english/ | |
1716 | - 'kr' => array( | |
1717 | - // .kr domain policy [appendix 1] : Qualifications for Second Level Domains | |
1718 | - // http://domain.nida.or.kr/eng/policy.jsp | |
1719 | - | |
1720 | - // Organizational | |
1721 | - 'co' => TRUE, | |
1722 | - 'ne ' => TRUE, | |
1723 | - 'or ' => TRUE, | |
1724 | - 're ' => TRUE, | |
1725 | - 'pe' => TRUE, | |
1726 | - 'go ' => TRUE, | |
1727 | - 'mil' => TRUE, | |
1728 | - 'ac' => TRUE, | |
1729 | - 'hs' => TRUE, | |
1730 | - 'ms' => TRUE, | |
1731 | - 'es' => TRUE, | |
1732 | - 'sc' => TRUE, | |
1733 | - 'kg' => TRUE, | |
1734 | - | |
1735 | - // Geographic | |
1736 | - 'seoul' => TRUE, | |
1737 | - 'busan' => TRUE, | |
1738 | - 'daegu' => TRUE, | |
1739 | - 'incheon' => TRUE, | |
1740 | - 'gwangju' => TRUE, | |
1741 | - 'daejeon' => TRUE, | |
1742 | - 'ulsan' => TRUE, | |
1743 | - 'gyeonggi' => TRUE, | |
1744 | - 'gangwon' => TRUE, | |
1745 | - 'chungbuk' => TRUE, | |
1746 | - 'chungnam' => TRUE, | |
1747 | - 'jeonbuk' => TRUE, | |
1748 | - 'jeonnam' => TRUE, | |
1749 | - 'gyeongbuk' => TRUE, | |
1750 | - 'gyeongnam' => TRUE, | |
1751 | - 'jeju' => TRUE, | |
1752 | - ), | |
1753 | - | |
1754 | - // ccTLD: Japan | |
1755 | - // NIC : http://jprs.co.jp/en/ | |
1756 | - // Whois: http://whois.jprs.jp/en/ | |
1757 | - 'jp' => array( | |
1758 | - // Guide to JP Domain Name | |
1759 | - // http://jprs.co.jp/en/jpdomain.html | |
1760 | - | |
1761 | - // Organizational | |
1762 | - 'ac' => TRUE, | |
1763 | - 'ad' => TRUE, | |
1764 | - 'co' => TRUE, | |
1765 | - 'ed' => TRUE, | |
1766 | - 'go' => TRUE, | |
1767 | - 'gr' => TRUE, | |
1768 | - 'lg' => TRUE, | |
1769 | - 'ne' => TRUE, | |
1770 | - 'or' => TRUE, | |
1771 | - | |
1772 | - // Geographic | |
1773 | - // | |
1774 | - // Examples for 3rd level domains | |
1775 | - //'kumamoto' => array( | |
1776 | - // // http://www.pref.kumamoto.jp/link/list.asp#4 | |
1777 | - // 'amakusa' => TRUE, | |
1778 | - // 'hitoyoshi' => TRUE, | |
1779 | - // 'jonan' => TRUE, | |
1780 | - // 'kumamoto' => TRUE, | |
1781 | - // ... | |
1782 | - //), | |
1783 | - 'aichi' => TRUE, | |
1784 | - 'akita' => TRUE, | |
1785 | - 'aomori' => TRUE, | |
1786 | - 'chiba' => TRUE, | |
1787 | - 'ehime' => TRUE, | |
1788 | - 'fukui' => TRUE, | |
1789 | - 'fukuoka' => TRUE, | |
1790 | - 'fukushima' => TRUE, | |
1791 | - 'gifu' => TRUE, | |
1792 | - 'gunma' => TRUE, | |
1793 | - 'hiroshima' => TRUE, | |
1794 | - 'hokkaido' => TRUE, | |
1795 | - 'hyogo' => TRUE, | |
1796 | - 'ibaraki' => TRUE, | |
1797 | - 'ishikawa' => TRUE, | |
1798 | - 'iwate' => TRUE, | |
1799 | - 'kagawa' => TRUE, | |
1800 | - 'kagoshima' => TRUE, | |
1801 | - 'kanagawa' => TRUE, | |
1802 | - 'kawasaki' => TRUE, | |
1803 | - 'kitakyushu'=> TRUE, | |
1804 | - 'kobe' => TRUE, | |
1805 | - 'kochi' => TRUE, | |
1806 | - 'kumamoto' => TRUE, | |
1807 | - 'kyoto' => TRUE, | |
1808 | - 'mie' => TRUE, | |
1809 | - 'miyagi' => TRUE, | |
1810 | - 'miyazaki' => TRUE, | |
1811 | - 'nagano' => TRUE, | |
1812 | - 'nagasaki' => TRUE, | |
1813 | - 'nagoya' => TRUE, | |
1814 | - 'nara' => TRUE, | |
1815 | - 'niigata' => TRUE, | |
1816 | - 'oita' => TRUE, | |
1817 | - 'okayama' => TRUE, | |
1818 | - 'okinawa' => TRUE, | |
1819 | - 'osaka' => TRUE, | |
1820 | - 'saga' => TRUE, | |
1821 | - 'saitama' => TRUE, | |
1822 | - 'sapporo' => TRUE, | |
1823 | - 'sendai' => TRUE, | |
1824 | - 'shiga' => TRUE, | |
1825 | - 'shimane' => TRUE, | |
1826 | - 'shizuoka' => TRUE, | |
1827 | - 'tochigi' => TRUE, | |
1828 | - 'tokushima' => TRUE, | |
1829 | - 'tokyo' => TRUE, | |
1830 | - 'tottori' => TRUE, | |
1831 | - 'toyama' => TRUE, | |
1832 | - 'wakayama' => TRUE, | |
1833 | - 'yamagata' => TRUE, | |
1834 | - 'yamaguchi' => TRUE, | |
1835 | - 'yamanashi' => TRUE, | |
1836 | - 'yokohama' => TRUE, | |
1837 | - ), | |
790 | + static $domain; | |
1838 | 791 | |
1839 | - // ccTLD: Mexico | |
1840 | - // NIC : http://www.nic.mx/ | |
1841 | - // Whois: http://www.nic.mx/es/Busqueda.Who_Is | |
1842 | - 'mx' => array( | |
1843 | - // Politicas Generales de Nombres de Dominio | |
1844 | - // http://www.nic.mx/es/Politicas?CATEGORY=INDICE | |
1845 | - 'com' => TRUE, | |
1846 | - 'edu' => TRUE, | |
1847 | - 'gob' => TRUE, | |
1848 | - 'net' => TRUE, | |
1849 | - 'org' => TRUE, | |
1850 | - ), | |
1851 | - | |
1852 | - // ccTLD: Russia | |
1853 | - // NIC : http://www.cctld.ru/en/ | |
1854 | - // Whois: http://www.ripn.net:8080/nic/whois/en/ | |
1855 | - 'ru' => array( | |
1856 | - // List of Reserved second-level Domain Names | |
1857 | - // http://www.cctld.ru/en/doc/detail.php?id21=20&i21=2 | |
1858 | - | |
1859 | - // Organizational | |
1860 | - 'ac' => TRUE, | |
1861 | - 'com' => TRUE, | |
1862 | - 'edu' => TRUE, | |
1863 | - 'gov' => TRUE, | |
1864 | - 'int' => TRUE, | |
1865 | - 'mil' => TRUE, | |
1866 | - 'net' => TRUE, | |
1867 | - 'org' => TRUE, | |
1868 | - 'pp' => TRUE, | |
1869 | - //'test' => TRUE, | |
1870 | - | |
1871 | - // Geographic | |
1872 | - 'adygeya' => TRUE, | |
1873 | - 'altai' => TRUE, | |
1874 | - 'amur' => TRUE, | |
1875 | - 'amursk' => TRUE, | |
1876 | - 'arkhangelsk' => TRUE, | |
1877 | - 'astrakhan' => TRUE, | |
1878 | - 'baikal' => TRUE, | |
1879 | - 'bashkiria' => TRUE, | |
1880 | - 'belgorod' => TRUE, | |
1881 | - 'bir' => TRUE, | |
1882 | - 'bryansk' => TRUE, | |
1883 | - 'buryatia' => TRUE, | |
1884 | - 'cbg' => TRUE, | |
1885 | - 'chel' => TRUE, | |
1886 | - 'chelyabinsk' => TRUE, | |
1887 | - 'chita' => TRUE, | |
1888 | - 'chukotka' => TRUE, | |
1889 | - 'chuvashia' => TRUE, | |
1890 | - 'cmw' => TRUE, | |
1891 | - 'dagestan' => TRUE, | |
1892 | - 'dudinka' => TRUE, | |
1893 | - 'e-burg' => TRUE, | |
1894 | - 'fareast' => TRUE, | |
1895 | - 'grozny' => TRUE, | |
1896 | - 'irkutsk' => TRUE, | |
1897 | - 'ivanovo' => TRUE, | |
1898 | - 'izhevsk' => TRUE, | |
1899 | - 'jamal' => TRUE, | |
1900 | - 'jar' => TRUE, | |
1901 | - 'joshkar-ola' => TRUE, | |
1902 | - 'k-uralsk' => TRUE, | |
1903 | - 'kalmykia' => TRUE, | |
1904 | - 'kaluga' => TRUE, | |
1905 | - 'kamchatka' => TRUE, | |
1906 | - 'karelia' => TRUE, | |
1907 | - 'kazan' => TRUE, | |
1908 | - 'kchr' => TRUE, | |
1909 | - 'kemerovo' => TRUE, | |
1910 | - 'khabarovsk' => TRUE, | |
1911 | - 'khakassia' => TRUE, | |
1912 | - 'khv' => TRUE, | |
1913 | - 'kirov' => TRUE, | |
1914 | - 'kms' => TRUE, | |
1915 | - 'koenig' => TRUE, | |
1916 | - 'komi' => TRUE, | |
1917 | - 'kostroma' => TRUE, | |
1918 | - 'krasnoyarsk' => TRUE, | |
1919 | - 'kuban' => TRUE, | |
1920 | - 'kurgan' => TRUE, | |
1921 | - 'kursk' => TRUE, | |
1922 | - 'kustanai' => TRUE, | |
1923 | - 'kuzbass' => TRUE, | |
1924 | - 'lipetsk' => TRUE, | |
1925 | - 'magadan' => TRUE, | |
1926 | - 'magnitka' => TRUE, | |
1927 | - 'mari-el' => TRUE, | |
1928 | - 'mari' => TRUE, | |
1929 | - 'marine' => TRUE, | |
1930 | - 'mordovia' => TRUE, | |
1931 | - 'mosreg' => TRUE, | |
1932 | - 'msk' => TRUE, | |
1933 | - 'murmansk' => TRUE, | |
1934 | - 'mytis' => TRUE, | |
1935 | - 'nakhodka' => TRUE, | |
1936 | - 'nalchik' => TRUE, | |
1937 | - 'nkz' => TRUE, | |
1938 | - 'nnov' => TRUE, | |
1939 | - 'norilsk' => TRUE, | |
1940 | - 'nov' => TRUE, | |
1941 | - 'novosibirsk' => TRUE, | |
1942 | - 'nsk' => TRUE, | |
1943 | - 'omsk' => TRUE, | |
1944 | - 'orenburg' => TRUE, | |
1945 | - 'oryol' => TRUE, | |
1946 | - 'oskol' => TRUE, | |
1947 | - 'palana' => TRUE, | |
1948 | - 'penza' => TRUE, | |
1949 | - 'perm' => TRUE, | |
1950 | - 'pskov' => TRUE, | |
1951 | - 'ptz' => TRUE, | |
1952 | - 'pyatigorsk' => TRUE, | |
1953 | - 'rnd' => TRUE, | |
1954 | - 'rubtsovsk' => TRUE, | |
1955 | - 'ryazan' => TRUE, | |
1956 | - 'sakhalin' => TRUE, | |
1957 | - 'samara' => TRUE, | |
1958 | - 'saratov' => TRUE, | |
1959 | - 'simbirsk' => TRUE, | |
1960 | - 'smolensk' => TRUE, | |
1961 | - 'snz' => TRUE, | |
1962 | - 'spb' => TRUE, | |
1963 | - 'stavropol' => TRUE, | |
1964 | - 'stv' => TRUE, | |
1965 | - 'surgut' => TRUE, | |
1966 | - 'syzran' => TRUE, | |
1967 | - 'tambov' => TRUE, | |
1968 | - 'tatarstan' => TRUE, | |
1969 | - 'tom' => TRUE, | |
1970 | - 'tomsk' => TRUE, | |
1971 | - 'tsaritsyn' => TRUE, | |
1972 | - 'tsk' => TRUE, | |
1973 | - 'tula' => TRUE, | |
1974 | - 'tuva' => TRUE, | |
1975 | - 'tver' => TRUE, | |
1976 | - 'tyumen' => TRUE, | |
1977 | - 'udm' => TRUE, | |
1978 | - 'udmurtia' => TRUE, | |
1979 | - 'ulan-ude' => TRUE, | |
1980 | - 'vdonsk' => TRUE, | |
1981 | - 'vladikavkaz' => TRUE, | |
1982 | - 'vladimir' => TRUE, | |
1983 | - 'vladivostok' => TRUE, | |
1984 | - 'volgograd' => TRUE, | |
1985 | - 'vologda' => TRUE, | |
1986 | - 'voronezh' => TRUE, | |
1987 | - 'vrn' => TRUE, | |
1988 | - 'vyatka' => TRUE, | |
1989 | - 'yakutia' => TRUE, | |
1990 | - 'yamal' => TRUE, | |
1991 | - 'yaroslavl' => TRUE, | |
1992 | - 'yekaterinburg' => TRUE, | |
1993 | - 'yuzhno-sakhalinsk' => TRUE, | |
1994 | - 'zgrad' => TRUE, | |
1995 | - ), | |
1996 | - | |
1997 | - // ccTLD: Seychelles | |
1998 | - // NIC : http://www.nic.sc/ | |
1999 | - // Whois: (Not available) | |
2000 | - 'sc' => array( | |
2001 | - // http://www.nic.sc/policies.html | |
2002 | - 'com' => TRUE, | |
2003 | - 'edu' => TRUE, | |
2004 | - 'gov' => TRUE, | |
2005 | - 'net' => TRUE, | |
2006 | - 'org' => TRUE, | |
2007 | - ), | |
2008 | - | |
2009 | - // ccTLD: Taiwan | |
2010 | - // NIC : http://www.twnic.net.tw/ | |
2011 | - // Whois: http://www.twnic.net.tw/ | |
2012 | - 'tw' => array( | |
2013 | - // Guidelines for Administration of Domain Name Registration | |
2014 | - // http://www.twnic.net.tw/english/dn/dn_02.htm | |
2015 | - // II. Types of TWNIC Domain Names and Application Requirements | |
2016 | - // http://www.twnic.net.tw/english/dn/dn_02_b.htm | |
2017 | - 'club' => TRUE, | |
2018 | - 'com' => TRUE, | |
2019 | - 'ebiz' => TRUE, | |
2020 | - 'edu' => TRUE, | |
2021 | - 'game' => TRUE, | |
2022 | - 'gov' => TRUE, | |
2023 | - 'idv' => TRUE, | |
2024 | - 'mil' => TRUE, | |
2025 | - 'net' => TRUE, | |
2026 | - 'org' => TRUE, | |
2027 | - // Reserved words for the 2nd level | |
2028 | - // http://mydn.twnic.net.tw/en/dn02/INDEX.htm | |
2029 | - ), | |
2030 | - | |
2031 | - // ccTLD: Tanzania | |
2032 | - // NIC : http://www.psg.com/dns/tz/ | |
2033 | - // Whois: (Not available) | |
2034 | - 'tz' => array( | |
2035 | - // TZ DOMAIN NAMING STRUCTURE | |
2036 | - // http://www.psg.com/dns/tz/tz.txt | |
2037 | - 'ac' => TRUE, | |
2038 | - 'co' => TRUE, | |
2039 | - 'go' => TRUE, | |
2040 | - 'ne' => TRUE, | |
2041 | - 'or' => TRUE, | |
2042 | - ), | |
2043 | - | |
2044 | - // ccTLD: Ukraine | |
2045 | - // NIC : http://www.nic.net.ua/ | |
2046 | - // Whois: http://whois.com.ua/ | |
2047 | - 'ua' => array( | |
2048 | - // policy for alternative 2nd level domain names (a2ld) | |
2049 | - // http://www.nic.net.ua/doc/a2ld | |
2050 | - // http://whois.com.ua/ | |
2051 | - 'cherkassy' => TRUE, | |
2052 | - 'chernigov' => TRUE, | |
2053 | - 'chernovtsy' => TRUE, | |
2054 | - 'ck' => TRUE, | |
2055 | - 'cn' => TRUE, | |
2056 | - 'com' => TRUE, | |
2057 | - 'crimea' => TRUE, | |
2058 | - 'cv' => TRUE, | |
2059 | - 'dn' => TRUE, | |
2060 | - 'dnepropetrovsk' => TRUE, | |
2061 | - 'donetsk' => TRUE, | |
2062 | - 'dp' => TRUE, | |
2063 | - 'edu' => TRUE, | |
2064 | - 'gov' => TRUE, | |
2065 | - 'if' => TRUE, | |
2066 | - 'ivano-frankivsk' => TRUE, | |
2067 | - 'kh' => TRUE, | |
2068 | - 'kharkov' => TRUE, | |
2069 | - 'kherson' => TRUE, | |
2070 | - 'kiev' => TRUE, | |
2071 | - 'kirovograd' => TRUE, | |
2072 | - 'km' => TRUE, | |
2073 | - 'kr' => TRUE, | |
2074 | - 'ks' => TRUE, | |
2075 | - 'lg' => TRUE, | |
2076 | - 'lugansk' => TRUE, | |
2077 | - 'lutsk' => TRUE, | |
2078 | - 'lviv' => TRUE, | |
2079 | - 'mk' => TRUE, | |
2080 | - 'net' => TRUE, | |
2081 | - 'nikolaev' => TRUE, | |
2082 | - 'od' => TRUE, | |
2083 | - 'odessa' => TRUE, | |
2084 | - 'org' => TRUE, | |
2085 | - 'pl' => TRUE, | |
2086 | - 'poltava' => TRUE, | |
2087 | - 'rovno' => TRUE, | |
2088 | - 'rv' => TRUE, | |
2089 | - 'sebastopol' => TRUE, | |
2090 | - 'sumy' => TRUE, | |
2091 | - 'te' => TRUE, | |
2092 | - 'ternopil' => TRUE, | |
2093 | - 'uz' => TRUE, | |
2094 | - 'uzhgorod' => TRUE, | |
2095 | - 'vinnica' => TRUE, | |
2096 | - 'vn' => TRUE, | |
2097 | - 'zaporizhzhe' => TRUE, | |
2098 | - 'zhitomir' => TRUE, | |
2099 | - 'zp' => TRUE, | |
2100 | - 'zt' => TRUE, | |
2101 | - ), | |
2102 | - | |
2103 | - // ccTLD: United Kingdom | |
2104 | - // NIC : http://www.nic.uk/ | |
2105 | - 'uk' => array( | |
2106 | - // Second Level Domains | |
2107 | - // http://www.nic.uk/registrants/aboutdomainnames/sld/ | |
2108 | - 'co' => TRUE, | |
2109 | - 'ltd' => TRUE, | |
2110 | - 'me' => TRUE, | |
2111 | - 'net' => TRUE, | |
2112 | - 'nic' => TRUE, | |
2113 | - 'org' => TRUE, | |
2114 | - 'plc' => TRUE, | |
2115 | - 'sch' => TRUE, | |
2116 | - | |
2117 | - // Delegated Second Level Domains | |
2118 | - // http://www.nic.uk/registrants/aboutdomainnames/sld/delegated/ | |
2119 | - 'ac' => TRUE, | |
2120 | - 'gov' => TRUE, | |
2121 | - 'mil' => TRUE, | |
2122 | - 'mod' => TRUE, | |
2123 | - 'nhs' => TRUE, | |
2124 | - 'police' => TRUE, | |
2125 | - ), | |
2126 | - | |
2127 | - // ccTLD: United States of America | |
2128 | - // NIC : http://nic.us/ | |
2129 | - // Whois: http://whois.us/ | |
2130 | - 'us' => array( | |
2131 | - // See RFC1480 | |
2132 | - | |
2133 | - // Organizational | |
2134 | - 'dni', | |
2135 | - 'fed', | |
2136 | - 'isa', | |
2137 | - 'kids', | |
2138 | - 'nsn', | |
2139 | - | |
2140 | - // Geographical | |
2141 | - // United States Postal Service: State abbreviations (for postal codes) | |
2142 | - // http://www.usps.com/ncsc/lookups/abbreviations.html | |
2143 | - 'ak' => TRUE, // Alaska | |
2144 | - 'al' => TRUE, // Alabama | |
2145 | - 'ar' => TRUE, // Arkansas | |
2146 | - 'as' => TRUE, // American samoa | |
2147 | - 'az' => TRUE, // Arizona | |
2148 | - 'ca' => TRUE, // California | |
2149 | - 'co' => TRUE, // Colorado | |
2150 | - 'ct' => TRUE, // Connecticut | |
2151 | - 'dc' => TRUE, // District of Columbia | |
2152 | - 'de' => TRUE, // Delaware | |
2153 | - 'fl' => TRUE, // Florida | |
2154 | - 'fm' => TRUE, // Federated states of Micronesia | |
2155 | - 'ga' => TRUE, // Georgia | |
2156 | - 'gu' => TRUE, // Guam | |
2157 | - 'hi' => TRUE, // Hawaii | |
2158 | - 'ia' => TRUE, // Iowa | |
2159 | - 'id' => TRUE, // Idaho | |
2160 | - 'il' => TRUE, // Illinois | |
2161 | - 'in' => TRUE, // Indiana | |
2162 | - 'ks' => TRUE, // Kansas | |
2163 | - 'ky' => TRUE, // Kentucky | |
2164 | - 'la' => TRUE, // Louisiana | |
2165 | - 'ma' => TRUE, // Massachusetts | |
2166 | - 'md' => TRUE, // Maryland | |
2167 | - 'me' => TRUE, // Maine | |
2168 | - 'mh' => TRUE, // Marshall Islands | |
2169 | - 'mi' => TRUE, // Michigan | |
2170 | - 'mn' => TRUE, // Minnesota | |
2171 | - 'mo' => TRUE, // Missouri | |
2172 | - 'mp' => TRUE, // Northern mariana islands | |
2173 | - 'ms' => TRUE, // Mississippi | |
2174 | - 'mt' => TRUE, // Montana | |
2175 | - 'nc' => TRUE, // North Carolina | |
2176 | - 'nd' => TRUE, // North Dakota | |
2177 | - 'ne' => TRUE, // Nebraska | |
2178 | - 'nh' => TRUE, // New Hampshire | |
2179 | - 'nj' => TRUE, // New Jersey | |
2180 | - 'nm' => TRUE, // New Mexico | |
2181 | - 'nv' => TRUE, // Nevada | |
2182 | - 'ny' => TRUE, // New York | |
2183 | - 'oh' => TRUE, // Ohio | |
2184 | - 'ok' => TRUE, // Oklahoma | |
2185 | - 'or' => TRUE, // Oregon | |
2186 | - 'pa' => TRUE, // Pennsylvania | |
2187 | - 'pr' => TRUE, // Puerto Rico | |
2188 | - 'pw' => TRUE, // Palau | |
2189 | - 'ri' => TRUE, // Rhode Island | |
2190 | - 'sc' => TRUE, // South Carolina | |
2191 | - 'sd' => TRUE, // South Dakota | |
2192 | - 'tn' => TRUE, // Tennessee | |
2193 | - 'tx' => TRUE, // Texas | |
2194 | - 'ut' => TRUE, // Utah | |
2195 | - 'va' => TRUE, // Virginia | |
2196 | - 'vi' => TRUE, // Virgin Islands | |
2197 | - 'vt' => TRUE, // Vermont | |
2198 | - 'wa' => TRUE, // Washington | |
2199 | - 'wi' => TRUE, // Wisconsin | |
2200 | - 'wv' => TRUE, // West Virginia | |
2201 | - 'wy' => TRUE, // Wyoming | |
2202 | - ), | |
792 | + if ($fqdn === NULL) { | |
793 | + $domain = NULL; // Unset | |
794 | + return ''; | |
795 | + } | |
796 | + if (! is_string($fqdn)) return ''; | |
2203 | 797 | |
2204 | - // ccTLD: South Africa | |
2205 | - // NIC : http://www.zadna.org.za/ | |
2206 | - // Whois: | |
2207 | - // ac.za http://www.tenet.ac.za/cgi/cgi_domainquery.exe | |
2208 | - // co.za http://co.za/whois.shtml | |
2209 | - // gov.za http://dnsadmin.gov.za/ | |
2210 | - // org.za http://www.org.za/ | |
2211 | - 'za' => array( | |
2212 | - // Second-level subdomains of .ZA | |
2213 | - // http://www.zadna.org.za/slds.html | |
2214 | - 'ac' => TRUE, | |
2215 | - 'city' => TRUE, | |
2216 | - 'co' => TRUE, | |
2217 | - 'edu' => TRUE, | |
2218 | - 'gov' => TRUE, | |
2219 | - 'law' => TRUE, | |
2220 | - 'mil' => TRUE, | |
2221 | - 'nom' => TRUE, | |
2222 | - 'org' => TRUE, | |
2223 | - 'school' => array( | |
2224 | - // Provincial Domains | |
2225 | - // http://www.esn.org.za/dns/ | |
2226 | - 'ecape' => TRUE, | |
2227 | - 'fs.' => TRUE, | |
2228 | - 'gp' => TRUE, | |
2229 | - 'kzn' => TRUE, | |
2230 | - 'lp' => TRUE, | |
2231 | - 'mpm' => TRUE, | |
2232 | - 'ncape' => TRUE, | |
2233 | - 'nw' => TRUE, | |
2234 | - 'wcape' => TRUE, | |
2235 | - ), | |
2236 | - ), | |
2237 | - ); | |
798 | + if (is_ip($fqdn)) return $fqdn; | |
2238 | 799 | |
2239 | - if (! is_string($fqdn)) return ''; | |
2240 | - if (is_ip($fqdn)) return $fqdn; | |
800 | + if (! isset($domain)) { | |
801 | + $domain = array(); | |
802 | + if (file_exists(DOMAIN_INI_FILE)) { | |
803 | + include(DOMAIN_INI_FILE); // Set | |
804 | + } | |
805 | + } | |
2241 | 806 | |
2242 | 807 | $result = array(); |
2243 | 808 | $dcursor = & $domain; |
@@ -2275,6 +840,7 @@ function whois_responsibility($fqdn = 'foo.bar.example.com', $parent = FALSE, $i | ||
2275 | 840 | function spam_dispose() |
2276 | 841 | { |
2277 | 842 | get_blocklist(NULL); |
843 | + whois_responsibility(NULL); | |
2278 | 844 | } |
2279 | 845 | |
2280 | 846 | // Common bahavior for blocking |
@@ -0,0 +1,788 @@ | ||
1 | +<?php | |
2 | +// $Id: spam_pickup.php,v 1.1 2007/07/03 14:47:20 henoheno Exp $ | |
3 | +// Copyright (C) 2006-2007 PukiWiki Developers Team | |
4 | +// License: GPL v2 or (at your option) any later version | |
5 | +// | |
6 | +// Functions for Concept-work of spam-uri metrics | |
7 | +// | |
8 | + | |
9 | +// --------------------- | |
10 | +// URI pickup | |
11 | + | |
12 | +// Return an array of URIs in the $string | |
13 | +// [OK] http://nasty.example.org#nasty_string | |
14 | +// [OK] http://nasty.example.org:80/foo/xxx#nasty_string/bar | |
15 | +// [OK] ftp://nasty.example.org:80/dfsdfs | |
16 | +// [OK] ftp://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm (from RFC3986) | |
17 | +function uri_pickup($string = '') | |
18 | +{ | |
19 | + if (! is_string($string)) return array(); | |
20 | + | |
21 | + // Not available for: IDN(ignored) | |
22 | + $array = array(); | |
23 | + preg_match_all( | |
24 | + // scheme://userinfo@host:port/path/or/pathinfo/maybefile.and?query=string#fragment | |
25 | + // Refer RFC3986 (Regex below is not strict) | |
26 | + '#(\b[a-z][a-z0-9.+-]{1,8}):[/\\\]+' . // 1: Scheme | |
27 | + '(?:' . | |
28 | + '([^\s<>"\'\[\]/\#?@]*)' . // 2: Userinfo (Username) | |
29 | + '@)?' . | |
30 | + '(' . | |
31 | + // 3: Host | |
32 | + '\[[0-9a-f:.]+\]' . '|' . // IPv6([colon-hex and dot]): RFC2732 | |
33 | + '(?:[0-9]{1,3}\.){3}[0-9]{1,3}' . '|' . // IPv4(dot-decimal): 001.22.3.44 | |
34 | + '[a-z0-9_-][a-z0-9_.-]+[a-z0-9_-]' . // hostname(FQDN) : foo.example.org | |
35 | + ')' . | |
36 | + '(?::([0-9]*))?' . // 4: Port | |
37 | + '((?:/+[^\s<>"\'\[\]/\#]+)*/+)?' . // 5: Directory path or path-info | |
38 | + '([^\s<>"\'\[\]\#?]+)?' . // 6: File? | |
39 | + '(?:\?([^\s<>"\'\[\]\#]+))?' . // 7: Query string | |
40 | + '(?:\#([a-z0-9._~%!$&\'()*+,;=:@-]*))?' . // 8: Fragment | |
41 | + '#i', | |
42 | + $string, $array, PREG_SET_ORDER | PREG_OFFSET_CAPTURE | |
43 | + ); | |
44 | + | |
45 | + // Format the $array | |
46 | + static $parts = array( | |
47 | + 1 => 'scheme', 2 => 'userinfo', 3 => 'host', 4 => 'port', | |
48 | + 5 => 'path', 6 => 'file', 7 => 'query', 8 => 'fragment' | |
49 | + ); | |
50 | + $default = array(''); | |
51 | + foreach(array_keys($array) as $uri) { | |
52 | + $_uri = & $array[$uri]; | |
53 | + array_rename_keys($_uri, $parts, TRUE, $default); | |
54 | + $offset = $_uri['scheme'][1]; // Scheme's offset = URI's offset | |
55 | + foreach(array_keys($_uri) as $part) { | |
56 | + $_uri[$part] = & $_uri[$part][0]; // Remove offsets | |
57 | + } | |
58 | + } | |
59 | + | |
60 | + foreach(array_keys($array) as $uri) { | |
61 | + $_uri = & $array[$uri]; | |
62 | + if ($_uri['scheme'] === '') { | |
63 | + unset($array[$uri]); // Considererd harmless | |
64 | + continue; | |
65 | + } | |
66 | + unset($_uri[0]); // Matched string itself | |
67 | + $_uri['area']['offset'] = $offset; // Area offset for area_measure() | |
68 | + } | |
69 | + | |
70 | + return $array; | |
71 | +} | |
72 | + | |
73 | +// Pickupped URI array => An URI (See uri_pickup()) | |
74 | +// USAGE: | |
75 | +// $pickups = uri_pickup('a string include some URIs'); | |
76 | +// $uris = array(); | |
77 | +// foreach (array_keys($pickups) as $key) { | |
78 | +// $uris[$key] = uri_pickup_implode($pickups[$key]); | |
79 | +// } | |
80 | +function uri_pickup_implode($uri = array()) | |
81 | +{ | |
82 | + if (empty($uri) || ! is_array($uri)) return NULL; | |
83 | + | |
84 | + $tmp = array(); | |
85 | + if (isset($uri['scheme']) && $uri['scheme'] !== '') { | |
86 | + $tmp[] = & $uri['scheme']; | |
87 | + $tmp[] = '://'; | |
88 | + } | |
89 | + if (isset($uri['userinfo']) && $uri['userinfo'] !== '') { | |
90 | + $tmp[] = & $uri['userinfo']; | |
91 | + $tmp[] = '@'; | |
92 | + } | |
93 | + if (isset($uri['host']) && $uri['host'] !== '') { | |
94 | + $tmp[] = & $uri['host']; | |
95 | + } | |
96 | + if (isset($uri['port']) && $uri['port'] !== '') { | |
97 | + $tmp[] = ':'; | |
98 | + $tmp[] = & $uri['port']; | |
99 | + } | |
100 | + if (isset($uri['path']) && $uri['path'] !== '') { | |
101 | + $tmp[] = & $uri['path']; | |
102 | + } | |
103 | + if (isset($uri['file']) && $uri['file'] !== '') { | |
104 | + $tmp[] = & $uri['file']; | |
105 | + } | |
106 | + if (isset($uri['query']) && $uri['query'] !== '') { | |
107 | + $tmp[] = '?'; | |
108 | + $tmp[] = & $uri['query']; | |
109 | + } | |
110 | + if (isset($uri['fragment']) && $uri['fragment'] !== '') { | |
111 | + $tmp[] = '#'; | |
112 | + $tmp[] = & $uri['fragment']; | |
113 | + } | |
114 | + | |
115 | + return implode('', $tmp); | |
116 | +} | |
117 | + | |
118 | + | |
119 | +// --------------------- | |
120 | +// URI normalization | |
121 | + | |
122 | +// Normalize an array of URI arrays | |
123 | +// NOTE: Give me the uri_pickup() results | |
124 | +function uri_pickup_normalize(& $pickups, $destructive = TRUE) | |
125 | +{ | |
126 | + if (! is_array($pickups)) return $pickups; | |
127 | + | |
128 | + if ($destructive) { | |
129 | + foreach (array_keys($pickups) as $key) { | |
130 | + $_key = & $pickups[$key]; | |
131 | + $_key['scheme'] = isset($_key['scheme']) ? scheme_normalize($_key['scheme']) : ''; | |
132 | + $_key['host'] = isset($_key['host']) ? host_normalize($_key['host']) : ''; | |
133 | + $_key['port'] = isset($_key['port']) ? port_normalize($_key['port'], $_key['scheme'], FALSE) : ''; | |
134 | + $_key['path'] = isset($_key['path']) ? strtolower(path_normalize($_key['path'])) : ''; | |
135 | + $_key['file'] = isset($_key['file']) ? file_normalize($_key['file']) : ''; | |
136 | + $_key['query'] = isset($_key['query']) ? query_normalize($_key['query']) : ''; | |
137 | + $_key['fragment'] = isset($_key['fragment']) ? strtolower($_key['fragment']) : ''; | |
138 | + } | |
139 | + } else { | |
140 | + foreach (array_keys($pickups) as $key) { | |
141 | + $_key = & $pickups[$key]; | |
142 | + $_key['scheme'] = isset($_key['scheme']) ? scheme_normalize($_key['scheme']) : ''; | |
143 | + $_key['host'] = isset($_key['host']) ? strtolower($_key['host']) : ''; | |
144 | + $_key['port'] = isset($_key['port']) ? port_normalize($_key['port'], $_key['scheme'], FALSE) : ''; | |
145 | + $_key['path'] = isset($_key['path']) ? path_normalize($_key['path']) : ''; | |
146 | + } | |
147 | + } | |
148 | + | |
149 | + return $pickups; | |
150 | +} | |
151 | + | |
152 | +// Scheme normalization: Renaming the schemes | |
153 | +// snntp://example.org => nntps://example.org | |
154 | +// NOTE: Keep the static lists simple. See also port_normalize(). | |
155 | +function scheme_normalize($scheme = '', $abbrevs_harmfull = TRUE) | |
156 | +{ | |
157 | + // Abbreviations they have no intention of link | |
158 | + static $abbrevs = array( | |
159 | + 'ttp' => 'http', | |
160 | + 'ttps' => 'https', | |
161 | + ); | |
162 | + | |
163 | + // Aliases => normalized ones | |
164 | + static $aliases = array( | |
165 | + 'pop' => 'pop3', | |
166 | + 'news' => 'nntp', | |
167 | + 'imap4' => 'imap', | |
168 | + 'snntp' => 'nntps', | |
169 | + 'snews' => 'nntps', | |
170 | + 'spop3' => 'pop3s', | |
171 | + 'pops' => 'pop3s', | |
172 | + ); | |
173 | + | |
174 | + if (! is_string($scheme)) return ''; | |
175 | + | |
176 | + $scheme = strtolower($scheme); | |
177 | + if (isset($abbrevs[$scheme])) { | |
178 | + $scheme = $abbrevs_harmfull ? $abbrevs[$scheme] : ''; | |
179 | + } | |
180 | + if (isset($aliases[$scheme])) { | |
181 | + $scheme = $aliases[$scheme]; | |
182 | + } | |
183 | + | |
184 | + return $scheme; | |
185 | +} | |
186 | + | |
187 | +// Hostname normlization (Destructive) | |
188 | +// www.foo => www.foo ('foo' seems TLD) | |
189 | +// www.foo.bar => foo.bar | |
190 | +// www.10.20 => www.10.20 (Invalid hostname) | |
191 | +// NOTE: | |
192 | +// 'www' is mostly used as traditional hostname of WWW server. | |
193 | +// 'www.foo.bar' may be identical with 'foo.bar'. | |
194 | +function host_normalize($host = '') | |
195 | +{ | |
196 | + if (! is_string($host)) return ''; | |
197 | + | |
198 | + $host = strtolower($host); | |
199 | + $matches = array(); | |
200 | + if (preg_match('/^www\.(.+\.[a-z]+)$/', $host, $matches)) { | |
201 | + return $matches[1]; | |
202 | + } else { | |
203 | + return $host; | |
204 | + } | |
205 | +} | |
206 | + | |
207 | +// Port normalization: Suppress the (redundant) default port | |
208 | +// HTTP://example.org:80/ => http://example.org/ | |
209 | +// HTTP://example.org:8080/ => http://example.org:8080/ | |
210 | +// HTTPS://example.org:443/ => https://example.org/ | |
211 | +function port_normalize($port, $scheme, $scheme_normalize = FALSE) | |
212 | +{ | |
213 | + // Schemes that users _maybe_ want to add protocol-handlers | |
214 | + // to their web browsers. (and attackers _maybe_ want to use ...) | |
215 | + // Reference: http://www.iana.org/assignments/port-numbers | |
216 | + static $array = array( | |
217 | + // scheme => default port | |
218 | + 'ftp' => 21, | |
219 | + 'ssh' => 22, | |
220 | + 'telnet' => 23, | |
221 | + 'smtp' => 25, | |
222 | + 'tftp' => 69, | |
223 | + 'gopher' => 70, | |
224 | + 'finger' => 79, | |
225 | + 'http' => 80, | |
226 | + 'pop3' => 110, | |
227 | + 'sftp' => 115, | |
228 | + 'nntp' => 119, | |
229 | + 'imap' => 143, | |
230 | + 'irc' => 194, | |
231 | + 'wais' => 210, | |
232 | + 'https' => 443, | |
233 | + 'nntps' => 563, | |
234 | + 'rsync' => 873, | |
235 | + 'ftps' => 990, | |
236 | + 'telnets' => 992, | |
237 | + 'imaps' => 993, | |
238 | + 'ircs' => 994, | |
239 | + 'pop3s' => 995, | |
240 | + 'mysql' => 3306, | |
241 | + ); | |
242 | + | |
243 | + // intval() converts '0-1' to '0', so preg_match() rejects these invalid ones | |
244 | + if (! is_numeric($port) || $port < 0 || preg_match('/[^0-9]/i', $port)) | |
245 | + return ''; | |
246 | + | |
247 | + $port = intval($port); | |
248 | + if ($scheme_normalize) $scheme = scheme_normalize($scheme); | |
249 | + if (isset($array[$scheme]) && $port == $array[$scheme]) | |
250 | + $port = ''; // Ignore the defaults | |
251 | + | |
252 | + return $port; | |
253 | +} | |
254 | + | |
255 | +// Path normalization | |
256 | +// http://example.org => http://example.org/ | |
257 | +// http://example.org#hoge => http://example.org/#hoge | |
258 | +// http://example.org/path/a/b/./c////./d => http://example.org/path/a/b/c/d | |
259 | +// http://example.org/path/../../a/../back => http://example.org/back | |
260 | +function path_normalize($path = '', $divider = '/', $add_root = TRUE) | |
261 | +{ | |
262 | + if (! is_string($divider)) return is_string($path) ? $path : ''; | |
263 | + | |
264 | + if ($add_root) { | |
265 | + $first_div = & $divider; | |
266 | + } else { | |
267 | + $first_div = ''; | |
268 | + } | |
269 | + if (! is_string($path) || $path == '') return $first_div; | |
270 | + | |
271 | + if (strpos($path, $divider, strlen($path) - strlen($divider)) === FALSE) { | |
272 | + $last_div = ''; | |
273 | + } else { | |
274 | + $last_div = & $divider; | |
275 | + } | |
276 | + | |
277 | + $array = explode($divider, $path); | |
278 | + | |
279 | + // Remove paddings ('//' and '/./') | |
280 | + foreach(array_keys($array) as $key) { | |
281 | + if ($array[$key] == '' || $array[$key] == '.') { | |
282 | + unset($array[$key]); | |
283 | + } | |
284 | + } | |
285 | + | |
286 | + // Remove back-tracks ('/../') | |
287 | + $tmp = array(); | |
288 | + foreach($array as $value) { | |
289 | + if ($value == '..') { | |
290 | + array_pop($tmp); | |
291 | + } else { | |
292 | + array_push($tmp, $value); | |
293 | + } | |
294 | + } | |
295 | + $array = & $tmp; | |
296 | + | |
297 | + if (empty($array)) { | |
298 | + return $first_div; | |
299 | + } else { | |
300 | + return $first_div . implode($divider, $array) . $last_div; | |
301 | + } | |
302 | +} | |
303 | + | |
304 | +// DirectoryIndex normalize (Destructive and rough) | |
305 | +// TODO: sample.en.ja.html.gz => sample.html | |
306 | +function file_normalize($file = 'index.html.en') | |
307 | +{ | |
308 | + static $simple_defaults = array( | |
309 | + 'default.htm' => TRUE, | |
310 | + 'default.html' => TRUE, | |
311 | + 'default.asp' => TRUE, | |
312 | + 'default.aspx' => TRUE, | |
313 | + 'index' => TRUE, // Some system can omit the suffix | |
314 | + ); | |
315 | + | |
316 | + static $content_suffix = array( | |
317 | + // index.xxx, sample.xxx | |
318 | + 'htm' => TRUE, | |
319 | + 'html' => TRUE, | |
320 | + 'shtml' => TRUE, | |
321 | + 'jsp' => TRUE, | |
322 | + 'php' => TRUE, | |
323 | + 'php3' => TRUE, | |
324 | + 'php4' => TRUE, | |
325 | + 'pl' => TRUE, | |
326 | + 'py' => TRUE, | |
327 | + 'rb' => TRUE, | |
328 | + 'cgi' => TRUE, | |
329 | + 'xml' => TRUE, | |
330 | + ); | |
331 | + | |
332 | + static $language_suffix = array( | |
333 | + // Reference: Apache 2.0.59 'AddLanguage' default | |
334 | + 'ca' => TRUE, | |
335 | + 'cs' => TRUE, // cs | |
336 | + 'cz' => TRUE, // cs | |
337 | + 'de' => TRUE, | |
338 | + 'dk' => TRUE, // da | |
339 | + 'el' => TRUE, | |
340 | + 'en' => TRUE, | |
341 | + 'eo' => TRUE, | |
342 | + 'es' => TRUE, | |
343 | + 'et' => TRUE, | |
344 | + 'fr' => TRUE, | |
345 | + 'he' => TRUE, | |
346 | + 'hr' => TRUE, | |
347 | + 'it' => TRUE, | |
348 | + 'ja' => TRUE, | |
349 | + 'ko' => TRUE, | |
350 | + 'ltz' => TRUE, | |
351 | + 'nl' => TRUE, | |
352 | + 'nn' => TRUE, | |
353 | + 'no' => TRUE, | |
354 | + 'po' => TRUE, | |
355 | + 'pt' => TRUE, | |
356 | + 'pt-br' => TRUE, | |
357 | + 'ru' => TRUE, | |
358 | + 'sv' => TRUE, | |
359 | + 'zh-cn' => TRUE, | |
360 | + 'zh-tw' => TRUE, | |
361 | + | |
362 | + // Reference: Apache 2.0.59 default 'index.html' variants | |
363 | + 'ee' => TRUE, | |
364 | + 'lb' => TRUE, | |
365 | + 'var' => TRUE, | |
366 | + ); | |
367 | + | |
368 | + static $charset_suffix = array( | |
369 | + // Reference: Apache 2.0.59 'AddCharset' default | |
370 | + 'iso8859-1' => TRUE, // ISO-8859-1 | |
371 | + 'latin1' => TRUE, // ISO-8859-1 | |
372 | + 'iso8859-2' => TRUE, // ISO-8859-2 | |
373 | + 'latin2' => TRUE, // ISO-8859-2 | |
374 | + 'cen' => TRUE, // ISO-8859-2 | |
375 | + 'iso8859-3' => TRUE, // ISO-8859-3 | |
376 | + 'latin3' => TRUE, // ISO-8859-3 | |
377 | + 'iso8859-4' => TRUE, // ISO-8859-4 | |
378 | + 'latin4' => TRUE, // ISO-8859-4 | |
379 | + 'iso8859-5' => TRUE, // ISO-8859-5 | |
380 | + 'latin5' => TRUE, // ISO-8859-5 | |
381 | + 'cyr' => TRUE, // ISO-8859-5 | |
382 | + 'iso-ru' => TRUE, // ISO-8859-5 | |
383 | + 'iso8859-6' => TRUE, // ISO-8859-6 | |
384 | + 'latin6' => TRUE, // ISO-8859-6 | |
385 | + 'arb' => TRUE, // ISO-8859-6 | |
386 | + 'iso8859-7' => TRUE, // ISO-8859-7 | |
387 | + 'latin7' => TRUE, // ISO-8859-7 | |
388 | + 'grk' => TRUE, // ISO-8859-7 | |
389 | + 'iso8859-8' => TRUE, // ISO-8859-8 | |
390 | + 'latin8' => TRUE, // ISO-8859-8 | |
391 | + 'heb' => TRUE, // ISO-8859-8 | |
392 | + 'iso8859-9' => TRUE, // ISO-8859-9 | |
393 | + 'latin9' => TRUE, // ISO-8859-9 | |
394 | + 'trk' => TRUE, // ISO-8859-9 | |
395 | + 'iso2022-jp'=> TRUE, // ISO-2022-JP | |
396 | + 'jis' => TRUE, // ISO-2022-JP | |
397 | + 'iso2022-kr'=> TRUE, // ISO-2022-KR | |
398 | + 'kis' => TRUE, // ISO-2022-KR | |
399 | + 'iso2022-cn'=> TRUE, // ISO-2022-CN | |
400 | + 'cis' => TRUE, // ISO-2022-CN | |
401 | + 'big5' => TRUE, | |
402 | + 'cp-1251' => TRUE, // ru, WINDOWS-1251 | |
403 | + 'win-1251' => TRUE, // ru, WINDOWS-1251 | |
404 | + 'cp866' => TRUE, // ru | |
405 | + 'koi8-r' => TRUE, // ru, KOI8-r | |
406 | + 'koi8-ru' => TRUE, // ru, KOI8-r | |
407 | + 'koi8-uk' => TRUE, // ru, KOI8-ru | |
408 | + 'ua' => TRUE, // ru, KOI8-ru | |
409 | + 'ucs2' => TRUE, // ru, ISO-10646-UCS-2 | |
410 | + 'ucs4' => TRUE, // ru, ISO-10646-UCS-4 | |
411 | + 'utf8' => TRUE, | |
412 | + | |
413 | + // Reference: Apache 2.0.59 default 'index.html' variants | |
414 | + 'euc-kr' => TRUE, | |
415 | + 'gb2312' => TRUE, | |
416 | + ); | |
417 | + | |
418 | + // May uncompress by web browsers on the fly | |
419 | + // Must be at the last of the filename | |
420 | + // Reference: Apache 2.0.59 'AddEncoding' | |
421 | + static $encoding_suffix = array( | |
422 | + 'z' => TRUE, | |
423 | + 'gz' => TRUE, | |
424 | + ); | |
425 | + | |
426 | + if (! is_string($file)) return ''; | |
427 | + $_file = strtolower($file); | |
428 | + if (isset($simple_defaults[$_file])) return ''; | |
429 | + | |
430 | + // Roughly removing language/character-set/encoding suffixes | |
431 | + // References: | |
432 | + // * Apache 2 document about 'Content-negotiaton', 'mod_mime' and 'mod_negotiation' | |
433 | + // http://httpd.apache.org/docs/2.0/content-negotiation.html | |
434 | + // http://httpd.apache.org/docs/2.0/mod/mod_mime.html | |
435 | + // http://httpd.apache.org/docs/2.0/mod/mod_negotiation.html | |
436 | + // * http://www.iana.org/assignments/character-sets | |
437 | + // * RFC3066: Tags for the Identification of Languages | |
438 | + // http://www.ietf.org/rfc/rfc3066.txt | |
439 | + // * ISO 639: codes of 'language names' | |
440 | + $suffixes = explode('.', $_file); | |
441 | + $body = array_shift($suffixes); | |
442 | + if ($suffixes) { | |
443 | + // Remove the last .gz/.z | |
444 | + $last_key = end(array_keys($suffixes)); | |
445 | + if (isset($encoding_suffix[$suffixes[$last_key]])) { | |
446 | + unset($suffixes[$last_key]); | |
447 | + } | |
448 | + } | |
449 | + // Cut language and charset suffixes | |
450 | + foreach($suffixes as $key => $value){ | |
451 | + if (isset($language_suffix[$value]) || isset($charset_suffix[$value])) { | |
452 | + unset($suffixes[$key]); | |
453 | + } | |
454 | + } | |
455 | + if (empty($suffixes)) return $body; | |
456 | + | |
457 | + // Index.xxx | |
458 | + $count = count($suffixes); | |
459 | + reset($suffixes); | |
460 | + $current = current($suffixes); | |
461 | + if ($body == 'index' && $count == 1 && isset($content_suffix[$current])) return ''; | |
462 | + | |
463 | + return $file; | |
464 | +} | |
465 | + | |
466 | +// Sort query-strings if possible (Destructive and rough) | |
467 | +// [OK] &&&&f=d&b&d&c&a=0dd => a=0dd&b&c&d&f=d | |
468 | +// [OK] nothing==&eg=dummy&eg=padding&eg=foobar => eg=foobar | |
469 | +function query_normalize($string = '', $equal = TRUE, $equal_cutempty = TRUE, $stortolower = TRUE) | |
470 | +{ | |
471 | + if (! is_string($string)) return ''; | |
472 | + if ($stortolower) $string = strtolower($string); | |
473 | + | |
474 | + $array = explode('&', $string); | |
475 | + | |
476 | + // Remove '&' paddings | |
477 | + foreach(array_keys($array) as $key) { | |
478 | + if ($array[$key] == '') { | |
479 | + unset($array[$key]); | |
480 | + } | |
481 | + } | |
482 | + | |
483 | + // Consider '='-sepalated input and paddings | |
484 | + if ($equal) { | |
485 | + $equals = $not_equals = array(); | |
486 | + foreach ($array as $part) { | |
487 | + if (strpos($part, '=') === FALSE) { | |
488 | + $not_equals[] = $part; | |
489 | + } else { | |
490 | + list($key, $value) = explode('=', $part, 2); | |
491 | + $value = ltrim($value, '='); | |
492 | + if (! $equal_cutempty || $value != '') { | |
493 | + $equals[$key] = $value; | |
494 | + } | |
495 | + } | |
496 | + } | |
497 | + | |
498 | + $array = & $not_equals; | |
499 | + foreach ($equals as $key => $value) { | |
500 | + $array[] = $key . '=' . $value; | |
501 | + } | |
502 | + unset($equals); | |
503 | + } | |
504 | + | |
505 | + natsort($array); | |
506 | + return implode('&', $array); | |
507 | +} | |
508 | + | |
509 | +// --------------------- | |
510 | +// Area pickup | |
511 | + | |
512 | +// Pickup all of markup areas | |
513 | +function area_pickup($string = '', $method = array()) | |
514 | +{ | |
515 | + $area = array(); | |
516 | + if (empty($method)) return $area; | |
517 | + | |
518 | + // Anchor tag pair by preg_match and preg_match_all() | |
519 | + // [OK] <a href></a> | |
520 | + // [OK] <a href= >Good site!</a> | |
521 | + // [OK] <a href= "#" >test</a> | |
522 | + // [OK] <a href="http://nasty.example.com">visit http://nasty.example.com/</a> | |
523 | + // [OK] <a href=\'http://nasty.example.com/\' >discount foobar</a> | |
524 | + // [NG] <a href="http://ng.example.com">visit http://ng.example.com _not_ended_ | |
525 | + $regex = '#<a\b[^>]*\bhref\b[^>]*>.*?</a\b[^>]*(>)#is'; | |
526 | + if (isset($method['area_anchor'])) { | |
527 | + $areas = array(); | |
528 | + $count = isset($method['asap']) ? | |
529 | + preg_match($regex, $string) : | |
530 | + preg_match_all($regex, $string, $areas); | |
531 | + if (! empty($count)) $area['area_anchor'] = $count; | |
532 | + } | |
533 | + if (isset($method['uri_anchor'])) { | |
534 | + $areas = array(); | |
535 | + preg_match_all($regex, $string, $areas, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); | |
536 | + foreach(array_keys($areas) as $_area) { | |
537 | + $areas[$_area] = array( | |
538 | + $areas[$_area][0][1], // Area start (<a href>) | |
539 | + $areas[$_area][1][1], // Area end (</a>) | |
540 | + ); | |
541 | + } | |
542 | + if (! empty($areas)) $area['uri_anchor'] = $areas; | |
543 | + } | |
544 | + | |
545 | + // phpBB's "BBCode" pair by preg_match and preg_match_all() | |
546 | + // [OK] [url][/url] | |
547 | + // [OK] [url]http://nasty.example.com/[/url] | |
548 | + // [OK] [link]http://nasty.example.com/[/link] | |
549 | + // [OK] [url=http://nasty.example.com]visit http://nasty.example.com/[/url] | |
550 | + // [OK] [link http://nasty.example.com/]buy something[/link] | |
551 | + $regex = '#\[(url|link)\b[^\]]*\].*?\[/\1\b[^\]]*(\])#is'; | |
552 | + if (isset($method['area_bbcode'])) { | |
553 | + $areas = array(); | |
554 | + $count = isset($method['asap']) ? | |
555 | + preg_match($regex, $string) : | |
556 | + preg_match_all($regex, $string, $areas, PREG_SET_ORDER); | |
557 | + if (! empty($count)) $area['area_bbcode'] = $count; | |
558 | + } | |
559 | + if (isset($method['uri_bbcode'])) { | |
560 | + $areas = array(); | |
561 | + preg_match_all($regex, $string, $areas, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); | |
562 | + foreach(array_keys($areas) as $_area) { | |
563 | + $areas[$_area] = array( | |
564 | + $areas[$_area][0][1], // Area start ([url]) | |
565 | + $areas[$_area][2][1], // Area end ([/url]) | |
566 | + ); | |
567 | + } | |
568 | + if (! empty($areas)) $area['uri_bbcode'] = $areas; | |
569 | + } | |
570 | + | |
571 | + // Various Wiki syntax | |
572 | + // [text_or_uri>text_or_uri] | |
573 | + // [text_or_uri:text_or_uri] | |
574 | + // [text_or_uri|text_or_uri] | |
575 | + // [text_or_uri->text_or_uri] | |
576 | + // [text_or_uri text_or_uri] // MediaWiki | |
577 | + // MediaWiki: [http://nasty.example.com/ visit http://nasty.example.com/] | |
578 | + | |
579 | + return $area; | |
580 | +} | |
581 | + | |
582 | +// If in doubt, it's a little doubtful | |
583 | +// if (Area => inside <= Area) $brief += -1 | |
584 | +function area_measure($areas, & $array, $belief = -1, $a_key = 'area', $o_key = 'offset') | |
585 | +{ | |
586 | + if (! is_array($areas) || ! is_array($array)) return; | |
587 | + | |
588 | + $areas_keys = array_keys($areas); | |
589 | + foreach(array_keys($array) as $u_index) { | |
590 | + $offset = isset($array[$u_index][$o_key]) ? | |
591 | + intval($array[$u_index][$o_key]) : 0; | |
592 | + foreach($areas_keys as $a_index) { | |
593 | + if (isset($array[$u_index][$a_key])) { | |
594 | + $offset_s = intval($areas[$a_index][0]); | |
595 | + $offset_e = intval($areas[$a_index][1]); | |
596 | + // [Area => inside <= Area] | |
597 | + if ($offset_s < $offset && $offset < $offset_e) { | |
598 | + $array[$u_index][$a_key] += $belief; | |
599 | + } | |
600 | + } | |
601 | + } | |
602 | + } | |
603 | +} | |
604 | + | |
605 | + | |
606 | +// --------------------- | |
607 | +// Spam-uri pickup | |
608 | + | |
609 | +// Preprocess: Removing uninterest part for URI detection | |
610 | +function spam_uri_removing_hocus_pocus($binary = '', $method = array()) | |
611 | +{ | |
612 | + $length = 4 ; // 'http'(1) and '://'(2) and 'fqdn'(1) | |
613 | + if (is_array($method)) { | |
614 | + // '<a'(2) or 'href='(5) or '>'(1) or '</a>'(4) | |
615 | + // '[uri'(4) or ']'(1) or '[/uri]'(6) | |
616 | + if (isset($method['area_anchor']) || isset($method['uri_anchor']) || | |
617 | + isset($method['area_bbcode']) || isset($method['uri_bbcode'])) | |
618 | + $length = 1; // Seems not effective | |
619 | + } | |
620 | + | |
621 | + // Removing sequential spaces and too short lines | |
622 | + $binary = strings($binary, $length, TRUE, FALSE); // Multibyte NOT needed | |
623 | + | |
624 | + // Remove words (has no '<>[]:') between spaces | |
625 | + $binary = preg_replace('/[ \t][\w.,()\ \t]+[ \t]/', ' ', $binary); | |
626 | + | |
627 | + return $binary; | |
628 | +} | |
629 | + | |
630 | +// Preprocess: Domain exposure callback (See spam_uri_pickup_preprocess()) | |
631 | +// http://victim.example.org/?foo+site:nasty.example.com+bar | |
632 | +// => http://nasty.example.com/?refer=victim.example.org | |
633 | +// NOTE: 'refer=' is not so good for (at this time). | |
634 | +// Consider about using IP address of the victim, try to avoid that. | |
635 | +function _preg_replace_callback_domain_exposure($matches = array()) | |
636 | +{ | |
637 | + $result = ''; | |
638 | + | |
639 | + // Preserve the victim URI as a complicity or ... | |
640 | + if (isset($matches[5])) { | |
641 | + $result = | |
642 | + $matches[1] . '://' . // scheme | |
643 | + $matches[2] . '/' . // victim.example.org | |
644 | + $matches[3]; // The rest of all (before victim) | |
645 | + } | |
646 | + | |
647 | + // Flipped URI | |
648 | + if (isset($matches[4])) { | |
649 | + $result = | |
650 | + $matches[1] . '://' . // scheme | |
651 | + $matches[4] . // nasty.example.com | |
652 | + '/?refer=' . strtolower($matches[2]) . // victim.example.org | |
653 | + ' ' . $result; | |
654 | + } | |
655 | + | |
656 | + return $result; | |
657 | +} | |
658 | + | |
659 | +// Preprocess: rawurldecode() and adding space(s) and something | |
660 | +// to detect/count some URIs _if possible_ | |
661 | +// NOTE: It's maybe danger to var_dump(result). [e.g. 'javascript:'] | |
662 | +// [OK] http://victim.example.org/?site:nasty.example.org | |
663 | +// [OK] http://victim.example.org/nasty.example.org | |
664 | +// [OK] http://victim.example.org/go?http%3A%2F%2Fnasty.example.org | |
665 | +// [OK] http://victim.example.org/http://nasty.example.org | |
666 | +function spam_uri_pickup_preprocess($string = '', $method = array()) | |
667 | +{ | |
668 | + if (! is_string($string)) return ''; | |
669 | + | |
670 | + $string = spam_uri_removing_hocus_pocus(rawurldecode($string), $method); | |
671 | + //var_dump(htmlspecialchars($string)); | |
672 | + | |
673 | + // Domain exposure (simple) | |
674 | + // http://victim.example.org/nasty.example.org/path#frag | |
675 | + // => http://nasty.example.org/?refer=victim.example.org and original | |
676 | + $string = preg_replace( | |
677 | + '#h?ttp://' . | |
678 | + '(' . | |
679 | + 'ime\.nu' . '|' . // 2ch.net | |
680 | + 'ime\.st' . '|' . // 2ch.net | |
681 | + 'link\.toolbot\.com' . '|' . | |
682 | + 'urlx\.org' . | |
683 | + ')' . | |
684 | + '/([a-z0-9.%_-]+\.[a-z0-9.%_-]+)#i', // nasty.example.org | |
685 | + 'http://$2/?refer=$1 $0', // Preserve $0 or remove? | |
686 | + $string | |
687 | + ); | |
688 | + | |
689 | + // Domain exposure (gate-big5) | |
690 | + // http://victim.example.org/gate/big5/nasty.example.org/path | |
691 | + // => http://nasty.example.org/?refer=victim.example.org and original | |
692 | + $string = preg_replace( | |
693 | + '#h?ttp://' . | |
694 | + '(' . | |
695 | + 'big5.51job.com' . '|' . | |
696 | + 'big5.china.com' . '|' . | |
697 | + 'big5.xinhuanet.com' . '|' . | |
698 | + ')' . | |
699 | + '/gate/big5' . | |
700 | + '/([a-z0-9.%_-]+\.[a-z0-9.%_-]+)' . | |
701 | + '#i', // nasty.example.org | |
702 | + 'http://$2/?refer=$1 $0', // Preserve $0 or remove? | |
703 | + $string | |
704 | + ); | |
705 | + | |
706 | + // Domain exposure (site:) See _preg_replace_callback_domain_exposure() | |
707 | + $string = preg_replace_callback( | |
708 | + array( | |
709 | + '#(h?ttp)://' . // 1:Scheme | |
710 | + // 2:Host | |
711 | + '(' . | |
712 | + '(?:[a-z0-9_.-]+\.)?[a-z0-9_-]+\.[a-z0-9_-]+' . | |
713 | + // Something Google: http://www.google.com/supported_domains | |
714 | + // AltaVista: http://es.altavista.com/web/results?q=site%3Anasty.example.org+foobar | |
715 | + // Live Search: search.live.com | |
716 | + // MySpace: http://sads.myspace.com/Modules/Search/Pages/Search.aspx?_snip_&searchString=site:nasty.example.org | |
717 | + // (also searchresults.myspace.com) | |
718 | + // alltheweb.com | |
719 | + // search.bbc.co.uk | |
720 | + // search.orange.co.uk | |
721 | + // ... | |
722 | + ')' . | |
723 | + '/' . | |
724 | + '([a-z0-9?=&.%_/\'\\\+-]+)' . // 3:path/?query=foo+bar+ | |
725 | + '\bsite:([a-z0-9.%_-]+\.[a-z0-9.%_-]+)' . // 4:site:nasty.example.com | |
726 | + '()' . // 5:Preserve or remove? | |
727 | + '#i', | |
728 | + ), | |
729 | + '_preg_replace_callback_domain_exposure', | |
730 | + $string | |
731 | + ); | |
732 | + | |
733 | + // URI exposure (uriuri => uri uri) | |
734 | + $string = preg_replace( | |
735 | + array( | |
736 | + '#(?<! )(?:https?|ftp):/#i', | |
737 | + // '#[a-z][a-z0-9.+-]{1,8}://#i', | |
738 | + // '#[a-z][a-z0-9.+-]{1,8}://#i' | |
739 | + ), | |
740 | + ' $0', | |
741 | + $string | |
742 | + ); | |
743 | + | |
744 | + return $string; | |
745 | +} | |
746 | + | |
747 | +// Main function of spam-uri pickup, | |
748 | +// A wrapper function of uri_pickup() | |
749 | +function spam_uri_pickup($string = '', $method = array()) | |
750 | +{ | |
751 | + if (! is_array($method) || empty($method)) { | |
752 | + $method = check_uri_spam_method(); | |
753 | + } | |
754 | + | |
755 | + $string = spam_uri_pickup_preprocess($string, $method); | |
756 | + | |
757 | + $array = uri_pickup($string); | |
758 | + | |
759 | + // Area elevation of URIs, for '(especially external)link' intension | |
760 | + if (! empty($array)) { | |
761 | + $_method = array(); | |
762 | + if (isset($method['uri_anchor'])) $_method['uri_anchor'] = & $method['uri_anchor']; | |
763 | + if (isset($method['uri_bbcode'])) $_method['uri_bbcode'] = & $method['uri_bbcode']; | |
764 | + $areas = area_pickup($string, $_method, TRUE); | |
765 | + if (! empty($areas)) { | |
766 | + $area_shadow = array(); | |
767 | + foreach (array_keys($array) as $key) { | |
768 | + $area_shadow[$key] = & $array[$key]['area']; | |
769 | + foreach (array_keys($_method) as $_key) { | |
770 | + $area_shadow[$key][$_key] = 0; | |
771 | + } | |
772 | + } | |
773 | + foreach (array_keys($_method) as $_key) { | |
774 | + if (isset($areas[$_key])) { | |
775 | + area_measure($areas[$_key], $area_shadow, 1, $_key); | |
776 | + } | |
777 | + } | |
778 | + } | |
779 | + } | |
780 | + | |
781 | + // Remove 'offset's for area_measure() | |
782 | + foreach(array_keys($array) as $key) | |
783 | + unset($array[$key]['area']['offset']); | |
784 | + | |
785 | + return $array; | |
786 | +} | |
787 | + | |
788 | +?> |
@@ -1,5 +1,5 @@ | ||
1 | 1 | <?php |
2 | -// $Id: spam.ini.php,v 1.68 2007/06/23 15:22:55 henoheno Exp $ | |
2 | +// $Id: spam.ini.php,v 1.69 2007/07/03 14:47:04 henoheno Exp $ | |
3 | 3 | // Spam-related setting |
4 | 4 | |
5 | 5 | // NOTE FOR ADMINISTRATORS: |
@@ -25,8 +25,10 @@ | ||
25 | 25 | // [3] IP address, if these hosts have the same ones |
26 | 26 | // [4] Something unique idea of you |
27 | 27 | // |
28 | -// Reference: Spamdexing http://en.wikipedia.org/wiki/Spamdexing | |
29 | - | |
28 | +// Reference: | |
29 | +// http://en.wikipedia.org/wiki/Spamdexing | |
30 | +// http://en.wikipedia.org/wiki/Domainers | |
31 | +// http://en.wikipedia.org/wiki/Typosquatting | |
30 | 32 | |
31 | 33 | $blocklist['list'] = array( |
32 | 34 | // List of the lists |
@@ -46,6 +48,8 @@ $blocklist['list'] = array( | ||
46 | 48 | ); |
47 | 49 | |
48 | 50 | |
51 | +// ---- | |
52 | + | |
49 | 53 | $blocklist['goodhost'] = array( |
50 | 54 | // Sample setting of ignorance list |
51 | 55 |
@@ -152,6 +156,7 @@ $blocklist['A-1'] = array( | ||
152 | 156 | '*.true.ws', |
153 | 157 | '*.visit.ws', |
154 | 158 | ), |
159 | + 'affilitool.com', // 125.206.117.91(right-way.org) by noboru hamada (info at isosupport.net) | |
155 | 160 | 'aifam.com', |
156 | 161 | 'All4WebMasters.pl' => array( |
157 | 162 | '*.ovp.pl', |
@@ -258,6 +263,7 @@ $blocklist['A-1'] = array( | ||
258 | 263 | '*.dvdonly.ru', |
259 | 264 | '*.dynu.ca', |
260 | 265 | 'dwarf.name', |
266 | + '*.eadf.com', | |
261 | 267 | '*.easyurl.net', |
262 | 268 | 'elfurl.com', |
263 | 269 | 'eny.pl', |
@@ -448,6 +454,7 @@ $blocklist['A-1'] = array( | ||
448 | 454 | '*.i89.us', |
449 | 455 | 'iat.net', // 74.208.58.130 by Tony Carter |
450 | 456 | '*.iceglow.com', |
457 | + 'go.id-tv.info', // 77.232.68.138(77-232-68-138.static.servage.net) by Max Million (max at id-tv.info) | |
451 | 458 | 'Ideas para Nuevos Mercados SL' => array( |
452 | 459 | // NOTE: 'i4nm.com' by 'Ideas para Nuevos Mercados SL' (i4nm at i4nm.com) |
453 | 460 | // NOTE: 'dominiosfree.com' by 'Ideas para nuevos mercados,sl' (dominiosfree at i4nm.com) |
@@ -855,6 +862,7 @@ $blocklist['A-1'] = array( | ||
855 | 862 | 'ourl.org', |
856 | 863 | 'ov2.net', // frame |
857 | 864 | '*.ozonez.com', |
865 | + 'pagebang.com', | |
858 | 866 | 'palurl.com', |
859 | 867 | '*.paulding.net', |
860 | 868 | 'phpfaber.org', |
@@ -871,6 +879,7 @@ $blocklist['A-1'] = array( | ||
871 | 879 | 'qrl.jp', |
872 | 880 | 'qurl.net', |
873 | 881 | 'qwer.org', |
882 | + 'readthisurl.com', // 67.15.58.36(win2k3.tuserver.com) by Zhe Hong Lim (zhehonglim at gmail.com) | |
874 | 883 | 'radiobase.net', |
875 | 884 | 'RedirectFree.com' => array( |
876 | 885 | '*.red.tc', |
@@ -1338,6 +1347,7 @@ $blocklist['A-1'] = array( | ||
1338 | 1347 | '*.zwap.to', |
1339 | 1348 | ); |
1340 | 1349 | |
1350 | + | |
1341 | 1351 | $blocklist['A-2'] = array( |
1342 | 1352 | |
1343 | 1353 | // A-2: Dynamic DNS, Dynamic IP services, DNS vulnerabilities, or another DNS cases |
@@ -1396,8 +1406,10 @@ $blocklist['A-2'] = array( | ||
1396 | 1406 | '*.zapto.org', |
1397 | 1407 | ), |
1398 | 1408 | '*.zenno.info', |
1399 | - '.cm', // 'Cameroon' ccTLD, sometimes used as typo of '.com' | |
1409 | + '.cm', // 'Cameroon' ccTLD, sometimes used as typo of '.com', | |
1400 | 1410 | // and all non-recorded domains redirect to 'agoga.com' now |
1411 | + // http://money.cnn.com/magazines/business2/business2_archive/2007/06/01/100050989/index.htm | |
1412 | + // http://agoga.com/aboutus.html | |
1401 | 1413 | ); |
1402 | 1414 | |
1403 | 1415 |
@@ -1472,6 +1484,7 @@ $blocklist['B-1'] = array( | ||
1472 | 1484 | '*.aimoo.com', |
1473 | 1485 | '*.alkablog.com', |
1474 | 1486 | '*.alluwant.de', |
1487 | + '.amkbb.com', | |
1475 | 1488 | 'AOL.com' => // http://about.aol.com/international_services |
1476 | 1489 | '/^(?:chezmoi|home|homes|hometown|journals|user)\.' . |
1477 | 1490 | '(?:aol|americaonline)\.' . |
@@ -1493,6 +1506,7 @@ $blocklist['B-1'] = array( | ||
1493 | 1506 | '*.blog-fx.com', |
1494 | 1507 | 'blogas.lt', |
1495 | 1508 | 'blogbud.com', |
1509 | + '*.blogburkinafaso.com', | |
1496 | 1510 | '*.blogcu.com', // by info at nokta.com |
1497 | 1511 | 'blogfreely.com', |
1498 | 1512 | '*.blogdrive.com', |
@@ -1570,7 +1584,8 @@ $blocklist['B-1'] = array( | ||
1570 | 1584 | ), |
1571 | 1585 | 'dotbb.be', |
1572 | 1586 | '*.dox.hu', // dns at 1b.hu |
1573 | - '*.e-host.ws', // by dns at jomax.net, ns by 0catch.com | |
1587 | + '*.e-host.ws', // by dns at jomax.net, ns by 0catch.com | |
1588 | + '*.eadf.com', | |
1574 | 1589 | '*.eblog.com.au', |
1575 | 1590 | '*.ekiwi.de', |
1576 | 1591 | '*.eamped.com', // Admin by Joe Hayes (joe_h_31028 at yahoo.com) |
@@ -1588,6 +1603,7 @@ $blocklist['B-1'] = array( | ||
1588 | 1603 | // NOTE: 'blog.fc2.com' is not included |
1589 | 1604 | '*.h.fc2.com', // Adult |
1590 | 1605 | ), |
1606 | + '*.fizwig.com', | |
1591 | 1607 | 'forum.ezedia.net', |
1592 | 1608 | '*.extra.hu', // angelo at jasmin.hu |
1593 | 1609 | '*.fanforum.cc', |
@@ -1598,7 +1614,9 @@ $blocklist['B-1'] = array( | ||
1598 | 1614 | 'foroswebgratis.com', |
1599 | 1615 | '*.forum-on.de', |
1600 | 1616 | '*.forum5.com', // by Harry S (hsg944 at gmail.com) |
1617 | + '*.forum66.com', | |
1601 | 1618 | 'forumbolt.com', |
1619 | + 'phpbb.forumgratis.com', | |
1602 | 1620 | '*.forumlivre.com', |
1603 | 1621 | 'forumnow.com.br', |
1604 | 1622 | '*.forumppl.com', |
@@ -1623,6 +1641,7 @@ $blocklist['B-1'] = array( | ||
1623 | 1641 | 'freebb.nl', |
1624 | 1642 | '*.freeclans.de', |
1625 | 1643 | '*.freelinuxhost.com', // by 100webspace.com |
1644 | + '*.freehyperspace.com', | |
1626 | 1645 | 'freeforum.at', // by Sandro Wilhelmy |
1627 | 1646 | 'freeforumshosting.com', // by Adam Roberts (admin at skaidon.co.uk) |
1628 | 1647 | '*.freeforums.org', // by 1&1 Internet, Inc. - 1and1.com |
@@ -1718,6 +1737,7 @@ $blocklist['B-1'] = array( | ||
1718 | 1737 | '*.blog.livedoor.com', // redirection |
1719 | 1738 | ), |
1720 | 1739 | '*.livejournal.com', |
1740 | + '.load4.net', // 72.232.201.61(61.201.232.72.static.reverse.layeredtech.com), Says free web hosting but anonymous | |
1721 | 1741 | '*.logme.nl', |
1722 | 1742 | 'ltss.luton.ac.uk', |
1723 | 1743 | 'Lycos.com' => array( |
@@ -1881,6 +1901,7 @@ $blocklist['B-1'] = array( | ||
1881 | 1901 | '.www3.to', |
1882 | 1902 | ), |
1883 | 1903 | '*.spazioforum.it', |
1904 | + 'members.spboards.com', | |
1884 | 1905 | 'forums.speedguide.net', |
1885 | 1906 | '*.spicyblogger.com', |
1886 | 1907 | '*.spotbb.com', |
@@ -2074,6 +2095,7 @@ $blocklist['B-2'] = array( | ||
2074 | 2095 | // (e.g. some sort of blog comments, BBSes, forums, wikis) |
2075 | 2096 | '*.3dm3.com', |
2076 | 2097 | '3gmicro.com', // by Dean Anderson (dean at nobullcomputing.com) |
2098 | + 'a4aid.org', | |
2077 | 2099 | 'aac.com', |
2078 | 2100 | '*.aamad.org', |
2079 | 2101 | 'ad-pecjak.si', |
@@ -2089,6 +2111,7 @@ $blocklist['B-2'] = array( | ||
2089 | 2111 | '*.canberra.net.au', |
2090 | 2112 | 'castus.com', |
2091 | 2113 | 'Case Western Reserve University' => array('case.edu'), |
2114 | + 'ceval.de', | |
2092 | 2115 | 'codespeak.net', |
2093 | 2116 | 'Colorado School of Mines' => array('ticc.mines.edu'), |
2094 | 2117 | '*.colourware.co.uk', |
@@ -2108,6 +2131,7 @@ $blocklist['B-2'] = array( | ||
2108 | 2131 | 'deproduction.org', |
2109 | 2132 | 'dc503.org', |
2110 | 2133 | 'dre-centro.pt', |
2134 | + 'Duke University' => array('devel.linux.duke.edu'), | |
2111 | 2135 | '*.esen.edu.sv', |
2112 | 2136 | 'forums.drumcore.com', |
2113 | 2137 | 'dundeeunited.org', |
@@ -2130,6 +2154,7 @@ $blocklist['B-2'] = array( | ||
2130 | 2154 | 'greendayvideo.net', |
2131 | 2155 | 'espanol.greendayvideo.net', |
2132 | 2156 | ), |
2157 | + 'Hampton University' => array('calipsovalidation.hamptonu.edu'), | |
2133 | 2158 | 'Harvard Law School' => array('blogs.law.harvard.edu'), |
2134 | 2159 | 'helpiammoving.com', |
2135 | 2160 | 'homepage-dienste.com', |
@@ -2197,6 +2222,7 @@ $blocklist['B-2'] = array( | ||
2197 | 2222 | 'preform.dk', |
2198 | 2223 | 'privatforum.de', |
2199 | 2224 | 'publicityhound.net', |
2225 | + 'qea.com', | |
2200 | 2226 | 'rbkdesign.com', |
2201 | 2227 | 'rehoboth.com', |
2202 | 2228 | 'rodee.org', |
@@ -2327,50 +2353,54 @@ $blocklist['C'] = array( | ||
2327 | 2353 | '.notiziacentro.info', |
2328 | 2354 | ), |
2329 | 2355 | 'SomethingGen' => array( |
2330 | - '.adult-chat-world.info', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com), | |
2331 | - '.adult-chat-world.org', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com) | |
2332 | - '.adult-sex-chat.info', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com) | |
2333 | - '.adult-sex-chat.org', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com) | |
2334 | - '.adult-cam-chat.info', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com) | |
2335 | - '.adult-cam-chat.org', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com) | |
2336 | - '.dildo-chat.org', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com) | |
2337 | - '.dildo-chat.info', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com) | |
2356 | + // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com) | |
2357 | + // 'CamsGen' by Sergey (buckster at hotpop.com) | |
2358 | + // 'BucksoGen', by Pronin Sergey (buckster at list.ru) | |
2359 | + // by Lee Chang (nebucha at model-x.com) | |
2360 | + '.adult-chat-world.info', // by Lui | |
2361 | + '.adult-chat-world.org', // by Lui | |
2362 | + '.adult-sex-chat.info', // by Lui | |
2363 | + '.adult-sex-chat.org', // by Lui | |
2364 | + '.adult-cam-chat.info', // by Lui | |
2365 | + '.adult-cam-chat.org', // by Lui | |
2366 | + '.dildo-chat.org', // by Lui | |
2367 | + '.dildo-chat.info', // by Lui | |
2338 | 2368 | // flirt-online.info is not CamsGen |
2339 | - '.flirt-online.org', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com) | |
2340 | - '.live-adult-chat.info', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com) | |
2341 | - '.live-adult-chat.org', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com) | |
2342 | - '.sexy-chat-rooms.info', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com) | |
2343 | - '.sexy-chat-rooms.org', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com) | |
2344 | - '.swinger-sex-chat.info', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com) | |
2345 | - '.swinger-sex-chat.org', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com) | |
2346 | - '.nasty-sex-chat.info', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com) | |
2347 | - '.nasty-sex-chat.org', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com) | |
2369 | + '.flirt-online.org', // by Lui | |
2370 | + '.live-adult-chat.info', // by Lui | |
2371 | + '.live-adult-chat.org', // by Lui | |
2372 | + '.sexy-chat-rooms.info', // by Lui | |
2373 | + '.sexy-chat-rooms.org', // by Lui | |
2374 | + '.swinger-sex-chat.info', // by Lui | |
2375 | + '.swinger-sex-chat.org', // by Lui | |
2376 | + '.nasty-sex-chat.info', // by Lui | |
2377 | + '.nasty-sex-chat.org', // by Lui | |
2348 | 2378 | |
2349 | - '.camshost.info', // 'CamsGen' by Sergey (buckster at hotpop.com) | |
2350 | - '.camdoors.info', // 'CamsGen' by Sergey (buckster at hotpop.com) | |
2351 | - '.chatdoors.info', // 'CamsGen' by Sergey (buckster at hotpop.com) | |
2379 | + '.camshost.info', // by Sergey | |
2380 | + '.camdoors.info', // by Sergey | |
2381 | + '.chatdoors.info', // by Sergey | |
2352 | 2382 | |
2353 | - '.lebedi.info', // 'BucksoGen', by Pronin Sergey (buckster at list.ru), | |
2354 | - '.loshad.info', // 'BucksoGen', by Pronin Sergey (buckster at list.ru) | |
2355 | - '.porosenok.info', // 'BucksoGen', by Pronin Sergey (buckster at list.ru) | |
2356 | - '.indyushonok.info', // 'BucksoGen', by Pronin Sergey (buckster at list.ru) | |
2357 | - '.kotyonok.info', // 'BucksoGen', by Pronin Sergey (buckster at list.ru) | |
2358 | - '.kozlyonok.info', // 'BucksoGen', by Pronin Sergey (buckster at list.ru) | |
2359 | - '.magnoliya.info', // 'BucksoGen', by Pronin Sergey (buckster at list.ru) | |
2360 | - '.svinka.info', // 'BucksoGen', by Pronin Sergey (buckster at list.ru) | |
2361 | - '.svinya.info', // 'BucksoGen', by Pronin Sergey (buckster at list.ru) | |
2362 | - '.zherebyonok.info', // 89.149.206.225 'BucksoGen', by Pronin Sergey (buckster at list.ru) | |
2383 | + '.lebedi.info', // by Pronin | |
2384 | + '.loshad.info', // by Pronin | |
2385 | + '.porosenok.info', // by Pronin | |
2386 | + '.indyushonok.info', // by Pronin | |
2387 | + '.kotyonok.info', // by Pronin | |
2388 | + '.kozlyonok.info', // by Pronin | |
2389 | + '.magnoliya.info', // by Pronin | |
2390 | + '.svinka.info', // by Pronin | |
2391 | + '.svinya.info', // by Pronin | |
2392 | + '.zherebyonok.info', // 89.149.206.225 by Pronin | |
2363 | 2393 | |
2364 | 2394 | '.medvezhonok.org', // 89.149.206.225 "BucksoGen 1.2b" |
2365 | 2395 | |
2366 | - '.adult-cam-chat-sex.info', // by Lee Chang (nebucha at model-x.com) | |
2367 | - '.adult-chat-sex-cam.info', // 'CamsGen' by Lee Chang (nebucha at model-x.com) | |
2368 | - '.live-chat-cam-sex.info', // 'CamsGen' by Lee Chang (nebucha at model-x.com) | |
2369 | - '.live-nude-cam-chat.info', // 'CamsGen' by Lee Chang (nebucha at model-x.com) | |
2370 | - '.live-sex-cam-nude-chat.info', // 'CamsGen' by Lee Chang (nebucha at model-x.com) | |
2371 | - '.sex-cam-live-chat-web.info', // 'CamsGen' by Lee Chang (nebucha at model-x.com) | |
2372 | - '.sex-chat-live-cam-nude.info', // 'CamsGen' by Lee Chang (nebucha at model-x.com) | |
2373 | - '.sex-chat-porn-cam.info', // by Lee Chang (nebucha at model-x.com) | |
2396 | + '.adult-cam-chat-sex.info', // by Lee | |
2397 | + '.adult-chat-sex-cam.info', // 'CamsGen' by Lee | |
2398 | + '.live-chat-cam-sex.info', // 'CamsGen' by Lee | |
2399 | + '.live-nude-cam-chat.info', // 'CamsGen' by Lee | |
2400 | + '.live-sex-cam-nude-chat.info', // 'CamsGen' by Lee | |
2401 | + '.sex-cam-live-chat-web.info', // 'CamsGen' by Lee | |
2402 | + '.sex-chat-live-cam-nude.info', // 'CamsGen' by Lee | |
2403 | + '.sex-chat-porn-cam.info', // by Lee | |
2374 | 2404 | ), |
2375 | 2405 | 'mital at topo20.org' => array( // by Marcello Italianore |
2376 | 2406 | '.trevisos.org', |
@@ -2832,11 +2862,15 @@ $blocklist['C'] = array( | ||
2832 | 2862 | '.yxyzauiq.info', // by robemuq8455 at cheerful.com |
2833 | 2863 | ), |
2834 | 2864 | 'Carmodelrank.com etc' => array( |
2835 | - '.carmodelrank.com',// by Brianna Dunlord (briasmi at yahoo.com) | |
2836 | - '.cutestories.net', // by Brianna Dunlord (briasmi at yahoo.com) | |
2865 | + // by Brianna Dunlord (briasmi at yahoo.com) | |
2866 | + // by Tim Rennei (TimRennei at yahoo.com), redirect to amaena.com (fake-antivirus) | |
2867 | + // by Alice T. Horst (Alice.T.Horst at pookmail.com) | |
2868 | + '.carmodelrank.com',// by Brianna | |
2869 | + '.cutestories.net', // by Brianna | |
2837 | 2870 | '.sturducs.com', |
2838 | - '.bestother.info', // by Tim Rennei (TimRennei at yahoo.com), redirect to amaena.com (fake-antivirus) | |
2839 | - '.yaahooo.info', // by Alice T. Horst (Alice.T.Horst at pookmail.com), redirect to activefreehost.com | |
2871 | + '.bestother.info', // by Tim | |
2872 | + '.premiumcasinogames.com', // by Brianna) | |
2873 | + '.yaahooo.info', // by Alice | |
2840 | 2874 | ), |
2841 | 2875 | 'aliacsandr at yahoo.com' => array( |
2842 | 2876 | '.cubub.info', // "Free Web Hosting" |
@@ -3881,6 +3915,7 @@ $blocklist['C'] = array( | ||
3881 | 3915 | 'hostorgadmin at googlemail.com' => array( // Byethost Internet Ltd. |
3882 | 3916 | '.1sthost.org', |
3883 | 3917 | '.22web.net', |
3918 | + '.2kool4u.net', | |
3884 | 3919 | '.4sql.net', |
3885 | 3920 | '.php0h.com', |
3886 | 3921 | '.php1h.com', |
@@ -4098,62 +4133,53 @@ $blocklist['C'] = array( | ||
4098 | 4133 | '.sanartuk.ru', // by Vladimir I Noskov (hoskv2003 at gmail.ru) |
4099 | 4134 | ), |
4100 | 4135 | '208.70.75.153' => array( |
4101 | - '.cerc-fi.info', // by Kon Bi (cerca-two at ya.ru), 208.70.75.153 | |
4102 | - '.cerc-fo.info', // by Kon Bi (cerca-two at ya.ru), 208.70.75.153 | |
4103 | - '.cerc-no.info', // by Ru Lee (cerca-tree at ya.ru), 208.70.75.153 | |
4104 | - '.cerc-on.info', // 208.70.75.153 | |
4105 | - '.cerc-sv.info', // by Ru Lee (cerca-tree at ya.ru) | |
4106 | - '.cerc-sx.org', // by Kon Bi (cerca-two at ya.ru), 208.70.75.153 | |
4107 | - '.cerc-te.info', // by Ru Lee (cerca-tree at ya.ru), 208.70.75.153 | |
4108 | - '.cerc-tr.info', // 208.70.75.153 | |
4109 | - '.cerc-tw.info', // 208.70.75.153 | |
4110 | - | |
4111 | - '.cerc-fi.org', // by Kon Bi (cerca-two at ya.ru), 208.70.75.153 | |
4112 | - '.cerc-fo.org', // by Kon Bi (cerca-two at ya.ru), 208.70.75.153 | |
4113 | - '.cerc-no.org', // by Ru Lee (cerca-tree at ya.ru), 208.70.75.153 | |
4114 | - '.cerc-on.org', // by cerca-one at ya.ru, 208.70.75.153 | |
4115 | - '.cerc-sv.org', // by Ru Lee (cerca-tree at ya.ru), 208.70.75.153 | |
4116 | - '.cerc-sx.org', // by Kon Bi (cerca-two at ya.ru), 208.70.75.153 | |
4117 | - '.cerc-te.org', // by Ru Lee (cerca-tree at ya.ru), 208.70.75.153 | |
4118 | - '.cerc-tr.org', // by cerca-one at ya.ru, 208.70.75.153 | |
4119 | - '.cerc-tw.org', // by cerca-one at ya.ru, 208.70.75.153 | |
4120 | - | |
4121 | - '.cerca-fi.org', // by orgitaly1 at ya.ru, 208.70.75.153 | |
4122 | - '.cerca-fo.info', // 208.70.75.153 | |
4123 | - '.cerca-no.info', // 208.70.75.153 | |
4124 | - '.cerca-on.info', // 208.70.75.153 | |
4125 | - '.cerca-sv.info', // 208.70.75.153 | |
4126 | - '.cerca-sx.org', // by orgitaly2 at ya.ru, 208.70.75.153 | |
4127 | - '.cerca-te.info', // 208.70.75.153 | |
4128 | - '.cerca-tr.info', // 208.70.75.153 | |
4129 | - | |
4136 | + '.cerc-fi.info', // by Kon Bi (cerca-two at ya.ru) | |
4137 | + '.cerc-fo.info', // by Kon Bi (cerca-two at ya.ru) | |
4138 | + '.cerc-no.info', // by Ru Lee (cerca-tree at ya.ru) | |
4139 | + '.cerc-on.info', | |
4140 | + '.cerc-sv.info', // by Ru Lee (cerca-tree at ya.ru) | |
4141 | + '.cerc-sx.org', // by Kon Bi (cerca-two at ya.ru) | |
4142 | + '.cerc-te.info', // by Ru Lee (cerca-tree at ya.ru) | |
4143 | + '.cerc-tr.info', | |
4144 | + '.cerc-tw.info', | |
4145 | + '.cerc-fi.org', // by Kon Bi (cerca-two at ya.ru) | |
4146 | + '.cerc-fo.org', // by Kon Bi (cerca-two at ya.ru) | |
4147 | + '.cerc-no.org', // by Ru Lee (cerca-tree at ya.ru) | |
4148 | + '.cerc-on.org', // by cerca-one at ya.ru | |
4149 | + '.cerc-sv.org', // by Ru Lee (cerca-tree at ya.ru) | |
4150 | + '.cerc-sx.org', // by Kon Bi (cerca-two at ya.ru) | |
4151 | + '.cerc-te.org', // by Ru Lee (cerca-tree at ya.ru) | |
4152 | + '.cerc-tr.org', // by cerca-one at ya.ru | |
4153 | + '.cerc-tw.org', // by cerca-one at ya.ru | |
4154 | + '.cerca-fi.org', // by orgitaly1 at ya.ru | |
4155 | + '.cerca-fo.info', | |
4156 | + '.cerca-no.info', | |
4157 | + '.cerca-on.info', | |
4158 | + '.cerca-sv.info', | |
4159 | + '.cerca-sx.org', // by orgitaly2 at ya.ru | |
4160 | + '.cerca-te.info', | |
4161 | + '.cerca-tr.info', | |
4130 | 4162 | '.cerca-sx.org', |
4131 | - '.cerca-tr.org', // orgitaly1 at ya.ru | |
4132 | - | |
4133 | - '.ricerca-fiv.org', // orgitaly1 at ya.ru | |
4134 | - | |
4135 | - '.ricerca-fo.info', // 208.70.75.153 | |
4136 | - | |
4137 | - '.ricerca-one.org', // 208.70.75.153 | |
4138 | - | |
4163 | + '.cerca-tr.org', // orgitaly1 at ya.ru | |
4164 | + '.ricerca-fiv.org', // orgitaly1 at ya.ru | |
4165 | + '.ricerca-fo.info', | |
4166 | + '.ricerca-one.org', | |
4139 | 4167 | '.ricerca-sv.org', |
4140 | 4168 | '.ricerca-sx.org', |
4141 | 4169 | '.ricerca-te.org', |
4142 | - '.ricerca-tw.org', // orgitaly1 at ya.ru | |
4143 | - | |
4144 | - '.subit01.org', // 208.70.75.153 | |
4145 | - '.subit02.org', // 208.70.75.153 | |
4146 | - '.subit03.org', // 208.70.75.153 | |
4147 | - '.subit04.org', // 208.70.75.153 | |
4148 | - '.subit05.org', // 208.70.75.153 | |
4149 | - '.subit06.org', // 208.70.75.153 | |
4150 | - | |
4151 | - '.subit01.info', // 208.70.75.153 | |
4152 | - '.subit02.info', // 208.70.75.153 | |
4153 | - '.subit03.info', // 208.70.75.153 | |
4154 | - '.subit04.info', // 208.70.75.153 | |
4155 | - '.subit05.info', // 208.70.75.153 | |
4156 | - '.subit06.info', // 208.70.75.153 | |
4170 | + '.ricerca-tw.org', // orgitaly1 at ya.ru | |
4171 | + '.subit01.org', | |
4172 | + '.subit02.org', | |
4173 | + '.subit03.org', | |
4174 | + '.subit04.org', | |
4175 | + '.subit05.org', | |
4176 | + '.subit06.org', | |
4177 | + '.subit01.info', | |
4178 | + '.subit02.info', | |
4179 | + '.subit03.info', | |
4180 | + '.subit04.info', | |
4181 | + '.subit05.info', | |
4182 | + '.subit06.info', | |
4157 | 4183 | ), |
4158 | 4184 | 'ernestppc at yahoo.com' => array( // by Anrey Markov (ernestppc at yahoo.com) |
4159 | 4185 | '.5-base.com', |
@@ -4299,21 +4325,147 @@ $blocklist['C'] = array( | ||
4299 | 4325 | '.kliktop.org', |
4300 | 4326 | '.pharmatop.us', |
4301 | 4327 | '.supertop.us', |
4328 | + '.supervaizer.info', | |
4302 | 4329 | ), |
4303 | 4330 | 'infomed2004 at mail.ru' => array( // by Andrey Ushakov (infomed2004 at mail.ru) |
4304 | 4331 | '.freeamateursexx.info', // 81.0.195.228 |
4305 | 4332 | '.freeanalsexx.info', // 217.11.233.97 |
4306 | 4333 | ), |
4307 | 4334 | 'support at dns4me.biz' => array( // 89.149.228.237 by John Black (support at dns4me.biz) |
4335 | + '.abbhi.info', | |
4308 | 4336 | '.gayblogguide.biz', |
4337 | + '.huope.info', | |
4309 | 4338 | '.thebdsmday.info', |
4339 | + '.zioprt.info', // 89.149.228.237 | |
4310 | 4340 | ), |
4311 | 4341 | 'dzheker at yandex.ru' => array( // by dzheker at yandex.ru |
4342 | + '.boblisk.info', | |
4312 | 4343 | '.factyri.info', |
4313 | 4344 | '.jorge1.info', |
4314 | 4345 | ), |
4346 | + 'lichincool at gmail.com' => array( // 72.232.229.115 by lichincool at gmail.com, / meanless | |
4347 | + '.bestmindstorm.org', | |
4348 | + '.redstoreonline.org', | |
4349 | + ), | |
4350 | + '59.106.24.2' => array( // 59.106.24.2, sakagutiryouta at yahoo.co.jp | |
4351 | + '.8e8ae.net', | |
4352 | + '.c-cock.com', | |
4353 | + '.fa59eaf.com', | |
4354 | + '.set-place.net', | |
4355 | + '.sex-beauty.net', | |
4356 | + ), | |
4357 | + '84.252.148.140' => array( // 84.252.148.140(kratos.mchost.ru) | |
4358 | + '.unefout.info', | |
4359 | + '.unitfree.info', | |
4360 | + '.votrefout.info', | |
4361 | + ), | |
4362 | + 'info at thecanadianmeds.com' => array( // by Andrey Smirnov (info at thecanadianmeds.com) | |
4363 | + '.myviagrasite.com', // 80.74.153.2 | |
4364 | + '.thecanadianmeds.com', // 80.74.153.17 | |
4365 | + ), | |
4366 | + | |
4367 | + // C-2: Affiliates, Hypes, Catalog retailers, Multi-level marketings, Resellers | |
4368 | + '15-Mail.com related' => array( | |
4369 | + '.15-mail.com', // 202.218.109.45(*.netassist.jp) by yukiyo yamamoto (sunkusu5268 at m4.ktplan.ne.jp) | |
4370 | + '.1bloglog.com', // 210.253.115.159 by Yukiyo Yamamoto (info at 15-mail.com) | |
4371 | + '.investment-school.com', // 210.253.115.159 by Yukiyo Yamamoto (info at 15-mail.com) | |
4372 | + '.breakjuku.com', // 210.253.115.159 (service provider bet.co.jp = xserver.jp) | |
4373 | + '.nambara.biz', // by Yukiyo Yamamoto (info at 15-mail.com) | |
4374 | + ), | |
4375 | + '.all-affiliater.com', // 202.222.30.18(sv125.lolipop.jp), ns *.lolipop.jp | |
4376 | + 'E-brainers.com related' => array( | |
4377 | + // 202.212.14.101 | |
4378 | + '.cyoto-morketing-club.com', // by Fujio Iwasaki (domain at sppd.co.jp) | |
4379 | + '.e-brainers.com', // by Fujio Iwasaki (domain at sppd.co.jp) | |
4380 | + '.my-tune.jp', // by brainers Inc. | |
4381 | + '.technical-support-center.com',// by Fujio Iwasaki (domain at sppd.co.jp) | |
4382 | + '.weekle.jp', // by brainers Inc. | |
4383 | + | |
4384 | + // 210.136.111.56 by Masatoshi Kobayashi (domain at e-brainers.com) | |
4385 | + // 210.136.111.56 by Fujio Iwasaki (domain at sppd.co.jp) | |
4386 | + '.3minutes-marketing-club.com', // by Fujio | |
4387 | + '.affiliate-vampire.com', // by Masatoshi | |
4388 | + '.article-site-power-package.com', // by Masatoshi | |
4389 | + '.audio-marketing-club.com', // by Fujio | |
4390 | + '.brainers-task-manager.com', // by Masatoshi | |
4391 | + '.brainers-troubleshooter-generator.com', // by Masatoshi | |
4392 | + '.brainersbuzz.com', // by Masatoshi | |
4393 | + '.den4renz-marketing-club.com', // by Fujio | |
4394 | + '.english-contents-club.com', // by Masatoshi | |
4395 | + '.fly-in-ads-japan.com', // by Fujio | |
4396 | + '.free-resalerights-giveaway.com', // by Fujio | |
4397 | + '.freegiveawaysecret.com', // by Masatoshi | |
4398 | + '.guaranteedvisitorpro.com', // by Masatoshi | |
4399 | + '.havads-japan.com', // by Masatoshi | |
4400 | + '.info-business123.com', // by Fujio | |
4401 | + '.instant-marketing-club.com', // by Fujio | |
4402 | + '.marketing-force-japan.com', // by Fujio | |
4403 | + '.masatoshikobayashi.com', // by Fujio | |
4404 | + '.profitsinstigator.com', // by Masatoshi Kobayashi (akada@e-brainers.com) | |
4405 | + '.replytomatt.com', // by Fujio | |
4406 | + '.santa-deal.com', // by Fujio | |
4407 | + '.santa-deal-summer.com', // by Fujio | |
4408 | + '.scratch-card-factory.com', // by Masatoshi | |
4409 | + '.script4you-japan.com', // by Fujio | |
4410 | + '.sell1000000dollarinjapan.com',// by Fujio | |
4411 | + '.squeeze-page-secret.com', // by Masatoshi | |
4412 | + '.viral-blog-square.com', // by Fujio | |
4413 | + '.viralarticle.com', // by Fujio | |
4414 | + '.wowhoken.com', // by Fujio | |
4315 | 4415 | |
4316 | - // C-2: Lonely domains (buddies not found yet) | |
4416 | + // 202.212.14.104 by Fujio Iwasaki (domain@sppd.co.jp) | |
4417 | + '.brainerstelevision.com', | |
4418 | + '.demosite4you.com', | |
4419 | + '.keywordcatcherpro.com', | |
4420 | + '.script-marketing-club.com', | |
4421 | + | |
4422 | + // 202.228.204.140(server.ultimate-marketing-weapon.com) by Masatoshi Kobayashi (akada at e-brainers.com) | |
4423 | + // 202.228.204.140 by Masatoshi Kobayashi (domain at e-brainers.com) | |
4424 | + '.brainers.ws', // 202.228.204.140 by info at key-systems.net, ns *.ultimate-marketing-weapon.com | |
4425 | + '.brainerscode.com', // by akada | |
4426 | + '.brainerslive.com', // by domain | |
4427 | + '.brainersreview.com', // by domain | |
4428 | + '.brainerstest.com', // by akada | |
4429 | + '.otosecret.com', // by domain | |
4430 | + '.ultimate-marketing-weapon.com', // by akada | |
4431 | + '.planet-club.net', // 202.228.204.141(server.ultimate-marketing-weapon.com) | |
4432 | + '.terk.jp', // by Tsuyoshi Tsukada, QHM | |
4433 | + | |
4434 | + '.samuraiautoresponder.com', // 211.125.179.75(bq1.mm22.jp) by Masatoshi Kobayashi (kobayashi at wowhoken.com) | |
4435 | + '.sppd.co.jp', // 210.136.106.122 by Studio Map Ltd., ns *.sppd.ne.jp, spam | |
4436 | + ), | |
4437 | + '.e2996.com', // 202.181.105.241(sv261.lolipop.jp) | |
4438 | + '.fx4rich.com', // 219.94.128.161(www921.sakura.ne.jp) by Yuji Nakano (info at will76.com) | |
4439 | + 'info at kobeweb.jp' => array( | |
4440 | + '.soholife.jp', // 211.125.65.203 by Takashige Tabuchi (info at kobeweb.jp) | |
4441 | + '.kobeweb.jp', // 59.106.13.51(www421.sakura.ne.jp) | |
4442 | + '.sloters.tv', // 211.125.65.203 by Takashige Tabuchi (t-2 at white.interq.or.jp) | |
4443 | + ), | |
4444 | + '.info-affiliate.net', // 219.94.148.8(sv41.chicappa.jp) | |
4445 | + '.infostore.jp', // 216.255.235.45, ns *.estore.co.jp | |
4446 | + 'JunSuzuki.com' => array( // e-brainers.com related | |
4447 | + '.junsuzuki.com', // 218.216.67.43(s92.xrea.com) by Jun Suzuki (jun_suzuki at compus.net) | |
4448 | + '.globalswing.biz', // 210.188.217.109(sv27.xserverzero.net) | |
4449 | + ), | |
4450 | + 'Point-park.com' => array( // Tadahiro Ogawa (domain at wide.ne.jp) | |
4451 | + '.11kanji.com', // 211.10.131.88 | |
4452 | + '.mlmsupport.jp', // 211.10.131.108 by info at point-park.com | |
4453 | + '.point-park.com', // 211.10.131.88 | |
4454 | + '.point-park.jp', // 43.244.140.160(160.140.244.43.ap.yournet.ne.jp) | |
4455 | + ), | |
4456 | + '.potitto.info', // 219.94.132.89(sv450.lolipop.jp) | |
4457 | + '.sedori-data.com', // | |
4458 | + '.tool4success.com', // 210.188.201.31(sv70.xserver.jp) by Yukihiro Akada (ml at original-ehon.com) | |
4459 | + 'tera at kirinn.com' => array( // 59.139.29.234(s240.xrea.com) by Naohsi Terada (tera at kirinn.com) | |
4460 | + '.e123.info', | |
4461 | + '.ialchemist.net', | |
4462 | + '.j012.net', | |
4463 | + '.xn--yckc2auxd4b6564dogvcf7g.biz', | |
4464 | + ), | |
4465 | + '.zakkuzaku.com', // 210.188.201.44(sv83.xserver.jp) | |
4466 | + | |
4467 | + | |
4468 | + // C-3: Lonely domains (buddies not found yet) | |
4317 | 4469 | '.0721-4404.com', |
4318 | 4470 | '.0nline-porno.info', // by Timyr (timyr at narod.ru) |
4319 | 4471 | '.1-click-clipart.com', // by Big Resources, Inc. (hostmaster at bigresources.com) |
@@ -4328,7 +4480,7 @@ $blocklist['C'] = array( | ||
4328 | 4480 | '.50webs.com', // by LiquidNet Ltd. (support at propersupport.com), redirect to mpage.jp |
4329 | 4481 | '.6i6.de', |
4330 | 4482 | '.advancediet.com', // by Shonta Mojica (hostadmin at advancediet.com) |
4331 | - '.adult-master-club.com', // by Alehander (mazyrkevich at cosmostv.by) | |
4483 | + '.adult-master-club.com', // by Alehander (mazyrkevich at cosmostv.by) | |
4332 | 4484 | '.adultpersonalsclubs.com', // by Peter (vaspet34 at yahoo.com) |
4333 | 4485 | '.akgame.com', // 72.32.79.100 by Howard Ke (gmtbank at gmail.com), rmt & pl |
4334 | 4486 | '.alfanetwork.info', // by dante (dantequick at gmail.com) |
@@ -4340,12 +4492,12 @@ $blocklist['C'] = array( | ||
4340 | 4492 | '.banep.info', // by Mihailov Dmitriy (marokogadro at yahoo.com), iframe to this site |
4341 | 4493 | '.baurish.info', |
4342 | 4494 | '.bestop.name', |
4343 | - '.bestmindstorm.org', // 72.232.229.115 by lichincool at gmail.com, / meanless | |
4344 | 4495 | '.betmmo.com', // 63.223.98.182 by Huang Qiang (liuxing-wushi at hotmail.com), pl |
4345 | 4496 | '.bestrademark.info', // by victoria (niko16d at yahoo.com), redirect to majordomo.ru |
4346 | 4497 | '.bestshopfinder.info', |
4347 | 4498 | '.bloggerblast.com', // by B. Kadrie (domains at starwhitehosting.com) |
4348 | - '.blogest.org', // 203.116.63.68 by Bobby.R.Kightlinger at pookmail.com, / seems blank | |
4499 | + '.blogest.org', // 203.116.63.68 by Bobby.R.Kightlinger at pookmail.com, / seems blank | |
4500 | + '.bookblogsite.org', // 217.11.233.58 by Eugene.E.Mather at mailinator.com | |
4349 | 4501 | '.businessplace.biz', // by Grenchenko Ivan Petrovich (eurogogi at yandex.ru) |
4350 | 4502 | '.capital2u.info', // by Delbert.A.Henry at dodgeit.com |
4351 | 4503 | '.casa-olympus.com', // "UcoZ WEB-SERVICES" |
@@ -4353,9 +4505,9 @@ $blocklist['C'] = array( | ||
4353 | 4505 | '.constitutionpartyofwa.org', // "UcoZ WEB-SERVICES" |
4354 | 4506 | '.covertarena.co.uk', // by Wayne Huxtable |
4355 | 4507 | '.d999.info', // by Peter Vayner (peter.vayner at inbox.ru) |
4356 | - '.dinmo.cn', // 218.30.96.149 by dinso at 163.com, seo etc. | |
4357 | - //'.wow-gold.dinmo.cn', // 125.65.76.59, pl | |
4358 | - '.dinmoseo.com', // 210.51.168.102(winp2-web-g02.xinnetdns.com) by jianmin911 at 126.com, NS *.xinnetdns.com, seo | |
4508 | + '.dinmo.cn', // 218.30.96.149 by dinso at 163.com, seo etc. | |
4509 | + //'.wow-gold.dinmo.cn', // 125.65.76.59, pl | |
4510 | + '.dinmoseo.com', // 210.51.168.102(winp2-web-g02.xinnetdns.com) by jianmin911 at 126.com, NS *.xinnetdns.com, seo | |
4359 | 4511 | '.dlekei.info', // by Maxima Bucaro (webmaster at tts2f.info) |
4360 | 4512 | '.dollar4u.info', // by Carla (Carla.J.Merritt at mytrashmail.com), / is blank |
4361 | 4513 | '.drug-shop.us', // by Alexandr (matrixpro at mail.ru) |
@@ -4367,7 +4519,7 @@ $blocklist['C'] = array( | ||
4367 | 4519 | '.fantasy-handjob-ra.com', // by Hose Pedro (hosepedro at gmail.com) |
4368 | 4520 | '.fast4me.info', // by Hakan Durov (poddubok at inbox.ru), / is blank |
4369 | 4521 | '.fastmoms.info', // by Pavel Golyshev (pogol at walla.com), / is blank |
4370 | - '.fastppc.info', // by peter conor (fastppc at msn.com) | |
4522 | + '.fastppc.info', // by peter conor (fastppc at msn.com) | |
4371 | 4523 | '.ffxiforums.net', // by Zhang xiaolong (mail at 33986.com), hidden VBScript |
4372 | 4524 | '*.filthserver.com', // sales at onlinemarketingservices.biz |
4373 | 4525 | '.find-stuff.org', // by Alice Freedman (admin at ip-labs.ru), / 404 Not Found |
@@ -4394,8 +4546,8 @@ $blocklist['C'] = array( | ||
4394 | 4546 | '.gm-exchange.jp', // RMT |
4395 | 4547 | '.goamoto.ru', // by Dmitry E Kotchnev (z2archive at gmail.com) |
4396 | 4548 | '.good1688.com', // by Wen Chien Lunz (wzk1219 at yahoo.com.tw), one of them frame to , and whoop.to |
4397 | - '.google-pharmacy.com', // by alex (mdisign1997 at yahoo.com), hiding with urlx.org etc | |
4398 | - '.greatbestwestern.org', // by gao.wungao at gmail.com | |
4549 | + '.google-pharmacy.com', // by alex (mdisign1997 at yahoo.com), hiding with urlx.org etc | |
4550 | + '.greatbestwestern.org',// by gao.wungao at gmail.com | |
4399 | 4551 | '.greatsexdate.com', // by Andreas Crablo (crablo at hotmail.com) |
4400 | 4552 | '.guild-wars-online.com', // by Fuzhou Tianmeng Touzi Zixun Co.,Ltd (welkin at skyunion.com) |
4401 | 4553 | '.happyhost.org', // by Paul Zamnov (paul at zamnov.be) |
@@ -4403,14 +4555,15 @@ $blocklist['C'] = array( | ||
4403 | 4555 | '.honda168.net', // by tan tianfu (xueyihua at gmail.com), seems not used now |
4404 | 4556 | '.hostuju.cz', // ns banan.cz, banan.it |
4405 | 4557 | '.hot4buy.org', // by Hot Maker (jot at hot4buy.org) |
4406 | - '.hotscriptonline.info', // by Psy Search (admin at psysearch.com) | |
4558 | + '.hotscriptonline.info',// by Psy Search (admin at psysearch.com) | |
4559 | + '.iinaa.net', // domain at ml.ninja.co.jp, ns *.shinobi.jp | |
4407 | 4560 | '.incbuy.info', // by Diego T. Murphy (Diego.T.Murphy at incbuy.info) |
4408 | 4561 | '.infocart.jp', // Trying to earn money easily by selling 'earn-money-easiliy' tips |
4409 | 4562 | '.infradoc.com', |
4410 | 4563 | '.investorvillage.com', // by natalija puchkova (internet at internet.lv) |
4411 | - '.ismarket.com', // Google-hiding. intercage.com related IP | |
4564 | + '.ismarket.com', // Google-hiding. intercage.com related IP | |
4412 | 4565 | '.italialiveonline.info', // by Silvio Cataloni (segooglemsn at yahoo.com), redirect to activefreehost.com |
4413 | - '.italy-search.org', // by Alex Yablin (zaharov-alex at yandex.ru) | |
4566 | + '.italy-search.org', // by Alex Yablin (zaharov-alex at yandex.ru) | |
4414 | 4567 | '.itsexosit.net', |
4415 | 4568 | '.itxxxit.net', |
4416 | 4569 | '.jimmys21.com', // by Klen Kudryavii (telvid at shaw.ca) |
@@ -4424,7 +4577,7 @@ $blocklist['C'] = array( | ||
4424 | 4577 | '.link-keeper.net', // 210.172.108.236 (257.xrea.com) |
4425 | 4578 | '.ls.la', // by Milton McLellan (McLellanMilton at yahoo.com) |
4426 | 4579 | '.mamaha.info', // by Alex Klimovsky (paganec at gmail.com), seems now constructiong |
4427 | - '.manseekingwomanx.com', // by Bill Peterson (coccooc at fastmail.fm) | |
4580 | + '.manseekingwomanx.com',// by Bill Peterson (coccooc at fastmail.fm) | |
4428 | 4581 | '.medpharmaworldguide.com', // by Nick Ivchenkov (signmark at gmail.com), / not found |
4429 | 4582 | '.megvideochatlive.info', // Bad seo |
4430 | 4583 | '.milfxxxpass.com', // by Morozov Pavlik (rulets at gmail.com) |
@@ -4434,6 +4587,7 @@ $blocklist['C'] = array( | ||
4434 | 4587 | '.next-moneylife.com', // RMT |
4435 | 4588 | '.newalandirect.com', // by Alnoor Hirji, ns *.sablehost.com |
4436 | 4589 | '.ngfu2.info', // by Tara Lagrant (webmaster at ngfu2.info) |
4590 | + '.nucked-sex.com', // 203.223.150.222 by lis (noidlis2 at yahoo.com) | |
4437 | 4591 | '.ok10000.com', // by zipeng hu (ldcs350003 at hotmail.com) |
4438 | 4592 | '.olimpmebel.info', // by pol (pauk_life at mail.ru), frame to bettersexmall.com |
4439 | 4593 | '.onlinetert.info', // by Jarod Hyde (grigorysch at gmail.com) |
@@ -4467,6 +4621,7 @@ $blocklist['C'] = array( | ||
4467 | 4621 | '.searchadv.com', // by Jaan Randolph (searchadv at gmail.com) |
4468 | 4622 | '.seek-www.com', // by Adam Smit (pingpong at mail.md) |
4469 | 4623 | '.sessocities.net', // by info at secureserver3.com |
4624 | + '.seven-pharmacy.com', // 83.138.176.247 by Justin Timberlake (preved at gmail.com) | |
4470 | 4625 | '.sexamoreit.com', |
4471 | 4626 | '.sexforit.com', |
4472 | 4627 | '.sexmaniacs.org', // by Yang Chong (chong at x-india.com) |
@@ -4480,6 +4635,7 @@ $blocklist['C'] = array( | ||
4480 | 4635 | '.thehostcity.com', // Domains by Proxy |
4481 | 4636 | '.thetinyurl.com', // by Beth J. Carter (Beth.J.Carter at thetinyurl.com) |
4482 | 4637 | '.thetrendy.info', // by Harold (Harold.J.Craft at pookmail.com), / is blank |
4638 | + '.theusapills.com', // by Dr. Zarman (contactus at theusapills.com) | |
4483 | 4639 | '.topmeds10.com', |
4484 | 4640 | '*.tv-reklama.info', // by Kozlov Maxim (m_koz at mail.ru) |
4485 | 4641 | '.twabout.com', // by qiu wenbing (qiuwenbing at 126.com), content from l2mpt.net |
@@ -4504,13 +4660,14 @@ $blocklist['C'] = array( | ||
4504 | 4660 | '.xpacificpoker.com', // by Hubert Hoffman (support at xpacificpoker.com) |
4505 | 4661 | '.xphost.org', // by alex alex (alrusnac at hotmail.com) |
4506 | 4662 | '.xamorexxx.net', |
4663 | + '.xn--gmqt9gewhdnlyq9c.net', // 122.249.16.133(x016133.ppp.asahi-net.or.jp) by daizinazikanwo yahoo.co.jp | |
4507 | 4664 | '.xsessox.com', |
4508 | 4665 | '.yoi4.net', // by Ryouhei Nakamura (888 at sympathys.com), tell me why so many blogs with popular issues and _diverted design from blog.livedoor.jp_ around here. |
4509 | 4666 | '.zlocorp.com', // by tonibcrus at hotpop.com, spammed well with "http ://zlocorp.com/" |
4510 | 4667 | '.zyguo.info', // ns globoxhost.net |
4511 | 4668 | '.zhuyiw.com', // by zhou yuntao (whzyt0122 at sohu.com) |
4512 | 4669 | |
4513 | - // C-3: Not classifiable (information wanted) | |
4670 | + // C-4: Not classifiable (information wanted) | |
4514 | 4671 | // |
4515 | 4672 | // Something incoming to pukiwiki related sites |
4516 | 4673 | 'nana.co.il related' => array( |
@@ -4519,7 +4676,6 @@ $blocklist['C'] = array( | ||
4519 | 4676 | ), |
4520 | 4677 | ); |
4521 | 4678 | |
4522 | - | |
4523 | 4679 | $blocklist['D'] = array( |
4524 | 4680 | // D: Sample setting of |
4525 | 4681 | // "third party in good faith"s |