• R/O
  • HTTP
  • SSH
  • HTTPS

提交

標籤
無標籤

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Commit MetaInfo

修訂1eeb9c71186f0f18ba035010606a10a957b6b622 (tree)
時間2007-07-03 23:47:20
作者henoheno <henoheno>
Commiterhenoheno

Log Message

$Id: spam.php,v 1.196 2007/07/02 14:51:40 henoheno Exp $
$Id: spam_pickup.php,v 1.51 2007/07/02 14:51:40 henoheno Exp $
$Id: spam.ini.php,v 1.130 2007/07/03 14:40:05 henoheno Exp $
$Id: domain.ini.php,v 1.2 2007/06/28 14:51:10 henoheno Exp $
* Separate spam.php => spam.php, spam_pickup.php, and domain.ini.php
* Reorder some functions
* Remove unused function: array_leaf()
* spam_uri_pickup_preprocess(): abstruction
* spam.ini.php: C-2: Affiliates, Hypes, Catalog retailers, Multi-level marketings, Resellers (in Japan)

Change Summary

差異

--- /dev/null
+++ b/domain.ini.php
@@ -0,0 +1,655 @@
1+<?php
2+// $Id: domain.ini.php,v 1.1 2007/07/03 14:47:04 henoheno Exp $
3+// Domain related setting
4+
5+// Domains who have 2nd and/or 3rd level domains
6+$domain = array(
7+
8+ // ccTLD: Australia
9+ // http://www.auda.org.au/
10+ // NIC : http://www.aunic.net/
11+ // Whois: http://www.ausregistry.com.au/
12+ 'au' => array(
13+ // .au Second Level Domains
14+ // http://www.auda.org.au/domains/
15+ 'asn' => TRUE,
16+ 'com' => TRUE,
17+ 'conf' => TRUE,
18+ 'csiro' => TRUE,
19+ 'edu' => array( // http://www.domainname.edu.au/
20+ // Geographic
21+ 'act' => TRUE,
22+ 'nt' => TRUE,
23+ 'nsw' => TRUE,
24+ 'qld' => TRUE,
25+ 'sa' => TRUE,
26+ 'tas' => TRUE,
27+ 'vic' => TRUE,
28+ 'wa' => TRUE,
29+ ),
30+ 'gov' => array(
31+ // Geographic
32+ 'act' => TRUE, // Australian Capital Territory
33+ 'nt' => TRUE, // Northern Territory
34+ 'nsw' => TRUE, // New South Wales
35+ 'qld' => TRUE, // Queensland
36+ 'sa' => TRUE, // South Australia
37+ 'tas' => TRUE, // Tasmania
38+ 'vic' => TRUE, // Victoria
39+ 'wa' => TRUE, // Western Australia
40+ ),
41+ 'id' => TRUE,
42+ 'net' => TRUE,
43+ 'org' => TRUE,
44+ 'info' => TRUE,
45+ ),
46+
47+ // ccTLD: Bahrain
48+ // NIC : http://www.inet.com.bh/ (.bh policies not found)
49+ // Whois: (Not available) http://www.inet.com.bh/
50+ 'bh' => array(
51+ // Observed
52+ 'com' => TRUE,
53+ 'edu' => TRUE,
54+ 'gov' => TRUE,
55+ 'org' => TRUE,
56+ ),
57+
58+ // ccTLD: China
59+ // NIC : http://www.cnnic.net.cn/en/index/
60+ // Whois: http://ewhois.cnnic.cn/
61+ 'cn' => array(
62+ // Provisional Administrative Rules for Registration of Domain Names in China
63+ // http://www.cnnic.net.cn/html/Dir/2003/11/27/1520.htm
64+
65+ // Organizational
66+ 'ac' => TRUE,
67+ 'com' => TRUE,
68+ 'edu' => TRUE,
69+ 'gov' => TRUE,
70+ 'net' => TRUE,
71+ 'org' => TRUE,
72+
73+ // Geographic
74+ 'ah' => TRUE,
75+ 'bj' => TRUE,
76+ 'cq' => TRUE,
77+ 'fj' => TRUE,
78+ 'gd' => TRUE,
79+ 'gs' => TRUE,
80+ 'gx' => TRUE,
81+ 'gz' => TRUE,
82+ 'ha' => TRUE,
83+ 'hb' => TRUE,
84+ 'he' => TRUE,
85+ 'hi' => TRUE,
86+ 'hk' => TRUE,
87+ 'hl' => TRUE,
88+ 'hn' => TRUE,
89+ 'jl' => TRUE,
90+ 'js' => TRUE,
91+ 'jx' => TRUE,
92+ 'ln' => TRUE,
93+ 'mo' => TRUE,
94+ 'nm' => TRUE,
95+ 'nx' => TRUE,
96+ 'qh' => TRUE,
97+ 'sc' => TRUE,
98+ 'sd' => TRUE,
99+ 'sh' => TRUE,
100+ 'sn' => TRUE,
101+ 'sx' => TRUE,
102+ 'tj' => TRUE,
103+ 'tw' => TRUE,
104+ 'xj' => TRUE,
105+ 'xz' => TRUE,
106+ 'yn' => TRUE,
107+ 'zj' => TRUE,
108+ ),
109+
110+ // ccTLD: India
111+ // NIC : http://www.inregistry.in/
112+ // Whois: http://www.inregistry.in/whois_search/
113+ 'in' => array(
114+ // Policies http://www.inregistry.in/policies/
115+ 'ac' => TRUE,
116+ 'co' => TRUE,
117+ 'firm' => TRUE,
118+ 'gen' => TRUE,
119+ 'gov' => TRUE,
120+ 'ind' => TRUE,
121+ 'mil' => TRUE,
122+ 'net' => TRUE,
123+ 'org' => TRUE,
124+ 'res' => TRUE,
125+ // Reserved Names by the government (for the 2nd level)
126+ // http://www.inregistry.in/policies/reserved_names
127+ ),
128+
129+ // ccTLD: South Korea
130+ // NIC : http://www.nic.or.kr/english/
131+ // Whois: http://whois.nida.or.kr/english/
132+ 'kr' => array(
133+ // .kr domain policy [appendix 1] : Qualifications for Second Level Domains
134+ // http://domain.nida.or.kr/eng/policy.jsp
135+
136+ // Organizational
137+ 'co' => TRUE,
138+ 'ne ' => TRUE,
139+ 'or ' => TRUE,
140+ 're ' => TRUE,
141+ 'pe' => TRUE,
142+ 'go ' => TRUE,
143+ 'mil' => TRUE,
144+ 'ac' => TRUE,
145+ 'hs' => TRUE,
146+ 'ms' => TRUE,
147+ 'es' => TRUE,
148+ 'sc' => TRUE,
149+ 'kg' => TRUE,
150+
151+ // Geographic
152+ 'seoul' => TRUE,
153+ 'busan' => TRUE,
154+ 'daegu' => TRUE,
155+ 'incheon' => TRUE,
156+ 'gwangju' => TRUE,
157+ 'daejeon' => TRUE,
158+ 'ulsan' => TRUE,
159+ 'gyeonggi' => TRUE,
160+ 'gangwon' => TRUE,
161+ 'chungbuk' => TRUE,
162+ 'chungnam' => TRUE,
163+ 'jeonbuk' => TRUE,
164+ 'jeonnam' => TRUE,
165+ 'gyeongbuk' => TRUE,
166+ 'gyeongnam' => TRUE,
167+ 'jeju' => TRUE,
168+ ),
169+
170+ // ccTLD: Japan
171+ // NIC : http://jprs.co.jp/en/
172+ // Whois: http://whois.jprs.jp/en/
173+ 'jp' => array(
174+ // Guide to JP Domain Name
175+ // http://jprs.co.jp/en/jpdomain.html
176+
177+ // Organizational
178+ 'ac' => TRUE,
179+ 'ad' => TRUE,
180+ 'co' => TRUE,
181+ 'ed' => TRUE,
182+ 'go' => TRUE,
183+ 'gr' => TRUE,
184+ 'lg' => TRUE, // pref.<geographic2nd>.lg.jp etc.
185+ 'ne' => TRUE,
186+ 'or' => TRUE,
187+
188+ // Geographic
189+ //
190+ // Examples for 3rd level domains
191+ //'kumamoto' => array(
192+ // // http://www.pref.kumamoto.jp/link/list.asp#4
193+ // 'amakusa' => TRUE,
194+ // 'hitoyoshi' => TRUE,
195+ // 'jonan' => TRUE,
196+ // 'kumamoto' => TRUE,
197+ // ...
198+ //),
199+ 'aichi' => TRUE,
200+ 'akita' => TRUE,
201+ 'aomori' => TRUE,
202+ 'chiba' => TRUE,
203+ 'ehime' => TRUE,
204+ 'fukui' => TRUE,
205+ 'fukuoka' => TRUE,
206+ 'fukushima' => TRUE,
207+ 'gifu' => TRUE,
208+ 'gunma' => TRUE,
209+ 'hiroshima' => TRUE,
210+ 'hokkaido' => TRUE,
211+ 'hyogo' => TRUE,
212+ 'ibaraki' => TRUE,
213+ 'ishikawa' => TRUE,
214+ 'iwate' => TRUE,
215+ 'kagawa' => TRUE,
216+ 'kagoshima' => TRUE,
217+ 'kanagawa' => TRUE,
218+ 'kawasaki' => TRUE,
219+ 'kitakyushu'=> TRUE,
220+ 'kobe' => TRUE,
221+ 'kochi' => TRUE,
222+ 'kumamoto' => TRUE,
223+ 'kyoto' => TRUE,
224+ 'mie' => TRUE,
225+ 'miyagi' => TRUE,
226+ 'miyazaki' => TRUE,
227+ 'nagano' => TRUE,
228+ 'nagasaki' => TRUE,
229+ 'nagoya' => TRUE,
230+ 'nara' => TRUE,
231+ 'niigata' => TRUE,
232+ 'oita' => TRUE,
233+ 'okayama' => TRUE,
234+ 'okinawa' => TRUE,
235+ 'osaka' => TRUE,
236+ 'saga' => TRUE,
237+ 'saitama' => TRUE,
238+ 'sapporo' => TRUE,
239+ 'sendai' => TRUE,
240+ 'shiga' => TRUE,
241+ 'shimane' => TRUE,
242+ 'shizuoka' => TRUE,
243+ 'tochigi' => TRUE,
244+ 'tokushima' => TRUE,
245+ 'tokyo' => TRUE,
246+ 'tottori' => TRUE,
247+ 'toyama' => TRUE,
248+ 'wakayama' => TRUE,
249+ 'yamagata' => TRUE,
250+ 'yamaguchi' => TRUE,
251+ 'yamanashi' => TRUE,
252+ 'yokohama' => TRUE,
253+ ),
254+
255+ // ccTLD: Mexico
256+ // NIC : http://www.nic.mx/
257+ // Whois: http://www.nic.mx/es/Busqueda.Who_Is
258+ 'mx' => array(
259+ // Politicas Generales de Nombres de Dominio
260+ // http://www.nic.mx/es/Politicas?CATEGORY=INDICE
261+ 'com' => TRUE,
262+ 'edu' => TRUE,
263+ 'gob' => TRUE,
264+ 'net' => TRUE,
265+ 'org' => TRUE,
266+ ),
267+
268+ // ccTLD: Russia
269+ // NIC : http://www.cctld.ru/en/
270+ // Whois: http://www.ripn.net:8080/nic/whois/en/
271+ 'ru' => array(
272+ // List of Reserved second-level Domain Names
273+ // http://www.cctld.ru/en/doc/detail.php?id21=20&i21=2
274+
275+ // Organizational
276+ 'ac' => TRUE,
277+ 'com' => TRUE,
278+ 'edu' => TRUE,
279+ 'gov' => TRUE,
280+ 'int' => TRUE,
281+ 'mil' => TRUE,
282+ 'net' => TRUE,
283+ 'org' => TRUE,
284+ 'pp' => TRUE,
285+ //'test' => TRUE,
286+
287+ // Geographic
288+ 'adygeya' => TRUE,
289+ 'altai' => TRUE,
290+ 'amur' => TRUE,
291+ 'amursk' => TRUE,
292+ 'arkhangelsk' => TRUE,
293+ 'astrakhan' => TRUE,
294+ 'baikal' => TRUE,
295+ 'bashkiria' => TRUE,
296+ 'belgorod' => TRUE,
297+ 'bir' => TRUE,
298+ 'bryansk' => TRUE,
299+ 'buryatia' => TRUE,
300+ 'cbg' => TRUE,
301+ 'chel' => TRUE,
302+ 'chelyabinsk' => TRUE,
303+ 'chita' => TRUE,
304+ 'chukotka' => TRUE,
305+ 'chuvashia' => TRUE,
306+ 'cmw' => TRUE,
307+ 'dagestan' => TRUE,
308+ 'dudinka' => TRUE,
309+ 'e-burg' => TRUE,
310+ 'fareast' => TRUE,
311+ 'grozny' => TRUE,
312+ 'irkutsk' => TRUE,
313+ 'ivanovo' => TRUE,
314+ 'izhevsk' => TRUE,
315+ 'jamal' => TRUE,
316+ 'jar' => TRUE,
317+ 'joshkar-ola' => TRUE,
318+ 'k-uralsk' => TRUE,
319+ 'kalmykia' => TRUE,
320+ 'kaluga' => TRUE,
321+ 'kamchatka' => TRUE,
322+ 'karelia' => TRUE,
323+ 'kazan' => TRUE,
324+ 'kchr' => TRUE,
325+ 'kemerovo' => TRUE,
326+ 'khabarovsk' => TRUE,
327+ 'khakassia' => TRUE,
328+ 'khv' => TRUE,
329+ 'kirov' => TRUE,
330+ 'kms' => TRUE,
331+ 'koenig' => TRUE,
332+ 'komi' => TRUE,
333+ 'kostroma' => TRUE,
334+ 'krasnoyarsk' => TRUE,
335+ 'kuban' => TRUE,
336+ 'kurgan' => TRUE,
337+ 'kursk' => TRUE,
338+ 'kustanai' => TRUE,
339+ 'kuzbass' => TRUE,
340+ 'lipetsk' => TRUE,
341+ 'magadan' => TRUE,
342+ 'magnitka' => TRUE,
343+ 'mari-el' => TRUE,
344+ 'mari' => TRUE,
345+ 'marine' => TRUE,
346+ 'mordovia' => TRUE,
347+ 'mosreg' => TRUE,
348+ 'msk' => TRUE,
349+ 'murmansk' => TRUE,
350+ 'mytis' => TRUE,
351+ 'nakhodka' => TRUE,
352+ 'nalchik' => TRUE,
353+ 'nkz' => TRUE,
354+ 'nnov' => TRUE,
355+ 'norilsk' => TRUE,
356+ 'nov' => TRUE,
357+ 'novosibirsk' => TRUE,
358+ 'nsk' => TRUE,
359+ 'omsk' => TRUE,
360+ 'orenburg' => TRUE,
361+ 'oryol' => TRUE,
362+ 'oskol' => TRUE,
363+ 'palana' => TRUE,
364+ 'penza' => TRUE,
365+ 'perm' => TRUE,
366+ 'pskov' => TRUE,
367+ 'ptz' => TRUE,
368+ 'pyatigorsk' => TRUE,
369+ 'rnd' => TRUE,
370+ 'rubtsovsk' => TRUE,
371+ 'ryazan' => TRUE,
372+ 'sakhalin' => TRUE,
373+ 'samara' => TRUE,
374+ 'saratov' => TRUE,
375+ 'simbirsk' => TRUE,
376+ 'smolensk' => TRUE,
377+ 'snz' => TRUE,
378+ 'spb' => TRUE,
379+ 'stavropol' => TRUE,
380+ 'stv' => TRUE,
381+ 'surgut' => TRUE,
382+ 'syzran' => TRUE,
383+ 'tambov' => TRUE,
384+ 'tatarstan' => TRUE,
385+ 'tom' => TRUE,
386+ 'tomsk' => TRUE,
387+ 'tsaritsyn' => TRUE,
388+ 'tsk' => TRUE,
389+ 'tula' => TRUE,
390+ 'tuva' => TRUE,
391+ 'tver' => TRUE,
392+ 'tyumen' => TRUE,
393+ 'udm' => TRUE,
394+ 'udmurtia' => TRUE,
395+ 'ulan-ude' => TRUE,
396+ 'vdonsk' => TRUE,
397+ 'vladikavkaz' => TRUE,
398+ 'vladimir' => TRUE,
399+ 'vladivostok' => TRUE,
400+ 'volgograd' => TRUE,
401+ 'vologda' => TRUE,
402+ 'voronezh' => TRUE,
403+ 'vrn' => TRUE,
404+ 'vyatka' => TRUE,
405+ 'yakutia' => TRUE,
406+ 'yamal' => TRUE,
407+ 'yaroslavl' => TRUE,
408+ 'yekaterinburg' => TRUE,
409+ 'yuzhno-sakhalinsk' => TRUE,
410+ 'zgrad' => TRUE,
411+ ),
412+
413+ // ccTLD: Seychelles
414+ // NIC : http://www.nic.sc/
415+ // Whois: (Not available)
416+ 'sc' => array(
417+ // http://www.nic.sc/policies.html
418+ 'com' => TRUE,
419+ 'edu' => TRUE,
420+ 'gov' => TRUE,
421+ 'net' => TRUE,
422+ 'org' => TRUE,
423+ ),
424+
425+ // ccTLD: Taiwan
426+ // NIC : http://www.twnic.net.tw/
427+ // Whois: http://www.twnic.net.tw/
428+ 'tw' => array(
429+ // Guidelines for Administration of Domain Name Registration
430+ // http://www.twnic.net.tw/english/dn/dn_02.htm
431+ // II. Types of TWNIC Domain Names and Application Requirements
432+ // http://www.twnic.net.tw/english/dn/dn_02_b.htm
433+ 'club' => TRUE,
434+ 'com' => TRUE,
435+ 'ebiz' => TRUE,
436+ 'edu' => TRUE,
437+ 'game' => TRUE,
438+ 'gov' => TRUE,
439+ 'idv' => TRUE,
440+ 'mil' => TRUE,
441+ 'net' => TRUE,
442+ 'org' => TRUE,
443+ // Reserved words for the 2nd level
444+ // http://mydn.twnic.net.tw/en/dn02/INDEX.htm
445+ ),
446+
447+ // ccTLD: Tanzania
448+ // NIC : http://www.psg.com/dns/tz/
449+ // Whois: (Not available)
450+ 'tz' => array(
451+ // TZ DOMAIN NAMING STRUCTURE
452+ // http://www.psg.com/dns/tz/tz.txt
453+ 'ac' => TRUE,
454+ 'co' => TRUE,
455+ 'go' => TRUE,
456+ 'ne' => TRUE,
457+ 'or' => TRUE,
458+ ),
459+
460+ // ccTLD: Ukraine
461+ // NIC : http://www.nic.net.ua/
462+ // Whois: http://whois.com.ua/
463+ 'ua' => array(
464+ // policy for alternative 2nd level domain names (a2ld)
465+ // http://www.nic.net.ua/doc/a2ld
466+ // http://whois.com.ua/
467+ 'cherkassy' => TRUE,
468+ 'chernigov' => TRUE,
469+ 'chernovtsy' => TRUE,
470+ 'ck' => TRUE,
471+ 'cn' => TRUE,
472+ 'com' => TRUE,
473+ 'crimea' => TRUE,
474+ 'cv' => TRUE,
475+ 'dn' => TRUE,
476+ 'dnepropetrovsk' => TRUE,
477+ 'donetsk' => TRUE,
478+ 'dp' => TRUE,
479+ 'edu' => TRUE,
480+ 'gov' => TRUE,
481+ 'if' => TRUE,
482+ 'ivano-frankivsk' => TRUE,
483+ 'kh' => TRUE,
484+ 'kharkov' => TRUE,
485+ 'kherson' => TRUE,
486+ 'kiev' => TRUE,
487+ 'kirovograd' => TRUE,
488+ 'km' => TRUE,
489+ 'kr' => TRUE,
490+ 'ks' => TRUE,
491+ 'lg' => TRUE,
492+ 'lugansk' => TRUE,
493+ 'lutsk' => TRUE,
494+ 'lviv' => TRUE,
495+ 'mk' => TRUE,
496+ 'net' => TRUE,
497+ 'nikolaev' => TRUE,
498+ 'od' => TRUE,
499+ 'odessa' => TRUE,
500+ 'org' => TRUE,
501+ 'pl' => TRUE,
502+ 'poltava' => TRUE,
503+ 'rovno' => TRUE,
504+ 'rv' => TRUE,
505+ 'sebastopol' => TRUE,
506+ 'sumy' => TRUE,
507+ 'te' => TRUE,
508+ 'ternopil' => TRUE,
509+ 'uz' => TRUE,
510+ 'uzhgorod' => TRUE,
511+ 'vinnica' => TRUE,
512+ 'vn' => TRUE,
513+ 'zaporizhzhe' => TRUE,
514+ 'zhitomir' => TRUE,
515+ 'zp' => TRUE,
516+ 'zt' => TRUE,
517+ ),
518+
519+ // ccTLD: United Kingdom
520+ // NIC : http://www.nic.uk/
521+ 'uk' => array(
522+ // Second Level Domains
523+ // http://www.nic.uk/registrants/aboutdomainnames/sld/
524+ 'co' => TRUE,
525+ 'ltd' => TRUE,
526+ 'me' => TRUE,
527+ 'net' => TRUE,
528+ 'nic' => TRUE,
529+ 'org' => TRUE,
530+ 'plc' => TRUE,
531+ 'sch' => TRUE,
532+
533+ // Delegated Second Level Domains
534+ // http://www.nic.uk/registrants/aboutdomainnames/sld/delegated/
535+ 'ac' => TRUE,
536+ 'gov' => TRUE,
537+ 'mil' => TRUE,
538+ 'mod' => TRUE,
539+ 'nhs' => TRUE,
540+ 'police' => TRUE,
541+ ),
542+
543+ // ccTLD: United States of America
544+ // NIC : http://nic.us/
545+ // Whois: http://whois.us/
546+ 'us' => array(
547+ // See RFC1480
548+
549+ // Organizational
550+ 'dni',
551+ 'fed',
552+ 'isa',
553+ 'kids',
554+ 'nsn',
555+
556+ // Geographical
557+ // United States Postal Service: State abbreviations (for postal codes)
558+ // http://www.usps.com/ncsc/lookups/abbreviations.html
559+ 'ak' => TRUE, // Alaska
560+ 'al' => TRUE, // Alabama
561+ 'ar' => TRUE, // Arkansas
562+ 'as' => TRUE, // American samoa
563+ 'az' => TRUE, // Arizona
564+ 'ca' => TRUE, // California
565+ 'co' => TRUE, // Colorado
566+ 'ct' => TRUE, // Connecticut
567+ 'dc' => TRUE, // District of Columbia
568+ 'de' => TRUE, // Delaware
569+ 'fl' => TRUE, // Florida
570+ 'fm' => TRUE, // Federated states of Micronesia
571+ 'ga' => TRUE, // Georgia
572+ 'gu' => TRUE, // Guam
573+ 'hi' => TRUE, // Hawaii
574+ 'ia' => TRUE, // Iowa
575+ 'id' => TRUE, // Idaho
576+ 'il' => TRUE, // Illinois
577+ 'in' => TRUE, // Indiana
578+ 'ks' => TRUE, // Kansas
579+ 'ky' => TRUE, // Kentucky
580+ 'la' => TRUE, // Louisiana
581+ 'ma' => TRUE, // Massachusetts
582+ 'md' => TRUE, // Maryland
583+ 'me' => TRUE, // Maine
584+ 'mh' => TRUE, // Marshall Islands
585+ 'mi' => TRUE, // Michigan
586+ 'mn' => TRUE, // Minnesota
587+ 'mo' => TRUE, // Missouri
588+ 'mp' => TRUE, // Northern mariana islands
589+ 'ms' => TRUE, // Mississippi
590+ 'mt' => TRUE, // Montana
591+ 'nc' => TRUE, // North Carolina
592+ 'nd' => TRUE, // North Dakota
593+ 'ne' => TRUE, // Nebraska
594+ 'nh' => TRUE, // New Hampshire
595+ 'nj' => TRUE, // New Jersey
596+ 'nm' => TRUE, // New Mexico
597+ 'nv' => TRUE, // Nevada
598+ 'ny' => TRUE, // New York
599+ 'oh' => TRUE, // Ohio
600+ 'ok' => TRUE, // Oklahoma
601+ 'or' => TRUE, // Oregon
602+ 'pa' => TRUE, // Pennsylvania
603+ 'pr' => TRUE, // Puerto Rico
604+ 'pw' => TRUE, // Palau
605+ 'ri' => TRUE, // Rhode Island
606+ 'sc' => TRUE, // South Carolina
607+ 'sd' => TRUE, // South Dakota
608+ 'tn' => TRUE, // Tennessee
609+ 'tx' => TRUE, // Texas
610+ 'ut' => TRUE, // Utah
611+ 'va' => TRUE, // Virginia
612+ 'vi' => TRUE, // Virgin Islands
613+ 'vt' => TRUE, // Vermont
614+ 'wa' => TRUE, // Washington
615+ 'wi' => TRUE, // Wisconsin
616+ 'wv' => TRUE, // West Virginia
617+ 'wy' => TRUE, // Wyoming
618+ ),
619+
620+ // ccTLD: South Africa
621+ // NIC : http://www.zadna.org.za/
622+ // Whois:
623+ // ac.za http://www.tenet.ac.za/cgi/cgi_domainquery.exe
624+ // co.za http://co.za/whois.shtml
625+ // gov.za http://dnsadmin.gov.za/
626+ // org.za http://www.org.za/
627+ 'za' => array(
628+ // Second-level subdomains of .ZA
629+ // http://www.zadna.org.za/slds.html
630+ 'ac' => TRUE,
631+ 'city' => TRUE,
632+ 'co' => TRUE,
633+ 'edu' => TRUE,
634+ 'gov' => TRUE,
635+ 'law' => TRUE,
636+ 'mil' => TRUE,
637+ 'nom' => TRUE,
638+ 'org' => TRUE,
639+ 'school' => array(
640+ // Provincial Domains
641+ // http://www.esn.org.za/dns/
642+ 'ecape' => TRUE,
643+ 'fs.' => TRUE,
644+ 'gp' => TRUE,
645+ 'kzn' => TRUE,
646+ 'lp' => TRUE,
647+ 'mpm' => TRUE,
648+ 'ncape' => TRUE,
649+ 'nw' => TRUE,
650+ 'wcape' => TRUE,
651+ ),
652+ ),
653+
654+);
655+?>
\ No newline at end of file
--- a/lib/spam.php
+++ b/lib/spam.php
@@ -1,5 +1,5 @@
11 <?php
2-// $Id: spam.php,v 1.28 2007/06/24 15:25:06 henoheno Exp $
2+// $Id: spam.php,v 1.29 2007/07/03 14:47:20 henoheno Exp $
33 // Copyright (C) 2006-2007 PukiWiki Developers Team
44 // License: GPL v2 or (at your option) any later version
55 //
@@ -7,7 +7,10 @@
77 //
88 // (PHP 4 >= 4.3.0): preg_match_all(PREG_OFFSET_CAPTURE): $method['uri_XXX'] related feature
99
10-if (! defined('SPAM_INI_FILE')) define('SPAM_INI_FILE', 'spam.ini.php');
10+require_once('spam_pickup.php');
11+
12+if (! defined('SPAM_INI_FILE')) define('SPAM_INI_FILE', 'spam.ini.php');
13+if (! defined('DOMAIN_INI_FILE')) define('DOMAIN_INI_FILE', 'domain.ini.php');
1114
1215 // ---------------------
1316 // Compat etc
@@ -37,7 +40,9 @@ function preg_grep_invert($pattern = '//', $input = array())
3740 }
3841 }
3942
40-// ----
43+
44+// ---------------------
45+// Utilities
4146
4247 // Very roughly, shrink the lines of var_export()
4348 // NOTE: If the same data exists, it must be corrupted.
@@ -67,41 +72,29 @@ function var_export_shrink($expression, $return = FALSE, $ignore_numeric_keys =
6772 }
6873 }
6974
70-// Remove redundant values from array()
71-function array_unique_recursive($array = array())
75+// Reverse $string with specified delimiter
76+function delimiter_reverse($string = 'foo.bar.example.com', $from_delim = '.', $to_delim = '.')
7277 {
73- if (! is_array($array)) return $array;
74-
75- $tmp = array();
76- foreach($array as $key => $value){
77- if (is_array($value)) {
78- $array[$key] = array_unique_recursive($value);
79- } else {
80- if (isset($tmp[$value])) {
81- unset($array[$key]);
82- } else {
83- $tmp[$value] = TRUE;
84- }
85- }
86- }
78+ if (! is_string($string) || ! is_string($from_delim) || ! is_string($to_delim))
79+ return $string;
8780
88- return $array;
81+ // com.example.bar.foo
82+ return implode($to_delim, array_reverse(explode($from_delim, $string)));
8983 }
9084
91-// Renumber all numeric keys from 0
92-function array_renumber_numeric_keys(& $array)
85+// ksort() by domain
86+function ksort_by_domain(& $array)
9387 {
94- if (! is_array($array)) return $array;
95-
96- $count = -1;
97- $tmp = array();
98- foreach($array as $key => $value){
99- if (is_array($value)) array_renumber_numeric_keys($array[$key]); // Recurse
100- if (is_numeric($key)) $tmp[$key] = ++$count;
88+ $sort = array();
89+ foreach(array_keys($array) as $key) {
90+ $sort[delimiter_reverse($key)] = $key;
10191 }
102- array_rename_keys($array, $tmp);
103-
104- return $array;
92+ ksort($sort, SORT_STRING);
93+ $result = array();
94+ foreach($sort as $key) {
95+ $result[$key] = & $array[$key];
96+ }
97+ $array = $result;
10598 }
10699
107100 // Roughly strings(1) using PCRE
@@ -153,154 +146,41 @@ function strings($binary = '', $min_len = 4, $ignore_space = FALSE, $multibyte =
153146 return $binary;
154147 }
155148
156-// Reverse $string with specified delimiter
157-function delimiter_reverse($string = 'foo.bar.example.com', $from_delim = '.', $to_delim = '.')
158-{
159- if (! is_string($string) || ! is_string($from_delim) || ! is_string($to_delim))
160- return $string;
161-
162- // com.example.bar.foo
163- return implode($to_delim, array_reverse(explode($from_delim, $string)));
164-}
165-
166149
167150 // ---------------------
168-// URI pickup
169-
170-// Return an array of URIs in the $string
171-// [OK] http://nasty.example.org#nasty_string
172-// [OK] http://nasty.example.org:80/foo/xxx#nasty_string/bar
173-// [OK] ftp://nasty.example.org:80/dfsdfs
174-// [OK] ftp://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm (from RFC3986)
175-function uri_pickup($string = '')
176-{
177- if (! is_string($string)) return array();
178-
179- // Not available for: IDN(ignored)
180- $array = array();
181- preg_match_all(
182- // scheme://userinfo@host:port/path/or/pathinfo/maybefile.and?query=string#fragment
183- // Refer RFC3986 (Regex below is not strict)
184- '#(\b[a-z][a-z0-9.+-]{1,8}):[/\\\]+' . // 1: Scheme
185- '(?:' .
186- '([^\s<>"\'\[\]/\#?@]*)' . // 2: Userinfo (Username)
187- '@)?' .
188- '(' .
189- // 3: Host
190- '\[[0-9a-f:.]+\]' . '|' . // IPv6([colon-hex and dot]): RFC2732
191- '(?:[0-9]{1,3}\.){3}[0-9]{1,3}' . '|' . // IPv4(dot-decimal): 001.22.3.44
192- '[a-z0-9_-][a-z0-9_.-]+[a-z0-9_-]' . // hostname(FQDN) : foo.example.org
193- ')' .
194- '(?::([0-9]*))?' . // 4: Port
195- '((?:/+[^\s<>"\'\[\]/\#]+)*/+)?' . // 5: Directory path or path-info
196- '([^\s<>"\'\[\]\#?]+)?' . // 6: File?
197- '(?:\?([^\s<>"\'\[\]\#]+))?' . // 7: Query string
198- '(?:\#([a-z0-9._~%!$&\'()*+,;=:@-]*))?' . // 8: Fragment
199- '#i',
200- $string, $array, PREG_SET_ORDER | PREG_OFFSET_CAPTURE
201- );
202-
203- // Format the $array
204- static $parts = array(
205- 1 => 'scheme', 2 => 'userinfo', 3 => 'host', 4 => 'port',
206- 5 => 'path', 6 => 'file', 7 => 'query', 8 => 'fragment'
207- );
208- $default = array('');
209- foreach(array_keys($array) as $uri) {
210- $_uri = & $array[$uri];
211- array_rename_keys($_uri, $parts, TRUE, $default);
212- $offset = $_uri['scheme'][1]; // Scheme's offset = URI's offset
213- foreach(array_keys($_uri) as $part) {
214- $_uri[$part] = & $_uri[$part][0]; // Remove offsets
215- }
216- }
217-
218- foreach(array_keys($array) as $uri) {
219- $_uri = & $array[$uri];
220- if ($_uri['scheme'] === '') {
221- unset($array[$uri]); // Considererd harmless
222- continue;
223- }
224- unset($_uri[0]); // Matched string itself
225- $_uri['area']['offset'] = $offset; // Area offset for area_measure()
226- }
227-
228- return $array;
229-}
151+// Utilities: Arrays
230152
231-// Normalize an array of URI arrays
232-// NOTE: Give me the uri_pickup() results
233-function uri_pickup_normalize(& $pickups, $destructive = TRUE)
153+// Count leaves (A leaf = value that is not an array, or an empty array)
154+function array_count_leaves($array = array(), $count_empty = FALSE)
234155 {
235- if (! is_array($pickups)) return $pickups;
156+ if (! is_array($array) || (empty($array) && $count_empty)) return 1;
236157
237- if ($destructive) {
238- foreach (array_keys($pickups) as $key) {
239- $_key = & $pickups[$key];
240- $_key['scheme'] = isset($_key['scheme']) ? scheme_normalize($_key['scheme']) : '';
241- $_key['host'] = isset($_key['host']) ? host_normalize($_key['host']) : '';
242- $_key['port'] = isset($_key['port']) ? port_normalize($_key['port'], $_key['scheme'], FALSE) : '';
243- $_key['path'] = isset($_key['path']) ? strtolower(path_normalize($_key['path'])) : '';
244- $_key['file'] = isset($_key['file']) ? file_normalize($_key['file']) : '';
245- $_key['query'] = isset($_key['query']) ? query_normalize($_key['query']) : '';
246- $_key['fragment'] = isset($_key['fragment']) ? strtolower($_key['fragment']) : '';
247- }
248- } else {
249- foreach (array_keys($pickups) as $key) {
250- $_key = & $pickups[$key];
251- $_key['scheme'] = isset($_key['scheme']) ? scheme_normalize($_key['scheme']) : '';
252- $_key['host'] = isset($_key['host']) ? strtolower($_key['host']) : '';
253- $_key['port'] = isset($_key['port']) ? port_normalize($_key['port'], $_key['scheme'], FALSE) : '';
254- $_key['path'] = isset($_key['path']) ? path_normalize($_key['path']) : '';
255- }
158+ // Recurse
159+ $count = 0;
160+ foreach ($array as $part) {
161+ $count += array_count_leaves($part, $count_empty);
256162 }
257-
258- return $pickups;
163+ return $count;
259164 }
260165
261-// An URI array => An URI (See uri_pickup())
262-// USAGE:
263-// $pickups = uri_pickup('a string include some URIs');
264-// $uris = array();
265-// foreach (array_keys($pickups) as $key) {
266-// $uris[$key] = uri_pickup_implode($pickups[$key]);
267-// }
268-function uri_pickup_implode($uri = array())
166+// An array-leaves to a flat array
167+function array_flat_leaves($array, $unique = TRUE)
269168 {
270- if (empty($uri) || ! is_array($uri)) return NULL;
169+ if (! is_array($array)) return $array;
271170
272171 $tmp = array();
273- if (isset($uri['scheme']) && $uri['scheme'] !== '') {
274- $tmp[] = & $uri['scheme'];
275- $tmp[] = '://';
276- }
277- if (isset($uri['userinfo']) && $uri['userinfo'] !== '') {
278- $tmp[] = & $uri['userinfo'];
279- $tmp[] = '@';
280- }
281- if (isset($uri['host']) && $uri['host'] !== '') {
282- $tmp[] = & $uri['host'];
283- }
284- if (isset($uri['port']) && $uri['port'] !== '') {
285- $tmp[] = ':';
286- $tmp[] = & $uri['port'];
287- }
288- if (isset($uri['path']) && $uri['path'] !== '') {
289- $tmp[] = & $uri['path'];
290- }
291- if (isset($uri['file']) && $uri['file'] !== '') {
292- $tmp[] = & $uri['file'];
293- }
294- if (isset($uri['query']) && $uri['query'] !== '') {
295- $tmp[] = '?';
296- $tmp[] = & $uri['query'];
297- }
298- if (isset($uri['fragment']) && $uri['fragment'] !== '') {
299- $tmp[] = '#';
300- $tmp[] = & $uri['fragment'];
172+ foreach(array_keys($array) as $key) {
173+ if (is_array($array[$key])) {
174+ // Recurse
175+ foreach(array_flat_leaves($array[$key]) as $_value) {
176+ $tmp[] = $_value;
177+ }
178+ } else {
179+ $tmp[] = & $array[$key];
180+ }
301181 }
302182
303- return implode('', $tmp);
183+ return $unique ? array_values(array_unique($tmp)) : $tmp;
304184 }
305185
306186 // $array['something'] => $array['wanted']
@@ -327,641 +207,28 @@ function array_rename_keys(& $array, $keys = array('from' => 'to'), $force = FAL
327207 return TRUE;
328208 }
329209
330-// ---------------------
331-// Area pickup
332-
333-// Pickup all of markup areas
334-function area_pickup($string = '', $method = array())
335-{
336- $area = array();
337- if (empty($method)) return $area;
338-
339- // Anchor tag pair by preg_match and preg_match_all()
340- // [OK] <a href></a>
341- // [OK] <a href= >Good site!</a>
342- // [OK] <a href= "#" >test</a>
343- // [OK] <a href="http://nasty.example.com">visit http://nasty.example.com/</a>
344- // [OK] <a href=\'http://nasty.example.com/\' >discount foobar</a>
345- // [NG] <a href="http://ng.example.com">visit http://ng.example.com _not_ended_
346- $regex = '#<a\b[^>]*\bhref\b[^>]*>.*?</a\b[^>]*(>)#is';
347- if (isset($method['area_anchor'])) {
348- $areas = array();
349- $count = isset($method['asap']) ?
350- preg_match($regex, $string) :
351- preg_match_all($regex, $string, $areas);
352- if (! empty($count)) $area['area_anchor'] = $count;
353- }
354- if (isset($method['uri_anchor'])) {
355- $areas = array();
356- preg_match_all($regex, $string, $areas, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
357- foreach(array_keys($areas) as $_area) {
358- $areas[$_area] = array(
359- $areas[$_area][0][1], // Area start (<a href>)
360- $areas[$_area][1][1], // Area end (</a>)
361- );
362- }
363- if (! empty($areas)) $area['uri_anchor'] = $areas;
364- }
365-
366- // phpBB's "BBCode" pair by preg_match and preg_match_all()
367- // [OK] [url][/url]
368- // [OK] [url]http://nasty.example.com/[/url]
369- // [OK] [link]http://nasty.example.com/[/link]
370- // [OK] [url=http://nasty.example.com]visit http://nasty.example.com/[/url]
371- // [OK] [link http://nasty.example.com/]buy something[/link]
372- $regex = '#\[(url|link)\b[^\]]*\].*?\[/\1\b[^\]]*(\])#is';
373- if (isset($method['area_bbcode'])) {
374- $areas = array();
375- $count = isset($method['asap']) ?
376- preg_match($regex, $string) :
377- preg_match_all($regex, $string, $areas, PREG_SET_ORDER);
378- if (! empty($count)) $area['area_bbcode'] = $count;
379- }
380- if (isset($method['uri_bbcode'])) {
381- $areas = array();
382- preg_match_all($regex, $string, $areas, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
383- foreach(array_keys($areas) as $_area) {
384- $areas[$_area] = array(
385- $areas[$_area][0][1], // Area start ([url])
386- $areas[$_area][2][1], // Area end ([/url])
387- );
388- }
389- if (! empty($areas)) $area['uri_bbcode'] = $areas;
390- }
391-
392- // Various Wiki syntax
393- // [text_or_uri>text_or_uri]
394- // [text_or_uri:text_or_uri]
395- // [text_or_uri|text_or_uri]
396- // [text_or_uri->text_or_uri]
397- // [text_or_uri text_or_uri] // MediaWiki
398- // MediaWiki: [http://nasty.example.com/ visit http://nasty.example.com/]
399-
400- return $area;
401-}
402-
403-// If in doubt, it's a little doubtful
404-// if (Area => inside <= Area) $brief += -1
405-function area_measure($areas, & $array, $belief = -1, $a_key = 'area', $o_key = 'offset')
406-{
407- if (! is_array($areas) || ! is_array($array)) return;
408-
409- $areas_keys = array_keys($areas);
410- foreach(array_keys($array) as $u_index) {
411- $offset = isset($array[$u_index][$o_key]) ?
412- intval($array[$u_index][$o_key]) : 0;
413- foreach($areas_keys as $a_index) {
414- if (isset($array[$u_index][$a_key])) {
415- $offset_s = intval($areas[$a_index][0]);
416- $offset_e = intval($areas[$a_index][1]);
417- // [Area => inside <= Area]
418- if ($offset_s < $offset && $offset < $offset_e) {
419- $array[$u_index][$a_key] += $belief;
420- }
421- }
422- }
423- }
424-}
425-
426-// ---------------------
427-// Spam-uri pickup
428-
429-// Domain exposure callback (See spam_uri_pickup_preprocess())
430-// http://victim.example.org/?foo+site:nasty.example.com+bar
431-// => http://nasty.example.com/?refer=victim.example.org
432-// NOTE: 'refer=' is not so good for (at this time).
433-// Consider about using IP address of the victim, try to avoid that.
434-function _preg_replace_callback_domain_exposure($matches = array())
435-{
436- $result = '';
437-
438- // Preserve the victim URI as a complicity or ...
439- if (isset($matches[5])) {
440- $result =
441- $matches[1] . '://' . // scheme
442- $matches[2] . '/' . // victim.example.org
443- $matches[3]; // The rest of all (before victim)
444- }
445-
446- // Flipped URI
447- if (isset($matches[4])) {
448- $result =
449- $matches[1] . '://' . // scheme
450- $matches[4] . // nasty.example.com
451- '/?refer=' . strtolower($matches[2]) . // victim.example.org
452- ' ' . $result;
453- }
454-
455- return $result;
456-}
457-
458-// Preprocess: Removing uninterest part for URI detection
459-function spam_uri_removing_hocus_pocus($binary = '', $method = array())
460-{
461- $length = 4 ; // 'http'(1) and '://'(2) and 'fqdn'(1)
462- if (is_array($method)) {
463- // '<a'(2) or 'href='(5) or '>'(1) or '</a>'(4)
464- // '[uri'(4) or ']'(1) or '[/uri]'(6)
465- if (isset($method['area_anchor']) || isset($method['uri_anchor']) ||
466- isset($method['area_bbcode']) || isset($method['uri_bbcode']))
467- $length = 1; // Seems not effective
468- }
469-
470- // Removing sequential spaces and too short lines
471- $binary = strings($binary, $length, TRUE, FALSE); // Multibyte NOT needed
472-
473- // Remove words (has no '<>[]:') between spaces
474- $binary = preg_replace('/[ \t][\w.,()\ \t]+[ \t]/', ' ', $binary);
475-
476- return $binary;
477-}
478-
479-// Preprocess: rawurldecode() and adding space(s) and something
480-// to detect/count some URIs _if possible_
481-// NOTE: It's maybe danger to var_dump(result). [e.g. 'javascript:']
482-// [OK] http://victim.example.org/?site:nasty.example.org
483-// [OK] http://victim.example.org/nasty.example.org
484-// [OK] http://victim.example.org/go?http%3A%2F%2Fnasty.example.org
485-// [OK] http://victim.example.org/http://nasty.example.org
486-function spam_uri_pickup_preprocess($string = '', $method = array())
487-{
488- if (! is_string($string)) return '';
489-
490- $string = spam_uri_removing_hocus_pocus(rawurldecode($string), $method);
491- //var_dump(htmlspecialchars($string));
492-
493- // Domain exposure (simple)
494- // http://victim.example.org/nasty.example.org/path#frag
495- // => http://nasty.example.org/?refer=victim.example.org and original
496- $string = preg_replace(
497- '#h?ttp://' .
498- '(' .
499- 'ime\.nu' . '|' . // 2ch.net
500- 'ime\.st' . '|' . // 2ch.net
501- 'link\.toolbot\.com' . '|' .
502- 'urlx\.org' .
503- ')' .
504- '/([a-z0-9.%_-]+\.[a-z0-9.%_-]+)#i', // nasty.example.org
505- 'http://$2/?refer=$1 $0', // Preserve $0 or remove?
506- $string
507- );
508-
509- // Domain exposure (gate-big5)
510- // http://victim.example.org/gate/big5/nasty.example.org/path
511- // => http://nasty.example.org/?refer=victim.example.org and original
512- $string = preg_replace(
513- '#h?ttp://' .
514- '(' .
515- 'big5.51job.com' . '|' .
516- 'big5.china.com' . '|' .
517- 'big5.xinhuanet.com' . '|' .
518- ')' .
519- '/gate/big5' .
520- '/([a-z0-9.%_-]+\.[a-z0-9.%_-]+)' .
521- '#i', // nasty.example.org
522- 'http://$2/?refer=$1 $0', // Preserve $0 or remove?
523- $string
524- );
525-
526- // Domain exposure (See _preg_replace_callback_domain_exposure())
527- $string = preg_replace_callback(
528- array(
529- '#(http)://' .
530- '(' .
531- // Something Google: http://www.google.com/supported_domains
532- '(?:[a-z0-9.]+\.)?google\.[a-z]{2,3}(?:\.[a-z]{2})?' .
533- '|' .
534- // AltaVista
535- '(?:[a-z0-9.]+\.)?altavista.com' .
536-
537- ')' .
538- '/' .
539- '([a-z0-9?=&.%_/\'\\\+-]+)' . // path/?query=foo+bar+
540- '\bsite:([a-z0-9.%_-]+\.[a-z0-9.%_-]+)' . // site:nasty.example.com
541- //'()' . // Preserve or remove?
542- '#i',
543- ),
544- '_preg_replace_callback_domain_exposure',
545- $string
546- );
547-
548- // URI exposure (uriuri => uri uri)
549- $string = preg_replace(
550- array(
551- '#(?<! )(?:https?|ftp):/#i',
552- // '#[a-z][a-z0-9.+-]{1,8}://#i',
553- // '#[a-z][a-z0-9.+-]{1,8}://#i'
554- ),
555- ' $0',
556- $string
557- );
558-
559- return $string;
560-}
561-
562-// Main function of spam-uri pickup,
563-// A wrapper function of uri_pickup()
564-function spam_uri_pickup($string = '', $method = array())
565-{
566- if (! is_array($method) || empty($method)) {
567- $method = check_uri_spam_method();
568- }
569-
570- $string = spam_uri_pickup_preprocess($string, $method);
571-
572- $array = uri_pickup($string);
573-
574- // Area elevation of URIs, for '(especially external)link' intension
575- if (! empty($array)) {
576- $_method = array();
577- if (isset($method['uri_anchor'])) $_method['uri_anchor'] = & $method['uri_anchor'];
578- if (isset($method['uri_bbcode'])) $_method['uri_bbcode'] = & $method['uri_bbcode'];
579- $areas = area_pickup($string, $_method, TRUE);
580- if (! empty($areas)) {
581- $area_shadow = array();
582- foreach (array_keys($array) as $key) {
583- $area_shadow[$key] = & $array[$key]['area'];
584- foreach (array_keys($_method) as $_key) {
585- $area_shadow[$key][$_key] = 0;
586- }
587- }
588- foreach (array_keys($_method) as $_key) {
589- if (isset($areas[$_key])) {
590- area_measure($areas[$_key], $area_shadow, 1, $_key);
591- }
592- }
593- }
594- }
595-
596- // Remove 'offset's for area_measure()
597- foreach(array_keys($array) as $key)
598- unset($array[$key]['area']['offset']);
599-
600- return $array;
601-}
602-
603-
604-// ---------------------
605-// Normalization
606-
607-// Scheme normalization: Renaming the schemes
608-// snntp://example.org => nntps://example.org
609-// NOTE: Keep the static lists simple. See also port_normalize().
610-function scheme_normalize($scheme = '', $abbrevs_harmfull = TRUE)
611-{
612- // Abbreviations they have no intention of link
613- static $abbrevs = array(
614- 'ttp' => 'http',
615- 'ttps' => 'https',
616- );
617-
618- // Aliases => normalized ones
619- static $aliases = array(
620- 'pop' => 'pop3',
621- 'news' => 'nntp',
622- 'imap4' => 'imap',
623- 'snntp' => 'nntps',
624- 'snews' => 'nntps',
625- 'spop3' => 'pop3s',
626- 'pops' => 'pop3s',
627- );
628-
629- if (! is_string($scheme)) return '';
630-
631- $scheme = strtolower($scheme);
632- if (isset($abbrevs[$scheme])) {
633- $scheme = $abbrevs_harmfull ? $abbrevs[$scheme] : '';
634- }
635- if (isset($aliases[$scheme])) {
636- $scheme = $aliases[$scheme];
637- }
638-
639- return $scheme;
640-}
641-
642-// Hostname normlization (Destructive)
643-// www.foo => www.foo ('foo' seems TLD)
644-// www.foo.bar => foo.bar
645-// www.10.20 => www.10.20 (Invalid hostname)
646-// NOTE:
647-// 'www' is mostly used as traditional hostname of WWW server.
648-// 'www.foo.bar' may be identical with 'foo.bar'.
649-function host_normalize($host = '')
650-{
651- if (! is_string($host)) return '';
652-
653- $host = strtolower($host);
654- $matches = array();
655- if (preg_match('/^www\.(.+\.[a-z]+)$/', $host, $matches)) {
656- return $matches[1];
657- } else {
658- return $host;
659- }
660-}
661-
662-// Port normalization: Suppress the (redundant) default port
663-// HTTP://example.org:80/ => http://example.org/
664-// HTTP://example.org:8080/ => http://example.org:8080/
665-// HTTPS://example.org:443/ => https://example.org/
666-function port_normalize($port, $scheme, $scheme_normalize = FALSE)
667-{
668- // Schemes that users _maybe_ want to add protocol-handlers
669- // to their web browsers. (and attackers _maybe_ want to use ...)
670- // Reference: http://www.iana.org/assignments/port-numbers
671- static $array = array(
672- // scheme => default port
673- 'ftp' => 21,
674- 'ssh' => 22,
675- 'telnet' => 23,
676- 'smtp' => 25,
677- 'tftp' => 69,
678- 'gopher' => 70,
679- 'finger' => 79,
680- 'http' => 80,
681- 'pop3' => 110,
682- 'sftp' => 115,
683- 'nntp' => 119,
684- 'imap' => 143,
685- 'irc' => 194,
686- 'wais' => 210,
687- 'https' => 443,
688- 'nntps' => 563,
689- 'rsync' => 873,
690- 'ftps' => 990,
691- 'telnets' => 992,
692- 'imaps' => 993,
693- 'ircs' => 994,
694- 'pop3s' => 995,
695- 'mysql' => 3306,
696- );
697-
698- // intval() converts '0-1' to '0', so preg_match() rejects these invalid ones
699- if (! is_numeric($port) || $port < 0 || preg_match('/[^0-9]/i', $port))
700- return '';
701-
702- $port = intval($port);
703- if ($scheme_normalize) $scheme = scheme_normalize($scheme);
704- if (isset($array[$scheme]) && $port == $array[$scheme])
705- $port = ''; // Ignore the defaults
706-
707- return $port;
708-}
709-
710-// Path normalization
711-// http://example.org => http://example.org/
712-// http://example.org#hoge => http://example.org/#hoge
713-// http://example.org/path/a/b/./c////./d => http://example.org/path/a/b/c/d
714-// http://example.org/path/../../a/../back => http://example.org/back
715-function path_normalize($path = '', $divider = '/', $add_root = TRUE)
210+// Remove redundant values from array()
211+function array_unique_recursive($array = array())
716212 {
717- if (! is_string($divider)) return is_string($path) ? $path : '';
718-
719- if ($add_root) {
720- $first_div = & $divider;
721- } else {
722- $first_div = '';
723- }
724- if (! is_string($path) || $path == '') return $first_div;
725-
726- if (strpos($path, $divider, strlen($path) - strlen($divider)) === FALSE) {
727- $last_div = '';
728- } else {
729- $last_div = & $divider;
730- }
731-
732- $array = explode($divider, $path);
733-
734- // Remove paddings ('//' and '/./')
735- foreach(array_keys($array) as $key) {
736- if ($array[$key] == '' || $array[$key] == '.') {
737- unset($array[$key]);
738- }
739- }
213+ if (! is_array($array)) return $array;
740214
741- // Remove back-tracks ('/../')
742215 $tmp = array();
743- foreach($array as $value) {
744- if ($value == '..') {
745- array_pop($tmp);
216+ foreach($array as $key => $value){
217+ if (is_array($value)) {
218+ $array[$key] = array_unique_recursive($value);
746219 } else {
747- array_push($tmp, $value);
748- }
749- }
750- $array = & $tmp;
751-
752- if (empty($array)) {
753- return $first_div;
754- } else {
755- return $first_div . implode($divider, $array) . $last_div;
756- }
757-}
758-
759-// DirectoryIndex normalize (Destructive and rough)
760-// TODO: sample.en.ja.html.gz => sample.html
761-function file_normalize($file = 'index.html.en')
762-{
763- static $simple_defaults = array(
764- 'default.htm' => TRUE,
765- 'default.html' => TRUE,
766- 'default.asp' => TRUE,
767- 'default.aspx' => TRUE,
768- 'index' => TRUE, // Some system can omit the suffix
769- );
770-
771- static $content_suffix = array(
772- // index.xxx, sample.xxx
773- 'htm' => TRUE,
774- 'html' => TRUE,
775- 'shtml' => TRUE,
776- 'jsp' => TRUE,
777- 'php' => TRUE,
778- 'php3' => TRUE,
779- 'php4' => TRUE,
780- 'pl' => TRUE,
781- 'py' => TRUE,
782- 'rb' => TRUE,
783- 'cgi' => TRUE,
784- 'xml' => TRUE,
785- );
786-
787- static $language_suffix = array(
788- // Reference: Apache 2.0.59 'AddLanguage' default
789- 'ca' => TRUE,
790- 'cs' => TRUE, // cs
791- 'cz' => TRUE, // cs
792- 'de' => TRUE,
793- 'dk' => TRUE, // da
794- 'el' => TRUE,
795- 'en' => TRUE,
796- 'eo' => TRUE,
797- 'es' => TRUE,
798- 'et' => TRUE,
799- 'fr' => TRUE,
800- 'he' => TRUE,
801- 'hr' => TRUE,
802- 'it' => TRUE,
803- 'ja' => TRUE,
804- 'ko' => TRUE,
805- 'ltz' => TRUE,
806- 'nl' => TRUE,
807- 'nn' => TRUE,
808- 'no' => TRUE,
809- 'po' => TRUE,
810- 'pt' => TRUE,
811- 'pt-br' => TRUE,
812- 'ru' => TRUE,
813- 'sv' => TRUE,
814- 'zh-cn' => TRUE,
815- 'zh-tw' => TRUE,
816-
817- // Reference: Apache 2.0.59 default 'index.html' variants
818- 'ee' => TRUE,
819- 'lb' => TRUE,
820- 'var' => TRUE,
821- );
822-
823- static $charset_suffix = array(
824- // Reference: Apache 2.0.59 'AddCharset' default
825- 'iso8859-1' => TRUE, // ISO-8859-1
826- 'latin1' => TRUE, // ISO-8859-1
827- 'iso8859-2' => TRUE, // ISO-8859-2
828- 'latin2' => TRUE, // ISO-8859-2
829- 'cen' => TRUE, // ISO-8859-2
830- 'iso8859-3' => TRUE, // ISO-8859-3
831- 'latin3' => TRUE, // ISO-8859-3
832- 'iso8859-4' => TRUE, // ISO-8859-4
833- 'latin4' => TRUE, // ISO-8859-4
834- 'iso8859-5' => TRUE, // ISO-8859-5
835- 'latin5' => TRUE, // ISO-8859-5
836- 'cyr' => TRUE, // ISO-8859-5
837- 'iso-ru' => TRUE, // ISO-8859-5
838- 'iso8859-6' => TRUE, // ISO-8859-6
839- 'latin6' => TRUE, // ISO-8859-6
840- 'arb' => TRUE, // ISO-8859-6
841- 'iso8859-7' => TRUE, // ISO-8859-7
842- 'latin7' => TRUE, // ISO-8859-7
843- 'grk' => TRUE, // ISO-8859-7
844- 'iso8859-8' => TRUE, // ISO-8859-8
845- 'latin8' => TRUE, // ISO-8859-8
846- 'heb' => TRUE, // ISO-8859-8
847- 'iso8859-9' => TRUE, // ISO-8859-9
848- 'latin9' => TRUE, // ISO-8859-9
849- 'trk' => TRUE, // ISO-8859-9
850- 'iso2022-jp'=> TRUE, // ISO-2022-JP
851- 'jis' => TRUE, // ISO-2022-JP
852- 'iso2022-kr'=> TRUE, // ISO-2022-KR
853- 'kis' => TRUE, // ISO-2022-KR
854- 'iso2022-cn'=> TRUE, // ISO-2022-CN
855- 'cis' => TRUE, // ISO-2022-CN
856- 'big5' => TRUE,
857- 'cp-1251' => TRUE, // ru, WINDOWS-1251
858- 'win-1251' => TRUE, // ru, WINDOWS-1251
859- 'cp866' => TRUE, // ru
860- 'koi8-r' => TRUE, // ru, KOI8-r
861- 'koi8-ru' => TRUE, // ru, KOI8-r
862- 'koi8-uk' => TRUE, // ru, KOI8-ru
863- 'ua' => TRUE, // ru, KOI8-ru
864- 'ucs2' => TRUE, // ru, ISO-10646-UCS-2
865- 'ucs4' => TRUE, // ru, ISO-10646-UCS-4
866- 'utf8' => TRUE,
867-
868- // Reference: Apache 2.0.59 default 'index.html' variants
869- 'euc-kr' => TRUE,
870- 'gb2312' => TRUE,
871- );
872-
873- // May uncompress by web browsers on the fly
874- // Must be at the last of the filename
875- // Reference: Apache 2.0.59 'AddEncoding'
876- static $encoding_suffix = array(
877- 'z' => TRUE,
878- 'gz' => TRUE,
879- );
880-
881- if (! is_string($file)) return '';
882- $_file = strtolower($file);
883- if (isset($simple_defaults[$_file])) return '';
884-
885-
886- // Roughly removing language/character-set/encoding suffixes
887- // References:
888- // * Apache 2 document about 'Content-negotiaton', 'mod_mime' and 'mod_negotiation'
889- // http://httpd.apache.org/docs/2.0/content-negotiation.html
890- // http://httpd.apache.org/docs/2.0/mod/mod_mime.html
891- // http://httpd.apache.org/docs/2.0/mod/mod_negotiation.html
892- // * http://www.iana.org/assignments/character-sets
893- // * RFC3066: Tags for the Identification of Languages
894- // http://www.ietf.org/rfc/rfc3066.txt
895- // * ISO 639: codes of 'language names'
896- $suffixes = explode('.', $_file);
897- $body = array_shift($suffixes);
898- if ($suffixes) {
899- // Remove the last .gz/.z
900- $last_key = end(array_keys($suffixes));
901- if (isset($encoding_suffix[$suffixes[$last_key]])) {
902- unset($suffixes[$last_key]);
903- }
904- }
905- // Cut language and charset suffixes
906- foreach($suffixes as $key => $value){
907- if (isset($language_suffix[$value]) || isset($charset_suffix[$value])) {
908- unset($suffixes[$key]);
909- }
910- }
911- if (empty($suffixes)) return $body;
912-
913- // Index.xxx
914- $count = count($suffixes);
915- reset($suffixes);
916- $current = current($suffixes);
917- if ($body == 'index' && $count == 1 && isset($content_suffix[$current])) return '';
918-
919- return $file;
920-}
921-
922-// Sort query-strings if possible (Destructive and rough)
923-// [OK] &&&&f=d&b&d&c&a=0dd => a=0dd&b&c&d&f=d
924-// [OK] nothing==&eg=dummy&eg=padding&eg=foobar => eg=foobar
925-function query_normalize($string = '', $equal = TRUE, $equal_cutempty = TRUE, $stortolower = TRUE)
926-{
927- if (! is_string($string)) return '';
928- if ($stortolower) $string = strtolower($string);
929-
930- $array = explode('&', $string);
931-
932- // Remove '&' paddings
933- foreach(array_keys($array) as $key) {
934- if ($array[$key] == '') {
935- unset($array[$key]);
936- }
937- }
938-
939- // Consider '='-sepalated input and paddings
940- if ($equal) {
941- $equals = $not_equals = array();
942- foreach ($array as $part) {
943- if (strpos($part, '=') === FALSE) {
944- $not_equals[] = $part;
220+ if (isset($tmp[$value])) {
221+ unset($array[$key]);
945222 } else {
946- list($key, $value) = explode('=', $part, 2);
947- $value = ltrim($value, '=');
948- if (! $equal_cutempty || $value != '') {
949- $equals[$key] = $value;
950- }
223+ $tmp[$value] = TRUE;
951224 }
952225 }
953-
954- $array = & $not_equals;
955- foreach ($equals as $key => $value) {
956- $array[] = $key . '=' . $value;
957- }
958- unset($equals);
959226 }
960227
961- natsort($array);
962- return implode('&', $array);
228+ return $array;
963229 }
964230
231+
965232 // ---------------------
966233 // Part One : Checker
967234
@@ -999,21 +266,6 @@ function generate_glob_regex($string = '', $divider = '/')
999266 return $string;
1000267 }
1001268
1002-// Rough hostname checker
1003-// [OK] 192.168.
1004-// TODO: Strict digit, 0x, CIDR, IPv6
1005-function is_ip($string = '')
1006-{
1007- if (preg_match('/^' .
1008- '(?:[0-9]{1,3}\.){3}[0-9]{1,3}' . '|' .
1009- '(?:[0-9]{1,3}\.){1,3}' . '$/',
1010- $string)) {
1011- return 4; // Seems IPv4(dot-decimal)
1012- } else {
1013- return 0; // Seems not IP
1014- }
1015-}
1016-
1017269 // Generate host (FQDN, IPv4, ...) regex
1018270 // 'localhost' : Matches with 'localhost' only
1019271 // 'example.org' : Matches with 'example.org' only (See host_normalize() about 'www')
@@ -1048,6 +300,21 @@ function generate_host_regex($string = '', $divider = '/')
1048300 }
1049301 }
1050302
303+// Rough hostname checker
304+// [OK] 192.168.
305+// TODO: Strict digit, 0x, CIDR, IPv6
306+function is_ip($string = '')
307+{
308+ if (preg_match('/^' .
309+ '(?:[0-9]{1,3}\.){3}[0-9]{1,3}' . '|' .
310+ '(?:[0-9]{1,3}\.){1,3}' . '$/',
311+ $string)) {
312+ return 4; // Seems IPv4(dot-decimal)
313+ } else {
314+ return 0; // Seems not IP
315+ }
316+}
317+
1051318 function get_blocklist($list = '')
1052319 {
1053320 static $regexes;
@@ -1145,6 +412,10 @@ function blocklist_distiller(& $hosts, $keys = array('goodhost', 'badhost'), $as
1145412 return $blocked;
1146413 }
1147414
415+
416+// ---------------------
417+
418+
1148419 // Default (enabled) methods and thresholds (for content insertion)
1149420 function check_uri_spam_method($times = 1, $t_area = 0, $rule = TRUE)
1150421 {
@@ -1405,62 +676,6 @@ function check_uri_spam($target = '', $method = array())
1405676 return $progress;
1406677 }
1407678
1408-// Count leaves (A leaf = value that is not an array, or an empty array)
1409-function array_count_leaves($array = array(), $count_empty = FALSE)
1410-{
1411- if (! is_array($array) || (empty($array) && $count_empty)) return 1;
1412-
1413- // Recurse
1414- $count = 0;
1415- foreach ($array as $part) {
1416- $count += array_count_leaves($part, $count_empty);
1417- }
1418- return $count;
1419-}
1420-
1421-// An array-leaves to a flat array
1422-function array_flat_leaves($array, $unique = TRUE)
1423-{
1424- if (! is_array($array)) return $array;
1425-
1426- $tmp = array();
1427- foreach(array_keys($array) as $key) {
1428- if (is_array($array[$key])) {
1429- // Recurse
1430- foreach(array_flat_leaves($array[$key]) as $_value) {
1431- $tmp[] = $_value;
1432- }
1433- } else {
1434- $tmp[] = & $array[$key];
1435- }
1436- }
1437-
1438- return $unique ? array_values(array_unique($tmp)) : $tmp;
1439-}
1440-
1441-// An array() to an array leaf
1442-function array_leaf($array = array('A', 'B', 'C.D'), $stem = FALSE, $edge = TRUE)
1443-{
1444- if (! is_array($array)) return $array;
1445-
1446- $leaf = array();
1447- $tmp = & $leaf;
1448- foreach($array as $arg) {
1449- if (! is_string($arg) && ! is_int($arg)) continue;
1450- $tmp[$arg] = array();
1451- $parent = & $tmp;
1452- $tmp = & $tmp[$arg];
1453- }
1454- if ($stem) {
1455- $parent[key($parent)] = & $edge;
1456- } else {
1457- $parent = key($parent);
1458- }
1459-
1460- return $leaf; // array('A' => array('B' => 'C.D'))
1461-}
1462-
1463-
1464679 // ---------------------
1465680 // Reporting
1466681
@@ -1564,20 +779,6 @@ function summarize_detail_newtral($progress = array())
1564779 ')';
1565780 }
1566781
1567-// ksort() by domain
1568-function ksort_by_domain(& $array)
1569-{
1570- $sort = array();
1571- foreach(array_keys($array) as $key) {
1572- $sort[delimiter_reverse($key)] = $key;
1573- }
1574- ksort($sort, SORT_STRING);
1575- $result = array();
1576- foreach($sort as $key) {
1577- $result[$key] = & $array[$key];
1578- }
1579- $array = $result;
1580-}
1581782
1582783 // Check responsibility-root of the FQDN
1583784 // 'foo.bar.example.com' => 'example.com' (.com has the last whois for it)
@@ -1586,658 +787,22 @@ function ksort_by_domain(& $array)
1586787 // 'foo.bar.example.act.edu.au' => 'example.act.edu.au' (.act.edu.au has the last whois for it)
1587788 function whois_responsibility($fqdn = 'foo.bar.example.com', $parent = FALSE, $implicit = TRUE)
1588789 {
1589- // Domains who have 2nd and/or 3rd level domains
1590- static $domain = array(
1591-
1592- // ccTLD: Australia
1593- // http://www.auda.org.au/
1594- // NIC : http://www.aunic.net/
1595- // Whois: http://www.ausregistry.com.au/
1596- 'au' => array(
1597- // .au Second Level Domains
1598- // http://www.auda.org.au/domains/
1599- 'asn' => TRUE,
1600- 'com' => TRUE,
1601- 'conf' => TRUE,
1602- 'csiro' => TRUE,
1603- 'edu' => array( // http://www.domainname.edu.au/
1604- // Geographic
1605- 'act' => TRUE,
1606- 'nt' => TRUE,
1607- 'nsw' => TRUE,
1608- 'qld' => TRUE,
1609- 'sa' => TRUE,
1610- 'tas' => TRUE,
1611- 'vic' => TRUE,
1612- 'wa' => TRUE,
1613- ),
1614- 'gov' => array(
1615- // Geographic
1616- 'act' => TRUE, // Australian Capital Territory
1617- 'nt' => TRUE, // Northern Territory
1618- 'nsw' => TRUE, // New South Wales
1619- 'qld' => TRUE, // Queensland
1620- 'sa' => TRUE, // South Australia
1621- 'tas' => TRUE, // Tasmania
1622- 'vic' => TRUE, // Victoria
1623- 'wa' => TRUE, // Western Australia
1624- ),
1625- 'id' => TRUE,
1626- 'net' => TRUE,
1627- 'org' => TRUE,
1628- 'info' => TRUE,
1629- ),
1630-
1631- // ccTLD: Bahrain
1632- // NIC : http://www.inet.com.bh/ (.bh policies not found)
1633- // Whois: (Not available) http://www.inet.com.bh/
1634- 'bh' => array(
1635- // Observed
1636- 'com' => TRUE,
1637- 'edu' => TRUE,
1638- 'gov' => TRUE,
1639- 'org' => TRUE,
1640- ),
1641-
1642- // ccTLD: China
1643- // NIC : http://www.cnnic.net.cn/en/index/
1644- // Whois: http://ewhois.cnnic.cn/
1645- 'cn' => array(
1646- // Provisional Administrative Rules for Registration of Domain Names in China
1647- // http://www.cnnic.net.cn/html/Dir/2003/11/27/1520.htm
1648-
1649- // Organizational
1650- 'ac' => TRUE,
1651- 'com' => TRUE,
1652- 'edu' => TRUE,
1653- 'gov' => TRUE,
1654- 'net' => TRUE,
1655- 'org' => TRUE,
1656-
1657- // Geographic
1658- 'ah' => TRUE,
1659- 'bj' => TRUE,
1660- 'cq' => TRUE,
1661- 'fj' => TRUE,
1662- 'gd' => TRUE,
1663- 'gs' => TRUE,
1664- 'gx' => TRUE,
1665- 'gz' => TRUE,
1666- 'ha' => TRUE,
1667- 'hb' => TRUE,
1668- 'he' => TRUE,
1669- 'hi' => TRUE,
1670- 'hk' => TRUE,
1671- 'hl' => TRUE,
1672- 'hn' => TRUE,
1673- 'jl' => TRUE,
1674- 'js' => TRUE,
1675- 'jx' => TRUE,
1676- 'ln' => TRUE,
1677- 'mo' => TRUE,
1678- 'nm' => TRUE,
1679- 'nx' => TRUE,
1680- 'qh' => TRUE,
1681- 'sc' => TRUE,
1682- 'sd' => TRUE,
1683- 'sh' => TRUE,
1684- 'sn' => TRUE,
1685- 'sx' => TRUE,
1686- 'tj' => TRUE,
1687- 'tw' => TRUE,
1688- 'xj' => TRUE,
1689- 'xz' => TRUE,
1690- 'yn' => TRUE,
1691- 'zj' => TRUE,
1692- ),
1693-
1694- // ccTLD: India
1695- // NIC : http://www.inregistry.in/
1696- // Whois: http://www.inregistry.in/whois_search/
1697- 'in' => array(
1698- // Policies http://www.inregistry.in/policies/
1699- 'ac' => TRUE,
1700- 'co' => TRUE,
1701- 'firm' => TRUE,
1702- 'gen' => TRUE,
1703- 'gov' => TRUE,
1704- 'ind' => TRUE,
1705- 'mil' => TRUE,
1706- 'net' => TRUE,
1707- 'org' => TRUE,
1708- 'res' => TRUE,
1709- // Reserved Names by the government (for the 2nd level)
1710- // http://www.inregistry.in/policies/reserved_names
1711- ),
1712-
1713- // ccTLD: South Korea
1714- // NIC : http://www.nic.or.kr/english/
1715- // Whois: http://whois.nida.or.kr/english/
1716- 'kr' => array(
1717- // .kr domain policy [appendix 1] : Qualifications for Second Level Domains
1718- // http://domain.nida.or.kr/eng/policy.jsp
1719-
1720- // Organizational
1721- 'co' => TRUE,
1722- 'ne ' => TRUE,
1723- 'or ' => TRUE,
1724- 're ' => TRUE,
1725- 'pe' => TRUE,
1726- 'go ' => TRUE,
1727- 'mil' => TRUE,
1728- 'ac' => TRUE,
1729- 'hs' => TRUE,
1730- 'ms' => TRUE,
1731- 'es' => TRUE,
1732- 'sc' => TRUE,
1733- 'kg' => TRUE,
1734-
1735- // Geographic
1736- 'seoul' => TRUE,
1737- 'busan' => TRUE,
1738- 'daegu' => TRUE,
1739- 'incheon' => TRUE,
1740- 'gwangju' => TRUE,
1741- 'daejeon' => TRUE,
1742- 'ulsan' => TRUE,
1743- 'gyeonggi' => TRUE,
1744- 'gangwon' => TRUE,
1745- 'chungbuk' => TRUE,
1746- 'chungnam' => TRUE,
1747- 'jeonbuk' => TRUE,
1748- 'jeonnam' => TRUE,
1749- 'gyeongbuk' => TRUE,
1750- 'gyeongnam' => TRUE,
1751- 'jeju' => TRUE,
1752- ),
1753-
1754- // ccTLD: Japan
1755- // NIC : http://jprs.co.jp/en/
1756- // Whois: http://whois.jprs.jp/en/
1757- 'jp' => array(
1758- // Guide to JP Domain Name
1759- // http://jprs.co.jp/en/jpdomain.html
1760-
1761- // Organizational
1762- 'ac' => TRUE,
1763- 'ad' => TRUE,
1764- 'co' => TRUE,
1765- 'ed' => TRUE,
1766- 'go' => TRUE,
1767- 'gr' => TRUE,
1768- 'lg' => TRUE,
1769- 'ne' => TRUE,
1770- 'or' => TRUE,
1771-
1772- // Geographic
1773- //
1774- // Examples for 3rd level domains
1775- //'kumamoto' => array(
1776- // // http://www.pref.kumamoto.jp/link/list.asp#4
1777- // 'amakusa' => TRUE,
1778- // 'hitoyoshi' => TRUE,
1779- // 'jonan' => TRUE,
1780- // 'kumamoto' => TRUE,
1781- // ...
1782- //),
1783- 'aichi' => TRUE,
1784- 'akita' => TRUE,
1785- 'aomori' => TRUE,
1786- 'chiba' => TRUE,
1787- 'ehime' => TRUE,
1788- 'fukui' => TRUE,
1789- 'fukuoka' => TRUE,
1790- 'fukushima' => TRUE,
1791- 'gifu' => TRUE,
1792- 'gunma' => TRUE,
1793- 'hiroshima' => TRUE,
1794- 'hokkaido' => TRUE,
1795- 'hyogo' => TRUE,
1796- 'ibaraki' => TRUE,
1797- 'ishikawa' => TRUE,
1798- 'iwate' => TRUE,
1799- 'kagawa' => TRUE,
1800- 'kagoshima' => TRUE,
1801- 'kanagawa' => TRUE,
1802- 'kawasaki' => TRUE,
1803- 'kitakyushu'=> TRUE,
1804- 'kobe' => TRUE,
1805- 'kochi' => TRUE,
1806- 'kumamoto' => TRUE,
1807- 'kyoto' => TRUE,
1808- 'mie' => TRUE,
1809- 'miyagi' => TRUE,
1810- 'miyazaki' => TRUE,
1811- 'nagano' => TRUE,
1812- 'nagasaki' => TRUE,
1813- 'nagoya' => TRUE,
1814- 'nara' => TRUE,
1815- 'niigata' => TRUE,
1816- 'oita' => TRUE,
1817- 'okayama' => TRUE,
1818- 'okinawa' => TRUE,
1819- 'osaka' => TRUE,
1820- 'saga' => TRUE,
1821- 'saitama' => TRUE,
1822- 'sapporo' => TRUE,
1823- 'sendai' => TRUE,
1824- 'shiga' => TRUE,
1825- 'shimane' => TRUE,
1826- 'shizuoka' => TRUE,
1827- 'tochigi' => TRUE,
1828- 'tokushima' => TRUE,
1829- 'tokyo' => TRUE,
1830- 'tottori' => TRUE,
1831- 'toyama' => TRUE,
1832- 'wakayama' => TRUE,
1833- 'yamagata' => TRUE,
1834- 'yamaguchi' => TRUE,
1835- 'yamanashi' => TRUE,
1836- 'yokohama' => TRUE,
1837- ),
790+ static $domain;
1838791
1839- // ccTLD: Mexico
1840- // NIC : http://www.nic.mx/
1841- // Whois: http://www.nic.mx/es/Busqueda.Who_Is
1842- 'mx' => array(
1843- // Politicas Generales de Nombres de Dominio
1844- // http://www.nic.mx/es/Politicas?CATEGORY=INDICE
1845- 'com' => TRUE,
1846- 'edu' => TRUE,
1847- 'gob' => TRUE,
1848- 'net' => TRUE,
1849- 'org' => TRUE,
1850- ),
1851-
1852- // ccTLD: Russia
1853- // NIC : http://www.cctld.ru/en/
1854- // Whois: http://www.ripn.net:8080/nic/whois/en/
1855- 'ru' => array(
1856- // List of Reserved second-level Domain Names
1857- // http://www.cctld.ru/en/doc/detail.php?id21=20&i21=2
1858-
1859- // Organizational
1860- 'ac' => TRUE,
1861- 'com' => TRUE,
1862- 'edu' => TRUE,
1863- 'gov' => TRUE,
1864- 'int' => TRUE,
1865- 'mil' => TRUE,
1866- 'net' => TRUE,
1867- 'org' => TRUE,
1868- 'pp' => TRUE,
1869- //'test' => TRUE,
1870-
1871- // Geographic
1872- 'adygeya' => TRUE,
1873- 'altai' => TRUE,
1874- 'amur' => TRUE,
1875- 'amursk' => TRUE,
1876- 'arkhangelsk' => TRUE,
1877- 'astrakhan' => TRUE,
1878- 'baikal' => TRUE,
1879- 'bashkiria' => TRUE,
1880- 'belgorod' => TRUE,
1881- 'bir' => TRUE,
1882- 'bryansk' => TRUE,
1883- 'buryatia' => TRUE,
1884- 'cbg' => TRUE,
1885- 'chel' => TRUE,
1886- 'chelyabinsk' => TRUE,
1887- 'chita' => TRUE,
1888- 'chukotka' => TRUE,
1889- 'chuvashia' => TRUE,
1890- 'cmw' => TRUE,
1891- 'dagestan' => TRUE,
1892- 'dudinka' => TRUE,
1893- 'e-burg' => TRUE,
1894- 'fareast' => TRUE,
1895- 'grozny' => TRUE,
1896- 'irkutsk' => TRUE,
1897- 'ivanovo' => TRUE,
1898- 'izhevsk' => TRUE,
1899- 'jamal' => TRUE,
1900- 'jar' => TRUE,
1901- 'joshkar-ola' => TRUE,
1902- 'k-uralsk' => TRUE,
1903- 'kalmykia' => TRUE,
1904- 'kaluga' => TRUE,
1905- 'kamchatka' => TRUE,
1906- 'karelia' => TRUE,
1907- 'kazan' => TRUE,
1908- 'kchr' => TRUE,
1909- 'kemerovo' => TRUE,
1910- 'khabarovsk' => TRUE,
1911- 'khakassia' => TRUE,
1912- 'khv' => TRUE,
1913- 'kirov' => TRUE,
1914- 'kms' => TRUE,
1915- 'koenig' => TRUE,
1916- 'komi' => TRUE,
1917- 'kostroma' => TRUE,
1918- 'krasnoyarsk' => TRUE,
1919- 'kuban' => TRUE,
1920- 'kurgan' => TRUE,
1921- 'kursk' => TRUE,
1922- 'kustanai' => TRUE,
1923- 'kuzbass' => TRUE,
1924- 'lipetsk' => TRUE,
1925- 'magadan' => TRUE,
1926- 'magnitka' => TRUE,
1927- 'mari-el' => TRUE,
1928- 'mari' => TRUE,
1929- 'marine' => TRUE,
1930- 'mordovia' => TRUE,
1931- 'mosreg' => TRUE,
1932- 'msk' => TRUE,
1933- 'murmansk' => TRUE,
1934- 'mytis' => TRUE,
1935- 'nakhodka' => TRUE,
1936- 'nalchik' => TRUE,
1937- 'nkz' => TRUE,
1938- 'nnov' => TRUE,
1939- 'norilsk' => TRUE,
1940- 'nov' => TRUE,
1941- 'novosibirsk' => TRUE,
1942- 'nsk' => TRUE,
1943- 'omsk' => TRUE,
1944- 'orenburg' => TRUE,
1945- 'oryol' => TRUE,
1946- 'oskol' => TRUE,
1947- 'palana' => TRUE,
1948- 'penza' => TRUE,
1949- 'perm' => TRUE,
1950- 'pskov' => TRUE,
1951- 'ptz' => TRUE,
1952- 'pyatigorsk' => TRUE,
1953- 'rnd' => TRUE,
1954- 'rubtsovsk' => TRUE,
1955- 'ryazan' => TRUE,
1956- 'sakhalin' => TRUE,
1957- 'samara' => TRUE,
1958- 'saratov' => TRUE,
1959- 'simbirsk' => TRUE,
1960- 'smolensk' => TRUE,
1961- 'snz' => TRUE,
1962- 'spb' => TRUE,
1963- 'stavropol' => TRUE,
1964- 'stv' => TRUE,
1965- 'surgut' => TRUE,
1966- 'syzran' => TRUE,
1967- 'tambov' => TRUE,
1968- 'tatarstan' => TRUE,
1969- 'tom' => TRUE,
1970- 'tomsk' => TRUE,
1971- 'tsaritsyn' => TRUE,
1972- 'tsk' => TRUE,
1973- 'tula' => TRUE,
1974- 'tuva' => TRUE,
1975- 'tver' => TRUE,
1976- 'tyumen' => TRUE,
1977- 'udm' => TRUE,
1978- 'udmurtia' => TRUE,
1979- 'ulan-ude' => TRUE,
1980- 'vdonsk' => TRUE,
1981- 'vladikavkaz' => TRUE,
1982- 'vladimir' => TRUE,
1983- 'vladivostok' => TRUE,
1984- 'volgograd' => TRUE,
1985- 'vologda' => TRUE,
1986- 'voronezh' => TRUE,
1987- 'vrn' => TRUE,
1988- 'vyatka' => TRUE,
1989- 'yakutia' => TRUE,
1990- 'yamal' => TRUE,
1991- 'yaroslavl' => TRUE,
1992- 'yekaterinburg' => TRUE,
1993- 'yuzhno-sakhalinsk' => TRUE,
1994- 'zgrad' => TRUE,
1995- ),
1996-
1997- // ccTLD: Seychelles
1998- // NIC : http://www.nic.sc/
1999- // Whois: (Not available)
2000- 'sc' => array(
2001- // http://www.nic.sc/policies.html
2002- 'com' => TRUE,
2003- 'edu' => TRUE,
2004- 'gov' => TRUE,
2005- 'net' => TRUE,
2006- 'org' => TRUE,
2007- ),
2008-
2009- // ccTLD: Taiwan
2010- // NIC : http://www.twnic.net.tw/
2011- // Whois: http://www.twnic.net.tw/
2012- 'tw' => array(
2013- // Guidelines for Administration of Domain Name Registration
2014- // http://www.twnic.net.tw/english/dn/dn_02.htm
2015- // II. Types of TWNIC Domain Names and Application Requirements
2016- // http://www.twnic.net.tw/english/dn/dn_02_b.htm
2017- 'club' => TRUE,
2018- 'com' => TRUE,
2019- 'ebiz' => TRUE,
2020- 'edu' => TRUE,
2021- 'game' => TRUE,
2022- 'gov' => TRUE,
2023- 'idv' => TRUE,
2024- 'mil' => TRUE,
2025- 'net' => TRUE,
2026- 'org' => TRUE,
2027- // Reserved words for the 2nd level
2028- // http://mydn.twnic.net.tw/en/dn02/INDEX.htm
2029- ),
2030-
2031- // ccTLD: Tanzania
2032- // NIC : http://www.psg.com/dns/tz/
2033- // Whois: (Not available)
2034- 'tz' => array(
2035- // TZ DOMAIN NAMING STRUCTURE
2036- // http://www.psg.com/dns/tz/tz.txt
2037- 'ac' => TRUE,
2038- 'co' => TRUE,
2039- 'go' => TRUE,
2040- 'ne' => TRUE,
2041- 'or' => TRUE,
2042- ),
2043-
2044- // ccTLD: Ukraine
2045- // NIC : http://www.nic.net.ua/
2046- // Whois: http://whois.com.ua/
2047- 'ua' => array(
2048- // policy for alternative 2nd level domain names (a2ld)
2049- // http://www.nic.net.ua/doc/a2ld
2050- // http://whois.com.ua/
2051- 'cherkassy' => TRUE,
2052- 'chernigov' => TRUE,
2053- 'chernovtsy' => TRUE,
2054- 'ck' => TRUE,
2055- 'cn' => TRUE,
2056- 'com' => TRUE,
2057- 'crimea' => TRUE,
2058- 'cv' => TRUE,
2059- 'dn' => TRUE,
2060- 'dnepropetrovsk' => TRUE,
2061- 'donetsk' => TRUE,
2062- 'dp' => TRUE,
2063- 'edu' => TRUE,
2064- 'gov' => TRUE,
2065- 'if' => TRUE,
2066- 'ivano-frankivsk' => TRUE,
2067- 'kh' => TRUE,
2068- 'kharkov' => TRUE,
2069- 'kherson' => TRUE,
2070- 'kiev' => TRUE,
2071- 'kirovograd' => TRUE,
2072- 'km' => TRUE,
2073- 'kr' => TRUE,
2074- 'ks' => TRUE,
2075- 'lg' => TRUE,
2076- 'lugansk' => TRUE,
2077- 'lutsk' => TRUE,
2078- 'lviv' => TRUE,
2079- 'mk' => TRUE,
2080- 'net' => TRUE,
2081- 'nikolaev' => TRUE,
2082- 'od' => TRUE,
2083- 'odessa' => TRUE,
2084- 'org' => TRUE,
2085- 'pl' => TRUE,
2086- 'poltava' => TRUE,
2087- 'rovno' => TRUE,
2088- 'rv' => TRUE,
2089- 'sebastopol' => TRUE,
2090- 'sumy' => TRUE,
2091- 'te' => TRUE,
2092- 'ternopil' => TRUE,
2093- 'uz' => TRUE,
2094- 'uzhgorod' => TRUE,
2095- 'vinnica' => TRUE,
2096- 'vn' => TRUE,
2097- 'zaporizhzhe' => TRUE,
2098- 'zhitomir' => TRUE,
2099- 'zp' => TRUE,
2100- 'zt' => TRUE,
2101- ),
2102-
2103- // ccTLD: United Kingdom
2104- // NIC : http://www.nic.uk/
2105- 'uk' => array(
2106- // Second Level Domains
2107- // http://www.nic.uk/registrants/aboutdomainnames/sld/
2108- 'co' => TRUE,
2109- 'ltd' => TRUE,
2110- 'me' => TRUE,
2111- 'net' => TRUE,
2112- 'nic' => TRUE,
2113- 'org' => TRUE,
2114- 'plc' => TRUE,
2115- 'sch' => TRUE,
2116-
2117- // Delegated Second Level Domains
2118- // http://www.nic.uk/registrants/aboutdomainnames/sld/delegated/
2119- 'ac' => TRUE,
2120- 'gov' => TRUE,
2121- 'mil' => TRUE,
2122- 'mod' => TRUE,
2123- 'nhs' => TRUE,
2124- 'police' => TRUE,
2125- ),
2126-
2127- // ccTLD: United States of America
2128- // NIC : http://nic.us/
2129- // Whois: http://whois.us/
2130- 'us' => array(
2131- // See RFC1480
2132-
2133- // Organizational
2134- 'dni',
2135- 'fed',
2136- 'isa',
2137- 'kids',
2138- 'nsn',
2139-
2140- // Geographical
2141- // United States Postal Service: State abbreviations (for postal codes)
2142- // http://www.usps.com/ncsc/lookups/abbreviations.html
2143- 'ak' => TRUE, // Alaska
2144- 'al' => TRUE, // Alabama
2145- 'ar' => TRUE, // Arkansas
2146- 'as' => TRUE, // American samoa
2147- 'az' => TRUE, // Arizona
2148- 'ca' => TRUE, // California
2149- 'co' => TRUE, // Colorado
2150- 'ct' => TRUE, // Connecticut
2151- 'dc' => TRUE, // District of Columbia
2152- 'de' => TRUE, // Delaware
2153- 'fl' => TRUE, // Florida
2154- 'fm' => TRUE, // Federated states of Micronesia
2155- 'ga' => TRUE, // Georgia
2156- 'gu' => TRUE, // Guam
2157- 'hi' => TRUE, // Hawaii
2158- 'ia' => TRUE, // Iowa
2159- 'id' => TRUE, // Idaho
2160- 'il' => TRUE, // Illinois
2161- 'in' => TRUE, // Indiana
2162- 'ks' => TRUE, // Kansas
2163- 'ky' => TRUE, // Kentucky
2164- 'la' => TRUE, // Louisiana
2165- 'ma' => TRUE, // Massachusetts
2166- 'md' => TRUE, // Maryland
2167- 'me' => TRUE, // Maine
2168- 'mh' => TRUE, // Marshall Islands
2169- 'mi' => TRUE, // Michigan
2170- 'mn' => TRUE, // Minnesota
2171- 'mo' => TRUE, // Missouri
2172- 'mp' => TRUE, // Northern mariana islands
2173- 'ms' => TRUE, // Mississippi
2174- 'mt' => TRUE, // Montana
2175- 'nc' => TRUE, // North Carolina
2176- 'nd' => TRUE, // North Dakota
2177- 'ne' => TRUE, // Nebraska
2178- 'nh' => TRUE, // New Hampshire
2179- 'nj' => TRUE, // New Jersey
2180- 'nm' => TRUE, // New Mexico
2181- 'nv' => TRUE, // Nevada
2182- 'ny' => TRUE, // New York
2183- 'oh' => TRUE, // Ohio
2184- 'ok' => TRUE, // Oklahoma
2185- 'or' => TRUE, // Oregon
2186- 'pa' => TRUE, // Pennsylvania
2187- 'pr' => TRUE, // Puerto Rico
2188- 'pw' => TRUE, // Palau
2189- 'ri' => TRUE, // Rhode Island
2190- 'sc' => TRUE, // South Carolina
2191- 'sd' => TRUE, // South Dakota
2192- 'tn' => TRUE, // Tennessee
2193- 'tx' => TRUE, // Texas
2194- 'ut' => TRUE, // Utah
2195- 'va' => TRUE, // Virginia
2196- 'vi' => TRUE, // Virgin Islands
2197- 'vt' => TRUE, // Vermont
2198- 'wa' => TRUE, // Washington
2199- 'wi' => TRUE, // Wisconsin
2200- 'wv' => TRUE, // West Virginia
2201- 'wy' => TRUE, // Wyoming
2202- ),
792+ if ($fqdn === NULL) {
793+ $domain = NULL; // Unset
794+ return '';
795+ }
796+ if (! is_string($fqdn)) return '';
2203797
2204- // ccTLD: South Africa
2205- // NIC : http://www.zadna.org.za/
2206- // Whois:
2207- // ac.za http://www.tenet.ac.za/cgi/cgi_domainquery.exe
2208- // co.za http://co.za/whois.shtml
2209- // gov.za http://dnsadmin.gov.za/
2210- // org.za http://www.org.za/
2211- 'za' => array(
2212- // Second-level subdomains of .ZA
2213- // http://www.zadna.org.za/slds.html
2214- 'ac' => TRUE,
2215- 'city' => TRUE,
2216- 'co' => TRUE,
2217- 'edu' => TRUE,
2218- 'gov' => TRUE,
2219- 'law' => TRUE,
2220- 'mil' => TRUE,
2221- 'nom' => TRUE,
2222- 'org' => TRUE,
2223- 'school' => array(
2224- // Provincial Domains
2225- // http://www.esn.org.za/dns/
2226- 'ecape' => TRUE,
2227- 'fs.' => TRUE,
2228- 'gp' => TRUE,
2229- 'kzn' => TRUE,
2230- 'lp' => TRUE,
2231- 'mpm' => TRUE,
2232- 'ncape' => TRUE,
2233- 'nw' => TRUE,
2234- 'wcape' => TRUE,
2235- ),
2236- ),
2237- );
798+ if (is_ip($fqdn)) return $fqdn;
2238799
2239- if (! is_string($fqdn)) return '';
2240- if (is_ip($fqdn)) return $fqdn;
800+ if (! isset($domain)) {
801+ $domain = array();
802+ if (file_exists(DOMAIN_INI_FILE)) {
803+ include(DOMAIN_INI_FILE); // Set
804+ }
805+ }
2241806
2242807 $result = array();
2243808 $dcursor = & $domain;
@@ -2275,6 +840,7 @@ function whois_responsibility($fqdn = 'foo.bar.example.com', $parent = FALSE, $i
2275840 function spam_dispose()
2276841 {
2277842 get_blocklist(NULL);
843+ whois_responsibility(NULL);
2278844 }
2279845
2280846 // Common bahavior for blocking
--- /dev/null
+++ b/lib/spam_pickup.php
@@ -0,0 +1,788 @@
1+<?php
2+// $Id: spam_pickup.php,v 1.1 2007/07/03 14:47:20 henoheno Exp $
3+// Copyright (C) 2006-2007 PukiWiki Developers Team
4+// License: GPL v2 or (at your option) any later version
5+//
6+// Functions for Concept-work of spam-uri metrics
7+//
8+
9+// ---------------------
10+// URI pickup
11+
12+// Return an array of URIs in the $string
13+// [OK] http://nasty.example.org#nasty_string
14+// [OK] http://nasty.example.org:80/foo/xxx#nasty_string/bar
15+// [OK] ftp://nasty.example.org:80/dfsdfs
16+// [OK] ftp://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm (from RFC3986)
17+function uri_pickup($string = '')
18+{
19+ if (! is_string($string)) return array();
20+
21+ // Not available for: IDN(ignored)
22+ $array = array();
23+ preg_match_all(
24+ // scheme://userinfo@host:port/path/or/pathinfo/maybefile.and?query=string#fragment
25+ // Refer RFC3986 (Regex below is not strict)
26+ '#(\b[a-z][a-z0-9.+-]{1,8}):[/\\\]+' . // 1: Scheme
27+ '(?:' .
28+ '([^\s<>"\'\[\]/\#?@]*)' . // 2: Userinfo (Username)
29+ '@)?' .
30+ '(' .
31+ // 3: Host
32+ '\[[0-9a-f:.]+\]' . '|' . // IPv6([colon-hex and dot]): RFC2732
33+ '(?:[0-9]{1,3}\.){3}[0-9]{1,3}' . '|' . // IPv4(dot-decimal): 001.22.3.44
34+ '[a-z0-9_-][a-z0-9_.-]+[a-z0-9_-]' . // hostname(FQDN) : foo.example.org
35+ ')' .
36+ '(?::([0-9]*))?' . // 4: Port
37+ '((?:/+[^\s<>"\'\[\]/\#]+)*/+)?' . // 5: Directory path or path-info
38+ '([^\s<>"\'\[\]\#?]+)?' . // 6: File?
39+ '(?:\?([^\s<>"\'\[\]\#]+))?' . // 7: Query string
40+ '(?:\#([a-z0-9._~%!$&\'()*+,;=:@-]*))?' . // 8: Fragment
41+ '#i',
42+ $string, $array, PREG_SET_ORDER | PREG_OFFSET_CAPTURE
43+ );
44+
45+ // Format the $array
46+ static $parts = array(
47+ 1 => 'scheme', 2 => 'userinfo', 3 => 'host', 4 => 'port',
48+ 5 => 'path', 6 => 'file', 7 => 'query', 8 => 'fragment'
49+ );
50+ $default = array('');
51+ foreach(array_keys($array) as $uri) {
52+ $_uri = & $array[$uri];
53+ array_rename_keys($_uri, $parts, TRUE, $default);
54+ $offset = $_uri['scheme'][1]; // Scheme's offset = URI's offset
55+ foreach(array_keys($_uri) as $part) {
56+ $_uri[$part] = & $_uri[$part][0]; // Remove offsets
57+ }
58+ }
59+
60+ foreach(array_keys($array) as $uri) {
61+ $_uri = & $array[$uri];
62+ if ($_uri['scheme'] === '') {
63+ unset($array[$uri]); // Considererd harmless
64+ continue;
65+ }
66+ unset($_uri[0]); // Matched string itself
67+ $_uri['area']['offset'] = $offset; // Area offset for area_measure()
68+ }
69+
70+ return $array;
71+}
72+
73+// Pickupped URI array => An URI (See uri_pickup())
74+// USAGE:
75+// $pickups = uri_pickup('a string include some URIs');
76+// $uris = array();
77+// foreach (array_keys($pickups) as $key) {
78+// $uris[$key] = uri_pickup_implode($pickups[$key]);
79+// }
80+function uri_pickup_implode($uri = array())
81+{
82+ if (empty($uri) || ! is_array($uri)) return NULL;
83+
84+ $tmp = array();
85+ if (isset($uri['scheme']) && $uri['scheme'] !== '') {
86+ $tmp[] = & $uri['scheme'];
87+ $tmp[] = '://';
88+ }
89+ if (isset($uri['userinfo']) && $uri['userinfo'] !== '') {
90+ $tmp[] = & $uri['userinfo'];
91+ $tmp[] = '@';
92+ }
93+ if (isset($uri['host']) && $uri['host'] !== '') {
94+ $tmp[] = & $uri['host'];
95+ }
96+ if (isset($uri['port']) && $uri['port'] !== '') {
97+ $tmp[] = ':';
98+ $tmp[] = & $uri['port'];
99+ }
100+ if (isset($uri['path']) && $uri['path'] !== '') {
101+ $tmp[] = & $uri['path'];
102+ }
103+ if (isset($uri['file']) && $uri['file'] !== '') {
104+ $tmp[] = & $uri['file'];
105+ }
106+ if (isset($uri['query']) && $uri['query'] !== '') {
107+ $tmp[] = '?';
108+ $tmp[] = & $uri['query'];
109+ }
110+ if (isset($uri['fragment']) && $uri['fragment'] !== '') {
111+ $tmp[] = '#';
112+ $tmp[] = & $uri['fragment'];
113+ }
114+
115+ return implode('', $tmp);
116+}
117+
118+
119+// ---------------------
120+// URI normalization
121+
122+// Normalize an array of URI arrays
123+// NOTE: Give me the uri_pickup() results
124+function uri_pickup_normalize(& $pickups, $destructive = TRUE)
125+{
126+ if (! is_array($pickups)) return $pickups;
127+
128+ if ($destructive) {
129+ foreach (array_keys($pickups) as $key) {
130+ $_key = & $pickups[$key];
131+ $_key['scheme'] = isset($_key['scheme']) ? scheme_normalize($_key['scheme']) : '';
132+ $_key['host'] = isset($_key['host']) ? host_normalize($_key['host']) : '';
133+ $_key['port'] = isset($_key['port']) ? port_normalize($_key['port'], $_key['scheme'], FALSE) : '';
134+ $_key['path'] = isset($_key['path']) ? strtolower(path_normalize($_key['path'])) : '';
135+ $_key['file'] = isset($_key['file']) ? file_normalize($_key['file']) : '';
136+ $_key['query'] = isset($_key['query']) ? query_normalize($_key['query']) : '';
137+ $_key['fragment'] = isset($_key['fragment']) ? strtolower($_key['fragment']) : '';
138+ }
139+ } else {
140+ foreach (array_keys($pickups) as $key) {
141+ $_key = & $pickups[$key];
142+ $_key['scheme'] = isset($_key['scheme']) ? scheme_normalize($_key['scheme']) : '';
143+ $_key['host'] = isset($_key['host']) ? strtolower($_key['host']) : '';
144+ $_key['port'] = isset($_key['port']) ? port_normalize($_key['port'], $_key['scheme'], FALSE) : '';
145+ $_key['path'] = isset($_key['path']) ? path_normalize($_key['path']) : '';
146+ }
147+ }
148+
149+ return $pickups;
150+}
151+
152+// Scheme normalization: Renaming the schemes
153+// snntp://example.org => nntps://example.org
154+// NOTE: Keep the static lists simple. See also port_normalize().
155+function scheme_normalize($scheme = '', $abbrevs_harmfull = TRUE)
156+{
157+ // Abbreviations they have no intention of link
158+ static $abbrevs = array(
159+ 'ttp' => 'http',
160+ 'ttps' => 'https',
161+ );
162+
163+ // Aliases => normalized ones
164+ static $aliases = array(
165+ 'pop' => 'pop3',
166+ 'news' => 'nntp',
167+ 'imap4' => 'imap',
168+ 'snntp' => 'nntps',
169+ 'snews' => 'nntps',
170+ 'spop3' => 'pop3s',
171+ 'pops' => 'pop3s',
172+ );
173+
174+ if (! is_string($scheme)) return '';
175+
176+ $scheme = strtolower($scheme);
177+ if (isset($abbrevs[$scheme])) {
178+ $scheme = $abbrevs_harmfull ? $abbrevs[$scheme] : '';
179+ }
180+ if (isset($aliases[$scheme])) {
181+ $scheme = $aliases[$scheme];
182+ }
183+
184+ return $scheme;
185+}
186+
187+// Hostname normlization (Destructive)
188+// www.foo => www.foo ('foo' seems TLD)
189+// www.foo.bar => foo.bar
190+// www.10.20 => www.10.20 (Invalid hostname)
191+// NOTE:
192+// 'www' is mostly used as traditional hostname of WWW server.
193+// 'www.foo.bar' may be identical with 'foo.bar'.
194+function host_normalize($host = '')
195+{
196+ if (! is_string($host)) return '';
197+
198+ $host = strtolower($host);
199+ $matches = array();
200+ if (preg_match('/^www\.(.+\.[a-z]+)$/', $host, $matches)) {
201+ return $matches[1];
202+ } else {
203+ return $host;
204+ }
205+}
206+
207+// Port normalization: Suppress the (redundant) default port
208+// HTTP://example.org:80/ => http://example.org/
209+// HTTP://example.org:8080/ => http://example.org:8080/
210+// HTTPS://example.org:443/ => https://example.org/
211+function port_normalize($port, $scheme, $scheme_normalize = FALSE)
212+{
213+ // Schemes that users _maybe_ want to add protocol-handlers
214+ // to their web browsers. (and attackers _maybe_ want to use ...)
215+ // Reference: http://www.iana.org/assignments/port-numbers
216+ static $array = array(
217+ // scheme => default port
218+ 'ftp' => 21,
219+ 'ssh' => 22,
220+ 'telnet' => 23,
221+ 'smtp' => 25,
222+ 'tftp' => 69,
223+ 'gopher' => 70,
224+ 'finger' => 79,
225+ 'http' => 80,
226+ 'pop3' => 110,
227+ 'sftp' => 115,
228+ 'nntp' => 119,
229+ 'imap' => 143,
230+ 'irc' => 194,
231+ 'wais' => 210,
232+ 'https' => 443,
233+ 'nntps' => 563,
234+ 'rsync' => 873,
235+ 'ftps' => 990,
236+ 'telnets' => 992,
237+ 'imaps' => 993,
238+ 'ircs' => 994,
239+ 'pop3s' => 995,
240+ 'mysql' => 3306,
241+ );
242+
243+ // intval() converts '0-1' to '0', so preg_match() rejects these invalid ones
244+ if (! is_numeric($port) || $port < 0 || preg_match('/[^0-9]/i', $port))
245+ return '';
246+
247+ $port = intval($port);
248+ if ($scheme_normalize) $scheme = scheme_normalize($scheme);
249+ if (isset($array[$scheme]) && $port == $array[$scheme])
250+ $port = ''; // Ignore the defaults
251+
252+ return $port;
253+}
254+
255+// Path normalization
256+// http://example.org => http://example.org/
257+// http://example.org#hoge => http://example.org/#hoge
258+// http://example.org/path/a/b/./c////./d => http://example.org/path/a/b/c/d
259+// http://example.org/path/../../a/../back => http://example.org/back
260+function path_normalize($path = '', $divider = '/', $add_root = TRUE)
261+{
262+ if (! is_string($divider)) return is_string($path) ? $path : '';
263+
264+ if ($add_root) {
265+ $first_div = & $divider;
266+ } else {
267+ $first_div = '';
268+ }
269+ if (! is_string($path) || $path == '') return $first_div;
270+
271+ if (strpos($path, $divider, strlen($path) - strlen($divider)) === FALSE) {
272+ $last_div = '';
273+ } else {
274+ $last_div = & $divider;
275+ }
276+
277+ $array = explode($divider, $path);
278+
279+ // Remove paddings ('//' and '/./')
280+ foreach(array_keys($array) as $key) {
281+ if ($array[$key] == '' || $array[$key] == '.') {
282+ unset($array[$key]);
283+ }
284+ }
285+
286+ // Remove back-tracks ('/../')
287+ $tmp = array();
288+ foreach($array as $value) {
289+ if ($value == '..') {
290+ array_pop($tmp);
291+ } else {
292+ array_push($tmp, $value);
293+ }
294+ }
295+ $array = & $tmp;
296+
297+ if (empty($array)) {
298+ return $first_div;
299+ } else {
300+ return $first_div . implode($divider, $array) . $last_div;
301+ }
302+}
303+
304+// DirectoryIndex normalize (Destructive and rough)
305+// TODO: sample.en.ja.html.gz => sample.html
306+function file_normalize($file = 'index.html.en')
307+{
308+ static $simple_defaults = array(
309+ 'default.htm' => TRUE,
310+ 'default.html' => TRUE,
311+ 'default.asp' => TRUE,
312+ 'default.aspx' => TRUE,
313+ 'index' => TRUE, // Some system can omit the suffix
314+ );
315+
316+ static $content_suffix = array(
317+ // index.xxx, sample.xxx
318+ 'htm' => TRUE,
319+ 'html' => TRUE,
320+ 'shtml' => TRUE,
321+ 'jsp' => TRUE,
322+ 'php' => TRUE,
323+ 'php3' => TRUE,
324+ 'php4' => TRUE,
325+ 'pl' => TRUE,
326+ 'py' => TRUE,
327+ 'rb' => TRUE,
328+ 'cgi' => TRUE,
329+ 'xml' => TRUE,
330+ );
331+
332+ static $language_suffix = array(
333+ // Reference: Apache 2.0.59 'AddLanguage' default
334+ 'ca' => TRUE,
335+ 'cs' => TRUE, // cs
336+ 'cz' => TRUE, // cs
337+ 'de' => TRUE,
338+ 'dk' => TRUE, // da
339+ 'el' => TRUE,
340+ 'en' => TRUE,
341+ 'eo' => TRUE,
342+ 'es' => TRUE,
343+ 'et' => TRUE,
344+ 'fr' => TRUE,
345+ 'he' => TRUE,
346+ 'hr' => TRUE,
347+ 'it' => TRUE,
348+ 'ja' => TRUE,
349+ 'ko' => TRUE,
350+ 'ltz' => TRUE,
351+ 'nl' => TRUE,
352+ 'nn' => TRUE,
353+ 'no' => TRUE,
354+ 'po' => TRUE,
355+ 'pt' => TRUE,
356+ 'pt-br' => TRUE,
357+ 'ru' => TRUE,
358+ 'sv' => TRUE,
359+ 'zh-cn' => TRUE,
360+ 'zh-tw' => TRUE,
361+
362+ // Reference: Apache 2.0.59 default 'index.html' variants
363+ 'ee' => TRUE,
364+ 'lb' => TRUE,
365+ 'var' => TRUE,
366+ );
367+
368+ static $charset_suffix = array(
369+ // Reference: Apache 2.0.59 'AddCharset' default
370+ 'iso8859-1' => TRUE, // ISO-8859-1
371+ 'latin1' => TRUE, // ISO-8859-1
372+ 'iso8859-2' => TRUE, // ISO-8859-2
373+ 'latin2' => TRUE, // ISO-8859-2
374+ 'cen' => TRUE, // ISO-8859-2
375+ 'iso8859-3' => TRUE, // ISO-8859-3
376+ 'latin3' => TRUE, // ISO-8859-3
377+ 'iso8859-4' => TRUE, // ISO-8859-4
378+ 'latin4' => TRUE, // ISO-8859-4
379+ 'iso8859-5' => TRUE, // ISO-8859-5
380+ 'latin5' => TRUE, // ISO-8859-5
381+ 'cyr' => TRUE, // ISO-8859-5
382+ 'iso-ru' => TRUE, // ISO-8859-5
383+ 'iso8859-6' => TRUE, // ISO-8859-6
384+ 'latin6' => TRUE, // ISO-8859-6
385+ 'arb' => TRUE, // ISO-8859-6
386+ 'iso8859-7' => TRUE, // ISO-8859-7
387+ 'latin7' => TRUE, // ISO-8859-7
388+ 'grk' => TRUE, // ISO-8859-7
389+ 'iso8859-8' => TRUE, // ISO-8859-8
390+ 'latin8' => TRUE, // ISO-8859-8
391+ 'heb' => TRUE, // ISO-8859-8
392+ 'iso8859-9' => TRUE, // ISO-8859-9
393+ 'latin9' => TRUE, // ISO-8859-9
394+ 'trk' => TRUE, // ISO-8859-9
395+ 'iso2022-jp'=> TRUE, // ISO-2022-JP
396+ 'jis' => TRUE, // ISO-2022-JP
397+ 'iso2022-kr'=> TRUE, // ISO-2022-KR
398+ 'kis' => TRUE, // ISO-2022-KR
399+ 'iso2022-cn'=> TRUE, // ISO-2022-CN
400+ 'cis' => TRUE, // ISO-2022-CN
401+ 'big5' => TRUE,
402+ 'cp-1251' => TRUE, // ru, WINDOWS-1251
403+ 'win-1251' => TRUE, // ru, WINDOWS-1251
404+ 'cp866' => TRUE, // ru
405+ 'koi8-r' => TRUE, // ru, KOI8-r
406+ 'koi8-ru' => TRUE, // ru, KOI8-r
407+ 'koi8-uk' => TRUE, // ru, KOI8-ru
408+ 'ua' => TRUE, // ru, KOI8-ru
409+ 'ucs2' => TRUE, // ru, ISO-10646-UCS-2
410+ 'ucs4' => TRUE, // ru, ISO-10646-UCS-4
411+ 'utf8' => TRUE,
412+
413+ // Reference: Apache 2.0.59 default 'index.html' variants
414+ 'euc-kr' => TRUE,
415+ 'gb2312' => TRUE,
416+ );
417+
418+ // May uncompress by web browsers on the fly
419+ // Must be at the last of the filename
420+ // Reference: Apache 2.0.59 'AddEncoding'
421+ static $encoding_suffix = array(
422+ 'z' => TRUE,
423+ 'gz' => TRUE,
424+ );
425+
426+ if (! is_string($file)) return '';
427+ $_file = strtolower($file);
428+ if (isset($simple_defaults[$_file])) return '';
429+
430+ // Roughly removing language/character-set/encoding suffixes
431+ // References:
432+ // * Apache 2 document about 'Content-negotiaton', 'mod_mime' and 'mod_negotiation'
433+ // http://httpd.apache.org/docs/2.0/content-negotiation.html
434+ // http://httpd.apache.org/docs/2.0/mod/mod_mime.html
435+ // http://httpd.apache.org/docs/2.0/mod/mod_negotiation.html
436+ // * http://www.iana.org/assignments/character-sets
437+ // * RFC3066: Tags for the Identification of Languages
438+ // http://www.ietf.org/rfc/rfc3066.txt
439+ // * ISO 639: codes of 'language names'
440+ $suffixes = explode('.', $_file);
441+ $body = array_shift($suffixes);
442+ if ($suffixes) {
443+ // Remove the last .gz/.z
444+ $last_key = end(array_keys($suffixes));
445+ if (isset($encoding_suffix[$suffixes[$last_key]])) {
446+ unset($suffixes[$last_key]);
447+ }
448+ }
449+ // Cut language and charset suffixes
450+ foreach($suffixes as $key => $value){
451+ if (isset($language_suffix[$value]) || isset($charset_suffix[$value])) {
452+ unset($suffixes[$key]);
453+ }
454+ }
455+ if (empty($suffixes)) return $body;
456+
457+ // Index.xxx
458+ $count = count($suffixes);
459+ reset($suffixes);
460+ $current = current($suffixes);
461+ if ($body == 'index' && $count == 1 && isset($content_suffix[$current])) return '';
462+
463+ return $file;
464+}
465+
466+// Sort query-strings if possible (Destructive and rough)
467+// [OK] &&&&f=d&b&d&c&a=0dd => a=0dd&b&c&d&f=d
468+// [OK] nothing==&eg=dummy&eg=padding&eg=foobar => eg=foobar
469+function query_normalize($string = '', $equal = TRUE, $equal_cutempty = TRUE, $stortolower = TRUE)
470+{
471+ if (! is_string($string)) return '';
472+ if ($stortolower) $string = strtolower($string);
473+
474+ $array = explode('&', $string);
475+
476+ // Remove '&' paddings
477+ foreach(array_keys($array) as $key) {
478+ if ($array[$key] == '') {
479+ unset($array[$key]);
480+ }
481+ }
482+
483+ // Consider '='-sepalated input and paddings
484+ if ($equal) {
485+ $equals = $not_equals = array();
486+ foreach ($array as $part) {
487+ if (strpos($part, '=') === FALSE) {
488+ $not_equals[] = $part;
489+ } else {
490+ list($key, $value) = explode('=', $part, 2);
491+ $value = ltrim($value, '=');
492+ if (! $equal_cutempty || $value != '') {
493+ $equals[$key] = $value;
494+ }
495+ }
496+ }
497+
498+ $array = & $not_equals;
499+ foreach ($equals as $key => $value) {
500+ $array[] = $key . '=' . $value;
501+ }
502+ unset($equals);
503+ }
504+
505+ natsort($array);
506+ return implode('&', $array);
507+}
508+
509+// ---------------------
510+// Area pickup
511+
512+// Pickup all of markup areas
513+function area_pickup($string = '', $method = array())
514+{
515+ $area = array();
516+ if (empty($method)) return $area;
517+
518+ // Anchor tag pair by preg_match and preg_match_all()
519+ // [OK] <a href></a>
520+ // [OK] <a href= >Good site!</a>
521+ // [OK] <a href= "#" >test</a>
522+ // [OK] <a href="http://nasty.example.com">visit http://nasty.example.com/</a>
523+ // [OK] <a href=\'http://nasty.example.com/\' >discount foobar</a>
524+ // [NG] <a href="http://ng.example.com">visit http://ng.example.com _not_ended_
525+ $regex = '#<a\b[^>]*\bhref\b[^>]*>.*?</a\b[^>]*(>)#is';
526+ if (isset($method['area_anchor'])) {
527+ $areas = array();
528+ $count = isset($method['asap']) ?
529+ preg_match($regex, $string) :
530+ preg_match_all($regex, $string, $areas);
531+ if (! empty($count)) $area['area_anchor'] = $count;
532+ }
533+ if (isset($method['uri_anchor'])) {
534+ $areas = array();
535+ preg_match_all($regex, $string, $areas, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
536+ foreach(array_keys($areas) as $_area) {
537+ $areas[$_area] = array(
538+ $areas[$_area][0][1], // Area start (<a href>)
539+ $areas[$_area][1][1], // Area end (</a>)
540+ );
541+ }
542+ if (! empty($areas)) $area['uri_anchor'] = $areas;
543+ }
544+
545+ // phpBB's "BBCode" pair by preg_match and preg_match_all()
546+ // [OK] [url][/url]
547+ // [OK] [url]http://nasty.example.com/[/url]
548+ // [OK] [link]http://nasty.example.com/[/link]
549+ // [OK] [url=http://nasty.example.com]visit http://nasty.example.com/[/url]
550+ // [OK] [link http://nasty.example.com/]buy something[/link]
551+ $regex = '#\[(url|link)\b[^\]]*\].*?\[/\1\b[^\]]*(\])#is';
552+ if (isset($method['area_bbcode'])) {
553+ $areas = array();
554+ $count = isset($method['asap']) ?
555+ preg_match($regex, $string) :
556+ preg_match_all($regex, $string, $areas, PREG_SET_ORDER);
557+ if (! empty($count)) $area['area_bbcode'] = $count;
558+ }
559+ if (isset($method['uri_bbcode'])) {
560+ $areas = array();
561+ preg_match_all($regex, $string, $areas, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
562+ foreach(array_keys($areas) as $_area) {
563+ $areas[$_area] = array(
564+ $areas[$_area][0][1], // Area start ([url])
565+ $areas[$_area][2][1], // Area end ([/url])
566+ );
567+ }
568+ if (! empty($areas)) $area['uri_bbcode'] = $areas;
569+ }
570+
571+ // Various Wiki syntax
572+ // [text_or_uri>text_or_uri]
573+ // [text_or_uri:text_or_uri]
574+ // [text_or_uri|text_or_uri]
575+ // [text_or_uri->text_or_uri]
576+ // [text_or_uri text_or_uri] // MediaWiki
577+ // MediaWiki: [http://nasty.example.com/ visit http://nasty.example.com/]
578+
579+ return $area;
580+}
581+
582+// If in doubt, it's a little doubtful
583+// if (Area => inside <= Area) $brief += -1
584+function area_measure($areas, & $array, $belief = -1, $a_key = 'area', $o_key = 'offset')
585+{
586+ if (! is_array($areas) || ! is_array($array)) return;
587+
588+ $areas_keys = array_keys($areas);
589+ foreach(array_keys($array) as $u_index) {
590+ $offset = isset($array[$u_index][$o_key]) ?
591+ intval($array[$u_index][$o_key]) : 0;
592+ foreach($areas_keys as $a_index) {
593+ if (isset($array[$u_index][$a_key])) {
594+ $offset_s = intval($areas[$a_index][0]);
595+ $offset_e = intval($areas[$a_index][1]);
596+ // [Area => inside <= Area]
597+ if ($offset_s < $offset && $offset < $offset_e) {
598+ $array[$u_index][$a_key] += $belief;
599+ }
600+ }
601+ }
602+ }
603+}
604+
605+
606+// ---------------------
607+// Spam-uri pickup
608+
609+// Preprocess: Removing uninterest part for URI detection
610+function spam_uri_removing_hocus_pocus($binary = '', $method = array())
611+{
612+ $length = 4 ; // 'http'(1) and '://'(2) and 'fqdn'(1)
613+ if (is_array($method)) {
614+ // '<a'(2) or 'href='(5) or '>'(1) or '</a>'(4)
615+ // '[uri'(4) or ']'(1) or '[/uri]'(6)
616+ if (isset($method['area_anchor']) || isset($method['uri_anchor']) ||
617+ isset($method['area_bbcode']) || isset($method['uri_bbcode']))
618+ $length = 1; // Seems not effective
619+ }
620+
621+ // Removing sequential spaces and too short lines
622+ $binary = strings($binary, $length, TRUE, FALSE); // Multibyte NOT needed
623+
624+ // Remove words (has no '<>[]:') between spaces
625+ $binary = preg_replace('/[ \t][\w.,()\ \t]+[ \t]/', ' ', $binary);
626+
627+ return $binary;
628+}
629+
630+// Preprocess: Domain exposure callback (See spam_uri_pickup_preprocess())
631+// http://victim.example.org/?foo+site:nasty.example.com+bar
632+// => http://nasty.example.com/?refer=victim.example.org
633+// NOTE: 'refer=' is not so good for (at this time).
634+// Consider about using IP address of the victim, try to avoid that.
635+function _preg_replace_callback_domain_exposure($matches = array())
636+{
637+ $result = '';
638+
639+ // Preserve the victim URI as a complicity or ...
640+ if (isset($matches[5])) {
641+ $result =
642+ $matches[1] . '://' . // scheme
643+ $matches[2] . '/' . // victim.example.org
644+ $matches[3]; // The rest of all (before victim)
645+ }
646+
647+ // Flipped URI
648+ if (isset($matches[4])) {
649+ $result =
650+ $matches[1] . '://' . // scheme
651+ $matches[4] . // nasty.example.com
652+ '/?refer=' . strtolower($matches[2]) . // victim.example.org
653+ ' ' . $result;
654+ }
655+
656+ return $result;
657+}
658+
659+// Preprocess: rawurldecode() and adding space(s) and something
660+// to detect/count some URIs _if possible_
661+// NOTE: It's maybe danger to var_dump(result). [e.g. 'javascript:']
662+// [OK] http://victim.example.org/?site:nasty.example.org
663+// [OK] http://victim.example.org/nasty.example.org
664+// [OK] http://victim.example.org/go?http%3A%2F%2Fnasty.example.org
665+// [OK] http://victim.example.org/http://nasty.example.org
666+function spam_uri_pickup_preprocess($string = '', $method = array())
667+{
668+ if (! is_string($string)) return '';
669+
670+ $string = spam_uri_removing_hocus_pocus(rawurldecode($string), $method);
671+ //var_dump(htmlspecialchars($string));
672+
673+ // Domain exposure (simple)
674+ // http://victim.example.org/nasty.example.org/path#frag
675+ // => http://nasty.example.org/?refer=victim.example.org and original
676+ $string = preg_replace(
677+ '#h?ttp://' .
678+ '(' .
679+ 'ime\.nu' . '|' . // 2ch.net
680+ 'ime\.st' . '|' . // 2ch.net
681+ 'link\.toolbot\.com' . '|' .
682+ 'urlx\.org' .
683+ ')' .
684+ '/([a-z0-9.%_-]+\.[a-z0-9.%_-]+)#i', // nasty.example.org
685+ 'http://$2/?refer=$1 $0', // Preserve $0 or remove?
686+ $string
687+ );
688+
689+ // Domain exposure (gate-big5)
690+ // http://victim.example.org/gate/big5/nasty.example.org/path
691+ // => http://nasty.example.org/?refer=victim.example.org and original
692+ $string = preg_replace(
693+ '#h?ttp://' .
694+ '(' .
695+ 'big5.51job.com' . '|' .
696+ 'big5.china.com' . '|' .
697+ 'big5.xinhuanet.com' . '|' .
698+ ')' .
699+ '/gate/big5' .
700+ '/([a-z0-9.%_-]+\.[a-z0-9.%_-]+)' .
701+ '#i', // nasty.example.org
702+ 'http://$2/?refer=$1 $0', // Preserve $0 or remove?
703+ $string
704+ );
705+
706+ // Domain exposure (site:) See _preg_replace_callback_domain_exposure()
707+ $string = preg_replace_callback(
708+ array(
709+ '#(h?ttp)://' . // 1:Scheme
710+ // 2:Host
711+ '(' .
712+ '(?:[a-z0-9_.-]+\.)?[a-z0-9_-]+\.[a-z0-9_-]+' .
713+ // Something Google: http://www.google.com/supported_domains
714+ // AltaVista: http://es.altavista.com/web/results?q=site%3Anasty.example.org+foobar
715+ // Live Search: search.live.com
716+ // MySpace: http://sads.myspace.com/Modules/Search/Pages/Search.aspx?_snip_&searchString=site:nasty.example.org
717+ // (also searchresults.myspace.com)
718+ // alltheweb.com
719+ // search.bbc.co.uk
720+ // search.orange.co.uk
721+ // ...
722+ ')' .
723+ '/' .
724+ '([a-z0-9?=&.%_/\'\\\+-]+)' . // 3:path/?query=foo+bar+
725+ '\bsite:([a-z0-9.%_-]+\.[a-z0-9.%_-]+)' . // 4:site:nasty.example.com
726+ '()' . // 5:Preserve or remove?
727+ '#i',
728+ ),
729+ '_preg_replace_callback_domain_exposure',
730+ $string
731+ );
732+
733+ // URI exposure (uriuri => uri uri)
734+ $string = preg_replace(
735+ array(
736+ '#(?<! )(?:https?|ftp):/#i',
737+ // '#[a-z][a-z0-9.+-]{1,8}://#i',
738+ // '#[a-z][a-z0-9.+-]{1,8}://#i'
739+ ),
740+ ' $0',
741+ $string
742+ );
743+
744+ return $string;
745+}
746+
747+// Main function of spam-uri pickup,
748+// A wrapper function of uri_pickup()
749+function spam_uri_pickup($string = '', $method = array())
750+{
751+ if (! is_array($method) || empty($method)) {
752+ $method = check_uri_spam_method();
753+ }
754+
755+ $string = spam_uri_pickup_preprocess($string, $method);
756+
757+ $array = uri_pickup($string);
758+
759+ // Area elevation of URIs, for '(especially external)link' intension
760+ if (! empty($array)) {
761+ $_method = array();
762+ if (isset($method['uri_anchor'])) $_method['uri_anchor'] = & $method['uri_anchor'];
763+ if (isset($method['uri_bbcode'])) $_method['uri_bbcode'] = & $method['uri_bbcode'];
764+ $areas = area_pickup($string, $_method, TRUE);
765+ if (! empty($areas)) {
766+ $area_shadow = array();
767+ foreach (array_keys($array) as $key) {
768+ $area_shadow[$key] = & $array[$key]['area'];
769+ foreach (array_keys($_method) as $_key) {
770+ $area_shadow[$key][$_key] = 0;
771+ }
772+ }
773+ foreach (array_keys($_method) as $_key) {
774+ if (isset($areas[$_key])) {
775+ area_measure($areas[$_key], $area_shadow, 1, $_key);
776+ }
777+ }
778+ }
779+ }
780+
781+ // Remove 'offset's for area_measure()
782+ foreach(array_keys($array) as $key)
783+ unset($array[$key]['area']['offset']);
784+
785+ return $array;
786+}
787+
788+?>
--- a/spam.ini.php
+++ b/spam.ini.php
@@ -1,5 +1,5 @@
11 <?php
2-// $Id: spam.ini.php,v 1.68 2007/06/23 15:22:55 henoheno Exp $
2+// $Id: spam.ini.php,v 1.69 2007/07/03 14:47:04 henoheno Exp $
33 // Spam-related setting
44
55 // NOTE FOR ADMINISTRATORS:
@@ -25,8 +25,10 @@
2525 // [3] IP address, if these hosts have the same ones
2626 // [4] Something unique idea of you
2727 //
28-// Reference: Spamdexing http://en.wikipedia.org/wiki/Spamdexing
29-
28+// Reference:
29+// http://en.wikipedia.org/wiki/Spamdexing
30+// http://en.wikipedia.org/wiki/Domainers
31+// http://en.wikipedia.org/wiki/Typosquatting
3032
3133 $blocklist['list'] = array(
3234 // List of the lists
@@ -46,6 +48,8 @@ $blocklist['list'] = array(
4648 );
4749
4850
51+// ----
52+
4953 $blocklist['goodhost'] = array(
5054 // Sample setting of ignorance list
5155
@@ -152,6 +156,7 @@ $blocklist['A-1'] = array(
152156 '*.true.ws',
153157 '*.visit.ws',
154158 ),
159+ 'affilitool.com', // 125.206.117.91(right-way.org) by noboru hamada (info at isosupport.net)
155160 'aifam.com',
156161 'All4WebMasters.pl' => array(
157162 '*.ovp.pl',
@@ -258,6 +263,7 @@ $blocklist['A-1'] = array(
258263 '*.dvdonly.ru',
259264 '*.dynu.ca',
260265 'dwarf.name',
266+ '*.eadf.com',
261267 '*.easyurl.net',
262268 'elfurl.com',
263269 'eny.pl',
@@ -448,6 +454,7 @@ $blocklist['A-1'] = array(
448454 '*.i89.us',
449455 'iat.net', // 74.208.58.130 by Tony Carter
450456 '*.iceglow.com',
457+ 'go.id-tv.info', // 77.232.68.138(77-232-68-138.static.servage.net) by Max Million (max at id-tv.info)
451458 'Ideas para Nuevos Mercados SL' => array(
452459 // NOTE: 'i4nm.com' by 'Ideas para Nuevos Mercados SL' (i4nm at i4nm.com)
453460 // NOTE: 'dominiosfree.com' by 'Ideas para nuevos mercados,sl' (dominiosfree at i4nm.com)
@@ -855,6 +862,7 @@ $blocklist['A-1'] = array(
855862 'ourl.org',
856863 'ov2.net', // frame
857864 '*.ozonez.com',
865+ 'pagebang.com',
858866 'palurl.com',
859867 '*.paulding.net',
860868 'phpfaber.org',
@@ -871,6 +879,7 @@ $blocklist['A-1'] = array(
871879 'qrl.jp',
872880 'qurl.net',
873881 'qwer.org',
882+ 'readthisurl.com', // 67.15.58.36(win2k3.tuserver.com) by Zhe Hong Lim (zhehonglim at gmail.com)
874883 'radiobase.net',
875884 'RedirectFree.com' => array(
876885 '*.red.tc',
@@ -1338,6 +1347,7 @@ $blocklist['A-1'] = array(
13381347 '*.zwap.to',
13391348 );
13401349
1350+
13411351 $blocklist['A-2'] = array(
13421352
13431353 // A-2: Dynamic DNS, Dynamic IP services, DNS vulnerabilities, or another DNS cases
@@ -1396,8 +1406,10 @@ $blocklist['A-2'] = array(
13961406 '*.zapto.org',
13971407 ),
13981408 '*.zenno.info',
1399- '.cm', // 'Cameroon' ccTLD, sometimes used as typo of '.com'
1409+ '.cm', // 'Cameroon' ccTLD, sometimes used as typo of '.com',
14001410 // and all non-recorded domains redirect to 'agoga.com' now
1411+ // http://money.cnn.com/magazines/business2/business2_archive/2007/06/01/100050989/index.htm
1412+ // http://agoga.com/aboutus.html
14011413 );
14021414
14031415
@@ -1472,6 +1484,7 @@ $blocklist['B-1'] = array(
14721484 '*.aimoo.com',
14731485 '*.alkablog.com',
14741486 '*.alluwant.de',
1487+ '.amkbb.com',
14751488 'AOL.com' => // http://about.aol.com/international_services
14761489 '/^(?:chezmoi|home|homes|hometown|journals|user)\.' .
14771490 '(?:aol|americaonline)\.' .
@@ -1493,6 +1506,7 @@ $blocklist['B-1'] = array(
14931506 '*.blog-fx.com',
14941507 'blogas.lt',
14951508 'blogbud.com',
1509+ '*.blogburkinafaso.com',
14961510 '*.blogcu.com', // by info at nokta.com
14971511 'blogfreely.com',
14981512 '*.blogdrive.com',
@@ -1570,7 +1584,8 @@ $blocklist['B-1'] = array(
15701584 ),
15711585 'dotbb.be',
15721586 '*.dox.hu', // dns at 1b.hu
1573- '*.e-host.ws', // by dns at jomax.net, ns by 0catch.com
1587+ '*.e-host.ws', // by dns at jomax.net, ns by 0catch.com
1588+ '*.eadf.com',
15741589 '*.eblog.com.au',
15751590 '*.ekiwi.de',
15761591 '*.eamped.com', // Admin by Joe Hayes (joe_h_31028 at yahoo.com)
@@ -1588,6 +1603,7 @@ $blocklist['B-1'] = array(
15881603 // NOTE: 'blog.fc2.com' is not included
15891604 '*.h.fc2.com', // Adult
15901605 ),
1606+ '*.fizwig.com',
15911607 'forum.ezedia.net',
15921608 '*.extra.hu', // angelo at jasmin.hu
15931609 '*.fanforum.cc',
@@ -1598,7 +1614,9 @@ $blocklist['B-1'] = array(
15981614 'foroswebgratis.com',
15991615 '*.forum-on.de',
16001616 '*.forum5.com', // by Harry S (hsg944 at gmail.com)
1617+ '*.forum66.com',
16011618 'forumbolt.com',
1619+ 'phpbb.forumgratis.com',
16021620 '*.forumlivre.com',
16031621 'forumnow.com.br',
16041622 '*.forumppl.com',
@@ -1623,6 +1641,7 @@ $blocklist['B-1'] = array(
16231641 'freebb.nl',
16241642 '*.freeclans.de',
16251643 '*.freelinuxhost.com', // by 100webspace.com
1644+ '*.freehyperspace.com',
16261645 'freeforum.at', // by Sandro Wilhelmy
16271646 'freeforumshosting.com', // by Adam Roberts (admin at skaidon.co.uk)
16281647 '*.freeforums.org', // by 1&1 Internet, Inc. - 1and1.com
@@ -1718,6 +1737,7 @@ $blocklist['B-1'] = array(
17181737 '*.blog.livedoor.com', // redirection
17191738 ),
17201739 '*.livejournal.com',
1740+ '.load4.net', // 72.232.201.61(61.201.232.72.static.reverse.layeredtech.com), Says free web hosting but anonymous
17211741 '*.logme.nl',
17221742 'ltss.luton.ac.uk',
17231743 'Lycos.com' => array(
@@ -1881,6 +1901,7 @@ $blocklist['B-1'] = array(
18811901 '.www3.to',
18821902 ),
18831903 '*.spazioforum.it',
1904+ 'members.spboards.com',
18841905 'forums.speedguide.net',
18851906 '*.spicyblogger.com',
18861907 '*.spotbb.com',
@@ -2074,6 +2095,7 @@ $blocklist['B-2'] = array(
20742095 // (e.g. some sort of blog comments, BBSes, forums, wikis)
20752096 '*.3dm3.com',
20762097 '3gmicro.com', // by Dean Anderson (dean at nobullcomputing.com)
2098+ 'a4aid.org',
20772099 'aac.com',
20782100 '*.aamad.org',
20792101 'ad-pecjak.si',
@@ -2089,6 +2111,7 @@ $blocklist['B-2'] = array(
20892111 '*.canberra.net.au',
20902112 'castus.com',
20912113 'Case Western Reserve University' => array('case.edu'),
2114+ 'ceval.de',
20922115 'codespeak.net',
20932116 'Colorado School of Mines' => array('ticc.mines.edu'),
20942117 '*.colourware.co.uk',
@@ -2108,6 +2131,7 @@ $blocklist['B-2'] = array(
21082131 'deproduction.org',
21092132 'dc503.org',
21102133 'dre-centro.pt',
2134+ 'Duke University' => array('devel.linux.duke.edu'),
21112135 '*.esen.edu.sv',
21122136 'forums.drumcore.com',
21132137 'dundeeunited.org',
@@ -2130,6 +2154,7 @@ $blocklist['B-2'] = array(
21302154 'greendayvideo.net',
21312155 'espanol.greendayvideo.net',
21322156 ),
2157+ 'Hampton University' => array('calipsovalidation.hamptonu.edu'),
21332158 'Harvard Law School' => array('blogs.law.harvard.edu'),
21342159 'helpiammoving.com',
21352160 'homepage-dienste.com',
@@ -2197,6 +2222,7 @@ $blocklist['B-2'] = array(
21972222 'preform.dk',
21982223 'privatforum.de',
21992224 'publicityhound.net',
2225+ 'qea.com',
22002226 'rbkdesign.com',
22012227 'rehoboth.com',
22022228 'rodee.org',
@@ -2327,50 +2353,54 @@ $blocklist['C'] = array(
23272353 '.notiziacentro.info',
23282354 ),
23292355 'SomethingGen' => array(
2330- '.adult-chat-world.info', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com),
2331- '.adult-chat-world.org', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com)
2332- '.adult-sex-chat.info', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com)
2333- '.adult-sex-chat.org', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com)
2334- '.adult-cam-chat.info', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com)
2335- '.adult-cam-chat.org', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com)
2336- '.dildo-chat.org', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com)
2337- '.dildo-chat.info', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com)
2356+ // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com)
2357+ // 'CamsGen' by Sergey (buckster at hotpop.com)
2358+ // 'BucksoGen', by Pronin Sergey (buckster at list.ru)
2359+ // by Lee Chang (nebucha at model-x.com)
2360+ '.adult-chat-world.info', // by Lui
2361+ '.adult-chat-world.org', // by Lui
2362+ '.adult-sex-chat.info', // by Lui
2363+ '.adult-sex-chat.org', // by Lui
2364+ '.adult-cam-chat.info', // by Lui
2365+ '.adult-cam-chat.org', // by Lui
2366+ '.dildo-chat.org', // by Lui
2367+ '.dildo-chat.info', // by Lui
23382368 // flirt-online.info is not CamsGen
2339- '.flirt-online.org', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com)
2340- '.live-adult-chat.info', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com)
2341- '.live-adult-chat.org', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com)
2342- '.sexy-chat-rooms.info', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com)
2343- '.sexy-chat-rooms.org', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com)
2344- '.swinger-sex-chat.info', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com)
2345- '.swinger-sex-chat.org', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com)
2346- '.nasty-sex-chat.info', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com)
2347- '.nasty-sex-chat.org', // 'CamsGen' by Lui Xeng Shou (camsgen at model-x.com)
2369+ '.flirt-online.org', // by Lui
2370+ '.live-adult-chat.info', // by Lui
2371+ '.live-adult-chat.org', // by Lui
2372+ '.sexy-chat-rooms.info', // by Lui
2373+ '.sexy-chat-rooms.org', // by Lui
2374+ '.swinger-sex-chat.info', // by Lui
2375+ '.swinger-sex-chat.org', // by Lui
2376+ '.nasty-sex-chat.info', // by Lui
2377+ '.nasty-sex-chat.org', // by Lui
23482378
2349- '.camshost.info', // 'CamsGen' by Sergey (buckster at hotpop.com)
2350- '.camdoors.info', // 'CamsGen' by Sergey (buckster at hotpop.com)
2351- '.chatdoors.info', // 'CamsGen' by Sergey (buckster at hotpop.com)
2379+ '.camshost.info', // by Sergey
2380+ '.camdoors.info', // by Sergey
2381+ '.chatdoors.info', // by Sergey
23522382
2353- '.lebedi.info', // 'BucksoGen', by Pronin Sergey (buckster at list.ru),
2354- '.loshad.info', // 'BucksoGen', by Pronin Sergey (buckster at list.ru)
2355- '.porosenok.info', // 'BucksoGen', by Pronin Sergey (buckster at list.ru)
2356- '.indyushonok.info', // 'BucksoGen', by Pronin Sergey (buckster at list.ru)
2357- '.kotyonok.info', // 'BucksoGen', by Pronin Sergey (buckster at list.ru)
2358- '.kozlyonok.info', // 'BucksoGen', by Pronin Sergey (buckster at list.ru)
2359- '.magnoliya.info', // 'BucksoGen', by Pronin Sergey (buckster at list.ru)
2360- '.svinka.info', // 'BucksoGen', by Pronin Sergey (buckster at list.ru)
2361- '.svinya.info', // 'BucksoGen', by Pronin Sergey (buckster at list.ru)
2362- '.zherebyonok.info', // 89.149.206.225 'BucksoGen', by Pronin Sergey (buckster at list.ru)
2383+ '.lebedi.info', // by Pronin
2384+ '.loshad.info', // by Pronin
2385+ '.porosenok.info', // by Pronin
2386+ '.indyushonok.info', // by Pronin
2387+ '.kotyonok.info', // by Pronin
2388+ '.kozlyonok.info', // by Pronin
2389+ '.magnoliya.info', // by Pronin
2390+ '.svinka.info', // by Pronin
2391+ '.svinya.info', // by Pronin
2392+ '.zherebyonok.info', // 89.149.206.225 by Pronin
23632393
23642394 '.medvezhonok.org', // 89.149.206.225 "BucksoGen 1.2b"
23652395
2366- '.adult-cam-chat-sex.info', // by Lee Chang (nebucha at model-x.com)
2367- '.adult-chat-sex-cam.info', // 'CamsGen' by Lee Chang (nebucha at model-x.com)
2368- '.live-chat-cam-sex.info', // 'CamsGen' by Lee Chang (nebucha at model-x.com)
2369- '.live-nude-cam-chat.info', // 'CamsGen' by Lee Chang (nebucha at model-x.com)
2370- '.live-sex-cam-nude-chat.info', // 'CamsGen' by Lee Chang (nebucha at model-x.com)
2371- '.sex-cam-live-chat-web.info', // 'CamsGen' by Lee Chang (nebucha at model-x.com)
2372- '.sex-chat-live-cam-nude.info', // 'CamsGen' by Lee Chang (nebucha at model-x.com)
2373- '.sex-chat-porn-cam.info', // by Lee Chang (nebucha at model-x.com)
2396+ '.adult-cam-chat-sex.info', // by Lee
2397+ '.adult-chat-sex-cam.info', // 'CamsGen' by Lee
2398+ '.live-chat-cam-sex.info', // 'CamsGen' by Lee
2399+ '.live-nude-cam-chat.info', // 'CamsGen' by Lee
2400+ '.live-sex-cam-nude-chat.info', // 'CamsGen' by Lee
2401+ '.sex-cam-live-chat-web.info', // 'CamsGen' by Lee
2402+ '.sex-chat-live-cam-nude.info', // 'CamsGen' by Lee
2403+ '.sex-chat-porn-cam.info', // by Lee
23742404 ),
23752405 'mital at topo20.org' => array( // by Marcello Italianore
23762406 '.trevisos.org',
@@ -2832,11 +2862,15 @@ $blocklist['C'] = array(
28322862 '.yxyzauiq.info', // by robemuq8455 at cheerful.com
28332863 ),
28342864 'Carmodelrank.com etc' => array(
2835- '.carmodelrank.com',// by Brianna Dunlord (briasmi at yahoo.com)
2836- '.cutestories.net', // by Brianna Dunlord (briasmi at yahoo.com)
2865+ // by Brianna Dunlord (briasmi at yahoo.com)
2866+ // by Tim Rennei (TimRennei at yahoo.com), redirect to amaena.com (fake-antivirus)
2867+ // by Alice T. Horst (Alice.T.Horst at pookmail.com)
2868+ '.carmodelrank.com',// by Brianna
2869+ '.cutestories.net', // by Brianna
28372870 '.sturducs.com',
2838- '.bestother.info', // by Tim Rennei (TimRennei at yahoo.com), redirect to amaena.com (fake-antivirus)
2839- '.yaahooo.info', // by Alice T. Horst (Alice.T.Horst at pookmail.com), redirect to activefreehost.com
2871+ '.bestother.info', // by Tim
2872+ '.premiumcasinogames.com', // by Brianna)
2873+ '.yaahooo.info', // by Alice
28402874 ),
28412875 'aliacsandr at yahoo.com' => array(
28422876 '.cubub.info', // "Free Web Hosting"
@@ -3881,6 +3915,7 @@ $blocklist['C'] = array(
38813915 'hostorgadmin at googlemail.com' => array( // Byethost Internet Ltd.
38823916 '.1sthost.org',
38833917 '.22web.net',
3918+ '.2kool4u.net',
38843919 '.4sql.net',
38853920 '.php0h.com',
38863921 '.php1h.com',
@@ -4098,62 +4133,53 @@ $blocklist['C'] = array(
40984133 '.sanartuk.ru', // by Vladimir I Noskov (hoskv2003 at gmail.ru)
40994134 ),
41004135 '208.70.75.153' => array(
4101- '.cerc-fi.info', // by Kon Bi (cerca-two at ya.ru), 208.70.75.153
4102- '.cerc-fo.info', // by Kon Bi (cerca-two at ya.ru), 208.70.75.153
4103- '.cerc-no.info', // by Ru Lee (cerca-tree at ya.ru), 208.70.75.153
4104- '.cerc-on.info', // 208.70.75.153
4105- '.cerc-sv.info', // by Ru Lee (cerca-tree at ya.ru)
4106- '.cerc-sx.org', // by Kon Bi (cerca-two at ya.ru), 208.70.75.153
4107- '.cerc-te.info', // by Ru Lee (cerca-tree at ya.ru), 208.70.75.153
4108- '.cerc-tr.info', // 208.70.75.153
4109- '.cerc-tw.info', // 208.70.75.153
4110-
4111- '.cerc-fi.org', // by Kon Bi (cerca-two at ya.ru), 208.70.75.153
4112- '.cerc-fo.org', // by Kon Bi (cerca-two at ya.ru), 208.70.75.153
4113- '.cerc-no.org', // by Ru Lee (cerca-tree at ya.ru), 208.70.75.153
4114- '.cerc-on.org', // by cerca-one at ya.ru, 208.70.75.153
4115- '.cerc-sv.org', // by Ru Lee (cerca-tree at ya.ru), 208.70.75.153
4116- '.cerc-sx.org', // by Kon Bi (cerca-two at ya.ru), 208.70.75.153
4117- '.cerc-te.org', // by Ru Lee (cerca-tree at ya.ru), 208.70.75.153
4118- '.cerc-tr.org', // by cerca-one at ya.ru, 208.70.75.153
4119- '.cerc-tw.org', // by cerca-one at ya.ru, 208.70.75.153
4120-
4121- '.cerca-fi.org', // by orgitaly1 at ya.ru, 208.70.75.153
4122- '.cerca-fo.info', // 208.70.75.153
4123- '.cerca-no.info', // 208.70.75.153
4124- '.cerca-on.info', // 208.70.75.153
4125- '.cerca-sv.info', // 208.70.75.153
4126- '.cerca-sx.org', // by orgitaly2 at ya.ru, 208.70.75.153
4127- '.cerca-te.info', // 208.70.75.153
4128- '.cerca-tr.info', // 208.70.75.153
4129-
4136+ '.cerc-fi.info', // by Kon Bi (cerca-two at ya.ru)
4137+ '.cerc-fo.info', // by Kon Bi (cerca-two at ya.ru)
4138+ '.cerc-no.info', // by Ru Lee (cerca-tree at ya.ru)
4139+ '.cerc-on.info',
4140+ '.cerc-sv.info', // by Ru Lee (cerca-tree at ya.ru)
4141+ '.cerc-sx.org', // by Kon Bi (cerca-two at ya.ru)
4142+ '.cerc-te.info', // by Ru Lee (cerca-tree at ya.ru)
4143+ '.cerc-tr.info',
4144+ '.cerc-tw.info',
4145+ '.cerc-fi.org', // by Kon Bi (cerca-two at ya.ru)
4146+ '.cerc-fo.org', // by Kon Bi (cerca-two at ya.ru)
4147+ '.cerc-no.org', // by Ru Lee (cerca-tree at ya.ru)
4148+ '.cerc-on.org', // by cerca-one at ya.ru
4149+ '.cerc-sv.org', // by Ru Lee (cerca-tree at ya.ru)
4150+ '.cerc-sx.org', // by Kon Bi (cerca-two at ya.ru)
4151+ '.cerc-te.org', // by Ru Lee (cerca-tree at ya.ru)
4152+ '.cerc-tr.org', // by cerca-one at ya.ru
4153+ '.cerc-tw.org', // by cerca-one at ya.ru
4154+ '.cerca-fi.org', // by orgitaly1 at ya.ru
4155+ '.cerca-fo.info',
4156+ '.cerca-no.info',
4157+ '.cerca-on.info',
4158+ '.cerca-sv.info',
4159+ '.cerca-sx.org', // by orgitaly2 at ya.ru
4160+ '.cerca-te.info',
4161+ '.cerca-tr.info',
41304162 '.cerca-sx.org',
4131- '.cerca-tr.org', // orgitaly1 at ya.ru
4132-
4133- '.ricerca-fiv.org', // orgitaly1 at ya.ru
4134-
4135- '.ricerca-fo.info', // 208.70.75.153
4136-
4137- '.ricerca-one.org', // 208.70.75.153
4138-
4163+ '.cerca-tr.org', // orgitaly1 at ya.ru
4164+ '.ricerca-fiv.org', // orgitaly1 at ya.ru
4165+ '.ricerca-fo.info',
4166+ '.ricerca-one.org',
41394167 '.ricerca-sv.org',
41404168 '.ricerca-sx.org',
41414169 '.ricerca-te.org',
4142- '.ricerca-tw.org', // orgitaly1 at ya.ru
4143-
4144- '.subit01.org', // 208.70.75.153
4145- '.subit02.org', // 208.70.75.153
4146- '.subit03.org', // 208.70.75.153
4147- '.subit04.org', // 208.70.75.153
4148- '.subit05.org', // 208.70.75.153
4149- '.subit06.org', // 208.70.75.153
4150-
4151- '.subit01.info', // 208.70.75.153
4152- '.subit02.info', // 208.70.75.153
4153- '.subit03.info', // 208.70.75.153
4154- '.subit04.info', // 208.70.75.153
4155- '.subit05.info', // 208.70.75.153
4156- '.subit06.info', // 208.70.75.153
4170+ '.ricerca-tw.org', // orgitaly1 at ya.ru
4171+ '.subit01.org',
4172+ '.subit02.org',
4173+ '.subit03.org',
4174+ '.subit04.org',
4175+ '.subit05.org',
4176+ '.subit06.org',
4177+ '.subit01.info',
4178+ '.subit02.info',
4179+ '.subit03.info',
4180+ '.subit04.info',
4181+ '.subit05.info',
4182+ '.subit06.info',
41574183 ),
41584184 'ernestppc at yahoo.com' => array( // by Anrey Markov (ernestppc at yahoo.com)
41594185 '.5-base.com',
@@ -4299,21 +4325,147 @@ $blocklist['C'] = array(
42994325 '.kliktop.org',
43004326 '.pharmatop.us',
43014327 '.supertop.us',
4328+ '.supervaizer.info',
43024329 ),
43034330 'infomed2004 at mail.ru' => array( // by Andrey Ushakov (infomed2004 at mail.ru)
43044331 '.freeamateursexx.info', // 81.0.195.228
43054332 '.freeanalsexx.info', // 217.11.233.97
43064333 ),
43074334 'support at dns4me.biz' => array( // 89.149.228.237 by John Black (support at dns4me.biz)
4335+ '.abbhi.info',
43084336 '.gayblogguide.biz',
4337+ '.huope.info',
43094338 '.thebdsmday.info',
4339+ '.zioprt.info', // 89.149.228.237
43104340 ),
43114341 'dzheker at yandex.ru' => array( // by dzheker at yandex.ru
4342+ '.boblisk.info',
43124343 '.factyri.info',
43134344 '.jorge1.info',
43144345 ),
4346+ 'lichincool at gmail.com' => array( // 72.232.229.115 by lichincool at gmail.com, / meanless
4347+ '.bestmindstorm.org',
4348+ '.redstoreonline.org',
4349+ ),
4350+ '59.106.24.2' => array( // 59.106.24.2, sakagutiryouta at yahoo.co.jp
4351+ '.8e8ae.net',
4352+ '.c-cock.com',
4353+ '.fa59eaf.com',
4354+ '.set-place.net',
4355+ '.sex-beauty.net',
4356+ ),
4357+ '84.252.148.140' => array( // 84.252.148.140(kratos.mchost.ru)
4358+ '.unefout.info',
4359+ '.unitfree.info',
4360+ '.votrefout.info',
4361+ ),
4362+ 'info at thecanadianmeds.com' => array( // by Andrey Smirnov (info at thecanadianmeds.com)
4363+ '.myviagrasite.com', // 80.74.153.2
4364+ '.thecanadianmeds.com', // 80.74.153.17
4365+ ),
4366+
4367+ // C-2: Affiliates, Hypes, Catalog retailers, Multi-level marketings, Resellers
4368+ '15-Mail.com related' => array(
4369+ '.15-mail.com', // 202.218.109.45(*.netassist.jp) by yukiyo yamamoto (sunkusu5268 at m4.ktplan.ne.jp)
4370+ '.1bloglog.com', // 210.253.115.159 by Yukiyo Yamamoto (info at 15-mail.com)
4371+ '.investment-school.com', // 210.253.115.159 by Yukiyo Yamamoto (info at 15-mail.com)
4372+ '.breakjuku.com', // 210.253.115.159 (service provider bet.co.jp = xserver.jp)
4373+ '.nambara.biz', // by Yukiyo Yamamoto (info at 15-mail.com)
4374+ ),
4375+ '.all-affiliater.com', // 202.222.30.18(sv125.lolipop.jp), ns *.lolipop.jp
4376+ 'E-brainers.com related' => array(
4377+ // 202.212.14.101
4378+ '.cyoto-morketing-club.com', // by Fujio Iwasaki (domain at sppd.co.jp)
4379+ '.e-brainers.com', // by Fujio Iwasaki (domain at sppd.co.jp)
4380+ '.my-tune.jp', // by brainers Inc.
4381+ '.technical-support-center.com',// by Fujio Iwasaki (domain at sppd.co.jp)
4382+ '.weekle.jp', // by brainers Inc.
4383+
4384+ // 210.136.111.56 by Masatoshi Kobayashi (domain at e-brainers.com)
4385+ // 210.136.111.56 by Fujio Iwasaki (domain at sppd.co.jp)
4386+ '.3minutes-marketing-club.com', // by Fujio
4387+ '.affiliate-vampire.com', // by Masatoshi
4388+ '.article-site-power-package.com', // by Masatoshi
4389+ '.audio-marketing-club.com', // by Fujio
4390+ '.brainers-task-manager.com', // by Masatoshi
4391+ '.brainers-troubleshooter-generator.com', // by Masatoshi
4392+ '.brainersbuzz.com', // by Masatoshi
4393+ '.den4renz-marketing-club.com', // by Fujio
4394+ '.english-contents-club.com', // by Masatoshi
4395+ '.fly-in-ads-japan.com', // by Fujio
4396+ '.free-resalerights-giveaway.com', // by Fujio
4397+ '.freegiveawaysecret.com', // by Masatoshi
4398+ '.guaranteedvisitorpro.com', // by Masatoshi
4399+ '.havads-japan.com', // by Masatoshi
4400+ '.info-business123.com', // by Fujio
4401+ '.instant-marketing-club.com', // by Fujio
4402+ '.marketing-force-japan.com', // by Fujio
4403+ '.masatoshikobayashi.com', // by Fujio
4404+ '.profitsinstigator.com', // by Masatoshi Kobayashi (akada@e-brainers.com)
4405+ '.replytomatt.com', // by Fujio
4406+ '.santa-deal.com', // by Fujio
4407+ '.santa-deal-summer.com', // by Fujio
4408+ '.scratch-card-factory.com', // by Masatoshi
4409+ '.script4you-japan.com', // by Fujio
4410+ '.sell1000000dollarinjapan.com',// by Fujio
4411+ '.squeeze-page-secret.com', // by Masatoshi
4412+ '.viral-blog-square.com', // by Fujio
4413+ '.viralarticle.com', // by Fujio
4414+ '.wowhoken.com', // by Fujio
43154415
4316- // C-2: Lonely domains (buddies not found yet)
4416+ // 202.212.14.104 by Fujio Iwasaki (domain@sppd.co.jp)
4417+ '.brainerstelevision.com',
4418+ '.demosite4you.com',
4419+ '.keywordcatcherpro.com',
4420+ '.script-marketing-club.com',
4421+
4422+ // 202.228.204.140(server.ultimate-marketing-weapon.com) by Masatoshi Kobayashi (akada at e-brainers.com)
4423+ // 202.228.204.140 by Masatoshi Kobayashi (domain at e-brainers.com)
4424+ '.brainers.ws', // 202.228.204.140 by info at key-systems.net, ns *.ultimate-marketing-weapon.com
4425+ '.brainerscode.com', // by akada
4426+ '.brainerslive.com', // by domain
4427+ '.brainersreview.com', // by domain
4428+ '.brainerstest.com', // by akada
4429+ '.otosecret.com', // by domain
4430+ '.ultimate-marketing-weapon.com', // by akada
4431+ '.planet-club.net', // 202.228.204.141(server.ultimate-marketing-weapon.com)
4432+ '.terk.jp', // by Tsuyoshi Tsukada, QHM
4433+
4434+ '.samuraiautoresponder.com', // 211.125.179.75(bq1.mm22.jp) by Masatoshi Kobayashi (kobayashi at wowhoken.com)
4435+ '.sppd.co.jp', // 210.136.106.122 by Studio Map Ltd., ns *.sppd.ne.jp, spam
4436+ ),
4437+ '.e2996.com', // 202.181.105.241(sv261.lolipop.jp)
4438+ '.fx4rich.com', // 219.94.128.161(www921.sakura.ne.jp) by Yuji Nakano (info at will76.com)
4439+ 'info at kobeweb.jp' => array(
4440+ '.soholife.jp', // 211.125.65.203 by Takashige Tabuchi (info at kobeweb.jp)
4441+ '.kobeweb.jp', // 59.106.13.51(www421.sakura.ne.jp)
4442+ '.sloters.tv', // 211.125.65.203 by Takashige Tabuchi (t-2 at white.interq.or.jp)
4443+ ),
4444+ '.info-affiliate.net', // 219.94.148.8(sv41.chicappa.jp)
4445+ '.infostore.jp', // 216.255.235.45, ns *.estore.co.jp
4446+ 'JunSuzuki.com' => array( // e-brainers.com related
4447+ '.junsuzuki.com', // 218.216.67.43(s92.xrea.com) by Jun Suzuki (jun_suzuki at compus.net)
4448+ '.globalswing.biz', // 210.188.217.109(sv27.xserverzero.net)
4449+ ),
4450+ 'Point-park.com' => array( // Tadahiro Ogawa (domain at wide.ne.jp)
4451+ '.11kanji.com', // 211.10.131.88
4452+ '.mlmsupport.jp', // 211.10.131.108 by info at point-park.com
4453+ '.point-park.com', // 211.10.131.88
4454+ '.point-park.jp', // 43.244.140.160(160.140.244.43.ap.yournet.ne.jp)
4455+ ),
4456+ '.potitto.info', // 219.94.132.89(sv450.lolipop.jp)
4457+ '.sedori-data.com', //
4458+ '.tool4success.com', // 210.188.201.31(sv70.xserver.jp) by Yukihiro Akada (ml at original-ehon.com)
4459+ 'tera at kirinn.com' => array( // 59.139.29.234(s240.xrea.com) by Naohsi Terada (tera at kirinn.com)
4460+ '.e123.info',
4461+ '.ialchemist.net',
4462+ '.j012.net',
4463+ '.xn--yckc2auxd4b6564dogvcf7g.biz',
4464+ ),
4465+ '.zakkuzaku.com', // 210.188.201.44(sv83.xserver.jp)
4466+
4467+
4468+ // C-3: Lonely domains (buddies not found yet)
43174469 '.0721-4404.com',
43184470 '.0nline-porno.info', // by Timyr (timyr at narod.ru)
43194471 '.1-click-clipart.com', // by Big Resources, Inc. (hostmaster at bigresources.com)
@@ -4328,7 +4480,7 @@ $blocklist['C'] = array(
43284480 '.50webs.com', // by LiquidNet Ltd. (support at propersupport.com), redirect to mpage.jp
43294481 '.6i6.de',
43304482 '.advancediet.com', // by Shonta Mojica (hostadmin at advancediet.com)
4331- '.adult-master-club.com', // by Alehander (mazyrkevich at cosmostv.by)
4483+ '.adult-master-club.com', // by Alehander (mazyrkevich at cosmostv.by)
43324484 '.adultpersonalsclubs.com', // by Peter (vaspet34 at yahoo.com)
43334485 '.akgame.com', // 72.32.79.100 by Howard Ke (gmtbank at gmail.com), rmt & pl
43344486 '.alfanetwork.info', // by dante (dantequick at gmail.com)
@@ -4340,12 +4492,12 @@ $blocklist['C'] = array(
43404492 '.banep.info', // by Mihailov Dmitriy (marokogadro at yahoo.com), iframe to this site
43414493 '.baurish.info',
43424494 '.bestop.name',
4343- '.bestmindstorm.org', // 72.232.229.115 by lichincool at gmail.com, / meanless
43444495 '.betmmo.com', // 63.223.98.182 by Huang Qiang (liuxing-wushi at hotmail.com), pl
43454496 '.bestrademark.info', // by victoria (niko16d at yahoo.com), redirect to majordomo.ru
43464497 '.bestshopfinder.info',
43474498 '.bloggerblast.com', // by B. Kadrie (domains at starwhitehosting.com)
4348- '.blogest.org', // 203.116.63.68 by Bobby.R.Kightlinger at pookmail.com, / seems blank
4499+ '.blogest.org', // 203.116.63.68 by Bobby.R.Kightlinger at pookmail.com, / seems blank
4500+ '.bookblogsite.org', // 217.11.233.58 by Eugene.E.Mather at mailinator.com
43494501 '.businessplace.biz', // by Grenchenko Ivan Petrovich (eurogogi at yandex.ru)
43504502 '.capital2u.info', // by Delbert.A.Henry at dodgeit.com
43514503 '.casa-olympus.com', // "UcoZ WEB-SERVICES"
@@ -4353,9 +4505,9 @@ $blocklist['C'] = array(
43534505 '.constitutionpartyofwa.org', // "UcoZ WEB-SERVICES"
43544506 '.covertarena.co.uk', // by Wayne Huxtable
43554507 '.d999.info', // by Peter Vayner (peter.vayner at inbox.ru)
4356- '.dinmo.cn', // 218.30.96.149 by dinso at 163.com, seo etc.
4357- //'.wow-gold.dinmo.cn', // 125.65.76.59, pl
4358- '.dinmoseo.com', // 210.51.168.102(winp2-web-g02.xinnetdns.com) by jianmin911 at 126.com, NS *.xinnetdns.com, seo
4508+ '.dinmo.cn', // 218.30.96.149 by dinso at 163.com, seo etc.
4509+ //'.wow-gold.dinmo.cn', // 125.65.76.59, pl
4510+ '.dinmoseo.com', // 210.51.168.102(winp2-web-g02.xinnetdns.com) by jianmin911 at 126.com, NS *.xinnetdns.com, seo
43594511 '.dlekei.info', // by Maxima Bucaro (webmaster at tts2f.info)
43604512 '.dollar4u.info', // by Carla (Carla.J.Merritt at mytrashmail.com), / is blank
43614513 '.drug-shop.us', // by Alexandr (matrixpro at mail.ru)
@@ -4367,7 +4519,7 @@ $blocklist['C'] = array(
43674519 '.fantasy-handjob-ra.com', // by Hose Pedro (hosepedro at gmail.com)
43684520 '.fast4me.info', // by Hakan Durov (poddubok at inbox.ru), / is blank
43694521 '.fastmoms.info', // by Pavel Golyshev (pogol at walla.com), / is blank
4370- '.fastppc.info', // by peter conor (fastppc at msn.com)
4522+ '.fastppc.info', // by peter conor (fastppc at msn.com)
43714523 '.ffxiforums.net', // by Zhang xiaolong (mail at 33986.com), hidden VBScript
43724524 '*.filthserver.com', // sales at onlinemarketingservices.biz
43734525 '.find-stuff.org', // by Alice Freedman (admin at ip-labs.ru), / 404 Not Found
@@ -4394,8 +4546,8 @@ $blocklist['C'] = array(
43944546 '.gm-exchange.jp', // RMT
43954547 '.goamoto.ru', // by Dmitry E Kotchnev (z2archive at gmail.com)
43964548 '.good1688.com', // by Wen Chien Lunz (wzk1219 at yahoo.com.tw), one of them frame to , and whoop.to
4397- '.google-pharmacy.com', // by alex (mdisign1997 at yahoo.com), hiding with urlx.org etc
4398- '.greatbestwestern.org', // by gao.wungao at gmail.com
4549+ '.google-pharmacy.com', // by alex (mdisign1997 at yahoo.com), hiding with urlx.org etc
4550+ '.greatbestwestern.org',// by gao.wungao at gmail.com
43994551 '.greatsexdate.com', // by Andreas Crablo (crablo at hotmail.com)
44004552 '.guild-wars-online.com', // by Fuzhou Tianmeng Touzi Zixun Co.,Ltd (welkin at skyunion.com)
44014553 '.happyhost.org', // by Paul Zamnov (paul at zamnov.be)
@@ -4403,14 +4555,15 @@ $blocklist['C'] = array(
44034555 '.honda168.net', // by tan tianfu (xueyihua at gmail.com), seems not used now
44044556 '.hostuju.cz', // ns banan.cz, banan.it
44054557 '.hot4buy.org', // by Hot Maker (jot at hot4buy.org)
4406- '.hotscriptonline.info', // by Psy Search (admin at psysearch.com)
4558+ '.hotscriptonline.info',// by Psy Search (admin at psysearch.com)
4559+ '.iinaa.net', // domain at ml.ninja.co.jp, ns *.shinobi.jp
44074560 '.incbuy.info', // by Diego T. Murphy (Diego.T.Murphy at incbuy.info)
44084561 '.infocart.jp', // Trying to earn money easily by selling 'earn-money-easiliy' tips
44094562 '.infradoc.com',
44104563 '.investorvillage.com', // by natalija puchkova (internet at internet.lv)
4411- '.ismarket.com', // Google-hiding. intercage.com related IP
4564+ '.ismarket.com', // Google-hiding. intercage.com related IP
44124565 '.italialiveonline.info', // by Silvio Cataloni (segooglemsn at yahoo.com), redirect to activefreehost.com
4413- '.italy-search.org', // by Alex Yablin (zaharov-alex at yandex.ru)
4566+ '.italy-search.org', // by Alex Yablin (zaharov-alex at yandex.ru)
44144567 '.itsexosit.net',
44154568 '.itxxxit.net',
44164569 '.jimmys21.com', // by Klen Kudryavii (telvid at shaw.ca)
@@ -4424,7 +4577,7 @@ $blocklist['C'] = array(
44244577 '.link-keeper.net', // 210.172.108.236 (257.xrea.com)
44254578 '.ls.la', // by Milton McLellan (McLellanMilton at yahoo.com)
44264579 '.mamaha.info', // by Alex Klimovsky (paganec at gmail.com), seems now constructiong
4427- '.manseekingwomanx.com', // by Bill Peterson (coccooc at fastmail.fm)
4580+ '.manseekingwomanx.com',// by Bill Peterson (coccooc at fastmail.fm)
44284581 '.medpharmaworldguide.com', // by Nick Ivchenkov (signmark at gmail.com), / not found
44294582 '.megvideochatlive.info', // Bad seo
44304583 '.milfxxxpass.com', // by Morozov Pavlik (rulets at gmail.com)
@@ -4434,6 +4587,7 @@ $blocklist['C'] = array(
44344587 '.next-moneylife.com', // RMT
44354588 '.newalandirect.com', // by Alnoor Hirji, ns *.sablehost.com
44364589 '.ngfu2.info', // by Tara Lagrant (webmaster at ngfu2.info)
4590+ '.nucked-sex.com', // 203.223.150.222 by lis (noidlis2 at yahoo.com)
44374591 '.ok10000.com', // by zipeng hu (ldcs350003 at hotmail.com)
44384592 '.olimpmebel.info', // by pol (pauk_life at mail.ru), frame to bettersexmall.com
44394593 '.onlinetert.info', // by Jarod Hyde (grigorysch at gmail.com)
@@ -4467,6 +4621,7 @@ $blocklist['C'] = array(
44674621 '.searchadv.com', // by Jaan Randolph (searchadv at gmail.com)
44684622 '.seek-www.com', // by Adam Smit (pingpong at mail.md)
44694623 '.sessocities.net', // by info at secureserver3.com
4624+ '.seven-pharmacy.com', // 83.138.176.247 by Justin Timberlake (preved at gmail.com)
44704625 '.sexamoreit.com',
44714626 '.sexforit.com',
44724627 '.sexmaniacs.org', // by Yang Chong (chong at x-india.com)
@@ -4480,6 +4635,7 @@ $blocklist['C'] = array(
44804635 '.thehostcity.com', // Domains by Proxy
44814636 '.thetinyurl.com', // by Beth J. Carter (Beth.J.Carter at thetinyurl.com)
44824637 '.thetrendy.info', // by Harold (Harold.J.Craft at pookmail.com), / is blank
4638+ '.theusapills.com', // by Dr. Zarman (contactus at theusapills.com)
44834639 '.topmeds10.com',
44844640 '*.tv-reklama.info', // by Kozlov Maxim (m_koz at mail.ru)
44854641 '.twabout.com', // by qiu wenbing (qiuwenbing at 126.com), content from l2mpt.net
@@ -4504,13 +4660,14 @@ $blocklist['C'] = array(
45044660 '.xpacificpoker.com', // by Hubert Hoffman (support at xpacificpoker.com)
45054661 '.xphost.org', // by alex alex (alrusnac at hotmail.com)
45064662 '.xamorexxx.net',
4663+ '.xn--gmqt9gewhdnlyq9c.net', // 122.249.16.133(x016133.ppp.asahi-net.or.jp) by daizinazikanwo yahoo.co.jp
45074664 '.xsessox.com',
45084665 '.yoi4.net', // by Ryouhei Nakamura (888 at sympathys.com), tell me why so many blogs with popular issues and _diverted design from blog.livedoor.jp_ around here.
45094666 '.zlocorp.com', // by tonibcrus at hotpop.com, spammed well with "http ://zlocorp.com/"
45104667 '.zyguo.info', // ns globoxhost.net
45114668 '.zhuyiw.com', // by zhou yuntao (whzyt0122 at sohu.com)
45124669
4513- // C-3: Not classifiable (information wanted)
4670+ // C-4: Not classifiable (information wanted)
45144671 //
45154672 // Something incoming to pukiwiki related sites
45164673 'nana.co.il related' => array(
@@ -4519,7 +4676,6 @@ $blocklist['C'] = array(
45194676 ),
45204677 );
45214678
4522-
45234679 $blocklist['D'] = array(
45244680 // D: Sample setting of
45254681 // "third party in good faith"s