• R/O
  • HTTP
  • SSH
  • HTTPS

提交

標籤
無標籤

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Commit MetaInfo

修訂a47991d4760cb6819bc1d032bc3873c3f495d306 (tree)
時間2007-01-02 16:09:21
作者henoheno <henoheno>
Commiterhenoheno

Log Message

$Id: spam.php,v 1.87 2007/01/02 05:57:51 henoheno Exp $
* Report badhost detail by mail
* Rename $method: 'non_uniq' => 'non_uniquri'
* Added $method['non_uniqhost'] allows N duped (and normalized) Hosts
NOTE: Not good for editing bookmarks (especially that have many site on _the_same_ host_, e.g. blog hosting service)
* uri_pickup(): FQDN does not have back-slashes and amps
* check_uri_spam(): Simplify

Change Summary

差異

--- a/lib/spam.php
+++ b/lib/spam.php
@@ -1,5 +1,5 @@
11 <?php
2-// $Id: spam.php,v 1.11 2006/12/23 04:36:40 henoheno Exp $
2+// $Id: spam.php,v 1.12 2007/01/02 07:09:06 henoheno Exp $
33 // Copyright (C) 2006 PukiWiki Developers Team
44 // License: GPL v2 or (at your option) any later version
55
@@ -33,7 +33,7 @@ function uri_pickup($string = '', $normalize = TRUE,
3333 // 3: Host
3434 '\[[0-9a-f:.]+\]' . '|' . // IPv6([colon-hex and dot]): RFC2732
3535 '(?:[0-9]{1-3}\.){3}[0-9]{1-3}' . '|' . // IPv4(dot-decimal): 001.22.3.44
36- '[^\s<>"\'\[\]:/\#?]+' . // FQDN: foo.example.org
36+ '[^\s<>"\'\[\]:/\#?&\\\]+' . // FQDN: foo.example.org
3737 ')' .
3838 '(?::([0-9]*))?' . // 4: Port
3939 '((?:/+[^\s<>"\'\[\]/\#]+)*/+)?' . // 5: Directory path or path-info
@@ -652,7 +652,7 @@ function is_badhost($hosts = '', $asap = TRUE)
652652 array('pressblog.jp', '*.pressblog.jp'),
653653 );
654654 foreach ($blocklist['badhost'] as $part) {
655- $_part = is_array($part) ? implode(', ', $part) : $part;
655+ $_part = is_array($part) ? implode('/', $part) : $part;
656656 $regex['badhost'][$_part] = '/^' . generate_glob_regex($part) . '$/i';
657657 }
658658 }
@@ -662,12 +662,11 @@ function is_badhost($hosts = '', $asap = TRUE)
662662 $blocklist = array();
663663 require(SPAM_INI_FILE);
664664 foreach ($blocklist['badhost'] as $part) {
665- $_part = is_array($part) ? implode(', ', $part) : $part;
665+ $_part = is_array($part) ? implode('/', $part) : $part;
666666 $regex['badhost'][$_part] = '/^' . generate_glob_regex($part) . '$/i';
667667 }
668668 }
669669 }
670- //var_dump($regex);
671670
672671 $result = array();
673672 if (! is_array($hosts)) $hosts = array($hosts);
@@ -690,7 +689,7 @@ function is_badhost($hosts = '', $asap = TRUE)
690689 return $result;
691690 }
692691
693-// Default (enabled) methods and thresholds
692+// Default (enabled) methods and thresholds (for content insertion)
694693 function check_uri_spam_method($times = 1, $t_area = 0, $rule = TRUE)
695694 {
696695 $times = intval($times);
@@ -698,21 +697,22 @@ function check_uri_spam_method($times = 1, $t_area = 0, $rule = TRUE)
698697
699698 $positive = array(
700699 // Thresholds
701- 'quantity' => 8 * $times, // Allow N URIs
702- 'non_uniq' => 3 * $times, // Allow N duped (and normalized) URIs
700+ 'quantity' => 8 * $times, // Allow N URIs
701+ 'non_uniqhost' => 7 * $times, // Allow N duped (and normalized) Hosts
702+ 'non_uniquri' => 3 * $times, // Allow N duped (and normalized) URIs
703703
704704 // Areas
705- 'area_anchor' => $t_area, // Using <a href> HTML tag
706- 'area_bbcode' => $t_area, // Using [url] or [link] BBCode
707- //'uri_anchor' => $t_area, // URI inside <a href> HTML tag
708- //'uri_bbcode' => $t_area, // URI inside [url] or [link] BBCode
705+ 'area_anchor' => $t_area, // Using <a href> HTML tag
706+ 'area_bbcode' => $t_area, // Using [url] or [link] BBCode
707+ //'uri_anchor' => $t_area, // URI inside <a href> HTML tag
708+ //'uri_bbcode' => $t_area, // URI inside [url] or [link] BBCode
709709 );
710710 if ($rule) {
711711 $bool = array(
712712 // Rules
713- //'asap' => TRUE, // Quit or return As Soon As Possible
714- 'uniqhost' => TRUE, // Show uniq host (at block notification mail)
715- 'badhost' => TRUE, // Check badhost
713+ //'asap' => TRUE, // Quit or return As Soon As Possible
714+ 'uniqhost' => TRUE, // Show uniq host (at block notification mail)
715+ 'badhost' => TRUE, // Check badhost
716716 );
717717 } else {
718718 $bool = array();
@@ -736,7 +736,8 @@ function check_uri_spam($target = '', $method = array())
736736 'sum' => array(
737737 'quantity' => 0,
738738 'uniqhost' => 0,
739- 'non_uniq' => 0,
739+ 'non_uniqhost'=> 0,
740+ 'non_uniquri' => 0,
740741 'badhost' => 0,
741742 'area_anchor' => 0,
742743 'area_bbcode' => 0,
@@ -755,11 +756,24 @@ function check_uri_spam($target = '', $method = array())
755756 foreach($target as $str) {
756757 // Recurse
757758 $_progress = check_uri_spam($str, $method);
758- foreach (array_keys($_progress['sum']) as $key) {
759- $sum[$key] += $_progress['sum'][$key];
759+ $_sum = & $_progress['sum'];
760+ $_is_spam = & $_progress['is_spam'];
761+ foreach (array_keys($_sum) as $key) {
762+ $sum[$key] += $_sum[$key];
760763 }
761- foreach(array_keys($_progress['is_spam']) as $key) {
762- $is_spam[$key] = TRUE;
764+ foreach(array_keys($_is_spam) as $key) {
765+ if (is_array($_is_spam[$key])) {
766+ // Marge keys (badhost)
767+ foreach(array_keys($_is_spam[$key]) as $_key) {
768+ if (! isset($is_spam[$key][$_key])) {
769+ $is_spam[$key][$_key] = $_is_spam[$key][$_key];
770+ } else {
771+ $is_spam[$key][$_key] += $_is_spam[$key][$_key];
772+ }
773+ }
774+ } else {
775+ $is_spam[$key] = TRUE;
776+ }
763777 }
764778 if ($asap && $is_spam) break;
765779 }
@@ -843,7 +857,7 @@ function check_uri_spam($target = '', $method = array())
843857 }
844858
845859 // URI: Uniqueness (and removing non-uniques)
846- if ((! $asap || ! $is_spam) && isset($method['non_uniq'])) {
860+ if ((! $asap || ! $is_spam) && isset($method['non_uniquri'])) {
847861
848862 // Destructive normalize of URIs
849863 uri_array_normalize($pickups);
@@ -854,9 +868,9 @@ function check_uri_spam($target = '', $method = array())
854868 }
855869 $count = count($uris);
856870 $uris = array_unique($uris);
857- $sum['non_uniq'] += $count - count($uris);
858- if ($sum['non_uniq'] > $method['non_uniq']) {
859- $is_spam['non_uniq'] = TRUE;
871+ $sum['non_uniquri'] += $count - count($uris);
872+ if ($sum['non_uniquri'] > $method['non_uniquri']) {
873+ $is_spam['non_uniquri'] = TRUE;
860874 }
861875 if (! $asap || ! $is_spam) {
862876 foreach (array_diff(array_keys($pickups),
@@ -872,17 +886,34 @@ function check_uri_spam($target = '', $method = array())
872886 return $progress;
873887 }
874888
875- // URI: Unique host
889+ // Host: Uniqueness (uniq / non-uniq)
876890 $hosts = array();
877891 foreach ($pickups as $pickup) $hosts[] = & $pickup['host'];
878892 $hosts = array_unique($hosts);
879893 $sum['uniqhost'] += count($hosts);
894+ if ((! $asap || ! $is_spam) && isset($method['non_uniqhost'])) {
895+ $sum['non_uniqhost'] = $sum['quantity'] - $sum['uniqhost'];
896+ if ($sum['non_uniqhost'] > $method['non_uniqhost']) {
897+ $is_spam['non_uniqhost'] = TRUE;
898+ }
899+ }
900+
901+ // Return if ...
902+ if ($asap && $is_spam) {
903+ return $progress;
904+ }
880905
881906 // URI: Bad host
882907 if ((! $asap || ! $is_spam) && isset($method['badhost'])) {
883- $count = array_count_leaves(is_badhost($hosts, $asap));
884- $sum['badhost'] += $count;
885- if ($count != 0) $is_spam['badhost'] = TRUE;
908+ $badhost = is_badhost($hosts, $asap);
909+ if (! empty($badhost)) {
910+ $sum['badhost'] += array_count_leaves($badhost);
911+ foreach(array_keys($badhost) as $keys) {
912+ $is_spam['badhost'][$keys] =
913+ array_count_leaves($badhost[$keys]);
914+ }
915+ unset($badhost);
916+ }
886917 }
887918
888919 return $progress;
@@ -981,6 +1012,13 @@ function pkwk_spamnotify($action, $page, $target = array('title' => ''), $progre
9811012 if (! $asap) {
9821013 $summary['METRICS'] = summarize_spam_progress($progress);
9831014 }
1015+ if (isset($progress['is_spam']['badhost'])) {
1016+ $badhost = array();
1017+ foreach($progress['is_spam']['badhost'] as $glob=>$number) {
1018+ $badhost[] = $glob . '(' . $number . ')';
1019+ }
1020+ $summary['BADHOST'] = implode(', ', $badhost);
1021+ }
9841022 $summary['COMMENT'] = $action;
9851023 $summary['PAGE'] = '[blocked] ' . (is_pagename($page) ? $page : '');
9861024 $summary['URI'] = get_script_uri() . '?' . rawurlencode($page);
--- a/pukiwiki.ini.php
+++ b/pukiwiki.ini.php
@@ -1,6 +1,6 @@
11 <?php
22 // PukiWiki - Yet another WikiWikiWeb clone
3-// $Id: pukiwiki.ini.php,v 1.146 2006/12/19 14:34:54 henoheno Exp $
3+// $Id: pukiwiki.ini.php,v 1.147 2007/01/02 07:09:21 henoheno Exp $
44 // Copyright (C)
55 // 2002-2006 PukiWiki Developers Team
66 // 2001-2002 Originally written by yu-ji
@@ -165,14 +165,15 @@ if ($spam) {
165165
166166 // Threshold and rules for insertion (default)
167167 $spam['method']['_default'] = array(
168- '_comment' => '_default',
169- 'quantity' => 8,
170- 'non_uniq' => 3,
171- 'area_anchor' => 0,
172- 'area_bbcode' => 0,
173- 'uniqhost' => TRUE,
174- 'badhost' => TRUE,
175- 'asap' => TRUE, // Stop as soon as possible (quick)
168+ '_comment' => '_default',
169+ 'quantity' => 8,
170+ //'non_uniquri' => 3,
171+ 'non_uniqhost' => 3,
172+ 'area_anchor' => 0,
173+ 'area_bbcode' => 0,
174+ 'uniqhost' => TRUE,
175+ 'badhost' => TRUE,
176+ 'asap' => TRUE, // Stop as soon as possible (quick but less-info)
176177 );
177178
178179 // For editing
@@ -182,14 +183,15 @@ if ($spam) {
182183 // Any rules will lock contents that have NG things already.
183184 $spam['method']['edit'] = array(
184185 // Supposed_by_you(n) * Edit_form_spec(2) * Margin(1.5)
185- '_comment' => 'edit',
186- //'quantity' => 60 * 3,
187- //'non_uniq' => 5 * 3,
188- //'area_anchor' => 30 * 3,
189- //'area_bbcode' => 15 * 3,
190- 'uniqhost' => TRUE,
191- 'badhost' => TRUE,
192- 'asap' => TRUE,
186+ '_comment' => 'edit',
187+ //'quantity' => 60 * 3,
188+ //'non_uniquri' => 5 * 3,
189+ //'non_uniqhost' => 50 * 3,
190+ //'area_anchor' => 30 * 3,
191+ //'area_bbcode' => 15 * 3,
192+ 'uniqhost' => TRUE,
193+ 'badhost' => TRUE,
194+ 'asap' => TRUE,
193195 );
194196
195197 //$spam['exitmode'] = 'dump'; // Dump progress