修訂 | a47991d4760cb6819bc1d032bc3873c3f495d306 (tree) |
---|---|
時間 | 2007-01-02 16:09:21 |
作者 | henoheno <henoheno> |
Commiter | henoheno |
$Id: spam.php,v 1.87 2007/01/02 05:57:51 henoheno Exp $
* Report badhost detail by mail
* Rename $method: 'non_uniq' => 'non_uniquri'
* Added $method['non_uniqhost'] allows N duped (and normalized) Hosts
NOTE: Not good for editing bookmarks (especially that have many site on _the_same_ host_, e.g. blog hosting service)
* uri_pickup(): FQDN does not have back-slashes and amps
* check_uri_spam(): Simplify
@@ -1,5 +1,5 @@ | ||
1 | 1 | <?php |
2 | -// $Id: spam.php,v 1.11 2006/12/23 04:36:40 henoheno Exp $ | |
2 | +// $Id: spam.php,v 1.12 2007/01/02 07:09:06 henoheno Exp $ | |
3 | 3 | // Copyright (C) 2006 PukiWiki Developers Team |
4 | 4 | // License: GPL v2 or (at your option) any later version |
5 | 5 |
@@ -33,7 +33,7 @@ function uri_pickup($string = '', $normalize = TRUE, | ||
33 | 33 | // 3: Host |
34 | 34 | '\[[0-9a-f:.]+\]' . '|' . // IPv6([colon-hex and dot]): RFC2732 |
35 | 35 | '(?:[0-9]{1-3}\.){3}[0-9]{1-3}' . '|' . // IPv4(dot-decimal): 001.22.3.44 |
36 | - '[^\s<>"\'\[\]:/\#?]+' . // FQDN: foo.example.org | |
36 | + '[^\s<>"\'\[\]:/\#?&\\\]+' . // FQDN: foo.example.org | |
37 | 37 | ')' . |
38 | 38 | '(?::([0-9]*))?' . // 4: Port |
39 | 39 | '((?:/+[^\s<>"\'\[\]/\#]+)*/+)?' . // 5: Directory path or path-info |
@@ -652,7 +652,7 @@ function is_badhost($hosts = '', $asap = TRUE) | ||
652 | 652 | array('pressblog.jp', '*.pressblog.jp'), |
653 | 653 | ); |
654 | 654 | foreach ($blocklist['badhost'] as $part) { |
655 | - $_part = is_array($part) ? implode(', ', $part) : $part; | |
655 | + $_part = is_array($part) ? implode('/', $part) : $part; | |
656 | 656 | $regex['badhost'][$_part] = '/^' . generate_glob_regex($part) . '$/i'; |
657 | 657 | } |
658 | 658 | } |
@@ -662,12 +662,11 @@ function is_badhost($hosts = '', $asap = TRUE) | ||
662 | 662 | $blocklist = array(); |
663 | 663 | require(SPAM_INI_FILE); |
664 | 664 | foreach ($blocklist['badhost'] as $part) { |
665 | - $_part = is_array($part) ? implode(', ', $part) : $part; | |
665 | + $_part = is_array($part) ? implode('/', $part) : $part; | |
666 | 666 | $regex['badhost'][$_part] = '/^' . generate_glob_regex($part) . '$/i'; |
667 | 667 | } |
668 | 668 | } |
669 | 669 | } |
670 | - //var_dump($regex); | |
671 | 670 | |
672 | 671 | $result = array(); |
673 | 672 | if (! is_array($hosts)) $hosts = array($hosts); |
@@ -690,7 +689,7 @@ function is_badhost($hosts = '', $asap = TRUE) | ||
690 | 689 | return $result; |
691 | 690 | } |
692 | 691 | |
693 | -// Default (enabled) methods and thresholds | |
692 | +// Default (enabled) methods and thresholds (for content insertion) | |
694 | 693 | function check_uri_spam_method($times = 1, $t_area = 0, $rule = TRUE) |
695 | 694 | { |
696 | 695 | $times = intval($times); |
@@ -698,21 +697,22 @@ function check_uri_spam_method($times = 1, $t_area = 0, $rule = TRUE) | ||
698 | 697 | |
699 | 698 | $positive = array( |
700 | 699 | // Thresholds |
701 | - 'quantity' => 8 * $times, // Allow N URIs | |
702 | - 'non_uniq' => 3 * $times, // Allow N duped (and normalized) URIs | |
700 | + 'quantity' => 8 * $times, // Allow N URIs | |
701 | + 'non_uniqhost' => 7 * $times, // Allow N duped (and normalized) Hosts | |
702 | + 'non_uniquri' => 3 * $times, // Allow N duped (and normalized) URIs | |
703 | 703 | |
704 | 704 | // Areas |
705 | - 'area_anchor' => $t_area, // Using <a href> HTML tag | |
706 | - 'area_bbcode' => $t_area, // Using [url] or [link] BBCode | |
707 | - //'uri_anchor' => $t_area, // URI inside <a href> HTML tag | |
708 | - //'uri_bbcode' => $t_area, // URI inside [url] or [link] BBCode | |
705 | + 'area_anchor' => $t_area, // Using <a href> HTML tag | |
706 | + 'area_bbcode' => $t_area, // Using [url] or [link] BBCode | |
707 | + //'uri_anchor' => $t_area, // URI inside <a href> HTML tag | |
708 | + //'uri_bbcode' => $t_area, // URI inside [url] or [link] BBCode | |
709 | 709 | ); |
710 | 710 | if ($rule) { |
711 | 711 | $bool = array( |
712 | 712 | // Rules |
713 | - //'asap' => TRUE, // Quit or return As Soon As Possible | |
714 | - 'uniqhost' => TRUE, // Show uniq host (at block notification mail) | |
715 | - 'badhost' => TRUE, // Check badhost | |
713 | + //'asap' => TRUE, // Quit or return As Soon As Possible | |
714 | + 'uniqhost' => TRUE, // Show uniq host (at block notification mail) | |
715 | + 'badhost' => TRUE, // Check badhost | |
716 | 716 | ); |
717 | 717 | } else { |
718 | 718 | $bool = array(); |
@@ -736,7 +736,8 @@ function check_uri_spam($target = '', $method = array()) | ||
736 | 736 | 'sum' => array( |
737 | 737 | 'quantity' => 0, |
738 | 738 | 'uniqhost' => 0, |
739 | - 'non_uniq' => 0, | |
739 | + 'non_uniqhost'=> 0, | |
740 | + 'non_uniquri' => 0, | |
740 | 741 | 'badhost' => 0, |
741 | 742 | 'area_anchor' => 0, |
742 | 743 | 'area_bbcode' => 0, |
@@ -755,11 +756,24 @@ function check_uri_spam($target = '', $method = array()) | ||
755 | 756 | foreach($target as $str) { |
756 | 757 | // Recurse |
757 | 758 | $_progress = check_uri_spam($str, $method); |
758 | - foreach (array_keys($_progress['sum']) as $key) { | |
759 | - $sum[$key] += $_progress['sum'][$key]; | |
759 | + $_sum = & $_progress['sum']; | |
760 | + $_is_spam = & $_progress['is_spam']; | |
761 | + foreach (array_keys($_sum) as $key) { | |
762 | + $sum[$key] += $_sum[$key]; | |
760 | 763 | } |
761 | - foreach(array_keys($_progress['is_spam']) as $key) { | |
762 | - $is_spam[$key] = TRUE; | |
764 | + foreach(array_keys($_is_spam) as $key) { | |
765 | + if (is_array($_is_spam[$key])) { | |
766 | + // Marge keys (badhost) | |
767 | + foreach(array_keys($_is_spam[$key]) as $_key) { | |
768 | + if (! isset($is_spam[$key][$_key])) { | |
769 | + $is_spam[$key][$_key] = $_is_spam[$key][$_key]; | |
770 | + } else { | |
771 | + $is_spam[$key][$_key] += $_is_spam[$key][$_key]; | |
772 | + } | |
773 | + } | |
774 | + } else { | |
775 | + $is_spam[$key] = TRUE; | |
776 | + } | |
763 | 777 | } |
764 | 778 | if ($asap && $is_spam) break; |
765 | 779 | } |
@@ -843,7 +857,7 @@ function check_uri_spam($target = '', $method = array()) | ||
843 | 857 | } |
844 | 858 | |
845 | 859 | // URI: Uniqueness (and removing non-uniques) |
846 | - if ((! $asap || ! $is_spam) && isset($method['non_uniq'])) { | |
860 | + if ((! $asap || ! $is_spam) && isset($method['non_uniquri'])) { | |
847 | 861 | |
848 | 862 | // Destructive normalize of URIs |
849 | 863 | uri_array_normalize($pickups); |
@@ -854,9 +868,9 @@ function check_uri_spam($target = '', $method = array()) | ||
854 | 868 | } |
855 | 869 | $count = count($uris); |
856 | 870 | $uris = array_unique($uris); |
857 | - $sum['non_uniq'] += $count - count($uris); | |
858 | - if ($sum['non_uniq'] > $method['non_uniq']) { | |
859 | - $is_spam['non_uniq'] = TRUE; | |
871 | + $sum['non_uniquri'] += $count - count($uris); | |
872 | + if ($sum['non_uniquri'] > $method['non_uniquri']) { | |
873 | + $is_spam['non_uniquri'] = TRUE; | |
860 | 874 | } |
861 | 875 | if (! $asap || ! $is_spam) { |
862 | 876 | foreach (array_diff(array_keys($pickups), |
@@ -872,17 +886,34 @@ function check_uri_spam($target = '', $method = array()) | ||
872 | 886 | return $progress; |
873 | 887 | } |
874 | 888 | |
875 | - // URI: Unique host | |
889 | + // Host: Uniqueness (uniq / non-uniq) | |
876 | 890 | $hosts = array(); |
877 | 891 | foreach ($pickups as $pickup) $hosts[] = & $pickup['host']; |
878 | 892 | $hosts = array_unique($hosts); |
879 | 893 | $sum['uniqhost'] += count($hosts); |
894 | + if ((! $asap || ! $is_spam) && isset($method['non_uniqhost'])) { | |
895 | + $sum['non_uniqhost'] = $sum['quantity'] - $sum['uniqhost']; | |
896 | + if ($sum['non_uniqhost'] > $method['non_uniqhost']) { | |
897 | + $is_spam['non_uniqhost'] = TRUE; | |
898 | + } | |
899 | + } | |
900 | + | |
901 | + // Return if ... | |
902 | + if ($asap && $is_spam) { | |
903 | + return $progress; | |
904 | + } | |
880 | 905 | |
881 | 906 | // URI: Bad host |
882 | 907 | if ((! $asap || ! $is_spam) && isset($method['badhost'])) { |
883 | - $count = array_count_leaves(is_badhost($hosts, $asap)); | |
884 | - $sum['badhost'] += $count; | |
885 | - if ($count != 0) $is_spam['badhost'] = TRUE; | |
908 | + $badhost = is_badhost($hosts, $asap); | |
909 | + if (! empty($badhost)) { | |
910 | + $sum['badhost'] += array_count_leaves($badhost); | |
911 | + foreach(array_keys($badhost) as $keys) { | |
912 | + $is_spam['badhost'][$keys] = | |
913 | + array_count_leaves($badhost[$keys]); | |
914 | + } | |
915 | + unset($badhost); | |
916 | + } | |
886 | 917 | } |
887 | 918 | |
888 | 919 | return $progress; |
@@ -981,6 +1012,13 @@ function pkwk_spamnotify($action, $page, $target = array('title' => ''), $progre | ||
981 | 1012 | if (! $asap) { |
982 | 1013 | $summary['METRICS'] = summarize_spam_progress($progress); |
983 | 1014 | } |
1015 | + if (isset($progress['is_spam']['badhost'])) { | |
1016 | + $badhost = array(); | |
1017 | + foreach($progress['is_spam']['badhost'] as $glob=>$number) { | |
1018 | + $badhost[] = $glob . '(' . $number . ')'; | |
1019 | + } | |
1020 | + $summary['BADHOST'] = implode(', ', $badhost); | |
1021 | + } | |
984 | 1022 | $summary['COMMENT'] = $action; |
985 | 1023 | $summary['PAGE'] = '[blocked] ' . (is_pagename($page) ? $page : ''); |
986 | 1024 | $summary['URI'] = get_script_uri() . '?' . rawurlencode($page); |
@@ -1,6 +1,6 @@ | ||
1 | 1 | <?php |
2 | 2 | // PukiWiki - Yet another WikiWikiWeb clone |
3 | -// $Id: pukiwiki.ini.php,v 1.146 2006/12/19 14:34:54 henoheno Exp $ | |
3 | +// $Id: pukiwiki.ini.php,v 1.147 2007/01/02 07:09:21 henoheno Exp $ | |
4 | 4 | // Copyright (C) |
5 | 5 | // 2002-2006 PukiWiki Developers Team |
6 | 6 | // 2001-2002 Originally written by yu-ji |
@@ -165,14 +165,15 @@ if ($spam) { | ||
165 | 165 | |
166 | 166 | // Threshold and rules for insertion (default) |
167 | 167 | $spam['method']['_default'] = array( |
168 | - '_comment' => '_default', | |
169 | - 'quantity' => 8, | |
170 | - 'non_uniq' => 3, | |
171 | - 'area_anchor' => 0, | |
172 | - 'area_bbcode' => 0, | |
173 | - 'uniqhost' => TRUE, | |
174 | - 'badhost' => TRUE, | |
175 | - 'asap' => TRUE, // Stop as soon as possible (quick) | |
168 | + '_comment' => '_default', | |
169 | + 'quantity' => 8, | |
170 | + //'non_uniquri' => 3, | |
171 | + 'non_uniqhost' => 3, | |
172 | + 'area_anchor' => 0, | |
173 | + 'area_bbcode' => 0, | |
174 | + 'uniqhost' => TRUE, | |
175 | + 'badhost' => TRUE, | |
176 | + 'asap' => TRUE, // Stop as soon as possible (quick but less-info) | |
176 | 177 | ); |
177 | 178 | |
178 | 179 | // For editing |
@@ -182,14 +183,15 @@ if ($spam) { | ||
182 | 183 | // Any rules will lock contents that have NG things already. |
183 | 184 | $spam['method']['edit'] = array( |
184 | 185 | // Supposed_by_you(n) * Edit_form_spec(2) * Margin(1.5) |
185 | - '_comment' => 'edit', | |
186 | - //'quantity' => 60 * 3, | |
187 | - //'non_uniq' => 5 * 3, | |
188 | - //'area_anchor' => 30 * 3, | |
189 | - //'area_bbcode' => 15 * 3, | |
190 | - 'uniqhost' => TRUE, | |
191 | - 'badhost' => TRUE, | |
192 | - 'asap' => TRUE, | |
186 | + '_comment' => 'edit', | |
187 | + //'quantity' => 60 * 3, | |
188 | + //'non_uniquri' => 5 * 3, | |
189 | + //'non_uniqhost' => 50 * 3, | |
190 | + //'area_anchor' => 30 * 3, | |
191 | + //'area_bbcode' => 15 * 3, | |
192 | + 'uniqhost' => TRUE, | |
193 | + 'badhost' => TRUE, | |
194 | + 'asap' => TRUE, | |
193 | 195 | ); |
194 | 196 | |
195 | 197 | //$spam['exitmode'] = 'dump'; // Dump progress |