• R/O
  • HTTP
  • SSH
  • HTTPS

提交

標籤
無標籤

Frequently used words (click to add to your profile)

javac++androidlinuxc#objective-cqt誰得windowscocoapythonphprubygameguibathyscaphec翻訳omegat計画中(planning stage)frameworktwittertestdomvb.netdirectxbtronarduinopreviewerゲームエンジン

OpenTweenのfork


Commit MetaInfo

修訂aba29fed5c6113a266cb022d8366946776ff15c4 (tree)
時間2012-05-06 06:15:12
作者Ichinose Shogo <shogo82148@gmai...>
CommiterKimura Youichi

Log Message

発言に含まれるURLの判定を厳格化

Change Summary

差異

--- a/OpenTween/Tween.cs
+++ b/OpenTween/Tween.cs
@@ -5080,7 +5080,44 @@ namespace OpenTween
50805080 //}
50815081 foreach (Match m in Regex.Matches(StatusText.Text, Twitter.rgUrl, RegexOptions.IgnoreCase))
50825082 {
5083- pLen += m.Result("${url}").Length - SettingDialog.TwitterConfiguration.ShortUrlLength;
5083+ string before = m.Result("${before}");
5084+ string url = m.Result("${url}");
5085+ string protocol = m.Result("${protocol}");
5086+ string domain = m.Result("${domain}");
5087+ string path = m.Result("${path}");
5088+ if (protocol.Length == 0)
5089+ {
5090+ if (Regex.IsMatch(before, Twitter.url_invalid_without_protocol_preceding_chars))
5091+ {
5092+ continue;
5093+ }
5094+
5095+ bool last_url_invalid_match = false;
5096+ string lasturl = null;
5097+ foreach (Match mm in Regex.Matches(domain, Twitter.url_valid_ascii_domain, RegexOptions.IgnoreCase))
5098+ {
5099+ lasturl = mm.ToString();
5100+ last_url_invalid_match = Regex.IsMatch(lasturl, Twitter.url_invalid_short_domain, RegexOptions.IgnoreCase);
5101+ if (!last_url_invalid_match)
5102+ {
5103+ pLen += lasturl.Length - SettingDialog.TwitterConfiguration.ShortUrlLength;
5104+ }
5105+ }
5106+
5107+ if (path.Length != 0)
5108+ {
5109+ if (last_url_invalid_match)
5110+ {
5111+ pLen += lasturl.Length - SettingDialog.TwitterConfiguration.ShortUrlLength;
5112+ }
5113+ pLen += path.Length;
5114+ }
5115+ }
5116+ else
5117+ {
5118+ pLen += url.Length - SettingDialog.TwitterConfiguration.ShortUrlLength;
5119+ }
5120+
50845121 //if (m.Result("${url}").Length > SettingDialog.TwitterConfiguration.ShortUrlLength)
50855122 //{
50865123 // pLen += m.Result("${url}").Length - SettingDialog.TwitterConfiguration.ShortUrlLength;
--- a/OpenTween/Twitter.cs
+++ b/OpenTween/Twitter.cs
@@ -66,7 +66,7 @@ namespace OpenTween
6666 // permissions and limitations under the License.
6767
6868 //Hashtag用正規表現
69- private const string LATIN_ACCENTS = @"\xc0-\xd6\xd8-\xf6\xf8-\xff";
69+ private const string LATIN_ACCENTS = @"\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u00ff\u0100-\u024f\u0253\u0254\u0256\u0257\u0259\u025b\u0263\u0268\u026f\u0272\u0289\u028b\u02bb\u1e00-\u1eff";
7070 private const string NON_LATIN_HASHTAG_CHARS = @"\u0400-\u04ff\u0500-\u0527\u1100-\u11ff\u3130-\u3185\uA960-\uA97F\uAC00-\uD7AF\uD7B0-\uD7FF";
7171 //private const string CJ_HASHTAG_CHARACTERS = @"\u30A1-\u30FA\uFF66-\uFF9F\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\u3041-\u3096\u3400-\u4DBF\u4E00-\u9FFF\u20000-\u2A6DF\u2A700-\u2B73F\u2B740-\u2B81F\u2F800-\u2FA1F";
7272 private const string CJ_HASHTAG_CHARACTERS = @"\u30A1-\u30FA\u30FC\u3005\uFF66-\uFF9F\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\u3041-\u309A\u3400-\u4DBF\p{IsCJKUnifiedIdeographs}";
@@ -76,24 +76,36 @@ namespace OpenTween
7676 private const string HASHTAG_TERMINATOR = "[^a-z0-9_" + LATIN_ACCENTS + NON_LATIN_HASHTAG_CHARS + CJ_HASHTAG_CHARACTERS + "]";
7777 public const string HASHTAG = "(" + HASHTAG_BOUNDARY + ")(#|#)(" + HASHTAG_ALPHANUMERIC + "*" + HASHTAG_ALPHA + HASHTAG_ALPHANUMERIC + "*)(?=" + HASHTAG_TERMINATOR + "|" + HASHTAG_BOUNDARY + ")";
7878 //URL正規表現
79- private const string url_valid_domain = @"(?<domain>(?:[^\p{P}\s][\.\-_](?=[^\p{P}\s])|[^\p{P}\s]){1,}\.[a-z]{2,}(?::[0-9]+)?)";
80- private const string url_valid_general_path_chars = @"[a-z0-9!*';:=+$/%#\[\]\-_&,~]";
79+ private const string url_valid_preceding_chars = @"(?:[^A-Za-z0-9@@$##\ufffe\ufeff\uffff\u202a-\u202e]|^)";
80+ public const string url_invalid_without_protocol_preceding_chars = @"[-_./]$";
81+ private const string url_invalid_domain_chars = @"\!'#%&'\(\)*\+,\\\-\.\/:;<=>\?@\[\]\^_{|}~\$\u2000-\u200a\u0009-\u000d\u0020\u0085\u00a0\u1680\u180e\u2028\u2029\u202f\u205f\u3000\ufffe\ufeff\uffff\u202a-\u202e";
82+ private const string url_valid_domain_chars = @"[^" + url_invalid_domain_chars + "]";
83+ private const string url_valid_subdomain = @"(?:(?:" + url_valid_domain_chars + @"(?:[_-]|" + url_valid_domain_chars + @")*)?" + url_valid_domain_chars + @"\.)";
84+ private const string url_valid_domain_name = @"(?:(?:" + url_valid_domain_chars + @"(?:-|" + url_valid_domain_chars + @")*)?" + url_valid_domain_chars + @"\.)";
85+ private const string url_valid_GTLD = @"(?:(?:aero|asia|biz|cat|com|coop|edu|gov|info|int|jobs|mil|mobi|museum|name|net|org|pro|tel|travel|xxx)(?=[^0-9a-zA-Z]|$))";
86+ private const string url_valid_CCTLD = @"(?:(?:ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw)(?=[^0-9a-zA-Z]|$))";
87+ private const string url_valid_punycode = @"(?:xn--[0-9a-z]+)";
88+ private const string url_valid_domain = @"(?<domain>" + url_valid_subdomain + "*" + url_valid_domain_name + "(?:" + url_valid_GTLD + "|" + url_valid_CCTLD + ")|" + url_valid_punycode + ")";
89+ public const string url_valid_ascii_domain = @"(?:(?:[a-z0-9" + LATIN_ACCENTS + @"]+)\.)+(?:" + url_valid_GTLD + "|" + url_valid_CCTLD + "|" + url_valid_punycode + ")";
90+ public const string url_invalid_short_domain = "^" + url_valid_domain_name + url_valid_CCTLD + "$";
91+ private const string url_valid_port_number = @"[0-9]+";
92+
93+ private const string url_valid_general_path_chars = @"[a-z0-9!*';:=+,.$/%#\[\]\-_~|&" + LATIN_ACCENTS + "]";
8194 private const string url_balance_parens = @"(?:\(" + url_valid_general_path_chars + @"+\))";
82- private const string url_valid_url_path_ending_chars = @"(?:[a-z0-9=_#/\-\+]+|" + url_balance_parens + ")";
83- private const string pth = "(?:" + url_balance_parens +
84- "|@" + url_valid_general_path_chars + "+/" +
85- "|[.,]?" + url_valid_general_path_chars + "+" +
95+ private const string url_valid_path_ending_chars = @"(?:[+\-a-z0-9=_#/" + LATIN_ACCENTS + "]|" + url_balance_parens + ")";
96+ private const string pth = "(?:" +
97+ "(?:" +
98+ url_valid_general_path_chars + "*" +
99+ "(?:" + url_balance_parens + url_valid_general_path_chars + "*)*" +
100+ url_valid_path_ending_chars +
101+ ")|(?:@" + url_valid_general_path_chars + "+/)" +
86102 ")";
87- private const string pth2 = "(/(?:" +
88- pth + "+" + url_valid_url_path_ending_chars + "|" +
89- pth + "+" + url_valid_url_path_ending_chars + "?|" +
90- url_valid_url_path_ending_chars +
91- ")?)?";
92- private const string qry = @"(?<query>\?[a-z0-9!*'();:&=+$/%#\[\]\-_.,~]*[a-z0-9_&=#])?";
93- public const string rgUrl = @"(?<before>(?:[^\""':!=#]|^|\:/))" +
94- "(?<url>(?<protocol>https?://)" +
95- url_valid_domain +
96- pth2 +
103+ private const string qry = @"(?<query>\?[a-z0-9!?*'();:&=+$/%#\[\]\-_.,~|]*[a-z0-9_&=#/])?";
104+ public const string rgUrl = @"(?<before>" + url_valid_preceding_chars + ")" +
105+ "(?<url>(?<protocol>https?://)?" +
106+ "(?<domain>" + url_valid_domain + ")" +
107+ "(?::" + url_valid_port_number + ")?" +
108+ "(?<path>/" + pth + "*)?" +
97109 qry +
98110 ")";
99111