Sutou Kouhei 2019-06-13 17:00:14 +0900 (Thu, 13 Jun 2019) Revision: 15062c35ed182e65903c9ee946b829ec7f8948e0 https://github.com/ranguba/chupa-text/commit/15062c35ed182e65903c9ee946b829ec7f8948e0 Message: Add support for timeout Modified files: lib/chupa-text/error.rb lib/chupa-text/extractor.rb test/test-extractor.rb Modified: lib/chupa-text/error.rb (+9 -0) =================================================================== --- lib/chupa-text/error.rb 2019-06-10 12:34:45 +0900 (cb7de87) +++ lib/chupa-text/error.rb 2019-06-13 17:00:14 +0900 (df87c9e) @@ -53,4 +53,13 @@ module ChupaText super("Unknown encoding data: <#{data.uri}>(#{data.mime_type}): <#{encoding}>") end end + + class TimeoutError < Error + attr_reader :data, :timeout + def initialize(data, timeout) + @data = data + @timeout = timeout + super("Timeout error: <#{data.uri}>(#{data.mime_type}): <#{timeout}>") + end + end end Modified: lib/chupa-text/extractor.rb (+27 -8) =================================================================== --- lib/chupa-text/extractor.rb 2019-06-10 12:34:45 +0900 (f08034a) +++ lib/chupa-text/extractor.rb 2019-06-13 17:00:14 +0900 (7e71038) @@ -16,6 +16,7 @@ require "pathname" require "uri" +require "timeout" module ChupaText class Extractor @@ -103,19 +104,37 @@ module ChupaText end else debug {"#{log_tag}[extract][decomposer] #{decomposer.class}"} - decomposer.decompose(target) do |decomposed| - debug do - "#{log_tag}[extract][decomposed] " + - "#{decomposer.class}: " + - "<#{target.uri}>: " + - "<#{target.mime_type}> -> <#{decomposed.mime_type}>" + with_timeout(target) do + decomposer.decompose(target) do |decomposed| + begin + debug do + "#{log_tag}[extract][decomposed] " + + "#{decomposer.class}: " + + "<#{target.uri}>: " + + "<#{target.mime_type}> -> <#{decomposed.mime_type}>" + end + extract_recursive(decomposed, &block) + ensure + decomposed.release + end end - extract_recursive(decomposed, &block) - decomposed.release end end end + def with_timeout(data, &block) + timeout = data.timeout + if timeout + begin + Timeout.timeout(timeout, &block) + rescue Timeout::Error + raise TimeoutError.new(data, timeout) + end + else + yield + end + end + def log_tag "[extractor]" end Modified: test/test-extractor.rb (+12 -0) =================================================================== --- test/test-extractor.rb 2019-06-10 12:34:45 +0900 (7e6f848) +++ test/test-extractor.rb 2019-06-13 17:00:14 +0900 (6039b8e) @@ -76,6 +76,7 @@ class TestExtractor < Test::Unit::TestCase extracted = ChupaText::Data.new extracted.mime_type = "text/plain" extracted.body = data.body.gsub(/<.+?>/, "") + sleep(data.timeout * 2) if data.timeout yield(extracted) end end @@ -92,6 +93,17 @@ class TestExtractor < Test::Unit::TestCase data.body = "<html><body>Hello</body></html>" assert_equal(["Hello"], extract(data)) end + + def test_timeout + data = ChupaText::Data.new + data.mime_type = "text/html" + data.body = "<html><body>Hello</body></html>" + data.timeout = 0.0001 + error = ChupaText::TimeoutError.new(data, data.timeout) + assert_raise(error) do + extract(data) + end + end end sub_test_case("multi decomposed") do -------------- next part -------------- An HTML attachment was scrubbed... URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190613/40ad4bb7/attachment-0001.html>