[Groonga-commit] ranguba/chupa-text at 15062c3 [master] Add support for timeout

Back to archive index
Sutou Kouhei null+****@clear*****
Thu Jun 13 17:00:14 JST 2019


Sutou Kouhei	2019-06-13 17:00:14 +0900 (Thu, 13 Jun 2019)

  Revision: 15062c35ed182e65903c9ee946b829ec7f8948e0
  https://github.com/ranguba/chupa-text/commit/15062c35ed182e65903c9ee946b829ec7f8948e0

  Message:
    Add support for timeout

  Modified files:
    lib/chupa-text/error.rb
    lib/chupa-text/extractor.rb
    test/test-extractor.rb

  Modified: lib/chupa-text/error.rb (+9 -0)
===================================================================
--- lib/chupa-text/error.rb    2019-06-10 12:34:45 +0900 (cb7de87)
+++ lib/chupa-text/error.rb    2019-06-13 17:00:14 +0900 (df87c9e)
@@ -53,4 +53,13 @@ module ChupaText
       super("Unknown encoding data: <#{data.uri}>(#{data.mime_type}): <#{encoding}>")
     end
   end
+
+  class TimeoutError < Error
+    attr_reader :data, :timeout
+    def initialize(data, timeout)
+      @data = data
+      @timeout = timeout
+      super("Timeout error: <#{data.uri}>(#{data.mime_type}): <#{timeout}>")
+    end
+  end
 end

  Modified: lib/chupa-text/extractor.rb (+27 -8)
===================================================================
--- lib/chupa-text/extractor.rb    2019-06-10 12:34:45 +0900 (f08034a)
+++ lib/chupa-text/extractor.rb    2019-06-13 17:00:14 +0900 (7e71038)
@@ -16,6 +16,7 @@
 
 require "pathname"
 require "uri"
+require "timeout"
 
 module ChupaText
   class Extractor
@@ -103,19 +104,37 @@ module ChupaText
         end
       else
         debug {"#{log_tag}[extract][decomposer] #{decomposer.class}"}
-        decomposer.decompose(target) do |decomposed|
-          debug do
-            "#{log_tag}[extract][decomposed] " +
-              "#{decomposer.class}: " +
-              "<#{target.uri}>: " +
-              "<#{target.mime_type}> -> <#{decomposed.mime_type}>"
+        with_timeout(target) do
+          decomposer.decompose(target) do |decomposed|
+            begin
+              debug do
+                "#{log_tag}[extract][decomposed] " +
+                  "#{decomposer.class}: " +
+                  "<#{target.uri}>: " +
+                  "<#{target.mime_type}> -> <#{decomposed.mime_type}>"
+              end
+              extract_recursive(decomposed, &block)
+            ensure
+              decomposed.release
+            end
           end
-          extract_recursive(decomposed, &block)
-          decomposed.release
         end
       end
     end
 
+    def with_timeout(data, &block)
+      timeout = data.timeout
+      if timeout
+        begin
+          Timeout.timeout(timeout, &block)
+        rescue Timeout::Error
+          raise TimeoutError.new(data, timeout)
+        end
+      else
+        yield
+      end
+    end
+
     def log_tag
       "[extractor]"
     end

  Modified: test/test-extractor.rb (+12 -0)
===================================================================
--- test/test-extractor.rb    2019-06-10 12:34:45 +0900 (7e6f848)
+++ test/test-extractor.rb    2019-06-13 17:00:14 +0900 (6039b8e)
@@ -76,6 +76,7 @@ class TestExtractor < Test::Unit::TestCase
           extracted = ChupaText::Data.new
           extracted.mime_type = "text/plain"
           extracted.body = data.body.gsub(/<.+?>/, "")
+          sleep(data.timeout * 2) if data.timeout
           yield(extracted)
         end
       end
@@ -92,6 +93,17 @@ class TestExtractor < Test::Unit::TestCase
         data.body = "<html><body>Hello</body></html>"
         assert_equal(["Hello"], extract(data))
       end
+
+      def test_timeout
+        data = ChupaText::Data.new
+        data.mime_type = "text/html"
+        data.body = "<html><body>Hello</body></html>"
+        data.timeout = 0.0001
+        error = ChupaText::TimeoutError.new(data, data.timeout)
+        assert_raise(error) do
+          extract(data)
+        end
+      end
     end
 
     sub_test_case("multi decomposed") do
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190613/40ad4bb7/attachment-0001.html>


More information about the Groonga-commit mailing list
Back to archive index