[Groonga-commit] ranguba/chupa-text-http-server at 4925101 [master] Support extraction

Back to archive index

Kouhei Sutou null+****@clear*****
Wed Jul 5 15:39:13 JST 2017


Kouhei Sutou	2017-07-05 15:39:13 +0900 (Wed, 05 Jul 2017)

  New Revision: 4925101eb5ac86db183b64b99bb9bab044ddc20a
  https://github.com/ranguba/chupa-text-http-server/commit/4925101eb5ac86db183b64b99bb9bab044ddc20a

  Message:
    Support extraction

  Added files:
    Gemfile.local
    app/controllers/extractions_controller.rb
    app/helpers/extractions_helper.rb
    app/models/extraction.rb
    app/views/extractions/_form.html.erb
    app/views/extractions/create.html.erb
    app/views/extractions/new.html.erb
    config/chupa-text.rb
    config/initializers/chupa_text.rb
  Modified files:
    Gemfile
    Gemfile.lock
    app/controllers/application_controller.rb
    config/routes.rb

  Modified: Gemfile (+6 -0)
===================================================================
--- Gemfile    2017-07-05 10:57:50 +0900 (9eb3929)
+++ Gemfile    2017-07-05 15:39:13 +0900 (65a94e2)
@@ -53,3 +53,9 @@ gem 'tzinfo-data', platforms: [:mingw, :mswin, :x64_mingw, :jruby]
 
 gem 'bootstrap', github: 'twbs/bootstrap-rubygem'
 gem 'jquery-rails'
+
+gem 'chupa-text', path: '../chupa-text'
+local_gemfile = "#{__dir__}/Gemfile.local"
+if File.exist?(local_gemfile)
+  eval(File.read(local_gemfile), binding, local_gemfile, 1)
+end

  Added: Gemfile.local (+4 -0) 100644
===================================================================
--- /dev/null
+++ Gemfile.local    2017-07-05 15:39:13 +0900 (a2112a9)
@@ -0,0 +1,4 @@
+gem 'chupa-text-decomposer-html'
+gem 'chupa-text-decomposer-pdf'
+gem 'chupa-text-decomposer-spreadsheet'
+gem 'chupa-text-decomposer-libreoffice'

  Modified: Gemfile.lock (+42 -0)
===================================================================
--- Gemfile.lock    2017-07-05 10:57:50 +0900 (4a1cd68)
+++ Gemfile.lock    2017-07-05 15:39:13 +0900 (8046705)
@@ -7,6 +7,11 @@ GIT
       popper_js (>= 1.9.9, < 2.0)
       sass (>= 3.4.19)
 
+PATH
+  remote: ../chupa-text
+  specs:
+    chupa-text (1.0.6)
+
 GEM
   remote: https://rubygems.org/
   specs:
@@ -55,6 +60,9 @@ GEM
     bindex (0.5.0)
     builder (3.2.3)
     byebug (9.0.6)
+    cairo (1.15.9)
+      native-package-installer (>= 1.0.3)
+      pkg-config (>= 1.2.2)
     capybara (2.14.4)
       addressable
       mime-types (>= 1.16)
@@ -64,6 +72,17 @@ GEM
       xpath (~> 2.0)
     childprocess (0.7.1)
       ffi (~> 1.0, >= 1.0.11)
+    chupa-text-decomposer-html (1.0.1)
+      chupa-text
+      nokogiri
+    chupa-text-decomposer-libreoffice (1.0.0)
+      chupa-text-decomposer-pdf
+    chupa-text-decomposer-pdf (1.0.3)
+      chupa-text
+      poppler
+    chupa-text-decomposer-spreadsheet (1.0.0)
+      chupa-text
+      roo
     coffee-rails (4.2.2)
       coffee-script (>= 2.2.0)
       railties (>= 4.0.0)
@@ -75,8 +94,18 @@ GEM
     erubi (1.6.1)
     execjs (2.7.0)
     ffi (1.9.18)
+    gdk_pixbuf2 (3.1.6)
+      gio2 (= 3.1.6)
+    gio2 (3.1.6)
+      glib2 (= 3.1.6)
+      gobject-introspection (= 3.1.6)
+    glib2 (3.1.6)
+      native-package-installer (>= 1.0.3)
+      pkg-config (>= 1.2.2)
     globalid (0.4.0)
       activesupport (>= 4.2.0)
+    gobject-introspection (3.1.6)
+      glib2 (= 3.1.6)
     i18n (0.8.4)
     jbuilder (2.7.0)
       activesupport (>= 4.2.0)
@@ -100,10 +129,15 @@ GEM
     mini_portile2 (2.2.0)
     minitest (5.10.2)
     multi_json (1.12.1)
+    native-package-installer (1.0.4)
     nio4r (2.1.0)
     nokogiri (1.8.0)
       mini_portile2 (~> 2.2.0)
+    pkg-config (1.2.3)
     popper_js (1.9.9)
+    poppler (3.1.6)
+      cairo (>= 1.14.0)
+      gdk_pixbuf2 (= 3.1.6)
     public_suffix (2.0.5)
     puma (3.9.1)
     rack (2.0.3)
@@ -136,6 +170,9 @@ GEM
     rb-fsevent (0.10.2)
     rb-inotify (0.9.10)
       ffi (>= 0.5.0, < 2)
+    roo (2.7.1)
+      nokogiri (~> 1)
+      rubyzip (~> 1.1, < 2.0.0)
     ruby_dep (1.5.0)
     rubyzip (1.2.1)
     sass (3.4.24)
@@ -188,6 +225,11 @@ DEPENDENCIES
   bootstrap!
   byebug
   capybara (~> 2.13)
+  chupa-text!
+  chupa-text-decomposer-html
+  chupa-text-decomposer-libreoffice
+  chupa-text-decomposer-pdf
+  chupa-text-decomposer-spreadsheet
   coffee-rails (~> 4.2)
   jbuilder (~> 2.5)
   jquery-rails

  Modified: app/controllers/application_controller.rb (+1 -1)
===================================================================
--- app/controllers/application_controller.rb    2017-07-05 10:57:50 +0900 (1c07694)
+++ app/controllers/application_controller.rb    2017-07-05 15:39:13 +0900 (3e54214)
@@ -1,3 +1,3 @@
 class ApplicationController < ActionController::Base
-  protect_from_forgery with: :exception
+  protect_from_forgery with: :exception, unless: -> {request.format.json?}
 end

  Added: app/controllers/extractions_controller.rb (+28 -0) 100644
===================================================================
--- /dev/null
+++ app/controllers/extractions_controller.rb    2017-07-05 15:39:13 +0900 (ee9d85c)
@@ -0,0 +1,28 @@
+class ExtractionsController < ApplicationController
+  # GET /extractions/new
+  def new
+    @extraction = Extraction.new
+  end
+
+  # POST /extractions
+  # POST /extractions.json
+  def create
+    @extraction = Extraction.new(extraction_params)
+
+    respond_to do |format|
+      if****@extra*****?
+        format.html { render :create }
+        format.json { render json: @extraction.extract }
+      else
+        format.html { render :new }
+        format.json { render json: @extraction.errors, status: :unprocessable_entity }
+      end
+    end
+  end
+
+  private
+    # Never trust parameters from the scary internet, only allow the white list through.
+    def extraction_params
+      params.permit(:input)
+    end
+end

  Added: app/helpers/extractions_helper.rb (+2 -0) 100644
===================================================================
--- /dev/null
+++ app/helpers/extractions_helper.rb    2017-07-05 15:39:13 +0900 (b35fbf5)
@@ -0,0 +1,2 @@
+module ExtractionsHelper
+end

  Added: app/models/extraction.rb (+31 -0) 100644
===================================================================
--- /dev/null
+++ app/models/extraction.rb    2017-07-05 15:39:13 +0900 (4c9ca12)
@@ -0,0 +1,31 @@
+class Extraction
+  include ActiveModel::Model
+
+  attr_accessor :input
+  validates :input, presence: true
+
+  def persisted?
+    false
+  end
+
+  def id
+    nil
+  end
+
+  def extract
+    extractor = ChupaText::Extractor.new
+    configuration = ChupaText::Configuration.new
+    configuration_loader = ChupaText::ConfigurationLoader.new(configuration)
+    configuration_loader.load(Rails.root + "config" + "chupa-text.rb")
+    extractor.apply_configuration(configuration)
+
+    data = ChupaText::VirtualFileData.new(Pathname(@input.original_filename),
+                                          @input.to_io)
+    formatter = ChupaText::Formatters::Hash.new
+    formatter.format_start(data)
+    extractor.extract(data) do |extracted|
+      formatter.format_extracted(extracted)
+    end
+    formatter.format_finish(data)
+  end
+end

  Added: app/views/extractions/_form.html.erb (+22 -0) 100644
===================================================================
--- /dev/null
+++ app/views/extractions/_form.html.erb    2017-07-05 15:39:13 +0900 (42886d4)
@@ -0,0 +1,22 @@
+<%= form_with(model: extraction, local: true, url: extraction_path, method: :post) do |form| %>
+  <% if extraction.errors.any? %>
+    <div id="error_explanation">
+      <h2><%= pluralize(extraction.errors.count, "error") %> prohibited this extraction from being saved:</h2>
+
+      <ul>
+      <% extraction.errors.full_messages.each do |message| %>
+        <li><%= message %></li>
+      <% end %>
+      </ul>
+    </div>
+  <% end %>
+
+  <div class="field">
+    <%= form.label :input %>
+    <%= form.file_field :input, id: :extraction_input, name: :input %>
+  </div>
+
+  <div class="actions">
+    <%= form.submit "Extract" %>
+  </div>
+<% end %>

  Added: app/views/extractions/create.html.erb (+5 -0) 100644
===================================================================
--- /dev/null
+++ app/views/extractions/create.html.erb    2017-07-05 15:39:13 +0900 (ec1a8f4)
@@ -0,0 +1,5 @@
+<h1>Extraction</h1>
+
+<%= render 'form', extraction: @extraction %>
+
+<pre><%= JSON.pretty_generate(@extraction.extract) %></pre>

  Added: app/views/extractions/new.html.erb (+3 -0) 100644
===================================================================
--- /dev/null
+++ app/views/extractions/new.html.erb    2017-07-05 15:39:13 +0900 (69a26e7)
@@ -0,0 +1,3 @@
+<h1>Extraction</h1>
+
+<%= render 'form', extraction: @extraction %>

  Added: config/chupa-text.rb (+5 -0) 100644
===================================================================
--- /dev/null
+++ config/chupa-text.rb    2017-07-05 15:39:13 +0900 (aff219b)
@@ -0,0 +1,5 @@
+decomposer.names = ["*"]
+
+Mime::EXTENSION_LOOKUP.each do |extension, mime_type|
+  mime_types[extension] = mime_type.to_s
+end

  Added: config/initializers/chupa_text.rb (+1 -0) 100644
===================================================================
--- /dev/null
+++ config/initializers/chupa_text.rb    2017-07-05 15:39:13 +0900 (b949701)
@@ -0,0 +1 @@
+ChupaText::Decomposers.load

  Modified: config/routes.rb (+2 -0)
===================================================================
--- config/routes.rb    2017-07-05 10:57:50 +0900 (787824f)
+++ config/routes.rb    2017-07-05 15:39:13 +0900 (13d8093)
@@ -1,3 +1,5 @@
 Rails.application.routes.draw do
   # For details on the DSL available within this file, see http://guides.rubyonrails.org/routing.html
+
+  resource :extraction, only: [:new, :create]
 end
-------------- next part --------------
HTML����������������������������...
下載 



More information about the Groonga-commit mailing list
Back to archive index