Kouhei Sutou
null+****@clear*****
Sun Feb 7 15:24:30 JST 2016
Kouhei Sutou 2016-02-07 15:24:30 +0900 (Sun, 07 Feb 2016) New Revision: e79d1b8a49f234ea3285561c2bdea499eb26d39e https://github.com/groonga/wikipedia-search/commit/e79d1b8a49f234ea3285561c2bdea499eb26d39e Message: Support CSV output Added files: lib/wikipedia-search/csv-converter.rb Modified files: bin/wikipedia-convert lib/wikipedia-search/path.rb lib/wikipedia-search/task.rb Modified: bin/wikipedia-convert (+4 -1) =================================================================== --- bin/wikipedia-convert 2015-05-29 12:23:33 +0900 (437da6e) +++ bin/wikipedia-convert 2016-02-07 15:24:30 +0900 (9eaaa60) @@ -12,8 +12,9 @@ $LOAD_PATH.unshift(lib_dir_path.to_s) require "wikipedia-search/groonga-converter" require "wikipedia-search/sql-converter" +require "wikipedia-search/csv-converter" -available_formats = [:groonga, :sql] +available_formats = [:groonga, :sql, :csv] options = OpenStruct.new options.output = "-" @@ -52,6 +53,8 @@ when :groonga converter_class = WikipediaSearch::GroongaConverter when :sql converter_class = WikipediaSearch::SQLConverter +when :csv + converter_class = WikipediaSearch::CSVConverter end converter = converter_class.new(ARGF, converter_options) if options.output == "-" Added: lib/wikipedia-search/csv-converter.rb (+45 -0) 100644 =================================================================== --- /dev/null +++ lib/wikipedia-search/csv-converter.rb 2016-02-07 15:24:30 +0900 (ae05a5e) @@ -0,0 +1,45 @@ +require "csv" + +require "wikipedia-search/converter" + +module WikipediaSearch + class CSVConverter < Converter + private + def create_listener(output) + CSVListener.new(output, @options) + end + + class CSVListener < Listener + def on_start + @csv = CSV.new(@output) + end + + def on_finish + @csv.close + end + + def on_page(page) + record_values = [ + @page.id, + escape_string(@page.title), + escape_string(shorten_text(@page.text)), + ] + @csv << record_values + end + + private + def escape_string(string) + string.gsub(/[\\\r\n]/) do |special_character| + case special_character + when "\r" + "\\r" + when "\n" + "\\n" + else + "\\#{special_character}" + end + end + end + end + end +end Modified: lib/wikipedia-search/path.rb (+23 -0) =================================================================== --- lib/wikipedia-search/path.rb 2015-05-29 12:23:33 +0900 (aedacf9) +++ lib/wikipedia-search/path.rb 2016-02-07 15:24:30 +0900 (ce8a54e) @@ -34,6 +34,10 @@ module WikipediaSearch def sql SQLPath.new(self, @language) end + + def csv + CSVPath.new(self, @language) + end end class WikipediaPath @@ -195,4 +199,23 @@ module WikipediaSearch data_dir + "#{@language}-all-pages.sql" end end + + class CSVPath + def initialize(base_path, language) + @base_path = base_path + @language = language + end + + def data_dir + @base_path.data_dir + "csv" + end + + def pages + data_dir + "#{@language}-pages.csv" + end + + def all_pages + data_dir + "#{@language}-all-pages.csv" + end + end end Modified: lib/wikipedia-search/task.rb (+11 -0) =================================================================== --- lib/wikipedia-search/task.rb 2015-05-29 12:23:33 +0900 (87a4b2c) +++ lib/wikipedia-search/task.rb 2016-02-07 15:24:30 +0900 (caf5bb7) @@ -65,6 +65,7 @@ module WikipediaSearch define_data_convert_groonga_tasks define_data_convert_droonga_tasks define_data_convert_sql_tasks + define_data_convert_csv_tasks end end @@ -159,6 +160,16 @@ module WikipediaSearch end end + def define_data_convert_csv_tasks + namespace :csv do + define_wikipedia_data_convert_tasks("csv", + @path.csv.pages, + @path.csv.all_pages) + desc "Convert Japanese Wikipedia page data to CSV data." + task :ja => @path.csv.pages.to_s + end + end + def define_local_tasks namespace :local do define_local_groonga_tasks -------------- next part -------------- HTML����������������������������... 下載