Kouhei Sutou 2019-04-26 10:30:45 +0900 (Fri, 26 Apr 2019) Revision: 3eb05abf4aab5585371f42f34a6f37092159ee1f https://github.com/groonga/groonga/commit/3eb05abf4aab5585371f42f34a6f37092159ee1f Message: Use ExpressionTree to estimate size Modified files: lib/mrb/scripts/expression_tree/binary_operation.rb lib/mrb/scripts/scan_info_data_size_estimator.rb Modified: lib/mrb/scripts/expression_tree/binary_operation.rb (+104 -15) =================================================================== --- lib/mrb/scripts/expression_tree/binary_operation.rb 2019-04-26 10:21:20 +0900 (052d1086a) +++ lib/mrb/scripts/expression_tree/binary_operation.rb 2019-04-26 10:30:45 +0900 (9962b9663) @@ -71,10 +71,16 @@ module Groonga column =****@left***** value =****@right***** - index_info = column.find_index(@operator) - return table.size if index_info.nil? - - index_column = index_info.index + case column + when Groonga::Accessor + return estimate_size_equal_accessor(table, column, value) + when Groonga::IndexColumn + index_column = column + else + index_info = column.find_index(@operator) + return table.size if index_info.nil? + index_column = index_info.index + end return table.size unless index_column.respond_to?(:lexicon) lexicon = index_column.lexicon @@ -91,18 +97,58 @@ module Groonga end end + def estimate_size_equal_accessor(table, accessor, value) + if accessor.id? + if table.id?(value.value) + return 1 + else + return 0 + end + else + if table[value] + return 1 + else + return 0 + end + end + end + def estimate_size_range(table) return table.size unles****@left*****_a?(Variable) return table.size unles****@right*****_a?(Constant) + is_table_search = false column =****@left***** value =****@right***** - index_info = column.find_index(@operator) - return table.size if index_info.nil? + case column + when Table + is_table_search = true + lexicon = column + when Groonga::Accessor + is_table_search = true + lexicon = column.object + case lexicon + when PatriciaTrie, DoubleArrayTrie + # range searchable + else + return table.size + end + when Groonga::IndexColumn + index_column = column + lexicon = column.lexicon + else + index_info = column.find_index(@operator) + return table.size if index_info.nil? - index_column = index_info.index - lexicon = index_column.lexicon - options = {} + index_column = index_info.index + lexicon = index_column.lexicon + end + n_terms = lexicon.size + return 0 if n_terms.zero? + + options = { + limit: sampling_cursor_limit(n_terms), + } case @operator when Operator::LESS options[:max] = value @@ -118,7 +164,13 @@ module Groonga options[:flags] = TableCursorFlags::GE end TableCursor.open(lexicon, options) do |cursor| - index_column.estimate_size(:lexicon_cursor => cursor) + if is_table_search + size = 1 + else + size = index_column.estimate_size(:lexicon_cursor => cursor) + end + size += 1 if cursor.next != ID::NIL + size end end @@ -144,6 +196,8 @@ module Groonga end end end + when Groonga::IndexColumn + indexes << column else index_info = column.find_index(@operator) return table.size if index_info.nil? @@ -164,12 +218,47 @@ module Groonga return table.size unles****@left*****_a?(Variable) return table.size unles****@right*****_a?(Constant) - key =****@left***** - return table.size unless key.is_a?(Groonga::Accessor) - return table.size unless key.key? + is_table_search = false + column =****@left***** + case column + when Groonga::Accessor + return table.size unless column.key? + is_table_search = true + lexicon = column.object + when Groonga::IndexColumn + lexicon = column.lexicon + else + return table.size + end + n_terms = lexicon.size + return 0 if n_terms.zero? + + value =****@right***** + options = { + min: value, + limit: sampling_cursor_limit(n_terms), + flags: TableCursorFlags::PREFIX, + } + TableCursor.open(lexicon, options) do |cursor| + if is_table_search + size = 1 + else + size = column.estimate_size(:lexicon_cursor => cursor) + end + size += 1 if cursor.next != ID::NIL + size + end + end - # TODO: Improve - table.size / 10 + def sampling_cursor_limit(n_terms) + limit = n_terms * 0.01 + if limit < 10 + 10 + elsif limit > 1000 + 1000 + else + limit.to_i + end end end end Modified: lib/mrb/scripts/scan_info_data_size_estimator.rb (+5 -137) =================================================================== --- lib/mrb/scripts/scan_info_data_size_estimator.rb 2019-04-26 10:21:20 +0900 (ec936f695) +++ lib/mrb/scripts/scan_info_data_size_estimator.rb 2019-04-26 10:30:45 +0900 (ca5a69c4b) @@ -15,22 +15,13 @@ module Groonga size = nil case****@data***** - when Operator::MATCH, - Operator::FUZZY - size = estimate_match(index_column) - when Operator::REGEXP - size = estimate_regexp(index_column) - when Operator::EQUAL - size = estimate_equal(index_column) - when Operator::LESS, - Operator::LESS_EQUAL, - Operator::GREATER, - Operator::GREATER_EQUAL - size = estimate_range(index_column) - when Operator::PREFIX - size = estimate_prefix(index_column) when Operator::CALL size = estimate_call(index_column) + else + left = ExpressionTree::Variable.new(index_column) + right = ExpressionTree::Constant.new(@data.query) + node = ExpressionTree::BinaryOperation.new(@data.op, left, right) + size = node.estimate_size(@table) end size || @table_size end @@ -47,129 +38,6 @@ module Groonga index_column end - def sampling_cursor_limit(n_terms) - limit = n_terms * 0.01 - if limit < 10 - 10 - elsif limit > 1000 - 1000 - else - limit.to_i - end - end - - def estimate_match(index_column) - index_column.estimate_size(:query => @data.query.value) - end - - def estimate_regexp(index_column) - index_column.estimate_size(:query => @data.query.value, - :mode => @data.op) - end - - def estimate_equal(index_column) - query =****@data***** - if index_column.is_a?(Accessor) - table = index_column.object - if index_column.name == "_id" - if table.id?(query.value) - 1 - else - 0 - end - else - if table[query.value] - 1 - else - 0 - end - end - else - lexicon = index_column.lexicon - if query.domain == lexicon.id - term = query.value - return 0 if term.nil? - index_column.estimate_size(:term => term) - else - term_id = lexicon[query] - return 0 if term_id.nil? - index_column.estimate_size(:term_id => term_id) - end - end - end - - def estimate_range(index_column) - if index_column.is_a?(Table) - is_table_search = true - lexicon = index_column - elsif index_column.is_a?(Groonga::Accessor) - is_table_search = true - lexicon = index_column.object - else - is_table_search = false - lexicon = index_column.lexicon - end - n_terms = lexicon.size - return 0 if n_terms.zero? - - value =****@data***** - options = { - :limit => sampling_cursor_limit(n_terms), - } - case****@data***** - when Operator::LESS - options[:max] = value - options[:flags] = TableCursorFlags::LT - when Operator::LESS_EQUAL - options[:max] = value - options[:flags] = TableCursorFlags::LE - when Operator::GREATER - options[:min] = value - options[:flags] = TableCursorFlags::GT - when Operator::GREATER_EQUAL - options[:min] = value - options[:flags] = TableCursorFlags::GE - end - TableCursor.open(lexicon, options) do |cursor| - if is_table_search - size = 1 - else - size = index_column.estimate_size(:lexicon_cursor => cursor) - end - size += 1 if cursor.next != ID::NIL - size - end - end - - def estimate_prefix(index_column) - is_table_search = - (index_column.is_a?(Accessor) and - index_column.name == "_key") - if is_table_search - lexicon = index_column.object - else - lexicon = index_column.lexicon - end - n_terms = lexicon.size - return 0 if n_terms.zero? - - value =****@data***** - options = { - :min => value, - :limit => sampling_cursor_limit(n_terms), - :flags => TableCursorFlags::PREFIX, - } - TableCursor.open(lexicon, options) do |cursor| - if is_table_search - size = 1 - else - size = index_column.estimate_size(:lexicon_cursor => cursor) - end - size += 1 if cursor.next != ID::NIL - size - end - end - def estimate_call(index_column) procedure =****@data*****[0] arguments =****@data*****[1..-1].collect do |arg| -------------- next part -------------- An HTML attachment was scrubbed... URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190426/fc63ec51/attachment-0001.html>