[Groonga-commit] groonga/groonga at 3eb05ab [master] Use ExpressionTree to estimate size

Back to archive index
Kouhei Sutou null+****@clear*****
Fri Apr 26 10:30:45 JST 2019


Kouhei Sutou	2019-04-26 10:30:45 +0900 (Fri, 26 Apr 2019)

  Revision: 3eb05abf4aab5585371f42f34a6f37092159ee1f
  https://github.com/groonga/groonga/commit/3eb05abf4aab5585371f42f34a6f37092159ee1f

  Message:
    Use ExpressionTree to estimate size

  Modified files:
    lib/mrb/scripts/expression_tree/binary_operation.rb
    lib/mrb/scripts/scan_info_data_size_estimator.rb

  Modified: lib/mrb/scripts/expression_tree/binary_operation.rb (+104 -15)
===================================================================
--- lib/mrb/scripts/expression_tree/binary_operation.rb    2019-04-26 10:21:20 +0900 (052d1086a)
+++ lib/mrb/scripts/expression_tree/binary_operation.rb    2019-04-26 10:30:45 +0900 (9962b9663)
@@ -71,10 +71,16 @@ module Groonga
 
         column =****@left*****
         value =****@right*****
-        index_info = column.find_index(@operator)
-        return table.size if index_info.nil?
-
-        index_column = index_info.index
+        case column
+        when Groonga::Accessor
+          return estimate_size_equal_accessor(table, column, value)
+        when Groonga::IndexColumn
+          index_column = column
+        else
+          index_info = column.find_index(@operator)
+          return table.size if index_info.nil?
+          index_column = index_info.index
+        end
         return table.size unless index_column.respond_to?(:lexicon)
 
         lexicon = index_column.lexicon
@@ -91,18 +97,58 @@ module Groonga
         end
       end
 
+      def estimate_size_equal_accessor(table, accessor, value)
+        if accessor.id?
+          if table.id?(value.value)
+            return 1
+          else
+            return 0
+          end
+        else
+          if table[value]
+            return 1
+          else
+            return 0
+          end
+        end
+      end
+
       def estimate_size_range(table)
         return table.size unles****@left*****_a?(Variable)
         return table.size unles****@right*****_a?(Constant)
 
+        is_table_search = false
         column =****@left*****
         value =****@right*****
-        index_info = column.find_index(@operator)
-        return table.size if index_info.nil?
+        case column
+        when Table
+          is_table_search = true
+          lexicon = column
+        when Groonga::Accessor
+          is_table_search = true
+          lexicon = column.object
+          case lexicon
+          when PatriciaTrie, DoubleArrayTrie
+            # range searchable
+          else
+            return table.size
+          end
+        when Groonga::IndexColumn
+          index_column = column
+          lexicon = column.lexicon
+        else
+          index_info = column.find_index(@operator)
+          return table.size if index_info.nil?
 
-        index_column = index_info.index
-        lexicon = index_column.lexicon
-        options = {}
+          index_column = index_info.index
+          lexicon = index_column.lexicon
+        end
+        n_terms = lexicon.size
+        return 0 if n_terms.zero?
+
+        options = {
+          limit: sampling_cursor_limit(n_terms),
+        }
         case @operator
         when Operator::LESS
           options[:max] = value
@@ -118,7 +164,13 @@ module Groonga
           options[:flags] = TableCursorFlags::GE
         end
         TableCursor.open(lexicon, options) do |cursor|
-          index_column.estimate_size(:lexicon_cursor => cursor)
+          if is_table_search
+            size = 1
+          else
+            size = index_column.estimate_size(:lexicon_cursor => cursor)
+          end
+          size += 1 if cursor.next != ID::NIL
+          size
         end
       end
 
@@ -144,6 +196,8 @@ module Groonga
               end
             end
           end
+        when Groonga::IndexColumn
+          indexes << column
         else
           index_info = column.find_index(@operator)
           return table.size if index_info.nil?
@@ -164,12 +218,47 @@ module Groonga
         return table.size unles****@left*****_a?(Variable)
         return table.size unles****@right*****_a?(Constant)
 
-        key =****@left*****
-        return table.size unless key.is_a?(Groonga::Accessor)
-        return table.size unless key.key?
+        is_table_search = false
+        column =****@left*****
+        case column
+        when Groonga::Accessor
+          return table.size unless column.key?
+          is_table_search = true
+          lexicon = column.object
+        when Groonga::IndexColumn
+          lexicon = column.lexicon
+        else
+          return table.size
+        end
+        n_terms = lexicon.size
+        return 0 if n_terms.zero?
+
+        value =****@right*****
+        options = {
+          min: value,
+          limit: sampling_cursor_limit(n_terms),
+          flags: TableCursorFlags::PREFIX,
+        }
+        TableCursor.open(lexicon, options) do |cursor|
+          if is_table_search
+            size = 1
+          else
+            size = column.estimate_size(:lexicon_cursor => cursor)
+          end
+          size += 1 if cursor.next != ID::NIL
+          size
+        end
+      end
 
-        # TODO: Improve
-        table.size / 10
+      def sampling_cursor_limit(n_terms)
+        limit = n_terms * 0.01
+        if limit < 10
+          10
+        elsif limit > 1000
+          1000
+        else
+          limit.to_i
+        end
       end
     end
   end

  Modified: lib/mrb/scripts/scan_info_data_size_estimator.rb (+5 -137)
===================================================================
--- lib/mrb/scripts/scan_info_data_size_estimator.rb    2019-04-26 10:21:20 +0900 (ec936f695)
+++ lib/mrb/scripts/scan_info_data_size_estimator.rb    2019-04-26 10:30:45 +0900 (ca5a69c4b)
@@ -15,22 +15,13 @@ module Groonga
 
       size = nil
       case****@data*****
-      when Operator::MATCH,
-           Operator::FUZZY
-        size = estimate_match(index_column)
-      when Operator::REGEXP
-        size = estimate_regexp(index_column)
-      when Operator::EQUAL
-        size = estimate_equal(index_column)
-      when Operator::LESS,
-           Operator::LESS_EQUAL,
-           Operator::GREATER,
-           Operator::GREATER_EQUAL
-        size = estimate_range(index_column)
-      when Operator::PREFIX
-        size = estimate_prefix(index_column)
       when Operator::CALL
         size = estimate_call(index_column)
+      else
+        left = ExpressionTree::Variable.new(index_column)
+        right = ExpressionTree::Constant.new(@data.query)
+        node = ExpressionTree::BinaryOperation.new(@data.op, left, right)
+        size = node.estimate_size(@table)
       end
       size || @table_size
     end
@@ -47,129 +38,6 @@ module Groonga
       index_column
     end
 
-    def sampling_cursor_limit(n_terms)
-      limit = n_terms * 0.01
-      if limit < 10
-        10
-      elsif limit > 1000
-        1000
-      else
-        limit.to_i
-      end
-    end
-
-    def estimate_match(index_column)
-      index_column.estimate_size(:query => @data.query.value)
-    end
-
-    def estimate_regexp(index_column)
-      index_column.estimate_size(:query => @data.query.value,
-                                 :mode => @data.op)
-    end
-
-    def estimate_equal(index_column)
-      query =****@data*****
-      if index_column.is_a?(Accessor)
-        table = index_column.object
-        if index_column.name == "_id"
-          if table.id?(query.value)
-            1
-          else
-            0
-          end
-        else
-          if table[query.value]
-            1
-          else
-            0
-          end
-        end
-      else
-        lexicon = index_column.lexicon
-        if query.domain == lexicon.id
-          term = query.value
-          return 0 if term.nil?
-          index_column.estimate_size(:term => term)
-        else
-          term_id = lexicon[query]
-          return 0 if term_id.nil?
-          index_column.estimate_size(:term_id => term_id)
-        end
-      end
-    end
-
-    def estimate_range(index_column)
-      if index_column.is_a?(Table)
-        is_table_search = true
-        lexicon = index_column
-      elsif index_column.is_a?(Groonga::Accessor)
-        is_table_search = true
-        lexicon = index_column.object
-      else
-        is_table_search = false
-        lexicon = index_column.lexicon
-      end
-      n_terms = lexicon.size
-      return 0 if n_terms.zero?
-
-      value =****@data*****
-      options = {
-        :limit => sampling_cursor_limit(n_terms),
-      }
-      case****@data*****
-      when Operator::LESS
-        options[:max] = value
-        options[:flags] = TableCursorFlags::LT
-      when Operator::LESS_EQUAL
-        options[:max] = value
-        options[:flags] = TableCursorFlags::LE
-      when Operator::GREATER
-        options[:min] = value
-        options[:flags] = TableCursorFlags::GT
-      when Operator::GREATER_EQUAL
-        options[:min] = value
-        options[:flags] = TableCursorFlags::GE
-      end
-      TableCursor.open(lexicon, options) do |cursor|
-        if is_table_search
-          size = 1
-        else
-          size = index_column.estimate_size(:lexicon_cursor => cursor)
-        end
-        size += 1 if cursor.next != ID::NIL
-        size
-      end
-    end
-
-    def estimate_prefix(index_column)
-      is_table_search =
-        (index_column.is_a?(Accessor) and
-        index_column.name == "_key")
-      if is_table_search
-        lexicon = index_column.object
-      else
-        lexicon = index_column.lexicon
-      end
-      n_terms = lexicon.size
-      return 0 if n_terms.zero?
-
-      value =****@data*****
-      options = {
-        :min => value,
-        :limit => sampling_cursor_limit(n_terms),
-        :flags => TableCursorFlags::PREFIX,
-      }
-      TableCursor.open(lexicon, options) do |cursor|
-        if is_table_search
-          size = 1
-        else
-          size = index_column.estimate_size(:lexicon_cursor => cursor)
-        end
-        size += 1 if cursor.next != ID::NIL
-        size
-      end
-    end
-
     def estimate_call(index_column)
       procedure =****@data*****[0]
       arguments =****@data*****[1..-1].collect do |arg|
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190426/fc63ec51/attachment-0001.html>


More information about the Groonga-commit mailing list
Back to archive index