[Groonga-commit] groonga/groonga at 8a814ad [master] select: improve performance for range search and enough filtered case

Back to archive index

Kouhei Sutou null+****@clear*****
Fri May 6 16:02:00 JST 2016


Kouhei Sutou	2016-05-06 16:02:00 +0900 (Fri, 06 May 2016)

  New Revision: 8a814ad16f4b05bffd0616df3fc7fb354e3add4e
  https://github.com/groonga/groonga/commit/8a814ad16f4b05bffd0616df3fc7fb354e3add4e

  Message:
    select: improve performance for range search and enough filtered case
    
    This may improve performance when a table has many records and two
    conditions such as --filter 'content @ "keyword" && price < 1000'. If
    'content @ "keyword"' filters many records, 'price < 1000' may be
    evaluated as sequential search instead of index search. Sequential
    search is faster than index search when the number of remained records
    is less.
    
    It's disabled by default. You can enable it by defining
    GRN_TABLE_SELECT_ENOUGH_FILTERED_RATIO=0.1 environment variable.
    
    0.1 means that
    "the number of remained records by 'content @ "keyword"'" /
    "the number of original records"
    must be 10% or less to use this optimization.
    
    If "the number of remained records by 'content @ "keyword"'" is greater
    than 1000, this optimization isn't used. You can custom the threshold by
    defining GRN_TABLE_SELECT_MAX_N_ENOUGH_FILTERED_RECORDS=5000 environment
    variable. In the case, the optimization may be used when "the number of
    remained records by 'content @ "keyword"'" is equal to or less than than
    5000.
    
    This optimization is used when both of the two conditions are
    satisfied.

  Added files:
    test/command/suite/select/filter/index/range/optimization/use_sequetial_search.expected
    test/command/suite/select/filter/index/range/optimization/use_sequetial_search.test
  Modified files:
    lib/ctx.c
    lib/expr.c
    lib/grn_expr.h

  Modified: lib/ctx.c (+2 -0)
===================================================================
--- lib/ctx.c    2016-05-06 16:00:24 +0900 (c5550ab)
+++ lib/ctx.c    2016-05-06 16:02:00 +0900 (4b24a69)
@@ -34,6 +34,7 @@
 #include "grn_ctx_impl_mrb.h"
 #include "grn_logger.h"
 #include "grn_cache.h"
+#include "grn_expr.h"
 #include <stdio.h>
 #include <stdarg.h>
 #include <time.h>
@@ -89,6 +90,7 @@ grn_init_from_env(void)
   grn_io_init_from_env();
   grn_ii_init_from_env();
   grn_db_init_from_env();
+  grn_expr_init_from_env();
   grn_index_column_init_from_env();
   grn_proc_init_from_env();
   grn_plugin_init_from_env();

  Modified: lib/expr.c (+100 -0)
===================================================================
--- lib/expr.c    2016-05-06 16:00:24 +0900 (c8085a2)
+++ lib/expr.c    2016-05-06 16:02:00 +0900 (bb0d9b2)
@@ -39,6 +39,35 @@
 # include <oniguruma.h>
 #endif
 
+static double grn_table_select_enough_filtered_ratio = 0.0;
+static int grn_table_select_max_n_enough_filtered_records = 1000;
+
+void
+grn_expr_init_from_env(void)
+{
+  {
+    char grn_table_select_enough_filtered_ratio_env[GRN_ENV_BUFFER_SIZE];
+    grn_getenv("GRN_TABLE_SELECT_ENOUGH_FILTERED_RATIO",
+               grn_table_select_enough_filtered_ratio_env,
+               GRN_ENV_BUFFER_SIZE);
+    if (grn_table_select_enough_filtered_ratio_env[0]) {
+      grn_table_select_enough_filtered_ratio =
+        atof(grn_table_select_enough_filtered_ratio_env);
+    }
+  }
+
+  {
+    char grn_table_select_max_n_enough_filtered_records_env[GRN_ENV_BUFFER_SIZE];
+    grn_getenv("GRN_TABLE_SELECT_MAX_N_ENOUGH_FILTERED_RECORDS",
+               grn_table_select_max_n_enough_filtered_records_env,
+               GRN_ENV_BUFFER_SIZE);
+    if (grn_table_select_max_n_enough_filtered_records_env[0]) {
+      grn_table_select_max_n_enough_filtered_records =
+        atoi(grn_table_select_max_n_enough_filtered_records_env);
+    }
+  }
+}
+
 grn_obj *
 grn_expr_alloc(grn_ctx *ctx, grn_obj *expr, grn_id domain, grn_obj_flags flags)
 {
@@ -5765,6 +5794,66 @@ grn_table_select_index_report(grn_ctx *ctx, const char *tag, grn_obj *index)
   grn_report_index(ctx, "[table][select]", tag, index);
 }
 
+static inline void
+grn_table_select_index_not_used_report(grn_ctx *ctx,
+                                       const char *tag,
+                                       grn_obj *index,
+                                       const char *reason)
+{
+  grn_report_index_not_used(ctx, "[table][select]", tag, index, reason);
+}
+
+static inline grn_bool
+grn_table_select_index_use_sequential_search(grn_ctx *ctx,
+                                             grn_obj *table,
+                                             grn_obj *res,
+                                             grn_operator logical_op,
+                                             const char *tag,
+                                             grn_obj *index)
+{
+  int n_records;
+  int n_filtered_records;
+  double filtered_ratio;
+  grn_obj reason;
+
+  if (logical_op != GRN_OP_AND) {
+    return GRN_FALSE;
+  }
+
+  n_records = grn_table_size(ctx, table);
+  n_filtered_records = grn_table_size(ctx, res);
+  if (n_records == 0) {
+    filtered_ratio = 1.0;
+  } else {
+    filtered_ratio = (double)n_filtered_records / (double)n_records;
+  }
+
+  if (filtered_ratio >= grn_table_select_enough_filtered_ratio) {
+    return GRN_FALSE;
+  }
+
+  if (n_filtered_records > grn_table_select_max_n_enough_filtered_records) {
+    return GRN_FALSE;
+  }
+
+  GRN_TEXT_INIT(&reason, 0);
+  grn_text_printf(ctx, &reason,
+                  "enough filtered: %.2f%%(%d/%d) < %.2f%% && %d <= %d",
+                  filtered_ratio * 100,
+                  n_filtered_records,
+                  n_records,
+                  grn_table_select_enough_filtered_ratio * 100,
+                  n_filtered_records,
+                  grn_table_select_max_n_enough_filtered_records);
+  GRN_TEXT_PUTC(ctx, &reason, '\0');
+  grn_table_select_index_not_used_report(ctx,
+                                         tag,
+                                         index,
+                                         GRN_TEXT_VALUE(&reason));
+  GRN_OBJ_FIN(ctx, &reason);
+  return GRN_TRUE;
+}
+
 static inline grn_bool
 grn_table_select_index_equal(grn_ctx *ctx,
                              grn_obj *table,
@@ -6002,6 +6091,7 @@ grn_table_select_index_range_column(grn_ctx *ctx, grn_obj *table,
                                     scan_info *si, grn_operator logical_op,
                                     grn_obj *res)
 {
+  const char *tag = "[range]";
   grn_bool processed = GRN_FALSE;
   grn_obj *index_table;
   grn_obj range;
@@ -6011,6 +6101,16 @@ grn_table_select_index_range_column(grn_ctx *ctx, grn_obj *table,
     return GRN_FALSE;
   }
 
+  if (grn_table_select_index_use_sequential_search(ctx,
+                                                   table,
+                                                   res,
+                                                   logical_op,
+                                                   tag,
+                                                   index_table)) {
+    grn_obj_unlink(ctx, index_table);
+    return GRN_FALSE;
+  }
+
   GRN_OBJ_INIT(&range, GRN_BULK, 0, index_table->header.domain);
   if (grn_obj_cast(ctx, si->query, &range, GRN_FALSE) == GRN_SUCCESS) {
     grn_table_cursor *cursor;

  Modified: lib/grn_expr.h (+2 -0)
===================================================================
--- lib/grn_expr.h    2016-05-06 16:00:24 +0900 (76633a9)
+++ lib/grn_expr.h    2016-05-06 16:02:00 +0900 (c20821f)
@@ -40,6 +40,8 @@ typedef enum {
 typedef struct _grn_scan_info scan_info;
 typedef grn_bool (*grn_scan_info_each_arg_callback)(grn_ctx *ctx, grn_obj *obj, void *user_data);
 
+void grn_expr_init_from_env(void);
+
 scan_info **grn_scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n,
                                 grn_operator op, grn_bool record_exist);
 

  Added: test/command/suite/select/filter/index/range/optimization/use_sequetial_search.expected (+166 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/filter/index/range/optimization/use_sequetial_search.expected    2016-05-06 16:02:00 +0900 (01157b8)
@@ -0,0 +1,166 @@
+table_create Users TABLE_HASH_KEY ShortText
+[[0,0.0,0.0],true]
+column_create Users age COLUMN_SCALAR Int32
+[[0,0.0,0.0],true]
+table_create Ages TABLE_PAT_KEY Int32
+[[0,0.0,0.0],true]
+column_create Ages users_age COLUMN_INDEX Users age
+[[0,0.0,0.0],true]
+load --table Users
+[
+{"_key": "user00", "age":  0},
+{"_key": "user01", "age":  1},
+{"_key": "user02", "age":  2},
+{"_key": "user03", "age":  3},
+{"_key": "user04", "age":  4},
+{"_key": "user05", "age":  5},
+{"_key": "user06", "age":  6},
+{"_key": "user07", "age":  7},
+{"_key": "user08", "age":  8},
+{"_key": "user09", "age":  9},
+{"_key": "user10", "age": 10},
+{"_key": "user11", "age": 11},
+{"_key": "user12", "age": 12},
+{"_key": "user13", "age": 13},
+{"_key": "user14", "age": 14},
+{"_key": "user15", "age": 15},
+{"_key": "user16", "age": 16},
+{"_key": "user17", "age": 17},
+{"_key": "user18", "age": 18},
+{"_key": "user19", "age": 19},
+{"_key": "user20", "age": 20},
+{"_key": "user21", "age": 21},
+{"_key": "user22", "age": 22},
+{"_key": "user23", "age": 23},
+{"_key": "user24", "age": 24},
+{"_key": "user25", "age": 25},
+{"_key": "user26", "age": 26},
+{"_key": "user27", "age": 27},
+{"_key": "user28", "age": 28},
+{"_key": "user29", "age": 29},
+{"_key": "user30", "age": 30},
+{"_key": "user31", "age": 31},
+{"_key": "user32", "age": 32},
+{"_key": "user33", "age": 33},
+{"_key": "user34", "age": 34},
+{"_key": "user35", "age": 35},
+{"_key": "user36", "age": 36},
+{"_key": "user37", "age": 37},
+{"_key": "user38", "age": 38},
+{"_key": "user39", "age": 39},
+{"_key": "user40", "age": 40},
+{"_key": "user41", "age": 41},
+{"_key": "user42", "age": 42},
+{"_key": "user43", "age": 43},
+{"_key": "user44", "age": 44},
+{"_key": "user45", "age": 45},
+{"_key": "user46", "age": 46},
+{"_key": "user47", "age": 47},
+{"_key": "user48", "age": 48},
+{"_key": "user49", "age": 49},
+{"_key": "user50", "age": 50},
+{"_key": "user51", "age": 51},
+{"_key": "user52", "age": 52},
+{"_key": "user53", "age": 53},
+{"_key": "user54", "age": 54},
+{"_key": "user55", "age": 55},
+{"_key": "user56", "age": 56},
+{"_key": "user57", "age": 57},
+{"_key": "user58", "age": 58},
+{"_key": "user59", "age": 59},
+{"_key": "user60", "age": 60},
+{"_key": "user61", "age": 61},
+{"_key": "user62", "age": 62},
+{"_key": "user63", "age": 63},
+{"_key": "user64", "age": 64},
+{"_key": "user65", "age": 65},
+{"_key": "user66", "age": 66},
+{"_key": "user67", "age": 67},
+{"_key": "user68", "age": 68},
+{"_key": "user69", "age": 69},
+{"_key": "user70", "age": 70},
+{"_key": "user71", "age": 71},
+{"_key": "user72", "age": 72},
+{"_key": "user73", "age": 73},
+{"_key": "user74", "age": 74},
+{"_key": "user75", "age": 75},
+{"_key": "user76", "age": 76},
+{"_key": "user77", "age": 77},
+{"_key": "user78", "age": 78},
+{"_key": "user79", "age": 79},
+{"_key": "user80", "age": 80},
+{"_key": "user81", "age": 81},
+{"_key": "user82", "age": 82},
+{"_key": "user83", "age": 83},
+{"_key": "user84", "age": 84},
+{"_key": "user85", "age": 85},
+{"_key": "user86", "age": 86},
+{"_key": "user87", "age": 87},
+{"_key": "user88", "age": 88},
+{"_key": "user89", "age": 89},
+{"_key": "user90", "age": 90},
+{"_key": "user91", "age": 91},
+{"_key": "user92", "age": 92},
+{"_key": "user93", "age": 93},
+{"_key": "user94", "age": 94},
+{"_key": "user95", "age": 95},
+{"_key": "user96", "age": 96},
+{"_key": "user97", "age": 97},
+{"_key": "user98", "age": 98},
+{"_key": "user99", "age": 99}
+]
+[[0,0.0,0.0],100]
+log_level --level info
+[[0,0.0,0.0],true]
+select Users --filter '_key @^ "user0" && age > 5'
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  [
+    [
+      [
+        4
+      ],
+      [
+        [
+          "_id",
+          "UInt32"
+        ],
+        [
+          "_key",
+          "ShortText"
+        ],
+        [
+          "age",
+          "Int32"
+        ]
+      ],
+      [
+        7,
+        "user06",
+        6
+      ],
+      [
+        8,
+        "user07",
+        7
+      ],
+      [
+        9,
+        "user08",
+        8
+      ],
+      [
+        10,
+        "user09",
+        9
+      ]
+    ]
+  ]
+]
+#|i| [table][select][index-not-used][range] <Ages>: enough filtered: 10.00%(10/100) < 11.00% && 10 <= 1000
+log_level --level notice
+[[0,0.0,0.0],true]

  Added: test/command/suite/select/filter/index/range/optimization/use_sequetial_search.test (+117 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/filter/index/range/optimization/use_sequetial_search.test    2016-05-06 16:02:00 +0900 (5eb1d50)
@@ -0,0 +1,117 @@
+#$GRN_TABLE_SELECT_ENOUGH_FILTERED_RATIO=0.11
+
+table_create Users TABLE_HASH_KEY ShortText
+column_create Users age COLUMN_SCALAR Int32
+
+table_create Ages TABLE_PAT_KEY Int32
+column_create Ages users_age COLUMN_INDEX Users age
+
+load --table Users
+[
+{"_key": "user00", "age":  0},
+{"_key": "user01", "age":  1},
+{"_key": "user02", "age":  2},
+{"_key": "user03", "age":  3},
+{"_key": "user04", "age":  4},
+{"_key": "user05", "age":  5},
+{"_key": "user06", "age":  6},
+{"_key": "user07", "age":  7},
+{"_key": "user08", "age":  8},
+{"_key": "user09", "age":  9},
+{"_key": "user10", "age": 10},
+{"_key": "user11", "age": 11},
+{"_key": "user12", "age": 12},
+{"_key": "user13", "age": 13},
+{"_key": "user14", "age": 14},
+{"_key": "user15", "age": 15},
+{"_key": "user16", "age": 16},
+{"_key": "user17", "age": 17},
+{"_key": "user18", "age": 18},
+{"_key": "user19", "age": 19},
+{"_key": "user20", "age": 20},
+{"_key": "user21", "age": 21},
+{"_key": "user22", "age": 22},
+{"_key": "user23", "age": 23},
+{"_key": "user24", "age": 24},
+{"_key": "user25", "age": 25},
+{"_key": "user26", "age": 26},
+{"_key": "user27", "age": 27},
+{"_key": "user28", "age": 28},
+{"_key": "user29", "age": 29},
+{"_key": "user30", "age": 30},
+{"_key": "user31", "age": 31},
+{"_key": "user32", "age": 32},
+{"_key": "user33", "age": 33},
+{"_key": "user34", "age": 34},
+{"_key": "user35", "age": 35},
+{"_key": "user36", "age": 36},
+{"_key": "user37", "age": 37},
+{"_key": "user38", "age": 38},
+{"_key": "user39", "age": 39},
+{"_key": "user40", "age": 40},
+{"_key": "user41", "age": 41},
+{"_key": "user42", "age": 42},
+{"_key": "user43", "age": 43},
+{"_key": "user44", "age": 44},
+{"_key": "user45", "age": 45},
+{"_key": "user46", "age": 46},
+{"_key": "user47", "age": 47},
+{"_key": "user48", "age": 48},
+{"_key": "user49", "age": 49},
+{"_key": "user50", "age": 50},
+{"_key": "user51", "age": 51},
+{"_key": "user52", "age": 52},
+{"_key": "user53", "age": 53},
+{"_key": "user54", "age": 54},
+{"_key": "user55", "age": 55},
+{"_key": "user56", "age": 56},
+{"_key": "user57", "age": 57},
+{"_key": "user58", "age": 58},
+{"_key": "user59", "age": 59},
+{"_key": "user60", "age": 60},
+{"_key": "user61", "age": 61},
+{"_key": "user62", "age": 62},
+{"_key": "user63", "age": 63},
+{"_key": "user64", "age": 64},
+{"_key": "user65", "age": 65},
+{"_key": "user66", "age": 66},
+{"_key": "user67", "age": 67},
+{"_key": "user68", "age": 68},
+{"_key": "user69", "age": 69},
+{"_key": "user70", "age": 70},
+{"_key": "user71", "age": 71},
+{"_key": "user72", "age": 72},
+{"_key": "user73", "age": 73},
+{"_key": "user74", "age": 74},
+{"_key": "user75", "age": 75},
+{"_key": "user76", "age": 76},
+{"_key": "user77", "age": 77},
+{"_key": "user78", "age": 78},
+{"_key": "user79", "age": 79},
+{"_key": "user80", "age": 80},
+{"_key": "user81", "age": 81},
+{"_key": "user82", "age": 82},
+{"_key": "user83", "age": 83},
+{"_key": "user84", "age": 84},
+{"_key": "user85", "age": 85},
+{"_key": "user86", "age": 86},
+{"_key": "user87", "age": 87},
+{"_key": "user88", "age": 88},
+{"_key": "user89", "age": 89},
+{"_key": "user90", "age": 90},
+{"_key": "user91", "age": 91},
+{"_key": "user92", "age": 92},
+{"_key": "user93", "age": 93},
+{"_key": "user94", "age": 94},
+{"_key": "user95", "age": 95},
+{"_key": "user96", "age": 96},
+{"_key": "user97", "age": 97},
+{"_key": "user98", "age": 98},
+{"_key": "user99", "age": 99}
+]
+
+log_level --level info
+#@add-important-log-levels info
+select Users --filter '_key @^ "user0" && age > 5'
+#@remove-important-log-levels info
+log_level --level notice
-------------- next part --------------
HTML����������������������������...
下載 



More information about the Groonga-commit mailing list
Back to archive index