[Groonga-commit] groonga/groonga at 675b51c [master] Improve index detection for match operator

Back to archive index

Kouhei Sutou null+****@clear*****
Sat Sep 26 19:24:41 JST 2015


Kouhei Sutou	2015-09-26 19:24:41 +0900 (Sat, 26 Sep 2015)

  New Revision: 675b51c2949a3a8e1430e1215e189e661f7638a7
  https://github.com/groonga/groonga/commit/675b51c2949a3a8e1430e1215e189e661f7638a7

  Message:
    Improve index detection for match operator
    
    Index column that belongs to lexicon that has tokenizer is preferred.

  Added files:
    test/command/suite/select/query/match/prefer_full_text_searchable_index.expected
    test/command/suite/select/query/match/prefer_full_text_searchable_index.test
  Modified files:
    lib/db.c

  Modified: lib/db.c (+60 -19)
===================================================================
--- lib/db.c    2015-09-26 13:52:22 +0900 (4b6f69d)
+++ lib/db.c    2015-09-26 19:24:41 +0900 (6db1daf)
@@ -11233,6 +11233,30 @@ grn_db_init_builtin_types(grn_ctx *ctx)
 
 #define MULTI_COLUMN_INDEXP(i) (DB_OBJ(i)->source_size > sizeof(grn_id))
 
+static inline grn_obj *
+grn_index_column_get_tokenizer(grn_ctx *ctx, grn_obj *index_column)
+{
+  grn_obj *tokenizer;
+  grn_obj *lexicon;
+
+  lexicon = grn_ctx_at(ctx, index_column->header.domain);
+  if (!lexicon) {
+    return NULL;
+  }
+
+  grn_table_get_info(ctx, lexicon, NULL, NULL, &tokenizer, NULL, NULL);
+  return tokenizer;
+}
+
+static inline grn_bool
+is_full_text_searchable_index(grn_ctx *ctx, grn_obj *index_column)
+{
+  grn_obj *tokenizer;
+
+  tokenizer = grn_index_column_get_tokenizer(ctx, index_column);
+  return tokenizer != NULL;
+}
+
 static inline int
 grn_column_find_index_data_column_equal(grn_ctx *ctx, grn_obj *obj,
                                         grn_operator op,
@@ -11250,14 +11274,11 @@ grn_column_find_index_data_column_equal(grn_ctx *ctx, grn_obj *obj,
     grn_obj *target = grn_ctx_at(ctx, data->target);
     int section;
     if (target->header.type != GRN_COLUMN_INDEX) { continue; }
-    section = (MULTI_COLUMN_INDEXP(target)) ? data->section : 0;
-    if (section_buf) { *section_buf = section; }
     if (obj->header.type != GRN_COLUMN_FIX_SIZE) {
-      grn_obj *tokenizer, *lexicon = grn_ctx_at(ctx, target->header.domain);
-      if (!lexicon) { continue; }
-      grn_table_get_info(ctx, lexicon, NULL, NULL, &tokenizer, NULL, NULL);
-      if (tokenizer) { continue; }
+      if (is_full_text_searchable_index(ctx, target)) { continue; }
     }
+    section = (MULTI_COLUMN_INDEXP(target)) ? data->section : 0;
+    if (section_buf) { *section_buf = section; }
     if (n < buf_size) {
       *ip++ = target;
     }
@@ -11275,21 +11296,10 @@ static inline grn_bool
 is_valid_regexp_index(grn_ctx *ctx, grn_obj *index_column)
 {
   grn_obj *tokenizer;
-  grn_obj *lexicon;
-
-  lexicon = grn_ctx_at(ctx, index_column->header.domain);
-  if (!lexicon) {
-    return GRN_FALSE;
-  }
-
-  grn_table_get_info(ctx, lexicon, NULL, NULL, &tokenizer, NULL, NULL);
-  grn_obj_unlink(ctx, lexicon);
-  if (!tokenizer) {
-    return GRN_FALSE;
-  }
 
+  tokenizer = grn_index_column_get_tokenizer(ctx, index_column);
   /* TODO: Restrict to TokenRegexp? */
-  return GRN_TRUE;
+  return tokenizer != NULL;
 }
 
 static inline int
@@ -11304,6 +11314,7 @@ grn_column_find_index_data_column_match(grn_ctx *ctx, grn_obj *obj,
   grn_obj **ip = index_buf;
   grn_hook_entry hook_entry;
   grn_hook *hooks;
+  grn_bool prefer_full_text_search_index = GRN_FALSE;
 
   switch (obj->header.type) {
   case GRN_TABLE_HASH_KEY :
@@ -11317,14 +11328,44 @@ grn_column_find_index_data_column_match(grn_ctx *ctx, grn_obj *obj,
     break;
   }
 
+  if (op != GRN_OP_REGEXP && !grn_column_is_vector(ctx, obj)) {
+    prefer_full_text_search_index = GRN_TRUE;
+  }
+
+  if (prefer_full_text_search_index) {
+    for (hooks = DB_OBJ(obj)->hooks[hook_entry]; hooks; hooks = hooks->next) {
+      default_set_value_hook_data *data = (void *)NEXT_ADDR(hooks);
+      grn_obj *target = grn_ctx_at(ctx, data->target);
+      int section;
+      if (target->header.type != GRN_COLUMN_INDEX) { continue; }
+      if (!is_full_text_searchable_index(ctx, target)) { continue; }
+      section = (MULTI_COLUMN_INDEXP(target)) ? data->section : 0;
+      if (section_buf) { *section_buf = section; }
+      if (n < buf_size) {
+        *ip++ = target;
+      }
+      if (n < n_index_data) {
+        index_data[n].index = target;
+        index_data[n].section = section;
+      }
+      n++;
+    }
+  }
+
   for (hooks = DB_OBJ(obj)->hooks[hook_entry]; hooks; hooks = hooks->next) {
     default_set_value_hook_data *data = (void *)NEXT_ADDR(hooks);
     grn_obj *target = grn_ctx_at(ctx, data->target);
     int section;
+
     if (target->header.type != GRN_COLUMN_INDEX) { continue; }
     if (op == GRN_OP_REGEXP && !is_valid_regexp_index(ctx, target)) {
       continue;
     }
+
+    if (prefer_full_text_search_index) {
+      if (is_full_text_searchable_index(ctx, target)) { continue; }
+    }
+
     section = (MULTI_COLUMN_INDEXP(target)) ? data->section : 0;
     if (section_buf) { *section_buf = section; }
     if (n < buf_size) {

  Added: test/command/suite/select/query/match/prefer_full_text_searchable_index.expected (+21 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/query/match/prefer_full_text_searchable_index.expected    2015-09-26 19:24:41 +0900 (7f56f96)
@@ -0,0 +1,21 @@
+table_create Users TABLE_NO_KEY
+[[0,0.0,0.0],true]
+column_create Users name COLUMN_SCALAR ShortText
+[[0,0.0,0.0],true]
+table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram
+[[0,0.0,0.0],true]
+column_create Terms index COLUMN_INDEX|WITH_POSITION Users name
+[[0,0.0,0.0],true]
+table_create Names TABLE_PAT_KEY ShortText
+[[0,0.0,0.0],true]
+column_create Names index COLUMN_INDEX Users name
+[[0,0.0,0.0],true]
+load --table Users
+[
+{"name": "Alice"},
+{"name": "Bob"},
+{"name": "Carlos"}
+]
+[[0,0.0,0.0],3]
+select Users --query name:@lic
+[[0,0.0,0.0],[[[1],[["_id","UInt32"],["name","ShortText"]],[1,"Alice"]]]]

  Added: test/command/suite/select/query/match/prefer_full_text_searchable_index.test (+17 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/query/match/prefer_full_text_searchable_index.test    2015-09-26 19:24:41 +0900 (907a7bd)
@@ -0,0 +1,17 @@
+table_create Users TABLE_NO_KEY
+column_create Users name COLUMN_SCALAR ShortText
+
+table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram
+column_create Terms index COLUMN_INDEX|WITH_POSITION Users name
+
+table_create Names TABLE_PAT_KEY ShortText
+column_create Names index COLUMN_INDEX Users name
+
+load --table Users
+[
+{"name": "Alice"},
+{"name": "Bob"},
+{"name": "Carlos"}
+]
+
+select Users --query name:@lic
-------------- next part --------------
HTML����������������������������...
下載 



More information about the Groonga-commit mailing list
Back to archive index