Kouhei Sutou
null+****@clear*****
Sat Sep 26 19:24:41 JST 2015
Kouhei Sutou 2015-09-26 19:24:41 +0900 (Sat, 26 Sep 2015) New Revision: 675b51c2949a3a8e1430e1215e189e661f7638a7 https://github.com/groonga/groonga/commit/675b51c2949a3a8e1430e1215e189e661f7638a7 Message: Improve index detection for match operator Index column that belongs to lexicon that has tokenizer is preferred. Added files: test/command/suite/select/query/match/prefer_full_text_searchable_index.expected test/command/suite/select/query/match/prefer_full_text_searchable_index.test Modified files: lib/db.c Modified: lib/db.c (+60 -19) =================================================================== --- lib/db.c 2015-09-26 13:52:22 +0900 (4b6f69d) +++ lib/db.c 2015-09-26 19:24:41 +0900 (6db1daf) @@ -11233,6 +11233,30 @@ grn_db_init_builtin_types(grn_ctx *ctx) #define MULTI_COLUMN_INDEXP(i) (DB_OBJ(i)->source_size > sizeof(grn_id)) +static inline grn_obj * +grn_index_column_get_tokenizer(grn_ctx *ctx, grn_obj *index_column) +{ + grn_obj *tokenizer; + grn_obj *lexicon; + + lexicon = grn_ctx_at(ctx, index_column->header.domain); + if (!lexicon) { + return NULL; + } + + grn_table_get_info(ctx, lexicon, NULL, NULL, &tokenizer, NULL, NULL); + return tokenizer; +} + +static inline grn_bool +is_full_text_searchable_index(grn_ctx *ctx, grn_obj *index_column) +{ + grn_obj *tokenizer; + + tokenizer = grn_index_column_get_tokenizer(ctx, index_column); + return tokenizer != NULL; +} + static inline int grn_column_find_index_data_column_equal(grn_ctx *ctx, grn_obj *obj, grn_operator op, @@ -11250,14 +11274,11 @@ grn_column_find_index_data_column_equal(grn_ctx *ctx, grn_obj *obj, grn_obj *target = grn_ctx_at(ctx, data->target); int section; if (target->header.type != GRN_COLUMN_INDEX) { continue; } - section = (MULTI_COLUMN_INDEXP(target)) ? data->section : 0; - if (section_buf) { *section_buf = section; } if (obj->header.type != GRN_COLUMN_FIX_SIZE) { - grn_obj *tokenizer, *lexicon = grn_ctx_at(ctx, target->header.domain); - if (!lexicon) { continue; } - grn_table_get_info(ctx, lexicon, NULL, NULL, &tokenizer, NULL, NULL); - if (tokenizer) { continue; } + if (is_full_text_searchable_index(ctx, target)) { continue; } } + section = (MULTI_COLUMN_INDEXP(target)) ? data->section : 0; + if (section_buf) { *section_buf = section; } if (n < buf_size) { *ip++ = target; } @@ -11275,21 +11296,10 @@ static inline grn_bool is_valid_regexp_index(grn_ctx *ctx, grn_obj *index_column) { grn_obj *tokenizer; - grn_obj *lexicon; - - lexicon = grn_ctx_at(ctx, index_column->header.domain); - if (!lexicon) { - return GRN_FALSE; - } - - grn_table_get_info(ctx, lexicon, NULL, NULL, &tokenizer, NULL, NULL); - grn_obj_unlink(ctx, lexicon); - if (!tokenizer) { - return GRN_FALSE; - } + tokenizer = grn_index_column_get_tokenizer(ctx, index_column); /* TODO: Restrict to TokenRegexp? */ - return GRN_TRUE; + return tokenizer != NULL; } static inline int @@ -11304,6 +11314,7 @@ grn_column_find_index_data_column_match(grn_ctx *ctx, grn_obj *obj, grn_obj **ip = index_buf; grn_hook_entry hook_entry; grn_hook *hooks; + grn_bool prefer_full_text_search_index = GRN_FALSE; switch (obj->header.type) { case GRN_TABLE_HASH_KEY : @@ -11317,14 +11328,44 @@ grn_column_find_index_data_column_match(grn_ctx *ctx, grn_obj *obj, break; } + if (op != GRN_OP_REGEXP && !grn_column_is_vector(ctx, obj)) { + prefer_full_text_search_index = GRN_TRUE; + } + + if (prefer_full_text_search_index) { + for (hooks = DB_OBJ(obj)->hooks[hook_entry]; hooks; hooks = hooks->next) { + default_set_value_hook_data *data = (void *)NEXT_ADDR(hooks); + grn_obj *target = grn_ctx_at(ctx, data->target); + int section; + if (target->header.type != GRN_COLUMN_INDEX) { continue; } + if (!is_full_text_searchable_index(ctx, target)) { continue; } + section = (MULTI_COLUMN_INDEXP(target)) ? data->section : 0; + if (section_buf) { *section_buf = section; } + if (n < buf_size) { + *ip++ = target; + } + if (n < n_index_data) { + index_data[n].index = target; + index_data[n].section = section; + } + n++; + } + } + for (hooks = DB_OBJ(obj)->hooks[hook_entry]; hooks; hooks = hooks->next) { default_set_value_hook_data *data = (void *)NEXT_ADDR(hooks); grn_obj *target = grn_ctx_at(ctx, data->target); int section; + if (target->header.type != GRN_COLUMN_INDEX) { continue; } if (op == GRN_OP_REGEXP && !is_valid_regexp_index(ctx, target)) { continue; } + + if (prefer_full_text_search_index) { + if (is_full_text_searchable_index(ctx, target)) { continue; } + } + section = (MULTI_COLUMN_INDEXP(target)) ? data->section : 0; if (section_buf) { *section_buf = section; } if (n < buf_size) { Added: test/command/suite/select/query/match/prefer_full_text_searchable_index.expected (+21 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/query/match/prefer_full_text_searchable_index.expected 2015-09-26 19:24:41 +0900 (7f56f96) @@ -0,0 +1,21 @@ +table_create Users TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Users name COLUMN_SCALAR ShortText +[[0,0.0,0.0],true] +table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram +[[0,0.0,0.0],true] +column_create Terms index COLUMN_INDEX|WITH_POSITION Users name +[[0,0.0,0.0],true] +table_create Names TABLE_PAT_KEY ShortText +[[0,0.0,0.0],true] +column_create Names index COLUMN_INDEX Users name +[[0,0.0,0.0],true] +load --table Users +[ +{"name": "Alice"}, +{"name": "Bob"}, +{"name": "Carlos"} +] +[[0,0.0,0.0],3] +select Users --query name:@lic +[[0,0.0,0.0],[[[1],[["_id","UInt32"],["name","ShortText"]],[1,"Alice"]]]] Added: test/command/suite/select/query/match/prefer_full_text_searchable_index.test (+17 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/query/match/prefer_full_text_searchable_index.test 2015-09-26 19:24:41 +0900 (907a7bd) @@ -0,0 +1,17 @@ +table_create Users TABLE_NO_KEY +column_create Users name COLUMN_SCALAR ShortText + +table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram +column_create Terms index COLUMN_INDEX|WITH_POSITION Users name + +table_create Names TABLE_PAT_KEY ShortText +column_create Names index COLUMN_INDEX Users name + +load --table Users +[ +{"name": "Alice"}, +{"name": "Bob"}, +{"name": "Carlos"} +] + +select Users --query name:@lic -------------- next part -------------- HTML����������������������������... 下載