Kouhei Sutou
null+****@clear*****
Tue Sep 8 10:25:17 JST 2015
Kouhei Sutou 2015-09-08 10:25:17 +0900 (Tue, 08 Sep 2015) New Revision: acb9022f02235f5239dc55306ac952f21920b935 https://github.com/groonga/groonga/commit/acb9022f02235f5239dc55306ac952f21920b935 Message: regexp: make match target text normalized It's incompatible change. It's effected when index isn't available. If there is any index available, the behavior isn't changed. It may be confused but we choose this behavior because of the followings: * Consistency against other operator such as OP_MATCH and OP_PREFIX. * Keep index search availability. (If users use complex regexp syntax such as (?i), we can't use index for fast search. It's not expected pattern.) Added files: test/command/suite/select/filter/no_index/regexp/normalized_text.expected test/command/suite/select/filter/no_index/regexp/normalized_text.test Modified files: lib/operator.c test/command/suite/select/filter/no_index/regexp/text_text.expected test/command/suite/select/filter/no_index/regexp/text_text.test Modified: lib/operator.c (+15 -12) =================================================================== --- lib/operator.c 2015-09-07 17:22:23 +0900 (f399534) +++ lib/operator.c 2015-09-08 10:25:17 +0900 (c82708b) @@ -841,23 +841,24 @@ exec_text_operator_raw_text_raw_text(grn_ctx *ctx, return GRN_FALSE; } - if (op == GRN_OP_REGEXP) { - return exec_text_operator(ctx, op, - target, target_len, - query, query_len); - } - normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1); norm_target = grn_string_open(ctx, target, target_len, normalizer, 0); - norm_query = grn_string_open(ctx, query, query_len, normalizer, 0); grn_string_get_normalized(ctx, norm_target, &norm_target_raw, &norm_target_raw_length_in_bytes, NULL); - grn_string_get_normalized(ctx, norm_query, - &norm_query_raw, - &norm_query_raw_length_in_bytes, - NULL); + + if (op == GRN_OP_REGEXP) { + norm_query = NULL; + norm_query_raw = query; + norm_query_raw_length_in_bytes = query_len; + } else { + norm_query = grn_string_open(ctx, query, query_len, normalizer, 0); + grn_string_get_normalized(ctx, norm_query, + &norm_query_raw, + &norm_query_raw_length_in_bytes, + NULL); + } matched = exec_text_operator(ctx, op, norm_target_raw, @@ -866,7 +867,9 @@ exec_text_operator_raw_text_raw_text(grn_ctx *ctx, norm_query_raw_length_in_bytes); grn_obj_close(ctx, norm_target); - grn_obj_close(ctx, norm_query); + if (norm_query) { + grn_obj_close(ctx, norm_query); + } grn_obj_unlink(ctx, normalizer); return matched; Added: test/command/suite/select/filter/no_index/regexp/normalized_text.expected (+44 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/filter/no_index/regexp/normalized_text.expected 2015-09-08 10:25:17 +0900 (d288752) @@ -0,0 +1,44 @@ +table_create Memos TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Memos content COLUMN_SCALAR Text +[[0,0.0,0.0],true] +load --table Memos +[ +{"content": "groonga"}, +{"content": "Groonga"}, +{"content": "Mroonga"} +] +[[0,0.0,0.0],3] +select Memos --filter 'content @~ "\\\\Agr"' +[ + [ + 0, + 0.0, + 0.0 + ], + [ + [ + [ + 2 + ], + [ + [ + "_id", + "UInt32" + ], + [ + "content", + "Text" + ] + ], + [ + 1, + "groonga" + ], + [ + 2, + "Groonga" + ] + ] + ] +] Added: test/command/suite/select/filter/no_index/regexp/normalized_text.test (+11 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/filter/no_index/regexp/normalized_text.test 2015-09-08 10:25:17 +0900 (7ebed94) @@ -0,0 +1,11 @@ +table_create Memos TABLE_NO_KEY +column_create Memos content COLUMN_SCALAR Text + +load --table Memos +[ +{"content": "groonga"}, +{"content": "Groonga"}, +{"content": "Mroonga"} +] + +select Memos --filter 'content @~ "\\\\Agr"' Modified: test/command/suite/select/filter/no_index/regexp/text_text.expected (+5 -4) =================================================================== --- test/command/suite/select/filter/no_index/regexp/text_text.expected 2015-09-07 17:22:23 +0900 (fe6fd67) +++ test/command/suite/select/filter/no_index/regexp/text_text.expected 2015-09-08 10:25:17 +0900 (c9770a2) @@ -4,8 +4,9 @@ column_create Memos content COLUMN_SCALAR Text [[0,0.0,0.0],true] load --table Memos [ -{"content": "Groonga"} +{"content": "groonga"}, +{"content": "rroonga"} ] -[[0,0.0,0.0],1] -select Memos --filter 'content @~ "\\\\AGr"' -[[0,0.0,0.0],[[[1],[["_id","UInt32"],["content","Text"]],[1,"Groonga"]]]] +[[0,0.0,0.0],2] +select Memos --filter 'content @~ "\\\\Agr"' +[[0,0.0,0.0],[[[1],[["_id","UInt32"],["content","Text"]],[1,"groonga"]]]] Modified: test/command/suite/select/filter/no_index/regexp/text_text.test (+3 -2) =================================================================== --- test/command/suite/select/filter/no_index/regexp/text_text.test 2015-09-07 17:22:23 +0900 (54d8e2f) +++ test/command/suite/select/filter/no_index/regexp/text_text.test 2015-09-08 10:25:17 +0900 (aec3462) @@ -3,7 +3,8 @@ column_create Memos content COLUMN_SCALAR Text load --table Memos [ -{"content": "Groonga"} +{"content": "groonga"}, +{"content": "rroonga"} ] -select Memos --filter 'content @~ "\\\\AGr"' +select Memos --filter 'content @~ "\\\\Agr"' -------------- next part -------------- HTML����������������������������... 下載