[Groonga-commit] groonga/groonga at acb9022 [master] regexp: make match target text normalized

Back to archive index

Kouhei Sutou null+****@clear*****
Tue Sep 8 10:25:17 JST 2015


Kouhei Sutou	2015-09-08 10:25:17 +0900 (Tue, 08 Sep 2015)

  New Revision: acb9022f02235f5239dc55306ac952f21920b935
  https://github.com/groonga/groonga/commit/acb9022f02235f5239dc55306ac952f21920b935

  Message:
    regexp: make match target text normalized
    
    It's incompatible change. It's effected when index isn't available. If
    there is any index available, the behavior isn't changed.
    
    It may be confused but we choose this behavior because of the
    followings:
    
      * Consistency against other operator such as OP_MATCH and OP_PREFIX.
      * Keep index search availability. (If users use complex regexp
        syntax such as (?i), we can't use index for fast search. It's not
        expected pattern.)

  Added files:
    test/command/suite/select/filter/no_index/regexp/normalized_text.expected
    test/command/suite/select/filter/no_index/regexp/normalized_text.test
  Modified files:
    lib/operator.c
    test/command/suite/select/filter/no_index/regexp/text_text.expected
    test/command/suite/select/filter/no_index/regexp/text_text.test

  Modified: lib/operator.c (+15 -12)
===================================================================
--- lib/operator.c    2015-09-07 17:22:23 +0900 (f399534)
+++ lib/operator.c    2015-09-08 10:25:17 +0900 (c82708b)
@@ -841,23 +841,24 @@ exec_text_operator_raw_text_raw_text(grn_ctx *ctx,
     return GRN_FALSE;
   }
 
-  if (op == GRN_OP_REGEXP) {
-    return exec_text_operator(ctx, op,
-                              target, target_len,
-                              query, query_len);
-  }
-
   normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1);
   norm_target = grn_string_open(ctx, target, target_len, normalizer, 0);
-  norm_query  = grn_string_open(ctx, query,  query_len,  normalizer, 0);
   grn_string_get_normalized(ctx, norm_target,
                             &norm_target_raw,
                             &norm_target_raw_length_in_bytes,
                             NULL);
-  grn_string_get_normalized(ctx, norm_query,
-                            &norm_query_raw,
-                            &norm_query_raw_length_in_bytes,
-                            NULL);
+
+  if (op == GRN_OP_REGEXP) {
+    norm_query = NULL;
+    norm_query_raw = query;
+    norm_query_raw_length_in_bytes = query_len;
+  } else {
+    norm_query = grn_string_open(ctx, query,  query_len,  normalizer, 0);
+    grn_string_get_normalized(ctx, norm_query,
+                              &norm_query_raw,
+                              &norm_query_raw_length_in_bytes,
+                              NULL);
+  }
 
   matched = exec_text_operator(ctx, op,
                                norm_target_raw,
@@ -866,7 +867,9 @@ exec_text_operator_raw_text_raw_text(grn_ctx *ctx,
                                norm_query_raw_length_in_bytes);
 
   grn_obj_close(ctx, norm_target);
-  grn_obj_close(ctx, norm_query);
+  if (norm_query) {
+    grn_obj_close(ctx, norm_query);
+  }
   grn_obj_unlink(ctx, normalizer);
 
   return matched;

  Added: test/command/suite/select/filter/no_index/regexp/normalized_text.expected (+44 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/filter/no_index/regexp/normalized_text.expected    2015-09-08 10:25:17 +0900 (d288752)
@@ -0,0 +1,44 @@
+table_create Memos TABLE_NO_KEY
+[[0,0.0,0.0],true]
+column_create Memos content COLUMN_SCALAR Text
+[[0,0.0,0.0],true]
+load --table Memos
+[
+{"content": "groonga"},
+{"content": "Groonga"},
+{"content": "Mroonga"}
+]
+[[0,0.0,0.0],3]
+select Memos --filter 'content @~ "\\\\Agr"'
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  [
+    [
+      [
+        2
+      ],
+      [
+        [
+          "_id",
+          "UInt32"
+        ],
+        [
+          "content",
+          "Text"
+        ]
+      ],
+      [
+        1,
+        "groonga"
+      ],
+      [
+        2,
+        "Groonga"
+      ]
+    ]
+  ]
+]

  Added: test/command/suite/select/filter/no_index/regexp/normalized_text.test (+11 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/filter/no_index/regexp/normalized_text.test    2015-09-08 10:25:17 +0900 (7ebed94)
@@ -0,0 +1,11 @@
+table_create Memos TABLE_NO_KEY
+column_create Memos content COLUMN_SCALAR Text
+
+load --table Memos
+[
+{"content": "groonga"},
+{"content": "Groonga"},
+{"content": "Mroonga"}
+]
+
+select Memos --filter 'content @~ "\\\\Agr"'

  Modified: test/command/suite/select/filter/no_index/regexp/text_text.expected (+5 -4)
===================================================================
--- test/command/suite/select/filter/no_index/regexp/text_text.expected    2015-09-07 17:22:23 +0900 (fe6fd67)
+++ test/command/suite/select/filter/no_index/regexp/text_text.expected    2015-09-08 10:25:17 +0900 (c9770a2)
@@ -4,8 +4,9 @@ column_create Memos content COLUMN_SCALAR Text
 [[0,0.0,0.0],true]
 load --table Memos
 [
-{"content": "Groonga"}
+{"content": "groonga"},
+{"content": "rroonga"}
 ]
-[[0,0.0,0.0],1]
-select Memos --filter 'content @~ "\\\\AGr"'
-[[0,0.0,0.0],[[[1],[["_id","UInt32"],["content","Text"]],[1,"Groonga"]]]]
+[[0,0.0,0.0],2]
+select Memos --filter 'content @~ "\\\\Agr"'
+[[0,0.0,0.0],[[[1],[["_id","UInt32"],["content","Text"]],[1,"groonga"]]]]

  Modified: test/command/suite/select/filter/no_index/regexp/text_text.test (+3 -2)
===================================================================
--- test/command/suite/select/filter/no_index/regexp/text_text.test    2015-09-07 17:22:23 +0900 (54d8e2f)
+++ test/command/suite/select/filter/no_index/regexp/text_text.test    2015-09-08 10:25:17 +0900 (aec3462)
@@ -3,7 +3,8 @@ column_create Memos content COLUMN_SCALAR Text
 
 load --table Memos
 [
-{"content": "Groonga"}
+{"content": "groonga"},
+{"content": "rroonga"}
 ]
 
-select Memos --filter 'content @~ "\\\\AGr"'
+select Memos --filter 'content @~ "\\\\Agr"'
-------------- next part --------------
HTML����������������������������...
下載 



More information about the Groonga-commit mailing list
Back to archive index