[Groonga-commit] groonga/groonga at fc579ad [master] Use struct for fuzzy related parameters

Back to archive index

naoa null+****@clear*****
Fri Feb 5 07:24:25 JST 2016


naoa	2016-02-05 07:24:25 +0900 (Fri, 05 Feb 2016)

  New Revision: fc579ad9f27fc852755341de32d43bfefee91607
  https://github.com/groonga/groonga/commit/fc579ad9f27fc852755341de32d43bfefee91607

  Merged a300aa1: Merge pull request #464 from naoa/master

  Message:
    Use struct for fuzzy related parameters

  Modified files:
    include/groonga/groonga.h
    lib/db.c
    lib/grn_db.h
    lib/grn_ii.h
    lib/grn_pat.h
    lib/ii.c
    lib/pat.c
    test/unit/core/test-patricia-trie-search.c

  Modified: include/groonga/groonga.h (+9 -5)
===================================================================
--- include/groonga/groonga.h    2016-02-05 00:12:00 +0900 (323d25f)
+++ include/groonga/groonga.h    2016-02-05 07:24:25 +0900 (247137a)
@@ -935,6 +935,14 @@ GRN_API grn_id grn_obj_id(grn_ctx *ctx, grn_obj *obj);
 
 #define GRN_TABLE_FUZZY_WITH_TRANSPOSITION                  (0x01)
 
+typedef struct _grn_fuzzy_optarg grn_fuzzy_optarg;
+
+struct _grn_fuzzy_optarg {
+  unsigned int max_distance;
+  unsigned int prefix_match_size;
+  int flags;
+};
+
 typedef struct _grn_search_optarg grn_search_optarg;
 
 struct _grn_search_optarg {
@@ -948,11 +956,7 @@ struct _grn_search_optarg {
   grn_obj *scorer;
   grn_obj *scorer_args_expr;
   unsigned int scorer_args_expr_offset;
-  struct {
-    unsigned int prefix_match_size;
-    unsigned int max_distance;
-    int flags;
-  } fuzzy;
+  grn_fuzzy_optarg *fuzzy_args;
 };
 
 GRN_API grn_rc grn_obj_search(grn_ctx *ctx, grn_obj *obj, grn_obj *query,

  Modified: lib/db.c (+3 -10)
===================================================================
--- lib/db.c    2016-02-05 00:12:00 +0900 (d305874)
+++ lib/db.c    2016-02-05 07:24:25 +0900 (19c4823)
@@ -2915,8 +2915,7 @@ grn_table_search(grn_ctx *ctx, grn_obj *table, const void *key, uint32_t key_siz
 
 grn_rc
 grn_table_fuzzy_search(grn_ctx *ctx, grn_obj *table, const void *key, uint32_t key_size,
-                       uint32_t prefix_match_size, uint32_t max_distance, int flags,
-                       grn_obj *res)
+                       grn_fuzzy_optarg *args, grn_obj *res)
 {
   grn_rc rc = GRN_SUCCESS;
   GRN_API_ENTER;
@@ -2926,10 +2925,7 @@ grn_table_fuzzy_search(grn_ctx *ctx, grn_obj *table, const void *key, uint32_t k
       grn_pat *pat = (grn_pat *)table;
       WITH_NORMALIZE(pat, key, key_size, {
         rc = grn_pat_fuzzy_search(ctx, pat, key, key_size,
-                                  prefix_match_size,
-                                  max_distance,
-                                  flags,
-                                  (grn_hash *)res);
+                                  args, (grn_hash *)res);
       });
     }
     break;
@@ -3466,10 +3462,7 @@ grn_obj_search(grn_ctx *ctx, grn_obj *obj, grn_obj *query,
           }
           if (optarg && optarg->mode == GRN_OP_FUZZY) {
             rc = grn_table_fuzzy_search(ctx, obj, key, key_size,
-                                        optarg->fuzzy.prefix_match_size,
-                                        optarg->fuzzy.max_distance,
-                                        optarg->fuzzy.flags,
-                                        res);
+                                        optarg->fuzzy_args, res);
           } else {
             rc = grn_table_search(ctx, obj, key, key_size, mode, res, op);
           }

  Modified: lib/grn_db.h (+1 -2)
===================================================================
--- lib/grn_db.h    2016-02-05 00:12:00 +0900 (a0a5662)
+++ lib/grn_db.h    2016-02-05 07:24:25 +0900 (5b30fd9)
@@ -85,8 +85,7 @@ grn_rc grn_table_search(grn_ctx *ctx, grn_obj *table,
 
 grn_rc grn_table_fuzzy_search(grn_ctx *ctx, grn_obj *table,
                               const void *key, uint32_t key_size,
-                              uint32_t prefix_match_size,
-                              uint32_t max_distance, int flags, grn_obj *res);
+                              grn_fuzzy_optarg *args, grn_obj *res);
 
 grn_id grn_table_next(grn_ctx *ctx, grn_obj *table, grn_id id);
 

  Modified: lib/grn_ii.h (+1 -5)
===================================================================
--- lib/grn_ii.h    2016-02-05 00:12:00 +0900 (6c2ff06)
+++ lib/grn_ii.h    2016-02-05 07:24:25 +0900 (a954a0a)
@@ -143,11 +143,7 @@ struct _grn_select_optarg {
   grn_obj *scorer;
   grn_obj *scorer_args_expr;
   unsigned int scorer_args_expr_offset;
-  struct {
-    unsigned int prefix_match_size;
-    unsigned int max_distance;
-    int flags;
-  } fuzzy;
+  grn_fuzzy_optarg *fuzzy_args;
 };
 
 GRN_API grn_rc grn_ii_column_update(grn_ctx *ctx, grn_ii *ii, grn_id id,

  Modified: lib/grn_pat.h (+2 -2)
===================================================================
--- lib/grn_pat.h    2016-02-05 00:12:00 +0900 (37f6aa4)
+++ lib/grn_pat.h    2016-02-05 07:24:25 +0900 (62b341e)
@@ -110,8 +110,8 @@ void grn_pat_cache_disable(grn_ctx *ctx, grn_pat *pat);
 
 GRN_API grn_rc grn_pat_fuzzy_search(grn_ctx *ctx, grn_pat *pat,
                                     const void *key, unsigned int key_size,
-                                    unsigned int prefix_match_size,
-                                    unsigned int max_distance, int flags, grn_hash *h);
+                                    grn_fuzzy_optarg *args, grn_hash *h);
+
 uint32_t grn_pat_total_key_size(grn_ctx *ctx, grn_pat *pat);
 
 #ifdef __cplusplus

  Modified: lib/ii.c (+20 -43)
===================================================================
--- lib/ii.c    2016-02-05 00:12:00 +0900 (c17d4a7)
+++ lib/ii.c    2016-02-05 07:24:25 +0900 (1549966)
@@ -5460,14 +5460,6 @@ typedef struct {
   grn_posting *p;
 } token_info;
 
-typedef struct {
-  struct {
-    unsigned int prefix_match_size;
-    unsigned int max_distance;
-    int flags;
-  } fuzzy;
-} token_info_optarg;
-
 #define EX_NONE   0
 #define EX_PREFIX 1
 #define EX_SUFFIX 2
@@ -5536,7 +5528,7 @@ token_info_close(grn_ctx *ctx, token_info *ti)
 inline static token_info *
 token_info_open(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii,
                 const char *key, unsigned int key_size, uint32_t offset, int mode,
-                token_info_optarg *arg)
+                grn_fuzzy_optarg *args)
 {
   int s = 0;
   grn_hash *h;
@@ -5602,10 +5594,7 @@ token_info_open(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii,
   case EX_FUZZY :
     if ((h = grn_hash_create(ctx, NULL, sizeof(grn_id), 0, 0))) {
       grn_table_fuzzy_search(ctx, lexicon, key, key_size,
-                             arg->fuzzy.prefix_match_size,
-                             arg->fuzzy.max_distance,
-                             arg->fuzzy.flags,
-                             (grn_obj *)h);
+                             args, (grn_obj *)h);
       if (GRN_HASH_SIZE(h)) {
         if ((ti->cursors = cursor_heap_open(ctx, GRN_HASH_SIZE(h)))) {
           GRN_HASH_EACH(ctx, h, id, &tp, NULL, NULL, {
@@ -5682,7 +5671,7 @@ token_compare(const void *a, const void *b)
 inline static grn_rc
 token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string, unsigned int string_len,
                  token_info **tis, uint32_t *n, grn_bool *only_skip_token,
-                 grn_operator mode, token_info_optarg *arg)
+                 grn_operator mode)
 {
   token_info *ti;
   const char *key;
@@ -5696,7 +5685,7 @@ token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string,
   *only_skip_token = GRN_FALSE;
   if (!token_cursor) { return GRN_NO_MEMORY_AVAILABLE; }
   if (mode == GRN_OP_UNSPLIT) {
-    if ((ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->orig, token_cursor->orig_blen, 0, EX_BOTH, arg))) {
+    if ((ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->orig, token_cursor->orig_blen, 0, EX_BOTH, NULL))) {
       tis[(*n)++] = ti;
       rc = GRN_SUCCESS;
     }
@@ -5722,21 +5711,21 @@ token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string,
     switch (token_cursor->status) {
     case GRN_TOKEN_CURSOR_DOING :
       key = _grn_table_key(ctx, lexicon, tid, &size);
-      ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos, ef & EX_SUFFIX, arg);
+      ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos, ef & EX_SUFFIX, NULL);
       break;
     case GRN_TOKEN_CURSOR_DONE :
       ti = token_info_open(ctx, lexicon, ii, (const char *)token_cursor->curr,
-                           token_cursor->curr_size, 0, ef, arg);
+                           token_cursor->curr_size, 0, ef, NULL);
       /*
       key = _grn_table_key(ctx, lexicon, tid, &size);
-      ti = token_info_open(ctx, lexicon, ii, token_cursor->curr, token_cursor->curr_size, token_cursor->pos, ef, arg);
+      ti = token_info_open(ctx, lexicon, ii, token_cursor->curr, token_cursor->curr_size, token_cursor->pos, ef, NULL);
       ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->orig,
-                           token_cursor->orig_blen, token_cursor->pos, ef, arg);
+                           token_cursor->orig_blen, token_cursor->pos, ef, NULL);
       */
       break;
     case GRN_TOKEN_CURSOR_NOT_FOUND :
       ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->orig,
-                           token_cursor->orig_blen, 0, ef, arg);
+                           token_cursor->orig_blen, 0, ef, NULL);
       break;
     case GRN_TOKEN_CURSOR_DONE_SKIP :
       *only_skip_token = GRN_TRUE;
@@ -5754,17 +5743,17 @@ token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string,
         continue;
       case GRN_TOKEN_CURSOR_DOING :
         key = _grn_table_key(ctx, lexicon, tid, &size);
-        ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos, EX_NONE, arg);
+        ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos, EX_NONE, NULL);
         break;
       case GRN_TOKEN_CURSOR_DONE :
         if (tid) {
           key = _grn_table_key(ctx, lexicon, tid, &size);
-          ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos, ef & EX_PREFIX, arg);
+          ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos, ef & EX_PREFIX, NULL);
           break;
         } /* else fallthru */
       default :
         ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->curr,
-                             token_cursor->curr_size, token_cursor->pos, ef & EX_PREFIX, arg);
+                             token_cursor->curr_size, token_cursor->pos, ef & EX_PREFIX, NULL);
         break;
       }
       if (!ti) {
@@ -5783,7 +5772,7 @@ inline static grn_rc
 token_info_build_fuzzy(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii,
                        const char *string, unsigned int string_len,
                        token_info **tis, uint32_t *n, grn_bool *only_skip_token,
-                       grn_operator mode, token_info_optarg *arg)
+                       grn_operator mode, grn_fuzzy_optarg *args)
 {
   token_info *ti;
   grn_rc rc = GRN_END_OF_DATA;
@@ -5802,7 +5791,7 @@ token_info_build_fuzzy(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii,
   case GRN_TOKEN_CURSOR_DOING :
   case GRN_TOKEN_CURSOR_DONE :
     ti = token_info_open(ctx, lexicon, ii, (const char *)token_cursor->curr,
-                         token_cursor->curr_size, token_cursor->pos, EX_FUZZY, arg);
+                         token_cursor->curr_size, token_cursor->pos, EX_FUZZY, args);
     break;
   default :
     break;
@@ -5819,7 +5808,7 @@ token_info_build_fuzzy(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii,
     case GRN_TOKEN_CURSOR_DOING :
     case GRN_TOKEN_CURSOR_DONE :
       ti = token_info_open(ctx, lexicon, ii, (const char *)token_cursor->curr,
-                           token_cursor->curr_size, token_cursor->pos, EX_FUZZY, arg);
+                           token_cursor->curr_size, token_cursor->pos, EX_FUZZY, args);
       break;
     default :
       break;
@@ -6553,7 +6542,6 @@ grn_ii_select(grn_ctx *ctx, grn_ii *ii,
   grn_obj *lexicon = ii->lexicon;
   grn_scorer_score_func *score_func = NULL;
   grn_scorer_matched_record record;
-  token_info_optarg token_info_arg = {0};
 
   if (!lexicon || !ii || !s) { return GRN_INVALID_ARGUMENT; }
   if (optarg) {
@@ -6563,11 +6551,6 @@ grn_ii_select(grn_ctx *ctx, grn_ii *ii,
     } else if (optarg->vector_size) {
       wvm = optarg->weight_vector ? grn_wv_static : grn_wv_constant;
     }
-    if (mode == GRN_OP_FUZZY) {
-      token_info_arg.fuzzy.prefix_match_size = optarg->fuzzy.prefix_match_size;
-      token_info_arg.fuzzy.max_distance = optarg->fuzzy.max_distance;
-      token_info_arg.fuzzy.flags = optarg->fuzzy.flags;
-    }
   }
   if (mode == GRN_OP_SIMILAR) {
     return grn_ii_similar_search(ctx, ii, string, string_len, s, op, optarg);
@@ -6589,9 +6572,9 @@ grn_ii_select(grn_ctx *ctx, grn_ii *ii,
     return GRN_NO_MEMORY_AVAILABLE;
   }
   if (mode == GRN_OP_FUZZY) {
-    if (token_info_build_fuzzy(ctx, lexicon, ii, string, string_len, tis, &n, &only_skip_token, mode, &token_info_arg) || !n) { goto exit; }
+    if (token_info_build_fuzzy(ctx, lexicon, ii, string, string_len, tis, &n, &only_skip_token, mode, optarg->fuzzy_args) || !n) { goto exit; }
   } else {
-    if (token_info_build(ctx, lexicon, ii, string, string_len, tis, &n, &only_skip_token, mode, &token_info_arg) || !n) { goto exit; }
+    if (token_info_build(ctx, lexicon, ii, string, string_len, tis, &n, &only_skip_token, mode) || !n) { goto exit; }
   }
   switch (mode) {
   case GRN_OP_NEAR2 :
@@ -6863,7 +6846,6 @@ grn_ii_estimate_size_for_query(grn_ctx *ctx, grn_ii *ii,
   grn_operator mode = GRN_OP_EXACT;
   double estimated_size = 0;
   double normalized_ratio = 1.0;
-  token_info_optarg token_info_arg = {0};
 
   if (query_len == 0) {
     return 0;
@@ -6883,9 +6865,6 @@ grn_ii_estimate_size_for_query(grn_ctx *ctx, grn_ii *ii,
       break;
     case GRN_OP_FUZZY :
       mode = optarg->mode;
-      token_info_arg.fuzzy.prefix_match_size = optarg->fuzzy.prefix_match_size;
-      token_info_arg.fuzzy.max_distance = optarg->fuzzy.max_distance;
-      token_info_arg.fuzzy.flags = optarg->fuzzy.flags;
     default :
       break;
     }
@@ -6904,11 +6883,11 @@ grn_ii_estimate_size_for_query(grn_ctx *ctx, grn_ii *ii,
   switch (mode) {
   case GRN_OP_FUZZY :
     rc = token_info_build_fuzzy(ctx, lexicon, ii, query, query_len,
-                                tis, &n_tis, &only_skip_token, mode, &token_info_arg);
+                                tis, &n_tis, &only_skip_token, mode, optarg->fuzzy_args);
     break;
   default :
     rc = token_info_build(ctx, lexicon, ii, query, query_len,
-                          tis, &n_tis, &only_skip_token, mode, &token_info_arg);
+                          tis, &n_tis, &only_skip_token, mode);
     break;
   }
 
@@ -6990,9 +6969,7 @@ grn_ii_sel(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_len
         break;
       case GRN_OP_FUZZY :
         arg.mode = optarg->mode;
-        arg.fuzzy.prefix_match_size = optarg->fuzzy.prefix_match_size;
-        arg.fuzzy.max_distance = optarg->fuzzy.max_distance;
-        arg.fuzzy.flags = optarg->fuzzy.flags;
+        arg.fuzzy_args = optarg->fuzzy_args;
         break;
       default :
         break;

  Modified: lib/pat.c (+9 -2)
===================================================================
--- lib/pat.c    2016-02-05 00:12:00 +0900 (88057be)
+++ lib/pat.c    2016-02-05 07:24:25 +0900 (7d3d9bd)
@@ -1384,8 +1384,7 @@ _grn_pat_fuzzy_search(grn_ctx *ctx, grn_pat *pat, grn_id id,
 grn_rc
 grn_pat_fuzzy_search(grn_ctx *ctx, grn_pat *pat,
                      const void *key, uint32_t key_size,
-                     uint32_t prefix_match_size,
-                     uint32_t max_distance, int flags, grn_hash *h)
+                     grn_fuzzy_optarg *args, grn_hash *h)
 {
   pat_node *node;
   grn_id id;
@@ -1394,10 +1393,18 @@ grn_pat_fuzzy_search(grn_ctx *ctx, grn_pat *pat,
   const char *s = key;
   const char *e = (const char *)key + key_size;
   fuzzy_node last_node;
+  uint32_t max_distance = 1;
+  uint32_t prefix_match_size = 0;
+  int flags = 0;
   grn_rc rc = grn_pat_error_if_truncated(ctx, pat);
   if (rc != GRN_SUCCESS) {
     return rc;
   }
+  if (args) {
+    max_distance = args->max_distance;
+    prefix_match_size = args->prefix_match_size;
+    flags = args->flags;
+  }
   if (key_size > GRN_TABLE_MAX_KEY_SIZE ||
       max_distance > GRN_TABLE_MAX_KEY_SIZE ||
       prefix_match_size > key_size) {

  Modified: test/unit/core/test-patricia-trie-search.c (+5 -2)
===================================================================
--- test/unit/core/test-patricia-trie-search.c    2016-02-05 00:12:00 +0900 (a0f26fd)
+++ test/unit/core/test-patricia-trie-search.c    2016-02-05 07:24:25 +0900 (34c3a6f)
@@ -426,6 +426,10 @@ test_fuzzy_search(gconstpointer data)
   const gchar key4[]  = "bbbb";
   const gchar key5[]  = "bbbbb";
   const gchar key6[]  = "cdefg";
+  grn_fuzzy_optarg args;
+  args.prefix_match_size = 0;
+  args.max_distance = 1;
+  args.flags = GRN_TABLE_FUZZY_WITH_TRANSPOSITION;
 
   trie_test_data_set_parameters(test_data);
 
@@ -443,8 +447,7 @@ test_fuzzy_search(gconstpointer data)
                            grn_pat_fuzzy_search(context, trie,
                                                 test_data->search_key,
                                                 strlen(test_data->search_key),
-                                                0, 1, 1,
-                                                hash));
+                                                &args, hash));
   gcut_assert_equal_list_string(test_data->expected_strings,
                                 retrieve_all_keys());
 }
-------------- next part --------------
HTML����������������������������...
下載 



More information about the Groonga-commit mailing list
Back to archive index