naoa
null+****@clear*****
Fri Feb 5 07:24:25 JST 2016
naoa 2016-02-05 07:24:25 +0900 (Fri, 05 Feb 2016) New Revision: fc579ad9f27fc852755341de32d43bfefee91607 https://github.com/groonga/groonga/commit/fc579ad9f27fc852755341de32d43bfefee91607 Merged a300aa1: Merge pull request #464 from naoa/master Message: Use struct for fuzzy related parameters Modified files: include/groonga/groonga.h lib/db.c lib/grn_db.h lib/grn_ii.h lib/grn_pat.h lib/ii.c lib/pat.c test/unit/core/test-patricia-trie-search.c Modified: include/groonga/groonga.h (+9 -5) =================================================================== --- include/groonga/groonga.h 2016-02-05 00:12:00 +0900 (323d25f) +++ include/groonga/groonga.h 2016-02-05 07:24:25 +0900 (247137a) @@ -935,6 +935,14 @@ GRN_API grn_id grn_obj_id(grn_ctx *ctx, grn_obj *obj); #define GRN_TABLE_FUZZY_WITH_TRANSPOSITION (0x01) +typedef struct _grn_fuzzy_optarg grn_fuzzy_optarg; + +struct _grn_fuzzy_optarg { + unsigned int max_distance; + unsigned int prefix_match_size; + int flags; +}; + typedef struct _grn_search_optarg grn_search_optarg; struct _grn_search_optarg { @@ -948,11 +956,7 @@ struct _grn_search_optarg { grn_obj *scorer; grn_obj *scorer_args_expr; unsigned int scorer_args_expr_offset; - struct { - unsigned int prefix_match_size; - unsigned int max_distance; - int flags; - } fuzzy; + grn_fuzzy_optarg *fuzzy_args; }; GRN_API grn_rc grn_obj_search(grn_ctx *ctx, grn_obj *obj, grn_obj *query, Modified: lib/db.c (+3 -10) =================================================================== --- lib/db.c 2016-02-05 00:12:00 +0900 (d305874) +++ lib/db.c 2016-02-05 07:24:25 +0900 (19c4823) @@ -2915,8 +2915,7 @@ grn_table_search(grn_ctx *ctx, grn_obj *table, const void *key, uint32_t key_siz grn_rc grn_table_fuzzy_search(grn_ctx *ctx, grn_obj *table, const void *key, uint32_t key_size, - uint32_t prefix_match_size, uint32_t max_distance, int flags, - grn_obj *res) + grn_fuzzy_optarg *args, grn_obj *res) { grn_rc rc = GRN_SUCCESS; GRN_API_ENTER; @@ -2926,10 +2925,7 @@ grn_table_fuzzy_search(grn_ctx *ctx, grn_obj *table, const void *key, uint32_t k grn_pat *pat = (grn_pat *)table; WITH_NORMALIZE(pat, key, key_size, { rc = grn_pat_fuzzy_search(ctx, pat, key, key_size, - prefix_match_size, - max_distance, - flags, - (grn_hash *)res); + args, (grn_hash *)res); }); } break; @@ -3466,10 +3462,7 @@ grn_obj_search(grn_ctx *ctx, grn_obj *obj, grn_obj *query, } if (optarg && optarg->mode == GRN_OP_FUZZY) { rc = grn_table_fuzzy_search(ctx, obj, key, key_size, - optarg->fuzzy.prefix_match_size, - optarg->fuzzy.max_distance, - optarg->fuzzy.flags, - res); + optarg->fuzzy_args, res); } else { rc = grn_table_search(ctx, obj, key, key_size, mode, res, op); } Modified: lib/grn_db.h (+1 -2) =================================================================== --- lib/grn_db.h 2016-02-05 00:12:00 +0900 (a0a5662) +++ lib/grn_db.h 2016-02-05 07:24:25 +0900 (5b30fd9) @@ -85,8 +85,7 @@ grn_rc grn_table_search(grn_ctx *ctx, grn_obj *table, grn_rc grn_table_fuzzy_search(grn_ctx *ctx, grn_obj *table, const void *key, uint32_t key_size, - uint32_t prefix_match_size, - uint32_t max_distance, int flags, grn_obj *res); + grn_fuzzy_optarg *args, grn_obj *res); grn_id grn_table_next(grn_ctx *ctx, grn_obj *table, grn_id id); Modified: lib/grn_ii.h (+1 -5) =================================================================== --- lib/grn_ii.h 2016-02-05 00:12:00 +0900 (6c2ff06) +++ lib/grn_ii.h 2016-02-05 07:24:25 +0900 (a954a0a) @@ -143,11 +143,7 @@ struct _grn_select_optarg { grn_obj *scorer; grn_obj *scorer_args_expr; unsigned int scorer_args_expr_offset; - struct { - unsigned int prefix_match_size; - unsigned int max_distance; - int flags; - } fuzzy; + grn_fuzzy_optarg *fuzzy_args; }; GRN_API grn_rc grn_ii_column_update(grn_ctx *ctx, grn_ii *ii, grn_id id, Modified: lib/grn_pat.h (+2 -2) =================================================================== --- lib/grn_pat.h 2016-02-05 00:12:00 +0900 (37f6aa4) +++ lib/grn_pat.h 2016-02-05 07:24:25 +0900 (62b341e) @@ -110,8 +110,8 @@ void grn_pat_cache_disable(grn_ctx *ctx, grn_pat *pat); GRN_API grn_rc grn_pat_fuzzy_search(grn_ctx *ctx, grn_pat *pat, const void *key, unsigned int key_size, - unsigned int prefix_match_size, - unsigned int max_distance, int flags, grn_hash *h); + grn_fuzzy_optarg *args, grn_hash *h); + uint32_t grn_pat_total_key_size(grn_ctx *ctx, grn_pat *pat); #ifdef __cplusplus Modified: lib/ii.c (+20 -43) =================================================================== --- lib/ii.c 2016-02-05 00:12:00 +0900 (c17d4a7) +++ lib/ii.c 2016-02-05 07:24:25 +0900 (1549966) @@ -5460,14 +5460,6 @@ typedef struct { grn_posting *p; } token_info; -typedef struct { - struct { - unsigned int prefix_match_size; - unsigned int max_distance; - int flags; - } fuzzy; -} token_info_optarg; - #define EX_NONE 0 #define EX_PREFIX 1 #define EX_SUFFIX 2 @@ -5536,7 +5528,7 @@ token_info_close(grn_ctx *ctx, token_info *ti) inline static token_info * token_info_open(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *key, unsigned int key_size, uint32_t offset, int mode, - token_info_optarg *arg) + grn_fuzzy_optarg *args) { int s = 0; grn_hash *h; @@ -5602,10 +5594,7 @@ token_info_open(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, case EX_FUZZY : if ((h = grn_hash_create(ctx, NULL, sizeof(grn_id), 0, 0))) { grn_table_fuzzy_search(ctx, lexicon, key, key_size, - arg->fuzzy.prefix_match_size, - arg->fuzzy.max_distance, - arg->fuzzy.flags, - (grn_obj *)h); + args, (grn_obj *)h); if (GRN_HASH_SIZE(h)) { if ((ti->cursors = cursor_heap_open(ctx, GRN_HASH_SIZE(h)))) { GRN_HASH_EACH(ctx, h, id, &tp, NULL, NULL, { @@ -5682,7 +5671,7 @@ token_compare(const void *a, const void *b) inline static grn_rc token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string, unsigned int string_len, token_info **tis, uint32_t *n, grn_bool *only_skip_token, - grn_operator mode, token_info_optarg *arg) + grn_operator mode) { token_info *ti; const char *key; @@ -5696,7 +5685,7 @@ token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string, *only_skip_token = GRN_FALSE; if (!token_cursor) { return GRN_NO_MEMORY_AVAILABLE; } if (mode == GRN_OP_UNSPLIT) { - if ((ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->orig, token_cursor->orig_blen, 0, EX_BOTH, arg))) { + if ((ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->orig, token_cursor->orig_blen, 0, EX_BOTH, NULL))) { tis[(*n)++] = ti; rc = GRN_SUCCESS; } @@ -5722,21 +5711,21 @@ token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string, switch (token_cursor->status) { case GRN_TOKEN_CURSOR_DOING : key = _grn_table_key(ctx, lexicon, tid, &size); - ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos, ef & EX_SUFFIX, arg); + ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos, ef & EX_SUFFIX, NULL); break; case GRN_TOKEN_CURSOR_DONE : ti = token_info_open(ctx, lexicon, ii, (const char *)token_cursor->curr, - token_cursor->curr_size, 0, ef, arg); + token_cursor->curr_size, 0, ef, NULL); /* key = _grn_table_key(ctx, lexicon, tid, &size); - ti = token_info_open(ctx, lexicon, ii, token_cursor->curr, token_cursor->curr_size, token_cursor->pos, ef, arg); + ti = token_info_open(ctx, lexicon, ii, token_cursor->curr, token_cursor->curr_size, token_cursor->pos, ef, NULL); ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->orig, - token_cursor->orig_blen, token_cursor->pos, ef, arg); + token_cursor->orig_blen, token_cursor->pos, ef, NULL); */ break; case GRN_TOKEN_CURSOR_NOT_FOUND : ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->orig, - token_cursor->orig_blen, 0, ef, arg); + token_cursor->orig_blen, 0, ef, NULL); break; case GRN_TOKEN_CURSOR_DONE_SKIP : *only_skip_token = GRN_TRUE; @@ -5754,17 +5743,17 @@ token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string, continue; case GRN_TOKEN_CURSOR_DOING : key = _grn_table_key(ctx, lexicon, tid, &size); - ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos, EX_NONE, arg); + ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos, EX_NONE, NULL); break; case GRN_TOKEN_CURSOR_DONE : if (tid) { key = _grn_table_key(ctx, lexicon, tid, &size); - ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos, ef & EX_PREFIX, arg); + ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos, ef & EX_PREFIX, NULL); break; } /* else fallthru */ default : ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->curr, - token_cursor->curr_size, token_cursor->pos, ef & EX_PREFIX, arg); + token_cursor->curr_size, token_cursor->pos, ef & EX_PREFIX, NULL); break; } if (!ti) { @@ -5783,7 +5772,7 @@ inline static grn_rc token_info_build_fuzzy(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string, unsigned int string_len, token_info **tis, uint32_t *n, grn_bool *only_skip_token, - grn_operator mode, token_info_optarg *arg) + grn_operator mode, grn_fuzzy_optarg *args) { token_info *ti; grn_rc rc = GRN_END_OF_DATA; @@ -5802,7 +5791,7 @@ token_info_build_fuzzy(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, case GRN_TOKEN_CURSOR_DOING : case GRN_TOKEN_CURSOR_DONE : ti = token_info_open(ctx, lexicon, ii, (const char *)token_cursor->curr, - token_cursor->curr_size, token_cursor->pos, EX_FUZZY, arg); + token_cursor->curr_size, token_cursor->pos, EX_FUZZY, args); break; default : break; @@ -5819,7 +5808,7 @@ token_info_build_fuzzy(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, case GRN_TOKEN_CURSOR_DOING : case GRN_TOKEN_CURSOR_DONE : ti = token_info_open(ctx, lexicon, ii, (const char *)token_cursor->curr, - token_cursor->curr_size, token_cursor->pos, EX_FUZZY, arg); + token_cursor->curr_size, token_cursor->pos, EX_FUZZY, args); break; default : break; @@ -6553,7 +6542,6 @@ grn_ii_select(grn_ctx *ctx, grn_ii *ii, grn_obj *lexicon = ii->lexicon; grn_scorer_score_func *score_func = NULL; grn_scorer_matched_record record; - token_info_optarg token_info_arg = {0}; if (!lexicon || !ii || !s) { return GRN_INVALID_ARGUMENT; } if (optarg) { @@ -6563,11 +6551,6 @@ grn_ii_select(grn_ctx *ctx, grn_ii *ii, } else if (optarg->vector_size) { wvm = optarg->weight_vector ? grn_wv_static : grn_wv_constant; } - if (mode == GRN_OP_FUZZY) { - token_info_arg.fuzzy.prefix_match_size = optarg->fuzzy.prefix_match_size; - token_info_arg.fuzzy.max_distance = optarg->fuzzy.max_distance; - token_info_arg.fuzzy.flags = optarg->fuzzy.flags; - } } if (mode == GRN_OP_SIMILAR) { return grn_ii_similar_search(ctx, ii, string, string_len, s, op, optarg); @@ -6589,9 +6572,9 @@ grn_ii_select(grn_ctx *ctx, grn_ii *ii, return GRN_NO_MEMORY_AVAILABLE; } if (mode == GRN_OP_FUZZY) { - if (token_info_build_fuzzy(ctx, lexicon, ii, string, string_len, tis, &n, &only_skip_token, mode, &token_info_arg) || !n) { goto exit; } + if (token_info_build_fuzzy(ctx, lexicon, ii, string, string_len, tis, &n, &only_skip_token, mode, optarg->fuzzy_args) || !n) { goto exit; } } else { - if (token_info_build(ctx, lexicon, ii, string, string_len, tis, &n, &only_skip_token, mode, &token_info_arg) || !n) { goto exit; } + if (token_info_build(ctx, lexicon, ii, string, string_len, tis, &n, &only_skip_token, mode) || !n) { goto exit; } } switch (mode) { case GRN_OP_NEAR2 : @@ -6863,7 +6846,6 @@ grn_ii_estimate_size_for_query(grn_ctx *ctx, grn_ii *ii, grn_operator mode = GRN_OP_EXACT; double estimated_size = 0; double normalized_ratio = 1.0; - token_info_optarg token_info_arg = {0}; if (query_len == 0) { return 0; @@ -6883,9 +6865,6 @@ grn_ii_estimate_size_for_query(grn_ctx *ctx, grn_ii *ii, break; case GRN_OP_FUZZY : mode = optarg->mode; - token_info_arg.fuzzy.prefix_match_size = optarg->fuzzy.prefix_match_size; - token_info_arg.fuzzy.max_distance = optarg->fuzzy.max_distance; - token_info_arg.fuzzy.flags = optarg->fuzzy.flags; default : break; } @@ -6904,11 +6883,11 @@ grn_ii_estimate_size_for_query(grn_ctx *ctx, grn_ii *ii, switch (mode) { case GRN_OP_FUZZY : rc = token_info_build_fuzzy(ctx, lexicon, ii, query, query_len, - tis, &n_tis, &only_skip_token, mode, &token_info_arg); + tis, &n_tis, &only_skip_token, mode, optarg->fuzzy_args); break; default : rc = token_info_build(ctx, lexicon, ii, query, query_len, - tis, &n_tis, &only_skip_token, mode, &token_info_arg); + tis, &n_tis, &only_skip_token, mode); break; } @@ -6990,9 +6969,7 @@ grn_ii_sel(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_len break; case GRN_OP_FUZZY : arg.mode = optarg->mode; - arg.fuzzy.prefix_match_size = optarg->fuzzy.prefix_match_size; - arg.fuzzy.max_distance = optarg->fuzzy.max_distance; - arg.fuzzy.flags = optarg->fuzzy.flags; + arg.fuzzy_args = optarg->fuzzy_args; break; default : break; Modified: lib/pat.c (+9 -2) =================================================================== --- lib/pat.c 2016-02-05 00:12:00 +0900 (88057be) +++ lib/pat.c 2016-02-05 07:24:25 +0900 (7d3d9bd) @@ -1384,8 +1384,7 @@ _grn_pat_fuzzy_search(grn_ctx *ctx, grn_pat *pat, grn_id id, grn_rc grn_pat_fuzzy_search(grn_ctx *ctx, grn_pat *pat, const void *key, uint32_t key_size, - uint32_t prefix_match_size, - uint32_t max_distance, int flags, grn_hash *h) + grn_fuzzy_optarg *args, grn_hash *h) { pat_node *node; grn_id id; @@ -1394,10 +1393,18 @@ grn_pat_fuzzy_search(grn_ctx *ctx, grn_pat *pat, const char *s = key; const char *e = (const char *)key + key_size; fuzzy_node last_node; + uint32_t max_distance = 1; + uint32_t prefix_match_size = 0; + int flags = 0; grn_rc rc = grn_pat_error_if_truncated(ctx, pat); if (rc != GRN_SUCCESS) { return rc; } + if (args) { + max_distance = args->max_distance; + prefix_match_size = args->prefix_match_size; + flags = args->flags; + } if (key_size > GRN_TABLE_MAX_KEY_SIZE || max_distance > GRN_TABLE_MAX_KEY_SIZE || prefix_match_size > key_size) { Modified: test/unit/core/test-patricia-trie-search.c (+5 -2) =================================================================== --- test/unit/core/test-patricia-trie-search.c 2016-02-05 00:12:00 +0900 (a0f26fd) +++ test/unit/core/test-patricia-trie-search.c 2016-02-05 07:24:25 +0900 (34c3a6f) @@ -426,6 +426,10 @@ test_fuzzy_search(gconstpointer data) const gchar key4[] = "bbbb"; const gchar key5[] = "bbbbb"; const gchar key6[] = "cdefg"; + grn_fuzzy_optarg args; + args.prefix_match_size = 0; + args.max_distance = 1; + args.flags = GRN_TABLE_FUZZY_WITH_TRANSPOSITION; trie_test_data_set_parameters(test_data); @@ -443,8 +447,7 @@ test_fuzzy_search(gconstpointer data) grn_pat_fuzzy_search(context, trie, test_data->search_key, strlen(test_data->search_key), - 0, 1, 1, - hash)); + &args, hash)); gcut_assert_equal_list_string(test_data->expected_strings, retrieve_all_keys()); } -------------- next part -------------- HTML����������������������������... 下載