naoa
null+****@clear*****
Sun Aug 10 03:41:10 JST 2014
naoa 2014-08-10 03:41:10 +0900 (Sun, 10 Aug 2014) New Revision: 1789121883dcfa264d60bdeb850d3dfe9ce16c21 https://github.com/groonga/groonga/commit/1789121883dcfa264d60bdeb850d3dfe9ce16c21 Merged c514768: Merge pull request #185 from naoa/add-highlight_html-function Message: Add highlight_full() function It tags output text. It can specify several options and multiple keywords. highlight_full(column, normalizer_name, html_escape_flag, keyword1, open_tag1, end_tag1 ... [keywordN, open_tagN, end_tagN]) TODO: Document it. Added files: test/command/suite/select/function/highlight_full/invalid_arguments.expected test/command/suite/select/function/highlight_full/invalid_arguments.test test/command/suite/select/function/highlight_full/no_normalize.expected test/command/suite/select/function/highlight_full/no_normalize.test test/command/suite/select/function/highlight_full/normalizer_failure.expected test/command/suite/select/function/highlight_full/normalizer_failure.test test/command/suite/select/function/highlight_full/not_enough_arguments.expected test/command/suite/select/function/highlight_full/not_enough_arguments.test test/command/suite/select/function/highlight_full/not_escaped.expected test/command/suite/select/function/highlight_full/not_escaped.test test/command/suite/select/function/highlight_full/one_keyword.expected test/command/suite/select/function/highlight_full/one_keyword.test test/command/suite/select/function/highlight_full/two_keywords.expected test/command/suite/select/function/highlight_full/two_keywords.test Modified files: lib/proc.c Modified: lib/proc.c (+180 -0) =================================================================== --- lib/proc.c 2014-08-10 00:35:17 +0900 (84480ab) +++ lib/proc.c 2014-08-10 03:41:10 +0900 (5eaacaf) @@ -4722,6 +4722,183 @@ exit : return rc; } +static grn_rc +grn_pat_tag_keys(grn_ctx *ctx, grn_obj *keywords, + const char *string, unsigned int string_length, + const char **open_tags, unsigned int *open_tag_lengths, + const char **close_tags, unsigned int *close_tag_lengths, + unsigned int n_tags, + grn_obj *highlighted, + grn_bool html_escape_flag) +{ + while (string_length > 0) { +#define MAX_N_HITS 1024 + grn_pat_scan_hit hits[MAX_N_HITS]; + const char *rest; + unsigned int i, n_hits; + unsigned int previous = 0; + + n_hits = grn_pat_scan(ctx, (grn_pat *)keywords, + string, string_length, + hits, MAX_N_HITS, &rest); + + for (i = 0; i < n_hits; i++) { + unsigned int nth_tag; + if (hits[i].offset - previous > 0) { + if (html_escape_flag) { + grn_text_escape_xml(ctx, highlighted, + string + previous, hits[i].offset - previous); + } else { + GRN_TEXT_PUT(ctx, highlighted, + string + previous, hits[i].offset - previous); + } + } + nth_tag = ((hits[i].id - 1) % n_tags); + GRN_TEXT_PUT(ctx, highlighted, + open_tags[nth_tag], open_tag_lengths[nth_tag]); + if (html_escape_flag) { + grn_text_escape_xml(ctx, highlighted, + string + hits[i].offset, hits[i].length); + } else { + GRN_TEXT_PUT(ctx, highlighted, + string + hits[i].offset, hits[i].length); + } + GRN_TEXT_PUT(ctx, highlighted, + close_tags[nth_tag], close_tag_lengths[nth_tag]); + previous = hits[i].offset + hits[i].length; + } + if (string_length - previous > 0) { + if (html_escape_flag) { + grn_text_escape_xml(ctx, highlighted, + string + previous, string_length - previous); + } else { + GRN_TEXT_PUT(ctx, highlighted, + string + previous, string_length - previous); + } + } + string_length -= rest - string; + string = rest; +#undef MAX_N_HITS + } + + return GRN_SUCCESS; +} + +static grn_obj * +func_highlight_full(grn_ctx *ctx, int nargs, grn_obj **args, + grn_user_data *user_data) +{ + grn_obj *highlighted = NULL; + +#define N_REQUIRED_ARGS 3 +#define KEYWORD_SET_SIZE 3 + if (nargs >= (N_REQUIRED_ARGS + KEYWORD_SET_SIZE) && + (nargs - N_REQUIRED_ARGS) % KEYWORD_SET_SIZE == 0) { + grn_obj *string = args[0]; + grn_obj *normalizer_name = args[1]; + grn_obj *html_escape_flag = args[2]; + grn_obj **keyword_set_args = args + N_REQUIRED_ARGS; + unsigned int n_keyword_sets = (nargs - N_REQUIRED_ARGS) / KEYWORD_SET_SIZE; + unsigned int i; + grn_obj open_tags; + grn_obj open_tag_lengths; + grn_obj close_tags; + grn_obj close_tag_lengths; + grn_obj *keywords; + + keywords = grn_table_create(ctx, NULL, 0, NULL, + GRN_OBJ_TABLE_PAT_KEY, + grn_ctx_at(ctx, GRN_DB_SHORT_TEXT), + NULL); + + if (GRN_TEXT_LEN(normalizer_name)) { + grn_obj *normalizer; + normalizer = grn_ctx_get(ctx, + GRN_TEXT_VALUE(normalizer_name), + GRN_TEXT_LEN(normalizer_name)); + if (!is_normalizer(ctx, normalizer)) { + grn_obj inspected; + GRN_TEXT_INIT(&inspected, 0); + grn_inspect(ctx, &inspected, normalizer); + ERR(GRN_INVALID_ARGUMENT, + "[highlight_full] not normalizer: %.*s", + (int)GRN_TEXT_LEN(&inspected), + GRN_TEXT_VALUE(&inspected)); + GRN_OBJ_FIN(ctx, &inspected); + grn_obj_unlink(ctx, normalizer); + return NULL; + } + grn_obj_set_info(ctx, keywords, GRN_INFO_NORMALIZER, normalizer); + grn_obj_unlink(ctx, normalizer); + } + + GRN_OBJ_INIT(&open_tags, GRN_BULK, 0, GRN_DB_VOID); + GRN_OBJ_INIT(&open_tag_lengths, GRN_BULK, 0, GRN_DB_VOID); + GRN_OBJ_INIT(&close_tags, GRN_BULK, 0, GRN_DB_VOID); + GRN_OBJ_INIT(&close_tag_lengths, GRN_BULK, 0, GRN_DB_VOID); + for (i = 0; i < n_keyword_sets; i++) { + grn_obj *keyword = keyword_set_args[i * KEYWORD_SET_SIZE + 0]; + grn_obj *open_tag = keyword_set_args[i * KEYWORD_SET_SIZE + 1]; + grn_obj *close_tag = keyword_set_args[i * KEYWORD_SET_SIZE + 2]; + + grn_table_add(ctx, keywords, + GRN_TEXT_VALUE(keyword), + GRN_TEXT_LEN(keyword), + NULL); + + { + const char *open_tag_content = GRN_TEXT_VALUE(open_tag); + grn_bulk_write(ctx, &open_tags, + (const char *)(&open_tag_content), + sizeof(char *)); + } + { + unsigned int open_tag_length = GRN_TEXT_LEN(open_tag); + grn_bulk_write(ctx, &open_tag_lengths, + (const char *)(&open_tag_length), + sizeof(unsigned int)); + } + { + const char *close_tag_content = GRN_TEXT_VALUE(close_tag); + grn_bulk_write(ctx, &close_tags, + (const char *)(&close_tag_content), + sizeof(char *)); + } + { + unsigned int close_tag_length = GRN_TEXT_LEN(close_tag); + grn_bulk_write(ctx, &close_tag_lengths, + (const char *)(&close_tag_length), + sizeof(unsigned int)); + } + } + + highlighted = GRN_PROC_ALLOC(GRN_DB_TEXT, 0); + grn_pat_tag_keys(ctx, keywords, + GRN_TEXT_VALUE(string), GRN_TEXT_LEN(string), + (const char **)GRN_BULK_HEAD(&open_tags), + (unsigned int *)GRN_BULK_HEAD(&open_tag_lengths), + (const char **)GRN_BULK_HEAD(&close_tags), + (unsigned int *)GRN_BULK_HEAD(&close_tag_lengths), + n_keyword_sets, + highlighted, + GRN_BOOL_VALUE(html_escape_flag)); + + grn_obj_unlink(ctx, keywords); + grn_obj_unlink(ctx, &open_tags); + grn_obj_unlink(ctx, &open_tag_lengths); + grn_obj_unlink(ctx, &close_tags); + grn_obj_unlink(ctx, &close_tag_lengths); + } +#undef N_REQUIRED_ARGS +#undef KEYWORD_SET_SIZE + + if (!highlighted) { + highlighted = GRN_PROC_ALLOC(GRN_DB_VOID, 0); + } + + return highlighted; +} + #define DEF_VAR(v,name_str) do {\ (v).name = (name_str);\ (v).name_size = GRN_STRLEN(name_str);\ @@ -4944,4 +5121,7 @@ grn_db_init_builtin_query(grn_ctx *ctx) func_between, NULL, NULL, 0, NULL); grn_proc_set_selector(ctx, selector_proc, selector_between); } + + grn_proc_create(ctx, "highlight_full", -1, GRN_PROC_FUNCTION, + func_highlight_full, NULL, NULL, 0, NULL); } Added: test/command/suite/select/function/highlight_full/invalid_arguments.expected (+11 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/function/highlight_full/invalid_arguments.expected 2014-08-10 03:41:10 +0900 (aa18b74) @@ -0,0 +1,11 @@ +table_create Entries TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Entries body COLUMN_SCALAR ShortText +[[0,0.0,0.0],true] +load --table Entries +[ +{"body": "Mroonga is a MySQL storage engine based on Groonga. <b>Rroonga</b> is a Ruby binding of Groonga."} +] +[[0,0.0,0.0],1] +select Entries --output_columns 'highlight_full(body, "NormalizerAuto", 1, "Groonga", "<span class=\\"keyword1\\">", "</span>", "mysql", "<span class=\\"keyword2\\">")' --command_version 2 +[[0,0.0,0.0],[[[1],[["highlight_full","null"]],[null]]]] Added: test/command/suite/select/function/highlight_full/invalid_arguments.test (+12 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/function/highlight_full/invalid_arguments.test 2014-08-10 03:41:10 +0900 (ce7ad79) @@ -0,0 +1,12 @@ +table_create Entries TABLE_NO_KEY +column_create Entries body COLUMN_SCALAR ShortText + +load --table Entries +[ +{"body": "Mroonga is a MySQL storage engine based on Groonga. <b>Rroonga</b> is a Ruby binding of Groonga."} +] + +select Entries --output_columns \ + 'highlight_full(body, "NormalizerAuto", 1, \ + "Groonga", "<span class=\\"keyword1\\">", "</span>", \ + "mysql", "<span class=\\"keyword2\\">")' --command_version 2 Added: test/command/suite/select/function/highlight_full/no_normalize.expected (+33 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/function/highlight_full/no_normalize.expected 2014-08-10 03:41:10 +0900 (1a02737) @@ -0,0 +1,33 @@ +table_create Entries TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Entries body COLUMN_SCALAR ShortText +[[0,0.0,0.0],true] +load --table Entries +[ +{"body": "Mroonga is a MySQL storage engine based on Groonga. <b>Rroonga</b> is a Ruby binding of Groonga."} +] +[[0,0.0,0.0],1] +select Entries --output_columns 'highlight_full(body, "", 1, "Groonga", "<span class=\\"keyword1\\">", "</span>", "mysql", "<span class=\\"keyword2\\">", "</span>")' --command_version 2 +[ + [ + 0, + 0.0, + 0.0 + ], + [ + [ + [ + 1 + ], + [ + [ + "highlight_full", + "null" + ] + ], + [ + "Mroonga is a MySQL storage engine based on <span class=\"keyword1\">Groonga</span>. <b>Rroonga</b> is a Ruby binding of <span class=\"keyword1\">Groonga</span>." + ] + ] + ] +] Added: test/command/suite/select/function/highlight_full/no_normalize.test (+12 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/function/highlight_full/no_normalize.test 2014-08-10 03:41:10 +0900 (828bca2) @@ -0,0 +1,12 @@ +table_create Entries TABLE_NO_KEY +column_create Entries body COLUMN_SCALAR ShortText + +load --table Entries +[ +{"body": "Mroonga is a MySQL storage engine based on Groonga. <b>Rroonga</b> is a Ruby binding of Groonga."} +] + +select Entries --output_columns \ + 'highlight_full(body, "", 1, \ + "Groonga", "<span class=\\"keyword1\\">", "</span>", \ + "mysql", "<span class=\\"keyword2\\">", "</span>")' --command_version 2 Added: test/command/suite/select/function/highlight_full/normalizer_failure.expected (+38 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/function/highlight_full/normalizer_failure.expected 2014-08-10 03:41:10 +0900 (687a514) @@ -0,0 +1,38 @@ +table_create Entries TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Entries body COLUMN_SCALAR ShortText +[[0,0.0,0.0],true] +load --table Entries +[ +{"body": "Mroonga is a MySQL storage engine based on Groonga. <b>Rroonga</b> is a Ruby binding of Groonga."} +] +[[0,0.0,0.0],1] +select Entries --output_columns 'highlight_full(body, "TokenBigram", 1, "Groonga", "<span class=\\"keyword1\\">", "</span>", "mysql", "<span class=\\"keyword2\\">", "</span>")' --command_version 2 +[ + [ + [ + -22, + 0.0, + 0.0 + ], + "[highlight_full] not normalizer: #<proc:tokenizer TokenBigram arguments:[$1, $2, $3]>" + ], + [ + [ + [ + 1 + ], + [ + [ + "highlight_full", + "null" + ] + ], + [ + "[highlight_full] not normalizer: #<proc:tokenizer TokenBigram arguments:[$1, $2, $3]>" + ] + ] + ] +] +#|e| [highlight_full] not normalizer: #<proc:tokenizer TokenBigram arguments:[$1, $2, $3]> +#|e| groonga() [0x404d05] Added: test/command/suite/select/function/highlight_full/normalizer_failure.test (+12 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/function/highlight_full/normalizer_failure.test 2014-08-10 03:41:10 +0900 (a91d377) @@ -0,0 +1,12 @@ +table_create Entries TABLE_NO_KEY +column_create Entries body COLUMN_SCALAR ShortText + +load --table Entries +[ +{"body": "Mroonga is a MySQL storage engine based on Groonga. <b>Rroonga</b> is a Ruby binding of Groonga."} +] + +select Entries --output_columns \ + 'highlight_full(body, "TokenBigram", 1, \ + "Groonga", "<span class=\\"keyword1\\">", "</span>", \ + "mysql", "<span class=\\"keyword2\\">", "</span>")' --command_version 2 Added: test/command/suite/select/function/highlight_full/not_enough_arguments.expected (+11 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/function/highlight_full/not_enough_arguments.expected 2014-08-10 03:41:10 +0900 (03c247e) @@ -0,0 +1,11 @@ +table_create Entries TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Entries body COLUMN_SCALAR ShortText +[[0,0.0,0.0],true] +load --table Entries +[ +{"body": "Mroonga is a MySQL storage engine based on Groonga. <b>Rroonga</b> is a Ruby binding of Groonga."} +] +[[0,0.0,0.0],1] +select Entries --output_columns 'highlight_full(body, 1, "Groonga", "<span class=\\"keyword1\\">", "</span>")' --command_version 2 +[[0,0.0,0.0],[[[1],[["highlight_full","null"]],[null]]]] Added: test/command/suite/select/function/highlight_full/not_enough_arguments.test (+11 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/function/highlight_full/not_enough_arguments.test 2014-08-10 03:41:10 +0900 (69363ff) @@ -0,0 +1,11 @@ +table_create Entries TABLE_NO_KEY +column_create Entries body COLUMN_SCALAR ShortText + +load --table Entries +[ +{"body": "Mroonga is a MySQL storage engine based on Groonga. <b>Rroonga</b> is a Ruby binding of Groonga."} +] + +select Entries --output_columns \ + 'highlight_full(body, 1, \ + "Groonga", "<span class=\\"keyword1\\">", "</span>")' --command_version 2 Added: test/command/suite/select/function/highlight_full/not_escaped.expected (+33 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/function/highlight_full/not_escaped.expected 2014-08-10 03:41:10 +0900 (0689687) @@ -0,0 +1,33 @@ +table_create Entries TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Entries body COLUMN_SCALAR ShortText +[[0,0.0,0.0],true] +load --table Entries +[ +{"body": "Mroonga is a MySQL storage engine based on Groonga. <b>Rroonga</b> is a Ruby binding of Groonga."} +] +[[0,0.0,0.0],1] +select Entries --output_columns 'highlight_full(body, "NormalizerAuto", 0, "Groonga", "<span class=\\"keyword1\\">", "</span>", "mysql", "<span class=\\"keyword2\\">", "</span>")' --command_version 2 +[ + [ + 0, + 0.0, + 0.0 + ], + [ + [ + [ + 1 + ], + [ + [ + "highlight_full", + "null" + ] + ], + [ + "Mroonga is a <span class=\"keyword2\">MySQL</span> storage engine based on <span class=\"keyword1\">Groonga</span>. <b>Rroonga</b> is a Ruby binding of <span class=\"keyword1\">Groonga</span>." + ] + ] + ] +] Added: test/command/suite/select/function/highlight_full/not_escaped.test (+12 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/function/highlight_full/not_escaped.test 2014-08-10 03:41:10 +0900 (8ab93f2) @@ -0,0 +1,12 @@ +table_create Entries TABLE_NO_KEY +column_create Entries body COLUMN_SCALAR ShortText + +load --table Entries +[ +{"body": "Mroonga is a MySQL storage engine based on Groonga. <b>Rroonga</b> is a Ruby binding of Groonga."} +] + +select Entries --output_columns \ + 'highlight_full(body, "NormalizerAuto", 0, \ + "Groonga", "<span class=\\"keyword1\\">", "</span>", \ + "mysql", "<span class=\\"keyword2\\">", "</span>")' --command_version 2 Added: test/command/suite/select/function/highlight_full/one_keyword.expected (+33 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/function/highlight_full/one_keyword.expected 2014-08-10 03:41:10 +0900 (82a9340) @@ -0,0 +1,33 @@ +table_create Entries TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Entries body COLUMN_SCALAR ShortText +[[0,0.0,0.0],true] +load --table Entries +[ +{"body": "Mroonga is a MySQL storage engine based on Groonga. <b>Rroonga</b> is a Ruby binding of Groonga."} +] +[[0,0.0,0.0],1] +select Entries --output_columns 'highlight_full(body, "NormalizerAuto", 1, "Groonga", "<span class=\\"keyword1\\">", "</span>")' --command_version 2 +[ + [ + 0, + 0.0, + 0.0 + ], + [ + [ + [ + 1 + ], + [ + [ + "highlight_full", + "null" + ] + ], + [ + "Mroonga is a MySQL storage engine based on <span class=\"keyword1\">Groonga</span>. <b>Rroonga</b> is a Ruby binding of <span class=\"keyword1\">Groonga</span>." + ] + ] + ] +] Added: test/command/suite/select/function/highlight_full/one_keyword.test (+11 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/function/highlight_full/one_keyword.test 2014-08-10 03:41:10 +0900 (a4663cd) @@ -0,0 +1,11 @@ +table_create Entries TABLE_NO_KEY +column_create Entries body COLUMN_SCALAR ShortText + +load --table Entries +[ +{"body": "Mroonga is a MySQL storage engine based on Groonga. <b>Rroonga</b> is a Ruby binding of Groonga."} +] + +select Entries --output_columns \ + 'highlight_full(body, "NormalizerAuto", 1, \ + "Groonga", "<span class=\\"keyword1\\">", "</span>")' --command_version 2 Added: test/command/suite/select/function/highlight_full/two_keywords.expected (+33 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/function/highlight_full/two_keywords.expected 2014-08-10 03:41:10 +0900 (795b1c5) @@ -0,0 +1,33 @@ +table_create Entries TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Entries body COLUMN_SCALAR ShortText +[[0,0.0,0.0],true] +load --table Entries +[ +{"body": "Mroonga is a MySQL storage engine based on Groonga. <b>Rroonga</b> is a Ruby binding of Groonga."} +] +[[0,0.0,0.0],1] +select Entries --output_columns 'highlight_full(body, "NormalizerAuto", 1, "Groonga", "<span class=\\"keyword1\\">", "</span>", "mysql", "<span class=\\"keyword2\\">", "</span>")' --command_version 2 +[ + [ + 0, + 0.0, + 0.0 + ], + [ + [ + [ + 1 + ], + [ + [ + "highlight_full", + "null" + ] + ], + [ + "Mroonga is a <span class=\"keyword2\">MySQL</span> storage engine based on <span class=\"keyword1\">Groonga</span>. <b>Rroonga</b> is a Ruby binding of <span class=\"keyword1\">Groonga</span>." + ] + ] + ] +] Added: test/command/suite/select/function/highlight_full/two_keywords.test (+12 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/function/highlight_full/two_keywords.test 2014-08-10 03:41:10 +0900 (f1b4dd9) @@ -0,0 +1,12 @@ +table_create Entries TABLE_NO_KEY +column_create Entries body COLUMN_SCALAR ShortText + +load --table Entries +[ +{"body": "Mroonga is a MySQL storage engine based on Groonga. <b>Rroonga</b> is a Ruby binding of Groonga."} +] + +select Entries --output_columns \ + 'highlight_full(body, "NormalizerAuto", 1, \ + "Groonga", "<span class=\\"keyword1\\">", "</span>", \ + "mysql", "<span class=\\"keyword2\\">", "</span>")' --command_version 2 -------------- next part -------------- HTML����������������������������... 下載