[Groonga-commit] groonga/groonga at ec87297 [master] Extract "normalize" command code

Back to archive index

Kouhei Sutou null+****@clear*****
Tue Apr 10 17:43:42 JST 2018


Kouhei Sutou	2018-04-10 17:43:42 +0900 (Tue, 10 Apr 2018)

  New Revision: ec87297d25b0fefa04995475ab61ca1e50b40b3b
  https://github.com/groonga/groonga/commit/ec87297d25b0fefa04995475ab61ca1e50b40b3b

  Message:
    Extract "normalize" command code

  Added files:
    lib/proc/proc_normalize.c
  Modified files:
    lib/grn_proc.h
    lib/proc.c
    lib/proc/sources.am

  Modified: lib/grn_proc.h (+2 -1)
===================================================================
--- lib/grn_proc.h    2018-04-10 17:13:49 +0900 (1ce12d7bf)
+++ lib/grn_proc.h    2018-04-10 17:43:42 +0900 (5f01ad58a)
@@ -1,6 +1,6 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
-  Copyright(C) 2009-2017 Brazil
+  Copyright(C) 2009-2018 Brazil
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -57,6 +57,7 @@ void grn_proc_init_in_records(grn_ctx *ctx);
 void grn_proc_init_lock_acquire(grn_ctx *ctx);
 void grn_proc_init_lock_clear(grn_ctx *ctx);
 void grn_proc_init_lock_release(grn_ctx *ctx);
+void grn_proc_init_normalize(grn_ctx *ctx);
 void grn_proc_init_object_exist(grn_ctx *ctx);
 void grn_proc_init_object_inspect(grn_ctx *ctx);
 void grn_proc_init_object_list(grn_ctx *ctx);

  Modified: lib/proc.c (+2 -166)
===================================================================
--- lib/proc.c    2018-04-10 17:13:49 +0900 (f89c05426)
+++ lib/proc.c    2018-04-10 17:43:42 +0900 (870c98b2a)
@@ -1,6 +1,6 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
-  Copyright(C) 2009-2017 Brazil
+  Copyright(C) 2009-2018 Brazil
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -1074,167 +1074,6 @@ exit :
   return NULL;
 }
 
-static int
-parse_normalize_flags(grn_ctx *ctx, grn_obj *flag_names)
-{
-  int flags = 0;
-  const char *names, *names_end;
-  int length;
-
-  names = GRN_TEXT_VALUE(flag_names);
-  length = GRN_TEXT_LEN(flag_names);
-  names_end = names + length;
-  while (names < names_end) {
-    if (*names == '|' || *names == ' ') {
-      names += 1;
-      continue;
-    }
-
-#define CHECK_FLAG(name)\
-    if (((names_end - names) >= (sizeof(#name) - 1)) &&\
-        (!memcmp(names, #name, sizeof(#name) - 1))) {\
-      flags |= GRN_STRING_ ## name;\
-      names += sizeof(#name) - 1;\
-      continue;\
-    }
-
-    CHECK_FLAG(REMOVE_BLANK);
-    CHECK_FLAG(WITH_TYPES);
-    CHECK_FLAG(WITH_CHECKS);
-    CHECK_FLAG(REMOVE_TOKENIZED_DELIMITER);
-
-#define GRN_STRING_NONE 0
-    CHECK_FLAG(NONE);
-#undef GRN_STRING_NONE
-
-    ERR(GRN_INVALID_ARGUMENT, "[normalize] invalid flag: <%.*s>",
-        (int)(names_end - names), names);
-    return 0;
-#undef CHECK_FLAG
-  }
-
-  return flags;
-}
-
-static grn_bool
-is_normalizer(grn_ctx *ctx, grn_obj *object)
-{
-  if (object->header.type != GRN_PROC) {
-    return GRN_FALSE;
-  }
-
-  if (grn_proc_get_type(ctx, object) != GRN_PROC_NORMALIZER) {
-    return GRN_FALSE;
-  }
-
-  return GRN_TRUE;
-}
-
-static grn_obj *
-proc_normalize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
-{
-  grn_obj *normalizer_name;
-  grn_obj *string;
-  grn_obj *flag_names;
-
-  normalizer_name = VAR(0);
-  string = VAR(1);
-  flag_names = VAR(2);
-  if (GRN_TEXT_LEN(normalizer_name) == 0) {
-    ERR(GRN_INVALID_ARGUMENT, "normalizer name is missing");
-    return NULL;
-  }
-
-  {
-    grn_obj *normalizer;
-    grn_obj *grn_string;
-    int flags;
-    unsigned int normalized_length_in_bytes;
-    unsigned int normalized_n_characters;
-
-    flags = parse_normalize_flags(ctx, flag_names);
-    normalizer = grn_ctx_get(ctx,
-                             GRN_TEXT_VALUE(normalizer_name),
-                             GRN_TEXT_LEN(normalizer_name));
-    if (!normalizer) {
-      ERR(GRN_INVALID_ARGUMENT,
-          "[normalize] nonexistent normalizer: <%.*s>",
-          (int)GRN_TEXT_LEN(normalizer_name),
-          GRN_TEXT_VALUE(normalizer_name));
-      return NULL;
-    }
-
-    if (!is_normalizer(ctx, normalizer)) {
-      grn_obj inspected;
-      GRN_TEXT_INIT(&inspected, 0);
-      grn_inspect(ctx, &inspected, normalizer);
-      ERR(GRN_INVALID_ARGUMENT,
-          "[normalize] not normalizer: %.*s",
-          (int)GRN_TEXT_LEN(&inspected),
-          GRN_TEXT_VALUE(&inspected));
-      GRN_OBJ_FIN(ctx, &inspected);
-      grn_obj_unlink(ctx, normalizer);
-      return NULL;
-    }
-
-    grn_string = grn_string_open(ctx,
-                                 GRN_TEXT_VALUE(string), GRN_TEXT_LEN(string),
-                                 normalizer, flags);
-    grn_obj_unlink(ctx, normalizer);
-
-    GRN_OUTPUT_MAP_OPEN("RESULT", 3);
-    {
-      const char *normalized;
-
-      grn_string_get_normalized(ctx, grn_string,
-                                &normalized,
-                                &normalized_length_in_bytes,
-                                &normalized_n_characters);
-      GRN_OUTPUT_CSTR("normalized");
-      GRN_OUTPUT_STR(normalized, normalized_length_in_bytes);
-    }
-    {
-      const unsigned char *types;
-
-      types = grn_string_get_types(ctx, grn_string);
-      GRN_OUTPUT_CSTR("types");
-      if (types) {
-        unsigned int i;
-        GRN_OUTPUT_ARRAY_OPEN("types", normalized_n_characters);
-        for (i = 0; i < normalized_n_characters; i++) {
-          GRN_OUTPUT_CSTR(grn_char_type_to_string(types[i]));
-        }
-        GRN_OUTPUT_ARRAY_CLOSE();
-      } else {
-        GRN_OUTPUT_ARRAY_OPEN("types", 0);
-        GRN_OUTPUT_ARRAY_CLOSE();
-      }
-    }
-    {
-      const short *checks;
-
-      checks = grn_string_get_checks(ctx, grn_string);
-      GRN_OUTPUT_CSTR("checks");
-      if (checks) {
-        unsigned int i;
-        GRN_OUTPUT_ARRAY_OPEN("checks", normalized_length_in_bytes);
-        for (i = 0; i < normalized_length_in_bytes; i++) {
-          GRN_OUTPUT_INT32(checks[i]);
-        }
-        GRN_OUTPUT_ARRAY_CLOSE();
-      } else {
-        GRN_OUTPUT_ARRAY_OPEN("checks", 0);
-        GRN_OUTPUT_ARRAY_CLOSE();
-      }
-    }
-    GRN_OUTPUT_MAP_CLOSE();
-
-    grn_obj_unlink(ctx, grn_string);
-  }
-
-  return NULL;
-}
-
 static void
 list_proc(grn_ctx *ctx, grn_proc_type target_proc_type,
           const char *name, const char *plural_name)
@@ -4070,10 +3909,7 @@ grn_db_init_builtin_commands(grn_ctx *ctx)
   DEF_VAR(vars[1], "table");
   DEF_COMMAND("truncate", proc_truncate, 2, vars);
 
-  DEF_VAR(vars[0], "normalizer");
-  DEF_VAR(vars[1], "string");
-  DEF_VAR(vars[2], "flags");
-  DEF_COMMAND("normalize", proc_normalize, 3, vars);
+  grn_proc_init_normalize(ctx);
 
   grn_proc_init_tokenize(ctx);
   grn_proc_init_table_tokenize(ctx);

  Added: lib/proc/proc_normalize.c (+199 -0) 100644
===================================================================
--- /dev/null
+++ lib/proc/proc_normalize.c    2018-04-10 17:43:42 +0900 (998a8030c)
@@ -0,0 +1,199 @@
+/* -*- c-basic-offset: 2 -*- */
+/*
+  Copyright(C) 2009-2018 Brazil
+
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License version 2.1 as published by the Free Software Foundation.
+
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+#include "../grn_proc.h"
+#include "../grn_ctx.h"
+#include "../grn_token_cursor.h"
+
+#include <groonga/plugin.h>
+
+static int
+parse_normalize_flags(grn_ctx *ctx, grn_obj *flag_names)
+{
+  int flags = 0;
+  const char *names, *names_end;
+  int length;
+
+  names = GRN_TEXT_VALUE(flag_names);
+  length = GRN_TEXT_LEN(flag_names);
+  names_end = names + length;
+  while (names < names_end) {
+    if (*names == '|' || *names == ' ') {
+      names += 1;
+      continue;
+    }
+
+#define CHECK_FLAG(name)\
+    if (((names_end - names) >= (sizeof(#name) - 1)) &&\
+        (!memcmp(names, #name, sizeof(#name) - 1))) {\
+      flags |= GRN_STRING_ ## name;\
+      names += sizeof(#name) - 1;\
+      continue;\
+    }
+
+    CHECK_FLAG(REMOVE_BLANK);
+    CHECK_FLAG(WITH_TYPES);
+    CHECK_FLAG(WITH_CHECKS);
+    CHECK_FLAG(REMOVE_TOKENIZED_DELIMITER);
+
+#define GRN_STRING_NONE 0
+    CHECK_FLAG(NONE);
+#undef GRN_STRING_NONE
+
+    ERR(GRN_INVALID_ARGUMENT, "[normalize] invalid flag: <%.*s>",
+        (int)(names_end - names), names);
+    return 0;
+#undef CHECK_FLAG
+  }
+
+  return flags;
+}
+
+static grn_bool
+is_normalizer(grn_ctx *ctx, grn_obj *object)
+{
+  if (object->header.type != GRN_PROC) {
+    return GRN_FALSE;
+  }
+
+  if (grn_proc_get_type(ctx, object) != GRN_PROC_NORMALIZER) {
+    return GRN_FALSE;
+  }
+
+  return GRN_TRUE;
+}
+
+static grn_obj *
+command_normalize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
+{
+  grn_obj *normalizer_name;
+  grn_obj *string;
+  grn_obj *flag_names;
+
+  normalizer_name = grn_plugin_proc_get_var(ctx, user_data, "normalizer", -1);
+  string = grn_plugin_proc_get_var(ctx, user_data, "string", -1);
+  flag_names = grn_plugin_proc_get_var(ctx, user_data, "flags", -1);
+  if (GRN_TEXT_LEN(normalizer_name) == 0) {
+    ERR(GRN_INVALID_ARGUMENT, "normalizer name is missing");
+    return NULL;
+  }
+
+  {
+    grn_obj *normalizer;
+    grn_obj *grn_string;
+    int flags;
+    unsigned int normalized_length_in_bytes;
+    unsigned int normalized_n_characters;
+
+    flags = parse_normalize_flags(ctx, flag_names);
+    normalizer = grn_ctx_get(ctx,
+                             GRN_TEXT_VALUE(normalizer_name),
+                             GRN_TEXT_LEN(normalizer_name));
+    if (!normalizer) {
+      ERR(GRN_INVALID_ARGUMENT,
+          "[normalize] nonexistent normalizer: <%.*s>",
+          (int)GRN_TEXT_LEN(normalizer_name),
+          GRN_TEXT_VALUE(normalizer_name));
+      return NULL;
+    }
+
+    if (!is_normalizer(ctx, normalizer)) {
+      grn_obj inspected;
+      GRN_TEXT_INIT(&inspected, 0);
+      grn_inspect(ctx, &inspected, normalizer);
+      ERR(GRN_INVALID_ARGUMENT,
+          "[normalize] not normalizer: %.*s",
+          (int)GRN_TEXT_LEN(&inspected),
+          GRN_TEXT_VALUE(&inspected));
+      GRN_OBJ_FIN(ctx, &inspected);
+      grn_obj_unlink(ctx, normalizer);
+      return NULL;
+    }
+
+    grn_string = grn_string_open(ctx,
+                                 GRN_TEXT_VALUE(string), GRN_TEXT_LEN(string),
+                                 normalizer, flags);
+    grn_obj_unlink(ctx, normalizer);
+
+    grn_ctx_output_map_open(ctx, "RESULT", 3);
+    {
+      const char *normalized;
+
+      grn_string_get_normalized(ctx, grn_string,
+                                &normalized,
+                                &normalized_length_in_bytes,
+                                &normalized_n_characters);
+      grn_ctx_output_cstr(ctx, "normalized");
+      grn_ctx_output_str(ctx, normalized, normalized_length_in_bytes);
+    }
+    {
+      const unsigned char *types;
+
+      types = grn_string_get_types(ctx, grn_string);
+      grn_ctx_output_cstr(ctx, "types");
+      if (types) {
+        unsigned int i;
+        grn_ctx_output_array_open(ctx, "types", normalized_n_characters);
+        for (i = 0; i < normalized_n_characters; i++) {
+          grn_ctx_output_cstr(ctx, grn_char_type_to_string(types[i]));
+        }
+        grn_ctx_output_array_close(ctx);
+      } else {
+        grn_ctx_output_array_open(ctx, "types", 0);
+        grn_ctx_output_array_close(ctx);
+      }
+    }
+    {
+      const short *checks;
+
+      checks = grn_string_get_checks(ctx, grn_string);
+      grn_ctx_output_cstr(ctx, "checks");
+      if (checks) {
+        unsigned int i;
+        grn_ctx_output_array_open(ctx, "checks", normalized_length_in_bytes);
+        for (i = 0; i < normalized_length_in_bytes; i++) {
+          grn_ctx_output_int32(ctx, checks[i]);
+        }
+        grn_ctx_output_array_close(ctx);
+      } else {
+        grn_ctx_output_array_open(ctx, "checks", 0);
+        grn_ctx_output_array_close(ctx);
+      }
+    }
+    grn_ctx_output_map_close(ctx);
+
+    grn_obj_unlink(ctx, grn_string);
+  }
+
+  return NULL;
+}
+
+void
+grn_proc_init_normalize(grn_ctx *ctx)
+{
+  grn_expr_var vars[3];
+
+  grn_plugin_expr_var_init(ctx, &(vars[0]), "normalizer", -1);
+  grn_plugin_expr_var_init(ctx, &(vars[1]), "string", -1);
+  grn_plugin_expr_var_init(ctx, &(vars[2]), "flags", -1);
+  grn_plugin_command_create(ctx,
+                            "normalize", -1,
+                            command_normalize,
+                            3,
+                            vars);
+}

  Modified: lib/proc/sources.am (+1 -0)
===================================================================
--- lib/proc/sources.am    2018-04-10 17:13:49 +0900 (a945320ff)
+++ lib/proc/sources.am    2018-04-10 17:43:42 +0900 (48b411fa9)
@@ -6,6 +6,7 @@ libgrnproc_la_SOURCES =				\
 	proc_highlight.c			\
 	proc_in_records.c			\
 	proc_lock.c				\
+	proc_normalize.c			\
 	proc_object.c				\
 	proc_object_inspect.c			\
 	proc_object_list.c			\
-------------- next part --------------
HTML����������������������������...
URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180410/d4634cbb/attachment-0001.htm 



More information about the Groonga-commit mailing list
Back to archive index