[Groonga-commit] groonga/groonga at 5869159 [master] grn_ts: update grn_ts_tokenize_output_columns() to skip empty tokens

Back to archive index

susumu.yata null+****@clear*****
Wed Sep 16 19:39:16 JST 2015


susumu.yata	2015-09-16 19:39:16 +0900 (Wed, 16 Sep 2015)

  New Revision: 5869159b738b825698aed6209d89656b99da2f66
  https://github.com/groonga/groonga/commit/5869159b738b825698aed6209d89656b99da2f66

  Message:
    grn_ts: update grn_ts_tokenize_output_columns() to skip empty tokens
    
    GitHub: #402

  Modified files:
    lib/ts.c

  Modified: lib/ts.c (+62 -57)
===================================================================
--- lib/ts.c    2015-09-16 13:35:25 +0900 (678daf1)
+++ lib/ts.c    2015-09-16 19:39:16 +0900 (528f8d4)
@@ -5448,80 +5448,85 @@ grn_ts_select_filter(grn_ctx *ctx, grn_obj *table,
 static grn_rc
 grn_ts_tokenize_output_columns(grn_ctx *ctx, grn_ts_str in,
                                grn_ts_str *token, grn_ts_str *rest) {
-  char stack_top;
   size_t i;
-  grn_rc rc;
-  grn_ts_str str;
+  char stack_top;
+  grn_rc rc = GRN_SUCCESS;
+  grn_ts_str str = in;
   grn_ts_buf stack;
 
-  str = grn_ts_str_trim_left(in);
-  if (!str.size) {
-    *token = str;
-    *rest = str;
-    return GRN_END_OF_DATA;
-  }
-
-  /* Find the end of the first token. */
+  /* Find a non-empty token. */
   grn_ts_buf_init(ctx, &stack);
-  for (i = 0; i < str.size; i++) {
-    if (stack.pos) {
-      if (str.ptr[i] == stack_top) {
-        if (--stack.pos) {
-          stack_top = ((char *)stack.ptr)[stack.pos - 1];
-        }
-        continue;
-      }
-      if (stack_top == '"') {
-        /* Skip the next byte of an escape character. */
-        if ((str.ptr[i] == '\\') && (i < (str.size - 1))) {
-          i++;
-        }
-        continue;
-      }
-    } else if (str.ptr[i] == ',') {
+  for ( ; ; ) {
+    str = grn_ts_str_trim_left(str);
+    if (!str.size) {
+      rc = GRN_END_OF_DATA;
       break;
     }
-    switch (str.ptr[i]) {
-      case '(': {
-        stack_top = ')';
-        rc = grn_ts_buf_write(ctx, &stack, &stack_top, 1);
-        break;
-      }
-      case '[': {
-        stack_top = ']';
-        rc = grn_ts_buf_write(ctx, &stack, &stack_top, 1);
+    for (i = 0; i < str.size; i++) {
+      if (stack.pos) {
+        if (str.ptr[i] == stack_top) {
+          if (--stack.pos) {
+            stack_top = ((char *)stack.ptr)[stack.pos - 1];
+          }
+          continue;
+        }
+        if (stack_top == '"') {
+          /* Skip the next byte of an escape character. */
+          if ((str.ptr[i] == '\\') && (i < (str.size - 1))) {
+            i++;
+          }
+          continue;
+        }
+      } else if (str.ptr[i] == ',') {
         break;
       }
-      case '{': {
-        stack_top = '}';
-        rc = grn_ts_buf_write(ctx, &stack, &stack_top, 1);
-        break;
+      switch (str.ptr[i]) {
+        case '(': {
+          stack_top = ')';
+          rc = grn_ts_buf_write(ctx, &stack, &stack_top, 1);
+          break;
+        }
+        case '[': {
+          stack_top = ']';
+          rc = grn_ts_buf_write(ctx, &stack, &stack_top, 1);
+          break;
+        }
+        case '{': {
+          stack_top = '}';
+          rc = grn_ts_buf_write(ctx, &stack, &stack_top, 1);
+          break;
+        }
+        case '"': {
+          stack_top = '"';
+          rc = grn_ts_buf_write(ctx, &stack, &stack_top, 1);
+          break;
+        }
       }
-      case '"': {
-        stack_top = '"';
-        rc = grn_ts_buf_write(ctx, &stack, &stack_top, 1);
+      if (rc != GRN_SUCCESS) {
         break;
       }
     }
     if (rc != GRN_SUCCESS) {
       break;
     }
+    if (i) {
+      /* Output the result. */
+      token->ptr = str.ptr;
+      token->size = i;
+      if (token->size == str.size) {
+        rest->ptr = str.ptr + str.size;
+        rest->size = 0;
+      } else {
+        rest->ptr = str.ptr + token->size + 1;
+        rest->size = str.size - token->size - 1;
+      }
+      break;
+    }
+    str.ptr++;
+    str.size--;
   }
   grn_ts_buf_fin(ctx, &stack);
-  if (rc != GRN_SUCCESS) {
-    return rc;
-  }
-
-  token->ptr = str.ptr;
-  token->size = i;
-  if (token->size == str.size) {
-    rest->ptr = str.ptr + str.size;
-    rest->size = 0;
-  } else {
-    rest->ptr = str.ptr + token->size + 1;
-    rest->size = str.size - token->size + 1;
-  }
-  return GRN_SUCCESS;
+  return rc;
 }
 
 /*
-------------- next part --------------
HTML����������������������������...
下載 



More information about the Groonga-commit mailing list
Back to archive index