[Groonga-commit] groonga/groonga at 59a843e [master] grn_ts: support sorting Text

Back to archive index

Susumu Yata null+****@clear*****
Fri Nov 27 19:38:05 JST 2015


Susumu Yata	2015-11-27 19:38:05 +0900 (Fri, 27 Nov 2015)

  New Revision: 59a843ea4e032eecee59e60687d3eda176fdde5b
  https://github.com/groonga/groonga/commit/59a843ea4e032eecee59e60687d3eda176fdde5b

  Message:
    grn_ts: support sorting Text
    
    GitHub: #429

  Modified files:
    lib/ts/ts_sorter.c

  Modified: lib/ts/ts_sorter.c (+460 -3)
===================================================================
--- lib/ts/ts_sorter.c    2015-11-27 03:44:08 +0900 (fa1c4a8)
+++ lib/ts/ts_sorter.c    2015-11-27 19:38:05 +0900 (3963693)
@@ -1011,6 +1011,448 @@ grn_ts_qsort_by_int(grn_ctx *ctx, grn_ts_sorter_node *node,
   return GRN_SUCCESS;
 }
 
+/* grn_ts_text_cmp() compares Text values. */
+inline static int
+grn_ts_text_cmp(grn_ts_text lhs, grn_ts_text rhs)
+{
+  size_t min_size = (lhs.size < rhs.size) ? lhs.size : rhs.size;
+  int result = memcmp(lhs.ptr, rhs.ptr, min_size);
+  if (result != 0) {
+    return result;
+  }
+  if (lhs.size == rhs.size) {
+    return 0;
+  }
+  return (lhs.size < rhs.size) ? -1 : 1;
+}
+
+/* grn_ts_text_swap() swaps Text values. */
+inline static void
+grn_ts_text_swap(grn_ts_text *lhs, grn_ts_text *rhs)
+{
+  grn_ts_text tmp = *lhs;
+  *lhs = *rhs;
+  *rhs = tmp;
+}
+
+/* grn_ts_move_pivot_by_text_asc() moves the pivot to the front. */
+static void
+grn_ts_move_pivot_by_text_asc(grn_ts_sorter_node *node, grn_ts_text *vals,
+                              grn_ts_record *recs, size_t n_recs)
+{
+  /* Choose the median from recs[1], recs[n_recs / 2], and recs[n_recs - 2]. */
+  size_t first = 1;
+  size_t middle = n_recs / 2;
+  size_t last = n_recs - 2;
+  if (grn_ts_text_cmp(vals[first], vals[middle]) < 0) {
+    /* first < middle. */
+    if (grn_ts_text_cmp(vals[middle], vals[last]) < 0) {
+      /* first < middle < last */
+      grn_ts_rec_swap(&recs[0], &recs[middle]);
+      grn_ts_text_swap(&vals[0], &vals[middle]);
+    } else if (grn_ts_text_cmp(vals[first], vals[last]) < 0) {
+      /* first < last < middle. */
+      grn_ts_rec_swap(&recs[0], &recs[last]);
+      grn_ts_text_swap(&vals[0], &vals[last]);
+    } else { /* last < first < middle. */
+      grn_ts_rec_swap(&recs[0], &recs[first]);
+      grn_ts_text_swap(&vals[0], &vals[first]);
+    }
+  } else if (grn_ts_text_cmp(vals[last], vals[middle]) < 0) {
+    /* last < middle < first. */
+    grn_ts_rec_swap(&recs[0], &recs[middle]);
+    grn_ts_text_swap(&vals[0], &vals[middle]);
+  } else if (grn_ts_text_cmp(vals[last], vals[first]) < 0) {
+    /* middle < last < first. */
+    grn_ts_rec_swap(&recs[0], &recs[last]);
+    grn_ts_text_swap(&vals[0], &vals[last]);
+  } else { /* middle < first < last. */
+    grn_ts_rec_swap(&recs[0], &recs[first]);
+    grn_ts_text_swap(&vals[0], &vals[first]);
+  }
+}
+
+/* grn_ts_isort_by_text_asc() sorts records. */
+static grn_rc
+grn_ts_isort_by_text_asc(grn_ctx *ctx, grn_ts_sorter_node *node,
+                         size_t offset, size_t limit,
+                         grn_ts_text *vals, grn_ts_record *recs, size_t n_recs)
+{
+  for (size_t i = 1; i < n_recs; ++i) {
+    for (size_t j = i; j > 0; --j) {
+      if (grn_ts_text_cmp(vals[j], vals[j - 1]) < 0) {
+        grn_ts_rec_swap(&recs[j], &recs[j - 1]);
+        grn_ts_text_swap(&vals[j], &vals[j - 1]);
+      } else {
+        break;
+      }
+    }
+  }
+  /* Apply the next sorting if there are score duplicates. */
+  if (node->next) {
+    grn_rc rc;
+    size_t begin = 0;
+    for (size_t i = 1; i < n_recs; ++i) {
+      if (grn_ts_text_cmp(vals[i], vals[begin]) != 0) {
+        if ((i - begin) >= 2) {
+          rc = grn_ts_sorter_node_sort(ctx, node->next, 0, i - begin,
+                                       recs + begin, i - begin);
+          if (rc != GRN_SUCCESS) {
+            return rc;
+          }
+        }
+        begin = i;
+      }
+    }
+    if ((n_recs - begin) >= 2) {
+      rc = grn_ts_sorter_node_sort(ctx, node->next, 0, n_recs - begin,
+                                   recs + begin, n_recs - begin);
+      if (rc != GRN_SUCCESS) {
+        return rc;
+      }
+    }
+  }
+  return GRN_SUCCESS;
+}
+
+/* grn_ts_qsort_by_text_asc() sorts records. */
+static grn_rc
+grn_ts_qsort_by_text_asc(grn_ctx *ctx, grn_ts_sorter_node *node,
+                         size_t offset, size_t limit,
+                         grn_ts_text *vals, grn_ts_record *recs, size_t n_recs)
+{
+  grn_rc rc;
+  /*
+   * FIXME: Currently, the threshold is 16.
+   *        This value should be optimized and replaced with a named constant.
+   */
+  while (n_recs >= 16) {
+    grn_ts_move_pivot_by_text_asc(node, vals, recs, n_recs);
+    grn_ts_text pivot = vals[0];
+    size_t left = 1, right = n_recs;
+    size_t pivot_left = 1, pivot_right = n_recs;
+    for ( ; ; ) {
+      /*
+       * Prior entries are moved to left. Less prior entries are moved to
+       * right. Entries which equal to the pivot are moved to the edges.
+       */
+      while (left < right) {
+        int result = grn_ts_text_cmp(pivot, vals[left]);
+        if (result < 0) {
+          break;
+        } else if (result == 0) {
+          grn_ts_rec_swap(&recs[left], &recs[pivot_left]);
+          grn_ts_text_swap(&vals[left], &vals[pivot_left]);
+          ++pivot_left;
+        }
+        ++left;
+      }
+      while (left < right) {
+        int result;
+        --right;
+        result = grn_ts_text_cmp(vals[right], pivot);
+        if (result < 0) {
+          break;
+        } else if (result == 0) {
+          --pivot_right;
+          grn_ts_rec_swap(&recs[right], &recs[pivot_right]);
+          grn_ts_text_swap(&vals[right], &vals[pivot_right]);
+        }
+      }
+      if (left >= right) {
+        break;
+      }
+      grn_ts_rec_swap(&recs[left], &recs[right]);
+      grn_ts_text_swap(&vals[left], &vals[right]);
+      ++left;
+    }
+    /* Move left pivot-equivalent entries to the left of the boundary. */
+    while (pivot_left > 0) {
+      --pivot_left;
+      --left;
+      grn_ts_rec_swap(&recs[pivot_left], &recs[left]);
+      grn_ts_text_swap(&vals[pivot_left], &vals[left]);
+    }
+    /* Move right pivot-equivalent entries to the right of the boundary. */
+    while (pivot_right < n_recs) {
+      grn_ts_rec_swap(&recs[pivot_right], &recs[right]);
+      grn_ts_text_swap(&vals[pivot_right], &vals[right]);
+      ++pivot_right;
+      ++right;
+    }
+    /* Apply the next sort condition to the pivot-equivalent recs. */
+    if (node->next) {
+      if (((right - left) >= 2) && (offset < right) && (limit > left)) {
+        size_t next_offset = (offset < left) ? 0 : (offset - left);
+        size_t next_limit = ((limit > right) ? right : limit) - left;
+        rc = grn_ts_sorter_node_sort(ctx, node->next, next_offset, next_limit,
+                                     recs + left, right - left);
+        if (rc != GRN_SUCCESS) {
+          return rc;
+        }
+      }
+    }
+    /*
+     * Use a recursive call to sort the smaller group so that the recursion
+     * depth is less than log_2(n_recs).
+     */
+    if (left < (n_recs - right)) {
+      if ((offset < left) && (left >= 2)) {
+        size_t next_limit = (limit < left) ? limit : left;
+        rc = grn_ts_qsort_by_text_asc(ctx, node, offset, next_limit,
+                                      vals, recs, left);
+        if (rc != GRN_SUCCESS) {
+          return rc;
+        }
+      }
+      if (limit <= right) {
+        return GRN_SUCCESS;
+      }
+      vals += right;
+      recs += right;
+      n_recs -= right;
+      offset = (offset < right) ? 0 : (offset - right);
+      limit -= right;
+    } else {
+      if ((limit > right) && ((n_recs - right) >= 2)) {
+        size_t next_offset = (offset < right) ? 0 : (offset - right);
+        size_t next_limit = limit - right;
+        rc = grn_ts_qsort_by_text_asc(ctx, node, next_offset, next_limit,
+                                      vals + right, recs + right,
+                                      n_recs - right);
+        if (rc != GRN_SUCCESS) {
+          return rc;
+        }
+      }
+      if (offset >= left) {
+        return GRN_SUCCESS;
+      }
+      n_recs = left;
+      if (limit > left) {
+        limit = left;
+      }
+    }
+  }
+  if (n_recs >= 2) {
+    rc = grn_ts_isort_by_text_asc(ctx, node, offset, limit,
+                                  vals, recs, n_recs);
+    if (rc != GRN_SUCCESS) {
+      return rc;
+    }
+  }
+  return GRN_SUCCESS;
+}
+
+/* grn_ts_move_pivot_by_text_desc() moves the pivot to the front. */
+static void
+grn_ts_move_pivot_by_text_desc(grn_ts_sorter_node *node, grn_ts_text *vals,
+                               grn_ts_record *recs, size_t n_recs)
+{
+  /* Choose the median from recs[1], recs[n_recs / 2], and recs[n_recs - 2]. */
+  size_t first = 1;
+  size_t middle = n_recs / 2;
+  size_t last = n_recs - 2;
+  if (grn_ts_text_cmp(vals[first], vals[middle]) > 0) {
+    /* first < middle. */
+    if (grn_ts_text_cmp(vals[middle], vals[last]) > 0) {
+      /* first < middle < last */
+      grn_ts_rec_swap(&recs[0], &recs[middle]);
+      grn_ts_text_swap(&vals[0], &vals[middle]);
+    } else if (grn_ts_text_cmp(vals[first], vals[last]) > 0) {
+      /* first < last < middle. */
+      grn_ts_rec_swap(&recs[0], &recs[last]);
+      grn_ts_text_swap(&vals[0], &vals[last]);
+    } else { /* last < first < middle. */
+      grn_ts_rec_swap(&recs[0], &recs[first]);
+      grn_ts_text_swap(&vals[0], &vals[first]);
+    }
+  } else if (grn_ts_text_cmp(vals[last], vals[middle]) > 0) {
+    /* last < middle < first. */
+    grn_ts_rec_swap(&recs[0], &recs[middle]);
+    grn_ts_text_swap(&vals[0], &vals[middle]);
+  } else if (grn_ts_text_cmp(vals[last], vals[first]) > 0) {
+    /* middle < last < first. */
+    grn_ts_rec_swap(&recs[0], &recs[last]);
+    grn_ts_text_swap(&vals[0], &vals[last]);
+  } else { /* middle < first < last. */
+    grn_ts_rec_swap(&recs[0], &recs[first]);
+    grn_ts_text_swap(&vals[0], &vals[first]);
+  }
+}
+
+/* grn_ts_isort_by_text_desc() sorts records. */
+static grn_rc
+grn_ts_isort_by_text_desc(grn_ctx *ctx, grn_ts_sorter_node *node,
+                          size_t offset, size_t limit,
+                          grn_ts_text *vals, grn_ts_record *recs,
+                          size_t n_recs)
+{
+  for (size_t i = 1; i < n_recs; ++i) {
+    for (size_t j = i; j > 0; --j) {
+      if (grn_ts_text_cmp(vals[j], vals[j - 1]) > 0) {
+        grn_ts_rec_swap(&recs[j], &recs[j - 1]);
+        grn_ts_text_swap(&vals[j], &vals[j - 1]);
+      } else {
+        break;
+      }
+    }
+  }
+  /* Apply the next sorting if there are score duplicates. */
+  if (node->next) {
+    grn_rc rc;
+    size_t begin = 0;
+    for (size_t i = 1; i < n_recs; ++i) {
+      if (grn_ts_text_cmp(vals[i], vals[begin]) != 0) {
+        if ((i - begin) >= 2) {
+          rc = grn_ts_sorter_node_sort(ctx, node->next, 0, i - begin,
+                                       recs + begin, i - begin);
+          if (rc != GRN_SUCCESS) {
+            return rc;
+          }
+        }
+        begin = i;
+      }
+    }
+    if ((n_recs - begin) >= 2) {
+      rc = grn_ts_sorter_node_sort(ctx, node->next, 0, n_recs - begin,
+                                   recs + begin, n_recs - begin);
+      if (rc != GRN_SUCCESS) {
+        return rc;
+      }
+    }
+  }
+  return GRN_SUCCESS;
+}
+
+/* grn_ts_qsort_by_text_desc() sorts records. */
+static grn_rc
+grn_ts_qsort_by_text_desc(grn_ctx *ctx, grn_ts_sorter_node *node,
+                          size_t offset, size_t limit,
+                          grn_ts_text *vals, grn_ts_record *recs,
+                          size_t n_recs)
+{
+  grn_rc rc;
+  /*
+   * FIXME: Currently, the threshold is 16.
+   *        This value should be optimized and replaced with a named constant.
+   */
+  while (n_recs >= 16) {
+    grn_ts_move_pivot_by_text_desc(node, vals, recs, n_recs);
+    grn_ts_text pivot = vals[0];
+    size_t left = 1, right = n_recs;
+    size_t pivot_left = 1, pivot_right = n_recs;
+    for ( ; ; ) {
+      /*
+       * Prior entries are moved to left. Less prior entries are moved to
+       * right. Entries which equal to the pivot are moved to the edges.
+       */
+      while (left < right) {
+        int result = grn_ts_text_cmp(pivot, vals[left]);
+        if (result > 0) {
+          break;
+        } else if (result == 0) {
+          grn_ts_rec_swap(&recs[left], &recs[pivot_left]);
+          grn_ts_text_swap(&vals[left], &vals[pivot_left]);
+          ++pivot_left;
+        }
+        ++left;
+      }
+      while (left < right) {
+        int result;
+        --right;
+        result = grn_ts_text_cmp(vals[right], pivot);
+        if (result > 0) {
+          break;
+        } else if (result == 0) {
+          --pivot_right;
+          grn_ts_rec_swap(&recs[right], &recs[pivot_right]);
+          grn_ts_text_swap(&vals[right], &vals[pivot_right]);
+        }
+      }
+      if (left >= right) {
+        break;
+      }
+      grn_ts_rec_swap(&recs[left], &recs[right]);
+      grn_ts_text_swap(&vals[left], &vals[right]);
+      ++left;
+    }
+    /* Move left pivot-equivalent entries to the left of the boundary. */
+    while (pivot_left > 0) {
+      --pivot_left;
+      --left;
+      grn_ts_rec_swap(&recs[pivot_left], &recs[left]);
+      grn_ts_text_swap(&vals[pivot_left], &vals[left]);
+    }
+    /* Move right pivot-equivalent entries to the right of the boundary. */
+    while (pivot_right < n_recs) {
+      grn_ts_rec_swap(&recs[pivot_right], &recs[right]);
+      grn_ts_text_swap(&vals[pivot_right], &vals[right]);
+      ++pivot_right;
+      ++right;
+    }
+    /* Apply the next sort condition to the pivot-equivalent recs. */
+    if (node->next) {
+      if (((right - left) >= 2) && (offset < right) && (limit > left)) {
+        size_t next_offset = (offset < left) ? 0 : (offset - left);
+        size_t next_limit = ((limit > right) ? right : limit) - left;
+        rc = grn_ts_sorter_node_sort(ctx, node->next, next_offset, next_limit,
+                                     recs + left, right - left);
+        if (rc != GRN_SUCCESS) {
+          return rc;
+        }
+      }
+    }
+    /*
+     * Use a recursive call to sort the smaller group so that the recursion
+     * depth is less than log_2(n_recs).
+     */
+    if (left < (n_recs - right)) {
+      if ((offset < left) && (left >= 2)) {
+        size_t next_limit = (limit < left) ? limit : left;
+        rc = grn_ts_qsort_by_text_desc(ctx, node, offset, next_limit,
+                                       vals, recs, left);
+        if (rc != GRN_SUCCESS) {
+          return rc;
+        }
+      }
+      if (limit <= right) {
+        return GRN_SUCCESS;
+      }
+      vals += right;
+      recs += right;
+      n_recs -= right;
+      offset = (offset < right) ? 0 : (offset - right);
+      limit -= right;
+    } else {
+      if ((limit > right) && ((n_recs - right) >= 2)) {
+        size_t next_offset = (offset < right) ? 0 : (offset - right);
+        size_t next_limit = limit - right;
+        rc = grn_ts_qsort_by_text_desc(ctx, node, next_offset, next_limit,
+                                       vals + right, recs + right,
+                                       n_recs - right);
+        if (rc != GRN_SUCCESS) {
+          return rc;
+        }
+      }
+      if (offset >= left) {
+        return GRN_SUCCESS;
+      }
+      n_recs = left;
+      if (limit > left) {
+        limit = left;
+      }
+    }
+  }
+  if (n_recs >= 2) {
+    rc = grn_ts_isort_by_text_desc(ctx, node, offset, limit,
+                                   vals, recs, n_recs);
+    if (rc != GRN_SUCCESS) {
+      return rc;
+    }
+  }
+  return GRN_SUCCESS;
+}
+
 /* grn_ts_sorter_node_sort_by_var() sorts records. */
 static grn_rc
 grn_ts_sorter_node_sort_by_var(grn_ctx *ctx, grn_ts_sorter_node *node,
@@ -1074,7 +1516,22 @@ grn_ts_sorter_node_sort_by_var(grn_ctx *ctx, grn_ts_sorter_node *node,
       }
       return grn_ts_qsort_by_int(ctx, node, offset, limit, vals, recs, n_recs);
     }
-    case GRN_TS_TEXT:
+    case GRN_TS_TEXT: {
+      grn_ts_text *vals;
+      rc = grn_ts_expr_evaluate_to_buf(ctx, node->expr, recs, n_recs,
+                                       &node->buf);
+      if (rc != GRN_SUCCESS) {
+        return rc;
+      }
+      vals = (grn_ts_text *)node->buf.ptr;
+      if (node->reverse) {
+        return grn_ts_qsort_by_text_desc(ctx, node, offset, limit,
+                                         vals, recs, n_recs);
+      } else {
+        return grn_ts_qsort_by_text_asc(ctx, node, offset, limit,
+                                        vals, recs, n_recs);
+      }
+    }
     case GRN_TS_INT_VECTOR:
     case GRN_TS_FLOAT_VECTOR:
     case GRN_TS_TIME_VECTOR:
@@ -1423,10 +1880,10 @@ grn_ts_sorter_builder_push(grn_ctx *ctx, grn_ts_sorter_builder *builder,
   switch (expr->data_kind) {
     case GRN_TS_INT:
     case GRN_TS_FLOAT:
-    case GRN_TS_TIME: {
+    case GRN_TS_TIME:
+    case GRN_TS_TEXT: {
       break;
     }
-    case GRN_TS_TEXT:
     case GRN_TS_INT_VECTOR:
     case GRN_TS_FLOAT_VECTOR:
     case GRN_TS_TIME_VECTOR:
-------------- next part --------------
HTML����������������������������...
下載 



More information about the Groonga-commit mailing list
Back to archive index