[Groonga-commit] groonga/groonga [master] work geo search with index again.

Back to archive index

null+****@clear***** null+****@clear*****
2010年 8月 3日 (火) 15:42:07 JST


Kouhei Sutou	2010-08-03 06:42:07 +0000 (Tue, 03 Aug 2010)

  New Revision: a440be4d479e5040562543a7836de595581f736a

  Log:
    work geo search with index again.

  Modified files:
    lib/db.c

  Modified: lib/db.c (+207 -155)
===================================================================
--- lib/db.c    2010-07-28 08:24:12 +0000 (1953fea)
+++ lib/db.c    2010-08-03 06:42:07 +0000 (38ffd5a)
@@ -6509,105 +6509,142 @@ range_is_idp(grn_obj *obj)
 #define GEO_GRS_C1       6335439
 #define GEO_GRS_C2       6378137
 #define GEO_GRS_C3       0.006694
-#define GEO_INT2RAD(x)   ((M_PI / (GEO_RESOLUTION * 180)) * x)
+#define GEO_INT2RAD(x)   ((M_PI / (GEO_RESOLUTION * 180)) * (x))
 
 typedef struct {
   grn_id id;
   double d;
 } geo_entry;
 
+static int
+compute_diff_bit(uint8_t *geo_key1, uint8_t *geo_key2)
+{
+  int i, diff_bit = 0;
+
+  for (i = 0; i < sizeof(grn_geo_point); i++) {
+    if (geo_key1[i] == geo_key2[i]) {
+      continue;
+    }
+
+    if ((geo_key1[i] & 0xc0) != (geo_key2[i] & 0xc0)) {
+      diff_bit = 0;
+      break;
+    } else if ((geo_key1[i] & 0x30) != (geo_key2[i] & 0x30)) {
+      diff_bit = 2;
+      break;
+    } else if ((geo_key1[i] & 0x0c) != (geo_key2[i] & 0x0c)) {
+      diff_bit = 4;
+      break;
+    } else if ((geo_key1[i] & 0x03) != (geo_key2[i] & 0x03)) {
+      diff_bit = 6;
+      break;
+    }
+  }
+
+  return i * 8 + diff_bit;
+}
+
 static void
+compute_min_and_max(grn_geo_point *base_point, int diff_bit,
+                    grn_geo_point *geo_min, grn_geo_point *geo_max)
+{
+  int diff_byte, diff_bit_mask;
+  uint8_t geo_key_base[sizeof(grn_geo_point)];
+  uint8_t geo_key_min[sizeof(grn_geo_point)];
+  uint8_t geo_key_max[sizeof(grn_geo_point)];
+
+  diff_byte = diff_bit / 8;
+  diff_bit_mask = 0xff >> (diff_bit % 8);
+  grn_gton(geo_key_base, base_point, sizeof(grn_geo_point));
+
+  memcpy(geo_key_min, geo_key_base, diff_byte + 1);
+  geo_key_min[diff_byte] &= ~diff_bit_mask;
+  memset(geo_key_min + diff_byte + 1, 0,
+         sizeof(grn_geo_point) - diff_byte - 1);
+
+  memcpy(geo_key_max, geo_key_base, diff_byte + 1);
+  geo_key_max[diff_byte] |= diff_bit_mask;
+  memset(geo_key_max + diff_byte + 1, 0xff,
+         sizeof(grn_geo_point) - diff_byte - 1);
+
+  grn_ntog((uint8_t *)geo_min, geo_key_min, sizeof(grn_geo_point));
+  grn_ntog((uint8_t *)geo_max, geo_key_max, sizeof(grn_geo_point));
+}
+
+static int
 grn_table_sort_geo_detect_far_point(grn_ctx *ctx, grn_obj *table, grn_obj *index,
-                                    grn_pat *pat,
+                                    grn_pat *pat, geo_entry *entries,
                                     grn_pat_cursor *pc, int n, int accessorp,
                                     grn_geo_point *base_point,
-                                    int *lng_far, int *lat_far, double *d_far)
+                                    double *d_far,
+                                    grn_geo_point *geo_min, grn_geo_point *geo_max,
+                                    int *diff_bit)
 {
-  int i = 0;
+  int i = 0, diff_bit_prev, diff_bit_current;
   grn_id tid;
+  geo_entry *ep, *p;
   double lng1, lat1, lng2, lat2, x, y, d;
+  uint8_t geo_key_prev[sizeof(grn_geo_point)];
+  uint8_t geo_key_curr[sizeof(grn_geo_point)];
+  grn_geo_point point;
 
-  *lng_far = base_point->longitude;
-  *lat_far = base_point->latitude;
-  lng1 = GEO_INT2RAD(*lng_far);
-  lat1 = GEO_INT2RAD(*lat_far);
+  lng1 = GEO_INT2RAD(base_point->longitude);
+  lat1 = GEO_INT2RAD(base_point->latitude);
   *d_far = 0.0;
-  while (i < n && (tid = grn_pat_cursor_next(ctx, pc))) {
+  grn_gton(geo_key_curr, base_point, sizeof(grn_geo_point));
+  *diff_bit = sizeof(grn_geo_point) * 8;
+  diff_bit_current = sizeof(grn_geo_point) * 8;
+  memcpy(&point, base_point, sizeof(grn_geo_point));
+  ep = entries;
+  while ((tid = grn_pat_cursor_next(ctx, pc))) {
     grn_ii_cursor *ic = grn_ii_cursor_open(ctx, (grn_ii *)index, tid, 0, 0, 1, 0);
     if (ic) {
-      grn_geo_point pos;
       grn_ii_posting *posting;
-      grn_pat_get_key(ctx, pat, tid, &pos, sizeof(grn_geo_point));
-      lng2 = GEO_INT2RAD(pos.longitude);
-      lat2 = GEO_INT2RAD(pos.latitude);
+      grn_gton(geo_key_prev, &point, sizeof(grn_geo_point));
+      grn_pat_get_key(ctx, pat, tid, &point, sizeof(grn_geo_point));
+      grn_gton(geo_key_curr, &point, sizeof(grn_geo_point));
+      lng2 = GEO_INT2RAD(point.longitude);
+      lat2 = GEO_INT2RAD(point.latitude);
       x = (lng2 - lng1) * cos((lat1 + lat2) * 0.5);
       y = (lat2 - lat1);
       d = ((x * x) + (y * y));
+
+      diff_bit_prev = diff_bit_current;
+      diff_bit_current = compute_diff_bit(geo_key_curr, geo_key_prev);
+      if (diff_bit_current < diff_bit_prev && *diff_bit > diff_bit_current) {
+        if (i == n) {
+          break;
+        }
+        *diff_bit = diff_bit_current;
+      }
+
       if (d > *d_far) {
-        *lng_far = pos.longitude;
-        *lat_far = pos.latitude;
         *d_far = d;
       }
-      while (i < n && (posting = grn_ii_cursor_next(ctx, ic))) {
+      while ((posting = grn_ii_cursor_next(ctx, ic))) {
         grn_id rid = accessorp
           ? grn_table_get(ctx, table, &posting->rid, sizeof(grn_id))
           : posting->rid;
         if (rid) {
-          i++;
+          for (p = ep; entries < p && p[-1].d > d; p--) {
+            p->id = p[-1].id;
+            p->d = p[-1].d;
+          }
+          p->id = rid;
+          p->d = d;
+          if (i < n) {
+            ep++;
+            i++;
+          }
         }
       }
       grn_ii_cursor_close(ctx, ic);
     }
   }
-}
 
-static void
-grn_table_sort_geo_detect_mesh(grn_ctx *ctx, grn_obj *table,
-                               grn_geo_point *base_point,
-                               int lng_far, int lat_far, double d_far,
-                               grn_geo_point *geo_min, grn_geo_point *geo_max,
-                               int *diff_byte, int *diff_bit)
-{
-  int i;
-  int diff_bit_mask = 0;
-  uint8_t geo_key_base[sizeof(grn_geo_point)];
-  uint8_t geo_key_far[sizeof(grn_geo_point)];
-  uint8_t geo_key_min[sizeof(grn_geo_point)];
-  uint8_t geo_key_max[sizeof(grn_geo_point)];
-  grn_geo_point point;
+  compute_min_and_max(base_point, *diff_bit, geo_min, geo_max);
 
-  grn_gton(geo_key_base, base_point, sizeof(grn_geo_point));
-  point.latitude = lat_far;
-  point.longitude = lng_far;
-  grn_gton(geo_key_far, &point, sizeof(grn_geo_point));
-  for (i = 0; i < sizeof(grn_geo_point); i++) {
-    if (((geo_key_base[i] >> 6) & 3) != ((geo_key_far[i] >> 6) & 3)) {
-      *diff_bit = 0;
-      diff_bit_mask = 0xff >> 0;
-      break;
-    } else if (((geo_key_base[i] >> 4) & 3) != ((geo_key_far[i] >> 4) & 3)) {
-      *diff_bit = 2;
-      diff_bit_mask = 0xff >> 2;
-      break;
-    } else if (((geo_key_base[i] >> 2) & 3) != ((geo_key_far[i] >> 2) & 3)) {
-      *diff_bit = 4;
-      diff_bit_mask = 0xff >> 4;
-      break;
-    } else if (((geo_key_base[i] >> 0) & 3) != ((geo_key_far[i] >> 0) & 3)) {
-      *diff_bit = 6;
-      diff_bit_mask = 0xff >> 6;
-      break;
-    }
-  }
-  *diff_byte = i;
-  memcpy(geo_key_min, geo_key_base, i + 1);
-  geo_key_min[i] &= ~diff_bit_mask;
-  memset(geo_key_min + i + 1, 0, sizeof(grn_geo_point) - i - 1);
-  memcpy(geo_key_max, geo_key_base, i + 1);
-  geo_key_max[i] |= diff_bit_mask;
-  memset(geo_key_max + i + 1, 0xff, sizeof(grn_geo_point) - i - 1);
-  grn_ntog((uint8_t *)geo_min, geo_key_min, sizeof(grn_geo_point));
-  grn_ntog((uint8_t *)geo_max, geo_key_max, sizeof(grn_geo_point));
+  return i;
 }
 
 typedef struct
@@ -6616,66 +6653,76 @@ typedef struct
   int key_size;
 } mesh_entry;
 
-static void
+static int
 grn_table_sort_geo_collect_points(grn_ctx *ctx, grn_obj *table, grn_obj *index,
                                   grn_pat *pat,
+                                  geo_entry *entries, int n_entries,
                                   int n, int accessorp,
                                   grn_geo_point *base_point,
-                                  int lng_far, int lat_far, double d_far,
-                                  geo_entry **entries, int *n_entries)
+                                  double d_far,
+                                  grn_geo_point *geo_min, grn_geo_point *geo_max,
+                                  int diff_bit)
 {
-  int n_meshes, e;
-  grn_geo_point geo_min, geo_max, geo_base;
-  mesh_entry meshes[20];
-  int diff_byte = 0, diff_bit = 0, key_size;
+  int n_meshes;
+  grn_geo_point geo_base;
+  mesh_entry meshes[19];
   int lat_diff, lng_diff;
   double lng1, lat1, lng2, lat2, x, y, d;
   geo_entry *ep, *p;
 
-  grn_table_sort_geo_detect_mesh(ctx, table, base_point, lng_far, lat_far, d_far,
-                                 &geo_min, &geo_max, &diff_byte, &diff_bit);
-  key_size = diff_byte * 8 + diff_bit;
-  lat1 = base_point->latitude;
-  lng1 = base_point->longitude;
-  lat_diff = geo_max.latitude - geo_min.latitude;
-  lng_diff = geo_max.longitude - geo_min.longitude;
-  if (lat1 >= geo_min.latitude + lat_diff / 2) {
-    geo_base.latitude = geo_max.latitude;
-    if (lng1 >= geo_max.longitude + lng_diff / 2) {
-      geo_base.longitude = geo_max.longitude;
+  lat1 = GEO_INT2RAD(base_point->latitude);
+  lng1 = GEO_INT2RAD(base_point->longitude);
+  lat_diff = geo_max->latitude - geo_min->latitude;
+  lng_diff = geo_max->longitude - geo_min->longitude;
+  if (base_point->latitude >= geo_min->latitude + lat_diff / 2) {
+    geo_base.latitude = geo_max->latitude;
+    if (base_point->longitude >= geo_min->longitude + lng_diff / 2) {
+      geo_base.longitude = geo_max->longitude;
     } else {
-      geo_base.longitude = geo_min.longitude;
+      geo_base.longitude = geo_min->longitude;
     }
   } else {
-    geo_base.latitude = geo_min.latitude;
-    if (lng1 >= geo_max.longitude + lng_diff / 2) {
-      geo_base.longitude = geo_max.longitude;
+    geo_base.latitude = geo_min->latitude;
+    if (base_point->longitude >= geo_min->longitude + lng_diff / 2) {
+      geo_base.longitude = geo_max->longitude;
     } else {
-      geo_base.longitude = geo_min.longitude;
+      geo_base.longitude = geo_min->longitude;
     }
   }
 
   n_meshes = 0;
 
 #define add_mesh(lat_diff_,lng_diff_,key_size_)\
-  meshes[n_meshes].key.latitude = geo_min.latitude + lat_diff_;\
-  meshes[n_meshes].key.longitude = geo_min.longitude + lng_diff_;\
+  meshes[n_meshes].key.latitude = geo_base.latitude + (lat_diff_);\
+  meshes[n_meshes].key.longitude = geo_base.longitude + (lng_diff_);\
   meshes[n_meshes].key_size = key_size_;\
   n_meshes++;
 
-  add_mesh(0, 0, key_size);
-  add_mesh(0, -lng_diff, key_size);
-  add_mesh(-lat_diff, -lng_diff, key_size);
-  add_mesh(-lat_diff, 0, key_size);
+  if (geo_base.latitude != geo_min->latitude ||
+      geo_base.longitude != geo_min->longitude) {
+    add_mesh(1, 1, diff_bit);
+  }
+  if (geo_base.latitude != geo_min->latitude ||
+      geo_base.longitude != geo_max->longitude) {
+    add_mesh(1, -lng_diff + 1, diff_bit);
+  }
+  if (geo_base.latitude != geo_max->latitude ||
+      geo_base.longitude != geo_max->longitude) {
+    add_mesh(-lat_diff + 1, -lng_diff + 1, diff_bit);
+  }
+  if (geo_base.latitude != geo_max->latitude ||
+      geo_base.longitude != geo_min->longitude) {
+    add_mesh(-lat_diff + 1, 1, diff_bit);
+  }
 
 #define add_sub_mesh(lat_diff_cmp,lng_diff_cmp,lat_diff_base,lng_diff_base)\
-  lat2 = geo_base.latitude + lat_diff_cmp;\
-  lng2 = geo_base.longitude + lng_diff_cmp;\
+  lat2 = GEO_INT2RAD(geo_base.latitude + lat_diff_cmp);\
+  lng2 = GEO_INT2RAD(geo_base.longitude + lng_diff_cmp);\
   x = (lng2 - lng1) * cos((lat1 + lat2) * 0.5);\
   y = (lat2 - lat1);\
   d = ((x * x) + (y * y));\
-  if (d > d_far) {\
-    add_mesh(lat_diff_base, lng_diff_base, key_size + 2);\
+  if (d < d_far) {\
+    add_mesh(lat_diff_base, lng_diff_base, diff_bit + 2);\
   }
 
   add_sub_mesh(lat_diff + 1, -lng_diff / 2,
@@ -6717,53 +6764,51 @@ grn_table_sort_geo_collect_points(grn_ctx *ctx, grn_obj *table, grn_obj *index,
 #undef add_sub_mesh
 #undef add_mesh
 
-  e = 0;
-  if ((ep = *entries = GRN_MALLOC(sizeof(geo_entry) * (n + 1)))) {
+  ep = entries + n_entries;
+  while (n_meshes--) {
     grn_id tid;
-    while (n_meshes--) {
-      grn_pat_cursor *pc = grn_pat_cursor_open(ctx, pat,
-                                               &(meshes[n_meshes].key),
-                                               meshes[n_meshes].key_size,
-                                               NULL, 0,
-                                               0, -1,
-                                               GRN_CURSOR_PREFIX|GRN_CURSOR_SIZE_BY_BIT);
-      if (pc) {
-        while ((tid = grn_pat_cursor_next(ctx, pc))) {
-          grn_ii_cursor *ic = grn_ii_cursor_open(ctx, (grn_ii *)index, tid, 0, 0, 1, 0);
-          if (ic) {
-            grn_geo_point pos;
-            grn_ii_posting *posting;
-            grn_pat_get_key(ctx, pat, tid, &pos, sizeof(grn_geo_point));
-            lng2 = GEO_INT2RAD(pos.longitude);
-            lat2 = GEO_INT2RAD(pos.latitude);
-            x = (lng2 - lng1) * cos((lat1 + lat2) * 0.5);
-            y = (lat2 - lat1);
-            d = ((x * x) + (y * y));
-            while ((posting = grn_ii_cursor_next(ctx, ic))) {
-              grn_id rid = accessorp
-                ? grn_table_get(ctx, table, &posting->rid, sizeof(grn_id))
-                : posting->rid;
-              if (rid) {
-                for (p = ep; *entries < p && p[-1].d > d; p--) {
-                  p->id = p[-1].id;
-                  p->d = p[-1].d;
-                }
-                p->id = rid;
-                p->d = d;
-                if (e < n) {
-                  ep++;
-                  e++;
-                }
+    grn_pat_cursor *pc = grn_pat_cursor_open(ctx, pat,
+                                             &(meshes[n_meshes].key),
+                                             meshes[n_meshes].key_size,
+                                             NULL, 0,
+                                             0, -1,
+                                             GRN_CURSOR_PREFIX|GRN_CURSOR_SIZE_BY_BIT);
+    if (pc) {
+      while ((tid = grn_pat_cursor_next(ctx, pc))) {
+        grn_ii_cursor *ic = grn_ii_cursor_open(ctx, (grn_ii *)index, tid, 0, 0, 1, 0);
+        if (ic) {
+          grn_geo_point pos;
+          grn_ii_posting *posting;
+          grn_pat_get_key(ctx, pat, tid, &pos, sizeof(grn_geo_point));
+          lng2 = GEO_INT2RAD(pos.longitude);
+          lat2 = GEO_INT2RAD(pos.latitude);
+          x = (lng2 - lng1) * cos((lat1 + lat2) * 0.5);
+          y = (lat2 - lat1);
+          d = ((x * x) + (y * y));
+          while ((posting = grn_ii_cursor_next(ctx, ic))) {
+            grn_id rid = accessorp
+              ? grn_table_get(ctx, table, &posting->rid, sizeof(grn_id))
+              : posting->rid;
+            if (rid) {
+              for (p = ep; entries < p && p[-1].d > d; p--) {
+                p->id = p[-1].id;
+                p->d = p[-1].d;
+              }
+              p->id = rid;
+              p->d = d;
+              if (n_entries < n) {
+                ep++;
+                n_entries++;
               }
             }
-            grn_ii_cursor_close(ctx, ic);
           }
+          grn_ii_cursor_close(ctx, ic);
         }
-        grn_pat_cursor_close(ctx, pc);
       }
+      grn_pat_cursor_close(ctx, pc);
     }
   }
-  *n_entries = e;
+  return n_entries;
 }
 
 static int
@@ -6797,32 +6842,39 @@ grn_table_sort_geo(grn_ctx *ctx, grn_obj *table, int offset, int limit,
             grn_ii_cursor_close(ctx, ic);
           }
         }
+        grn_pat_cursor_close(ctx, pc);
       } else {
-        int lng_far, lat_far;
-        double d_far;
-        grn_geo_point *base_point;
-        geo_entry *array = NULL, *ep;
-        int n = 0;
-        base_point = (grn_geo_point *)GRN_BULK_HEAD(arg);
-        grn_table_sort_geo_detect_far_point(ctx, table, index, pat,
-                                            pc, e, accessorp,
-                                            base_point,
-                                            &lat_far, &lng_far, &d_far);
-        grn_table_sort_geo_collect_points(ctx, table, index, pat,
-                                          e, accessorp,
-                                          base_point,
-                                          lat_far, lng_far, d_far,
-                                          &array, &n);
-        if (array) {
+        geo_entry *entries;
+
+        if ((entries = GRN_MALLOC(sizeof(geo_entry) * (e + 1)))) {
+          int n, diff_bit;
+          double d_far;
           grn_id *v;
-          for (i = 0, ep = array + offset; i < limit && ep < array + n; i++, ep++) {
+          grn_geo_point *base_point, geo_min, geo_max;
+          geo_entry *ep;
+
+          base_point = (grn_geo_point *)GRN_BULK_HEAD(arg);
+          n = grn_table_sort_geo_detect_far_point(ctx, table, index, pat,
+                                                  entries, pc, e, accessorp,
+                                                  base_point,
+                                                  &d_far,
+                                                  &geo_min, &geo_max,
+                                                  &diff_bit);
+          grn_pat_cursor_close(ctx, pc);
+          if (diff_bit > 0) {
+            n += grn_table_sort_geo_collect_points(ctx, table, index, pat,
+                                                   entries, n, e, accessorp,
+                                                   base_point, d_far,
+                                                   &geo_min, &geo_max,
+                                                   diff_bit);
+          }
+          for (i = 0, ep = entries + offset; i < limit && ep < entries + n; i++, ep++) {
             if (!grn_array_add(ctx, (grn_array *)result, (void **)&v)) { break; }
             *v = ep->id;
           }
-          GRN_FREE(array);
+          GRN_FREE(entries);
         }
       }
-      grn_pat_cursor_close(ctx, pc);
     }
   }
   return i;




Groonga-commit メーリングリストの案内
Back to archive index