[Groonga-commit] groonga/grnxx at 61f75f5 [new_data_types] Implement Index for Int, Float, and Text. (#121)

Back to archive index

susumu.yata null+****@clear*****
Fri Nov 28 15:08:25 JST 2014


susumu.yata	2014-11-28 15:08:25 +0900 (Fri, 28 Nov 2014)

  New Revision: 61f75f5fd13c709d50ac5cc8c58a1e11efae7edb
  https://github.com/groonga/grnxx/commit/61f75f5fd13c709d50ac5cc8c58a1e11efae7edb

  Message:
    Implement Index for Int, Float, and Text. (#121)

  Modified files:
    include/grnxx/index.hpp
    lib/grnxx/impl/column/base.cpp
    lib/grnxx/impl/index.cpp
    lib/grnxx/impl/index.hpp

  Modified: include/grnxx/index.hpp (+4 -4)
===================================================================
--- include/grnxx/index.hpp    2014-11-27 22:58:44 +0900 (b18df4a)
+++ include/grnxx/index.hpp    2014-11-28 15:08:25 +0900 (c6bbf81)
@@ -99,16 +99,16 @@ class Index {
   // Remove an entry.
   //
   // On failure, throws an exception.
-  virtual bool remove(Int row_id, const Datum &value) = 0;
+  virtual void remove(Int row_id, const Datum &value) = 0;
 
   // Return whether "value" is registered or not.
-  virtual bool contains(const Datum &value) const;
+  virtual bool contains(const Datum &value) const = 0;
 
   // Find "datum".
   //
   // If found, returns the row ID.
   // If not found, returns N/A.
-  virtual Int find_one(const Datum &value) const;
+  virtual Int find_one(const Datum &value) const = 0;
 
   // Create a cursor to get records.
   //
@@ -116,7 +116,7 @@ class Index {
   // On failure, throws an exception.
   virtual std::unique_ptr<Cursor> find(
       const Datum &value,
-      const CursorOptions &options = CursorOptions()) const;
+      const CursorOptions &options = CursorOptions()) const = 0;
 
  protected:
   virtual ~Index() = default;

  Modified: lib/grnxx/impl/column/base.cpp (+26 -32)
===================================================================
--- lib/grnxx/impl/column/base.cpp    2014-11-27 22:58:44 +0900 (667c17d)
+++ lib/grnxx/impl/column/base.cpp    2014-11-28 15:08:25 +0900 (0e8ff66)
@@ -33,44 +33,38 @@ Index *ColumnBase::create_index(
     const String &name,
     IndexType type,
     const IndexOptions &options) {
-  throw "Not supported yet";  // TODO
-
-//  if (find_index(name)) {
-//    throw "Index already exists";  // TODO
-//  }
-//  indexes_.reserve(indexes_.size() + 1);
-//  std::unique_ptr<Index> new_index = Index::create(this, name, type, options);
-//  indexes_.push_back(std::move(new_index));
-//  return indexes_.back().get();
+  if (find_index(name)) {
+    throw "Index already exists";  // TODO
+  }
+  indexes_.reserve(indexes_.size() + 1);
+  std::unique_ptr<Index> new_index(Index::create(this, name, type, options));
+  indexes_.push_back(std::move(new_index));
+  return indexes_.back().get();
 }
 
 void ColumnBase::remove_index(const String &name) {
-  throw "Not supported yet";  // TODO
-
-//  size_t index_id;
-//  if (!find_index_with_id(name, &index_id)) {
-//    throw "Index not found";  // TODO
-//  }
-//  if (!indexes_[index_id]->is_removable()) {
-//    throw "Index not removable";  // TODO
-//  }
-//  indexes_.erase(index_id);
+  size_t index_id;
+  if (!find_index_with_id(name, &index_id)) {
+    throw "Index not found";  // TODO
+  }
+  if (!indexes_[index_id]->is_removable()) {
+    throw "Index not removable";  // TODO
+  }
+  indexes_.erase(index_id);
 }
 
 void ColumnBase::rename_index(const String &name, const String &new_name) {
-  throw "Not supported yet";  // TODO
-
-//  size_t index_id;
-//  if (!find_index_with_id(name, &index_id)) {
-//    throw "Index not found";  // TODO
-//  }
-//  if (name == new_name) {
-//    return;
-//  }
-//  if (find_index(new_name)) {
-//    throw "Index already exists";  // TODO
-//  }
-//  indexes_[index_id]->rename(new_name);
+  size_t index_id;
+  if (!find_index_with_id(name, &index_id)) {
+    throw "Index not found";  // TODO
+  }
+  if (name == new_name) {
+    return;
+  }
+  if (find_index(new_name)) {
+    throw "Index already exists";  // TODO
+  }
+  indexes_[index_id]->rename(new_name);
 }
 
 void ColumnBase::reorder_index(const String &name, const String &prev_name) {

  Modified: lib/grnxx/impl/index.cpp (+487 -0)
===================================================================
--- lib/grnxx/impl/index.cpp    2014-11-27 22:58:44 +0900 (a9c5b36)
+++ lib/grnxx/impl/index.cpp    2014-11-28 15:08:25 +0900 (791df7e)
@@ -1,7 +1,494 @@
 #include "grnxx/impl/index.hpp"
 
+#include <map>
+#include <set>
+
+#include "grnxx/impl/column.hpp"
+#include "grnxx/impl/cursor.hpp"
+
 namespace grnxx {
 namespace impl {
+namespace index {
+
+// TODO: These are test implementations.
+
+// -- IteratorCursor --
+
+template <typename T>
+class IteratorCursor : public Cursor {
+ public:
+  using Iterator = T;
+
+  IteratorCursor(Iterator begin, Iterator end, size_t offset, size_t limit)
+      : Cursor(),
+        it_(begin),
+        end_(end),
+        offset_(offset),
+        limit_(limit) {}
+  ~IteratorCursor() = default;
+
+  size_t read(ArrayRef<Record> records);
+
+ private:
+  Iterator it_;
+  Iterator end_;
+  size_t offset_;
+  size_t limit_;
+};
+
+template <typename T>
+size_t IteratorCursor<T>::read(ArrayRef<Record> records) {
+  size_t max_count = records.size();
+  if (max_count > limit_) {
+    max_count = limit_;
+  }
+  if (max_count <= 0) {
+    return 0;
+  }
+  size_t count = 0;
+  while ((offset_ > 0) && (it_ != end_)) {
+    ++it_;
+    --offset_;
+  }
+  while ((count < max_count) && (it_ != end_)) {
+    records[count] = Record(*it_, Float(0.0));
+    ++count;
+    ++it_;
+  }
+  limit_ -= count;
+  return count;
+}
+
+// Helper function to create an iterator cursor.
+template <typename T>
+std::unique_ptr<Cursor> create_iterator_cursor(T begin,
+                                               T end,
+                                               size_t offset,
+                                               size_t limit) {
+  return std::unique_ptr<Cursor>(
+      new IteratorCursor<T>(begin, end, offset, limit));
+}
+
+// TODO: It's not clear that a reverse cursor should return row IDs in
+//       reverse order or not.
+//
+// Helper function to create a reverse iterator cursor.
+template <typename T>
+std::unique_ptr<Cursor> create_reverse_iterator_cursor(T begin,
+                                                       T end,
+                                                       size_t offset,
+                                                       size_t limit) {
+  using ReverseIterator = std::reverse_iterator<T>;
+  return std::unique_ptr<Cursor>(
+      new IteratorCursor<ReverseIterator>(ReverseIterator(end),
+                                          ReverseIterator(begin),
+                                          offset,
+                                          limit));
+}
+
+template <typename T> class TreeIndex;
+
+template <>
+class TreeIndex<Int> : public Index {
+ public:
+  struct Less {
+    bool operator()(Int lhs, Int rhs) const {
+      return lhs.raw() < rhs.raw();
+    }
+  };
+
+  using Value = Int;
+  using Set = std::set<Int, Less>;
+  using Map = std::map<Value, Set, Less>;
+
+  TreeIndex(ColumnBase *column,
+            const String &name,
+            const IndexOptions &options);
+  ~TreeIndex() = default;
+
+  IndexType type() const {
+    return TREE_INDEX;
+  }
+
+  void insert(Int row_id, const Datum &value);
+  void remove(Int row_id, const Datum &value);
+
+  std::unique_ptr<Cursor> find(const Datum &datum,
+                               const CursorOptions &options) const;
+
+ private:
+  mutable Map map_;
+};
+
+TreeIndex<Int>::TreeIndex(ColumnBase *column,
+                          const String &name,
+                          const IndexOptions &options)
+    : Index(column, name),
+      map_() {
+  auto cursor = column->table()->create_cursor();
+  auto typed_column = static_cast<Column<Int> *>(column);
+  Array<Record> records;
+  Array<Value> values;
+  for ( ; ; ) {
+    size_t count = cursor->read(1024, &records);
+    if (count == 0) {
+      break;
+    }
+    values.resize(records.size());
+    typed_column->read(records, values.ref());
+    for (size_t i = 0; i < count; ++i) {
+      insert(records[i].row_id, values[i]);
+    }
+    records.clear();
+  }
+}
+
+void TreeIndex<Int>::insert(Int row_id, const Datum &value) {
+  auto result = map_[value.as_int()].insert(row_id);
+  if (!result.second) {
+    throw "Entry already exists";  // TODO
+  }
+}
+
+void TreeIndex<Int>::remove(Int row_id, const Datum &value) {
+  auto map_it = map_.find(value.as_int());
+  if (map_it == map_.end()) {
+    throw "Entry not found";  // TODO
+  }
+  auto set_it = map_it->second.find(row_id);
+  if (set_it == map_it->second.end()) {
+    throw "Entry not found";  // TODO
+  }
+  map_it->second.erase(set_it);
+  if (map_it->second.size() == 0) {
+    map_.erase(map_it);
+  }
+}
+
+std::unique_ptr<Cursor> TreeIndex<Int>::find(
+    const Datum &datum,
+    const CursorOptions &options) const {
+  if (datum.type() != INT_DATA) {
+    throw "Data type conflict";  // TODO
+  }
+  auto map_it = map_.find(datum.as_int());
+  if (map_it == map_.end()) {
+    return std::unique_ptr<Cursor>(new EmptyCursor);
+  } else {
+    auto set_begin = map_it->second.begin();
+    auto set_end = map_it->second.end();
+    if (options.order_type == CURSOR_REGULAR_ORDER) {
+      return create_iterator_cursor(set_begin,
+                                    set_end,
+                                    options.offset,
+                                    options.limit);
+    } else {
+      return create_reverse_iterator_cursor(set_begin,
+                                            set_end,
+                                            options.offset,
+                                            options.limit);
+    }
+  }
+}
+
+template <>
+class TreeIndex<Float> : public Index {
+ public:
+  struct Less {
+    bool operator()(Int lhs, Int rhs) const {
+      return lhs.raw() < rhs.raw();
+    }
+    bool operator()(Float lhs, Float rhs) const {
+      return lhs.raw() < rhs.raw();
+    }
+  };
+
+  using Value = Float;
+  using Set = std::set<Int, Less>;
+  using Map = std::map<Value, Set, Less>;
+
+  TreeIndex(ColumnBase *column,
+            const String &name,
+            const IndexOptions &options);
+  ~TreeIndex() = default;
+
+  IndexType type() const {
+    return TREE_INDEX;
+  }
+
+  void insert(Int row_id, const Datum &value);
+  void remove(Int row_id, const Datum &value);
+
+  std::unique_ptr<Cursor> find(const Datum &datum,
+                               const CursorOptions &options) const;
+
+ private:
+  mutable Map map_;
+};
+
+TreeIndex<Float>::TreeIndex(ColumnBase *column,
+                            const String &name,
+                            const IndexOptions &options)
+    : Index(column, name),
+      map_() {
+  auto cursor = column->table()->create_cursor();
+  auto typed_column = static_cast<Column<Float> *>(column);
+  Array<Record> records;
+  Array<Value> values;
+  for ( ; ; ) {
+    size_t count = cursor->read(1024, &records);
+    if (count == 0) {
+      break;
+    }
+    values.resize(records.size());
+    typed_column->read(records, values.ref());
+    for (size_t i = 0; i < count; ++i) {
+      insert(records[i].row_id, values[i]);
+    }
+    records.clear();
+  }
+}
+
+void TreeIndex<Float>::insert(Int row_id, const Datum &value) {
+  auto result = map_[value.as_float()].insert(row_id);
+  if (!result.second) {
+    throw "Entry already exists";  // TODO
+  }
+}
+
+void TreeIndex<Float>::remove(Int row_id, const Datum &value) {
+  auto map_it = map_.find(value.as_float());
+  if (map_it == map_.end()) {
+    throw "Entry not found";  // TODO
+  }
+  auto set_it = map_it->second.find(row_id);
+  if (set_it == map_it->second.end()) {
+    throw "Entry not found";  // TODO
+  }
+  map_it->second.erase(set_it);
+  if (map_it->second.size() == 0) {
+    map_.erase(map_it);
+  }
+}
+
+std::unique_ptr<Cursor> TreeIndex<Float>::find(
+    const Datum &datum,
+    const CursorOptions &options) const {
+  if (datum.type() != FLOAT_DATA) {
+    throw "Data type conflict";  // TODO
+  }
+  auto map_it = map_.find(datum.as_float());
+  if (map_it == map_.end()) {
+    return std::unique_ptr<Cursor>(new EmptyCursor);
+  } else {
+    auto set_begin = map_it->second.begin();
+    auto set_end = map_it->second.end();
+    if (options.order_type == CURSOR_REGULAR_ORDER) {
+      return create_iterator_cursor(set_begin,
+                                    set_end,
+                                    options.offset,
+                                    options.limit);
+    } else {
+      return create_reverse_iterator_cursor(set_begin,
+                                            set_end,
+                                            options.offset,
+                                            options.limit);
+    }
+  }
+}
+
+template <>
+class TreeIndex<Text> : public Index {
+ public:
+  struct Less {
+    bool operator()(Int lhs, Int rhs) const {
+      return lhs.raw() < rhs.raw();
+    }
+  };
+
+  using Value = Text;
+  using Set = std::set<Int, Less>;
+  using Map = std::map<String, Set>;
+
+  TreeIndex(ColumnBase *column,
+            const String &name,
+            const IndexOptions &options);
+  ~TreeIndex() = default;
+
+  IndexType type() const {
+    return TREE_INDEX;
+  }
+
+  void insert(Int row_id, const Datum &value);
+  void remove(Int row_id, const Datum &value);
+
+  std::unique_ptr<Cursor> find(const Datum &datum,
+                               const CursorOptions &options) const;
+
+ private:
+  mutable Map map_;
+};
+
+TreeIndex<Text>::TreeIndex(ColumnBase *column,
+                           const String &name,
+                           const IndexOptions &options)
+    : Index(column, name),
+      map_() {
+  auto cursor = column->table()->create_cursor();
+  auto typed_column = static_cast<Column<Text> *>(column);
+  Array<Record> records;
+  Array<Value> values;
+  for ( ; ; ) {
+    size_t count = cursor->read(1024, &records);
+    if (count == 0) {
+      break;
+    }
+    values.resize(records.size());
+    typed_column->read(records, values.ref());
+    for (size_t i = 0; i < count; ++i) {
+      insert(records[i].row_id, values[i]);
+    }
+    records.clear();
+  }
+}
+
+void TreeIndex<Text>::insert(Int row_id, const Datum &value) {
+  Text text = value.as_text();
+  String string;
+  string.assign(text.raw_data(), text.raw_size());
+  auto result = map_[std::move(string)].insert(row_id);
+  if (!result.second) {
+    throw "Entry already exists";  // TODO
+  }
+}
+
+void TreeIndex<Text>::remove(Int row_id, const Datum &value) {
+  Text text = value.as_text();
+  String string(text.raw_data(), text.raw_size());
+  auto map_it = map_.find(string);
+  if (map_it == map_.end()) {
+    throw "Entry not found";  // TODO
+  }
+  auto set_it = map_it->second.find(row_id);
+  if (set_it == map_it->second.end()) {
+    throw "Entry not found";  // TODO
+  }
+  map_it->second.erase(set_it);
+  if (map_it->second.size() == 0) {
+    map_.erase(map_it);
+  }
+}
+
+std::unique_ptr<Cursor> TreeIndex<Text>::find(
+    const Datum &datum,
+    const CursorOptions &options) const {
+  if (datum.type() != TEXT_DATA) {
+    throw "Data type conflict";  // TODO
+  }
+  Text text = datum.as_text();
+  String string(text.raw_data(), text.raw_size());
+  auto map_it = map_.find(string);
+  if (map_it == map_.end()) {
+    return std::unique_ptr<Cursor>(new EmptyCursor);
+  } else {
+    auto set_begin = map_it->second.begin();
+    auto set_end = map_it->second.end();
+    if (options.order_type == CURSOR_REGULAR_ORDER) {
+      return create_iterator_cursor(set_begin,
+                                    set_end,
+                                    options.offset,
+                                    options.limit);
+    } else {
+      return create_reverse_iterator_cursor(set_begin,
+                                            set_end,
+                                            options.offset,
+                                            options.limit);
+    }
+  }
+}
+
+//std::unique_ptr<Cursor> TreeIndex<Text>::find(
+//    const Datum &datum,
+//    const CursorOptions &options) const;
+
+}  // namespace index
+
+using namespace index;
+
+Index::Index(ColumnBase *column, const String &name)
+    : column_(column),
+      name_(name.clone()) {}
+
+Index::~Index() {}
+
+ColumnInterface *Index::column() const {
+  return column_;
+}
+
+bool Index::contains(const Datum &value) const {
+  return !find_one(value).is_na();
+}
+
+Int Index::find_one(const Datum &value) const {
+  // TODO: This function should not fail, so Cursor should not be used.
+  auto cursor = find(value);
+  Array<Record> records;
+  auto count = cursor->read(1, &records);
+  if (count == 0) {
+    Int::na();
+  }
+  return records[0].row_id;
+}
+
+std::unique_ptr<Cursor> Index::find(
+    const Datum &value,
+    const CursorOptions &options) const {
+  throw "Not supported yet";  // TODO
+}
+
+Index *Index::create(ColumnBase *column,
+                     const String &name,
+                     IndexType type,
+                     const IndexOptions &options) try {
+  if (type != TREE_INDEX) {
+    throw "Not supoprted yet";  // TODO
+  }
+  switch (column->data_type()) {
+    case BOOL_DATA: {
+      throw "Not supported yet";  // TODO
+    }
+    case INT_DATA: {
+      return new TreeIndex<Int>(column, name, options);
+    }
+    case FLOAT_DATA: {
+      return new TreeIndex<Float>(column, name, options);
+    }
+    case GEO_POINT_DATA: {
+      throw "Not supported yet";  // TODO
+    }
+    case TEXT_DATA: {
+      return new TreeIndex<Text>(column, name, options);
+    }
+    case BOOL_VECTOR_DATA:
+    case INT_VECTOR_DATA:
+    case FLOAT_VECTOR_DATA:
+    case GEO_POINT_VECTOR_DATA:
+    case TEXT_VECTOR_DATA:
+    default: {
+      throw "Not supported yet";  // TODO
+    }
+  }
+} catch (const std::bad_alloc &) {
+  throw "Memory allocation failed";  // TODO
+}
+
+void Index::rename(const String &new_name) {
+  name_.assign(new_name);
+}
+
+bool Index::is_removable() const {
+  return true;
+}
 
 }  // namespace impl
 }  // namespace grnxx

  Modified: lib/grnxx/impl/index.hpp (+18 -10)
===================================================================
--- lib/grnxx/impl/index.hpp    2014-11-27 22:58:44 +0900 (77edc0f)
+++ lib/grnxx/impl/index.hpp    2014-11-28 15:08:25 +0900 (edc6cf4)
@@ -12,34 +12,42 @@ namespace impl {
 using ColumnInterface = grnxx::Column;
 using IndexInterface = grnxx::Index;
 
+class ColumnBase;
+
 class Index : public IndexInterface {
  public:
   // -- Public API (grnxx/index.hpp) --
 
-  Index(Column *column, const String &name);
+  Index(ColumnBase *column, const String &name);
   virtual ~Index();
 
-  // Return the owner column.
   ColumnInterface *column() const;
-  // Return the name.
   String name() const {
     return name_;
   }
 
+  virtual void insert(Int row_id, const Datum &value) = 0;
+  virtual void remove(Int row_id, const Datum &value) = 0;
+
+  virtual bool contains(const Datum &value) const;
+  virtual Int find_one(const Datum &value) const;
+  virtual std::unique_ptr<Cursor> find(
+      const Datum &value,
+      const CursorOptions &options = CursorOptions()) const;
+
   // -- Internal API --
 
   // Create a new index.
   //
   // On success, returns the column.
   // On failure, throws an exception.
-  static std::unique_ptr<Index> create(
-      Column *column,
-      const String &name,
-      IndexType type,
-      const IndexOptions &options);
+  static Index *create(ColumnBase *column,
+                       const String &name,
+                       IndexType type,
+                       const IndexOptions &options);
 
   // Return the owner table.
-  Column *_column() const {
+  ColumnBase *_column() const {
     return column_;
   }
 
@@ -52,7 +60,7 @@ class Index : public IndexInterface {
   bool is_removable() const;
 
  private:
-  Column *column_;
+  ColumnBase *column_;
   String name_;
 };
 
-------------- next part --------------
HTML����������������������������...
下載 



More information about the Groonga-commit mailing list
Back to archive index