susumu.yata
null+****@clear*****
Tue Sep 16 18:39:10 JST 2014
susumu.yata 2014-09-16 18:39:10 +0900 (Tue, 16 Sep 2014) New Revision: 306b6c336cef24f5397f8aaaf31a6696a2707067 https://github.com/groonga/grnxx/commit/306b6c336cef24f5397f8aaaf31a6696a2707067 Message: Add TreeIndex<Text>. (#52) Modified files: lib/grnxx/column.cpp lib/grnxx/index.cpp lib/grnxx/tree_index.hpp Modified: lib/grnxx/column.cpp (+45 -20) =================================================================== --- lib/grnxx/column.cpp 2014-09-16 15:39:54 +0900 (d94736e) +++ lib/grnxx/column.cpp 2014-09-16 18:39:10 +0900 (d0b3cca) @@ -426,29 +426,42 @@ bool ColumnImpl<Text>::set(Error *error, Int row_id, const Datum &datum) { if (!table_->test_row(error, row_id)) { return false; } - Text value = datum.force_text(); - if (value.size() == 0) { - headers_[row_id] = 0; - return true; - } - Int offset = bodies_.size(); - if (value.size() < 0xFFFF) { - if (!bodies_.resize(error, offset + value.size())) { - return false; + Text old_value = get(row_id); + Text new_value = datum.force_text(); + if (new_value != old_value) { + for (Int i = 0; i < num_indexes(); ++i) { + if (!indexes_[i]->insert(error, row_id, datum)) { + for (Int j = 0; j < i; ++i) { + indexes_[j]->remove(nullptr, row_id, datum); + } + return false; + } } - std::memcpy(&bodies_[offset], value.data(), value.size()); - headers_[row_id] = (offset << 16) | value.size(); - } else { - // The size of a long text is stored in front of the body. - if ((offset % sizeof(Int)) != 0) { - offset += sizeof(Int) - (offset % sizeof(Int)); + Int offset = bodies_.size(); + UInt new_header; + if (new_value.size() < 0xFFFF) { + if (!bodies_.resize(error, offset + new_value.size())) { + return false; + } + std::memcpy(&bodies_[offset], new_value.data(), new_value.size()); + new_header = (offset << 16) | new_value.size(); + } else { + // The size of a long text is stored in front of the body. + if ((offset % sizeof(Int)) != 0) { + offset += sizeof(Int) - (offset % sizeof(Int)); + } + if (!bodies_.resize(error, offset + sizeof(Int) + new_value.size())) { + return false; + } + *reinterpret_cast<Int *>(&bodies_[offset]) = new_value.size(); + std::memcpy(&bodies_[offset + sizeof(Int)], + new_value.data(), new_value.size()); + new_header = (offset << 16) | 0xFFFF; } - if (!bodies_.resize(error, offset + sizeof(Int) + value.size())) { - return false; + for (Int i = 0; i < num_indexes(); ++i) { + indexes_[i]->remove(nullptr, row_id, old_value); } - *reinterpret_cast<Int *>(&bodies_[offset]) = value.size(); - std::memcpy(&bodies_[offset + sizeof(Int)], value.data(), value.size()); - headers_[row_id] = (offset << 16) | 0xFFFF; + headers_[row_id] = new_header; } return true; } @@ -488,11 +501,23 @@ bool ColumnImpl<Text>::set_default_value(Error *error, Int row_id) { return false; } } + Text value = TypeTraits<Text>::default_value(); + for (Int i = 0; i < num_indexes(); ++i) { + if (!indexes_[i]->insert(error, row_id, value)) { + for (Int j = 0; j < i; ++j) { + indexes_[j]->remove(nullptr, row_id, value); + } + return false; + } + } headers_[row_id] = 0; return true; } void ColumnImpl<Text>::unset(Int row_id) { + for (Int i = 0; i < num_indexes(); ++i) { + indexes_[i]->remove(nullptr, row_id, get(row_id)); + } headers_[row_id] = 0; } Modified: lib/grnxx/index.cpp (+170 -2) =================================================================== --- lib/grnxx/index.cpp 2014-09-16 15:39:54 +0900 (6e6fdf0) +++ lib/grnxx/index.cpp 2014-09-16 18:39:10 +0900 (4af5a41) @@ -65,8 +65,13 @@ unique_ptr<Index> Index::create(Error *error, case FLOAT_DATA: { return TreeIndex<Float>::create(error, column, name, options); } - case GEO_POINT_DATA: - case TEXT_DATA: + case GEO_POINT_DATA: { + GRNXX_ERROR_SET(error, NOT_SUPPORTED_YET, "Not supoprted yet"); + return nullptr; + } + case TEXT_DATA: { + return TreeIndex<Text>::create(error, column, name, options); + } case BOOL_VECTOR_DATA: case INT_VECTOR_DATA: case FLOAT_VECTOR_DATA: @@ -832,4 +837,167 @@ bool TreeIndex<Float>::remove(Error *, Int row_id, const Datum &value) { return true; } +// -- TreeIndex<Text> -- + +unique_ptr<TreeIndex<Text>> TreeIndex<Text>::create( + Error *error, + Column *column, + String name, + const IndexOptions &options) { + unique_ptr<TreeIndex> index(new (nothrow) TreeIndex); + if (!index) { + GRNXX_ERROR_SET(error, NO_MEMORY, "Memory allocation failed"); + return nullptr; + } + if (!index->initialize_base(error, column, name, TREE_INDEX, options)) { + return nullptr; + } + auto cursor = column->table()->create_cursor(error); + if (!cursor) { + return nullptr; + } + auto typed_column = static_cast<ColumnImpl<Text> *>(column); + Array<Record> records; + for ( ; ; ) { + Int count = cursor->read(error, 1024, &records); + if (count < 0) { + return nullptr; + } else if (count == 0) { + break; + } + for (Int i = 0; i < count; ++i) { + Int row_id = records.get_row_id(i); + if (!index->insert(error, row_id, typed_column->get(row_id))) { + return nullptr; + } + } + records.clear(); + } + return index; +} + +TreeIndex<Text>::~TreeIndex() {} + +unique_ptr<Cursor> TreeIndex<Text>::create_cursor( + Error *error, + const Datum &datum, + const CursorOptions &options) const { + if (datum.type() != TEXT_DATA) { + GRNXX_ERROR_SET(error, INVALID_ARGUMENT, "Data type conflict"); + return nullptr; + } + + Text text = datum.force_text(); + std::string string(text.data(), text.size()); + auto map_it = map_.find(string); + if (map_it == map_.end()) { + return create_empty_cursor(error, column_->table()); + } + auto set_begin = map_it->second.begin(); + auto set_end = map_it->second.end(); + if (options.order_type == REGULAR_ORDER) { + return create_iterator_cursor(error, + column_->table(), + set_begin, + set_end, + options.offset, + options.limit); + } else { + return create_reverse_iterator_cursor(error, + column_->table(), + set_begin, + set_end, + options.offset, + options.limit); + } +} + +unique_ptr<Cursor> TreeIndex<Text>::create_cursor( + Error *error, + const IndexRange &range, + const CursorOptions &options) const { + std::string lower_bound_value = ""; + if (range.has_lower_bound()) { + const EndPoint &lower_bound = range.lower_bound(); + if (lower_bound.value.type() != TEXT_DATA) { + GRNXX_ERROR_SET(error, INVALID_ARGUMENT, "Data type conflict"); + return nullptr; + } + Text text = lower_bound.value.force_text(); + lower_bound_value.assign(text.data(), text.size()); + if (lower_bound.type == EXCLUSIVE_END_POINT) { + lower_bound_value += '\0'; + } + } + + std::string upper_bound_value = ""; + if (range.has_upper_bound()) { + const EndPoint &upper_bound = range.upper_bound(); + if (upper_bound.value.type() != TEXT_DATA) { + GRNXX_ERROR_SET(error, INVALID_ARGUMENT, "Data type conflict"); + return nullptr; + } + Text text = upper_bound.value.force_text(); + upper_bound_value.assign(text.data(), text.size()); + if (upper_bound.type == INCLUSIVE_END_POINT) { + upper_bound_value += '\0'; + } + } + + if (range.has_upper_bound()) { + if (lower_bound_value >= upper_bound_value) { + // TODO: Invalid range. + return nullptr; + } + } + + auto begin = map_.lower_bound(lower_bound_value); + auto end = range.has_upper_bound() ? + map_.lower_bound(upper_bound_value) : map_.end(); + if (options.order_type == REGULAR_ORDER) { + return create_map_set_cursor(error, + column_->table(), + begin, + end, + options.offset, + options.limit); + } else { + return create_reverse_map_set_cursor(error, + column_->table(), + begin, + end, + options.offset, + options.limit); + } +} + +bool TreeIndex<Text>::insert(Error *, Int row_id, const Datum &value) { + Text text = value.force_text(); + std::string string(text.data(), text.size()); + auto result = map_[string].insert(row_id); + if (!result.second) { + // Already exists. + return false; + } + return true; +} + +bool TreeIndex<Text>::remove(Error *, Int row_id, const Datum &value) { + Text text = value.force_text(); + std::string string(text.data(), text.size()); + auto map_it = map_.find(string); + if (map_it == map_.end()) { + return false; + } + auto set_it = map_it->second.find(row_id); + if (set_it == map_it->second.end()) { + return false; + } + map_it->second.erase(set_it); + if (map_it->second.size() == 0) { + map_.erase(map_it); + } + return true; +} + } // namespace grnxx Modified: lib/grnxx/tree_index.hpp (+36 -2) =================================================================== --- lib/grnxx/tree_index.hpp 2014-09-16 15:39:54 +0900 (3f63270) +++ lib/grnxx/tree_index.hpp 2014-09-16 18:39:10 +0900 (29923f4) @@ -46,7 +46,7 @@ class TreeIndex<Bool> : public Index { TreeIndex() : Index(), map_() {} }; -// TODO: This is just a test implementation. +// TODO: This is a test implementation. template <> class TreeIndex<Int> : public Index { public: @@ -80,7 +80,7 @@ class TreeIndex<Int> : public Index { TreeIndex() : Index(), map_() {} }; -// TODO: This is just a test implementation. +// TODO: This is a test implementation. template <> class TreeIndex<Float> : public Index { public: @@ -126,6 +126,40 @@ class TreeIndex<Float> : public Index { TreeIndex() : Index(), map_() {} }; +// TODO: This is a test implementation. +template <> +class TreeIndex<Text> : public Index { + public: + using Value = Text; + using Set = std::set<Int>; + using Map = std::map<std::string, Set>; + + static unique_ptr<TreeIndex> create(Error *error, + Column *column, + String name, + const IndexOptions &options); + + ~TreeIndex(); + + unique_ptr<Cursor> create_cursor( + Error *error, + const Datum &datum, + const CursorOptions &options = CursorOptions()) const; + + unique_ptr<Cursor> create_cursor( + Error *error, + const IndexRange &range, + const CursorOptions &options) const; + + bool insert(Error *error, Int row_id, const Datum &value); + bool remove(Error *error, Int row_id, const Datum &value); + + private: + mutable Map map_; + + TreeIndex() : Index(), map_() {} +}; + } // namespace grnxx #endif // GRNXX_TREE_INDEX_HPP -------------- next part -------------- HTML����������������������������...下載