BitMagic-C++
bmsparsevec_compr.h
Go to the documentation of this file.
1#ifndef BMSPARSEVEC_COMPR_H__INCLUDED__
2#define BMSPARSEVEC_COMPR_H__INCLUDED__
3/*
4Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5
6Licensed under the Apache License, Version 2.0 (the "License");
7you may not use this file except in compliance with the License.
8You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12Unless required by applicable law or agreed to in writing, software
13distributed under the License is distributed on an "AS IS" BASIS,
14WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15See the License for the specific language governing permissions and
16limitations under the License.
17
18For more information please visit: http://bitmagic.io
19*/
20
21/*! \file bmsparsevec_compr.h
22 \brief Compressed sparse container rsc_sparse_vector<> for integer types
23*/
24
25#include <memory.h>
26
27#ifndef BM_NO_STL
28#include <stdexcept>
29#endif
30
31#ifndef BM__H__INCLUDED__
32// BitMagic utility headers do not include main "bm.h" declaration
33// #include "bm.h" or "bm64.h" explicitly
34# error missing include (bm.h or bm64.h)
35#endif
36
37
38#include "bmsparsevec.h"
39#include "bmdef.h"
40
41namespace bm
42{
43
44
45/*!
46 \brief Rank-Select compressed sparse vector
47
48 Container uses Rank-Select method of compression, where
49 all NULL columns gets dropped, effective address of columns is computed
50 using address bit-vector.
51
52 Use for cases, where memory efficiency is preferable over
53 single element access latency.
54
55 \ingroup sv
56*/
57template<class Val, class SV>
59{
60public:
62 {
63 sv_slices = (sizeof(Val) * 8 + 1),
64 sv_value_slices = (sizeof(Val) * 8)
65 };
66
67 typedef Val value_type;
69 typedef typename SV::size_type size_type;
71 typedef typename SV::const_iterator sparse_vector_const_iterator;
72 typedef typename SV::bvector_type bvector_type;
79 typedef typename SV::bmatrix_type bmatrix_type;
80 typedef typename SV::unsigned_value_type unsigned_value_type;
81
86
87 struct is_remap_support { enum trait { value = false }; };
88 struct is_rsc_support { enum trait { value = true }; };
89 struct is_dynamic_splices { enum trait { value = false }; };
90
91public:
92 /*! Statistical information about memory allocation details. */
93 struct statistics : public bv_statistics
94 {};
95
96public:
97 /**
98 Reference class to access elements via common [] operator
99 */
101 {
102 public:
104 : csv_(csv), idx_(idx)
105 {}
106 operator value_type() const BMNOEXCEPT { return csv_.get(idx_); }
107 bool operator==(const reference& ref) const BMNOEXCEPT
108 { return bool(*this) == bool(ref); }
109 bool is_null() const BMNOEXCEPT { return csv_.is_null(idx_); }
110 private:
112 size_type idx_;
113 };
114
115 /**
116 Const iterator to traverse the rsc sparse vector.
117
118 Implementation uses buffer for decoding so, competing changes
119 to the original vector may not match the iterator returned values.
120
121 This iterator keeps an operational buffer, memory footprint is not
122 negligable
123
124 @ingroup sv
125 */
127 {
128 public:
129 friend class rsc_sparse_vector;
130
131#ifndef BM_NO_STL
132 typedef std::input_iterator_tag iterator_category;
133#endif
140 typedef typename
142 typedef bm::byte_buffer<allocator_type> buffer_type;
143
144 typedef unsigned difference_type;
145 typedef unsigned* pointer;
147
148 public:
153
154 bool operator==(const const_iterator& it) const BMNOEXCEPT
155 { return (pos_ == it.pos_) && (csv_ == it.csv_); }
157 { return ! operator==(it); }
159 { return pos_ < it.pos_; }
161 { return pos_ <= it.pos_; }
163 { return pos_ > it.pos_; }
165 { return pos_ >= it.pos_; }
166
167 /// \brief Get current position (value)
168 value_type operator*() const { return this->value(); }
169
170
171 /// \brief Advance to the next available value
172 const_iterator& operator++() BMNOEXCEPT { this->advance(); return *this; }
173
174 /// \brief Advance to the next available value
176 { const_iterator tmp(*this);this->advance(); return tmp; }
177
178
179 /// \brief Get current position (value)
181
182 /// \brief Get NULL status
183 bool is_null() const BMNOEXCEPT;
184
185 /// Returns true if iterator is at a valid position
186 bool valid() const BMNOEXCEPT { return pos_ != bm::id_max; }
187
188 /// Invalidate current iterator
190
191 /// Current position (index) in the vector
192 size_type pos() const BMNOEXCEPT{ return pos_; }
193
194 /// re-position to a specified position
196
197 /// advance iterator forward by one
198 /// @return true if it is still valid
200
202 private:
203 enum buf_size_e
204 {
205 n_buf_size = 1024 * 8
206 };
207
208 private:
209 const rsc_sparse_vector_type* csv_; ///!< ptr to parent
210 size_type pos_; ///!< Position
211 mutable buffer_type vbuffer_; ///!< value buffer
212 mutable buffer_type tbuffer_; ///!< temp buffer
213 mutable value_type* buf_ptr_; ///!< position in the buffer
214 };
215
216
217
218 /**
219 Back insert iterator implements buffered insert, faster than generic
220 access assignment.
221
222 Limitations for buffered inserter:
223 1. Do not use more than one inserter per vector at a time
224 2. Use method flush() at the end to send the rest of accumulated buffer
225 flush is happening automatically on destruction, but if flush produces an
226 exception (for whatever reason) it will be an exception in destructor.
227 As such, explicit flush() is safer way to finilize the sparse vector load.
228
229 @ingroup sv
230 */
232 {
233 public:
234#ifndef BM_NO_STL
235 typedef std::output_iterator_tag iterator_category;
236#endif
242
243 typedef void difference_type;
244 typedef void pointer;
245 typedef void reference;
246
247 public:
248
249 /*! @name Construction and assignment */
250 ///@{
251
254
256 void operator=(const back_insert_iterator& bi)
257 {
258 BM_ASSERT(bi.empty());
259 this->flush(); sv_bi_ = bi.sv_bi_;
260 }
261
263 ///@}
264
265 /** push value to the vector */
267 { this->add(v); return *this; }
268 /** noop */
269 back_insert_iterator& operator*() { return *this; }
270 /** noop */
271 back_insert_iterator& operator++() { return *this; }
272 /** noop */
273 back_insert_iterator& operator++( int ) { return *this; }
274
275 /** add value to the container*/
277
278 /** add NULL (no-value) to the container */
280
281 /** add a series of consequitve NULLs (no-value) to the container */
283
284 /** flush the accumulated buffer */
285 void flush();
286 protected:
287
288 /** add value to the buffer without changing the NULL vector
289 @param v - value to push back
290 @return index of added value in the internal buffer
291 @internal
292 */
293 ///size_type add_value(value_type v);
294
296 typedef
298 private:
299 rsc_sparse_vector_type* csv_; ///!< pointer on the parent vector
300 sparse_vector_bi sv_bi_;
301 };
302
303public:
304 // ------------------------------------------------------------
305 /*! @name Construction and assignment */
306
307 //@{
308
311 size_type bv_max_size = bm::id_max,
312 const allocator_type& alloc = allocator_type());
313
314 /**
315 Contructor to pre-initialize the list of assigned (not NULL) elements.
316
317 If the list of not NULL elements is known upfront it can help to
318 pre-declare it, enable rank-select index and then use set function.
319 This scenario gives significant speed boost, comparing random assignment
320
321 @param bv_null - not NULL vector for the container
322 */
324
326
327 /*! copy-ctor */
329
330
331 /*! copy assignmment operator */
332 rsc_sparse_vector<Val,SV>& operator=(const rsc_sparse_vector<Val, SV>& csv)
333 {
334 if (this != &csv)
335 {
336 sv_ = csv.sv_;
337 size_ = csv.size_; max_id_ = csv.max_id_;
338 in_sync_ = csv.in_sync_;
339 if (in_sync_)
340 {
341 rs_idx_->copy_from(*(csv.rs_idx_));
342 }
343 }
344 return *this;
345 }
346
347#ifndef BM_NO_CXX11
348 /*! move-ctor */
350
351 /*! move assignmment operator */
353 {
354 if (this != &csv)
355 {
356 clear_all(true, 0);
357 sv_.swap(csv.sv_);
358 size_ = csv.size_; max_id_ = csv.max_id_; in_sync_ = csv.in_sync_;
359 if (in_sync_)
360 rs_idx_->copy_from(*(csv.rs_idx_));
361 }
362 return *this;
363 }
364#endif
365
366 //@}
367 // ------------------------------------------------------------
368 /*! @name Size, etc */
369 ///@{
370
371 /*! \brief return size of the vector
372 \return size of sparse vector
373 */
375
376 /*! \brief return true if vector is empty
377 \return true if empty
378 */
379 bool empty() const BMNOEXCEPT { return sv_.empty(); }
380
381 /**
382 \brief recalculate size to exclude tail NULL elements
383 After this call size() will return the true size of the vector
384 */
386
387 /*! \brief change vector size
388 \param new_size - new vector size
389 */
390 void resize(size_type new_size);
391
392 /*
393 \brief Returns count of not NULL elements (population)
394 in the given range [left..right]
395 Uses rank-select index to accelerate the search (after sync())
396
397 \param left - index of first bit start counting from
398 \param right - index of last bit
399
400 @sa sync
401 */
404
405 ///@}
406
407 // ------------------------------------------------------------
408 /*! @name Element access */
409 //@{
410
411 /*!
412 \brief get specified element without bounds checking
413 \param idx - element index
414 \return value of the element
415 */
416 value_type operator[](size_type idx) const { return this->get(idx); }
417
418 /*!
419 \brief access specified element with bounds checking
420 \param idx - element index
421 \return value of the element
422 */
424
425 /*!
426 \brief get specified element without bounds checking
427 \param idx - element index
428 \return value of the element
429 */
431
432 /** Get raw unsigned value first N bits
433 \param idx - element index in the vector
434 \param N_bits - number of bits to be extracted (should be > 0)
435 @return unsigned value for
436 */
438 size_type N_bits) const BMNOEXCEPT;
439
440
441 /**
442 \brief get specified element with NOT NULL check
443 \param idx - element index
444 \param v - [out] value to get
445 \return true if value was aquired (NOT NULL), false otherwise
446 @sa is_null, get
447 */
449
450 /**
451 \brief get specified element with NOT NULL check
452 Caller guarantees that the vector is in sync mode (RS-index access).
453 \param idx - element index
454 \param v - [out] value to get
455 \return true if value was aquired (NOT NULL), false otherwise
456 @sa is_null, get, sync
457 */
459
460
461 /*!
462 \brief set specified element with bounds checking and automatic resize
463
464 Method cannot insert elements, so every new idx has to be greater or equal
465 than what it used before. Elements must be loaded in a sorted order.
466
467 \param idx - element index
468 \param v - element value
469 */
471
472
473 /*!
474 \brief add element with automatic resize
475 \param v - element value
476 */
478 { this->push_back(size_, v); }
479
480 /*!
481 \brief push back specified amount of NULL values
482 \param count - number of NULLs to push back
483 */
485
486 /*!
487 \brief push back NULL value
488 */
490
491 /*!
492 \brief set specified element with bounds checking and automatic resize
493 \param idx - element index
494 \param v - element value
495 */
497
498
499 /*!
500 \brief increment specified element by one
501 \param idx - element index
502 */
503 void inc(size_type idx);
504
505 /*!
506 \brief increment specified element by one
507 \param idx - element index
508 \param v - increment value
509 */
511
512 /*!
513 \brief increment specified element by one, element MUST be NOT NULL
514 Faster than just inc() if element is NULL - behavior is undefined
515 \param idx - element index
516 \param v - increment value
517 @sa inc
518 */
520
521 /*!
522 \brief set specified element to NULL
523 RSC vector actually erases element when it is set to NULL (expensive).
524 \param idx - element index
525 */
527
528 /**
529 Set NULL all elements set as 1 in the argument vector.
530 Function also clears all the values to 0.
531 Note: this can be a very expensive function for an RS container.
532 \param bv_idx - index bit-vector for elements which to be set to NULL
533 */
534 void set_null(const bvector_type& bv_idx);
535
536
537 /**
538 Set vector elements spcified by argument bit-vector to zero
539 Note that set to 0 elements are NOT going to tuned to NULL (NULL qualifier is preserved)
540 \param bv_idx - index bit-vector for elements which to be set to 0
541 */
542 void clear(const bvector_type& bv_idx);
543
544
545
546 /** \brief test if specified element is NULL
547 \param idx - element index
548 \return true if it is NULL false if it was assigned or container
549 is not configured to support assignment flags
550 */
551 bool is_null(size_type idx) const BMNOEXCEPT;
552
553 /**
554 \brief Get bit-vector of assigned values (or NULL)
555 */
557
558 /**
559 \brief find position of compressed element by its rank
560 \param rank - rank (virtual index in sparse vector)
561 \param idx - index (true position)
562 */
563 bool find_rank(size_type rank, size_type& idx) const BMNOEXCEPT;
564
565 //@}
566
567 // ------------------------------------------------------------
568 /*! @name Export content to C-stype array */
569 ///@{
570
571 /**
572 \brief C-style decode
573 \param arr - decode target array (must be properly sized)
574 \param idx_from - start address to decode
575 \param size - number of elements to decode
576 \param zero_mem - flag if array needs to beset to zeros first
577
578 @return actual decoded size
579 @sa decode_buf
580 */
582 size_type idx_from,
584 bool zero_mem = true) const;
585
586
587 /**
588 \brief C-style decode (variant with external memory)
589 Analog of decode, but requires two arrays.
590 Faster than decode in many cases.
591
592 \param arr - decode target array (must be properly sized)
593 \param arr_buf_tmp - decode temp bufer (must be same size of arr)
594 \param idx_from - start address to decode
595 \param size - number of elements to decode
596 \param zero_mem - flag if array needs to beset to zeros first
597
598 @return actual decoded size
599 @sa decode
600 */
602 value_type* arr_buf_tmp,
603 size_type idx_from,
605 bool zero_mem = true) const BMNOEXCEPT;
606
607 /*!
608 \brief Gather elements to a C-style array
609
610 Gather collects values from different locations, for best
611 performance feed it with sorted list of indexes.
612
613 Faster than one-by-one random access.
614
615 For efficiency, this is left as a low level function,
616 it does not do any bounds checking on the target array, it will
617 override memory and crash if you are not careful with allocation
618 and request size.
619
620 \param arr - destination array
621 \param idx - index list to gather elements (read only)
622 \param idx_tmp_buf - temp scratch buffer for index rank-select index translation
623 must be correctly allocated to match size. No value initialization requirement.
624 \param size - decoding index list size (array allocation should match)
625 \param sorted_idx - sort order directive for the idx array
626 (BM_UNSORTED, BM_SORTED, BM_UNKNOWN)
627 Sort order affects both performance and correctness(!), use BM_UNKNOWN
628 if not sure.
629
630 \return number of actually exported elements (can be less than requested)
631
632 \sa decode
633 */
635 const size_type* idx,
636 size_type* idx_tmp_buf,
638 bm::sort_order sorted_idx) const;
639
640
641 ///@}
642
643
644 // ------------------------------------------------------------
645 /*! @name Various traits */
646 ///@{
647 /**
648 \brief if container supports NULL(unassigned) values (true)
649 */
650 bool is_nullable() const { return true; }
651
652 /** \brief various type traits
653 */
654 static constexpr
655 bool is_compressed() BMNOEXCEPT { return true; }
656
657 static constexpr
658 bool is_str() BMNOEXCEPT { return false; }
659
660 ///@}
661
662
663 // ------------------------------------------------------------
664 /*! @name Comparison */
665 ///@{
666
667 /*!
668 \brief check if another vector has the same content
669 \return true, if it is the same
670 */
672 //@}
673
674
675 // ------------------------------------------------------------
676 /*! @name Load-Export compressed vector with data */
677 ///@{
678 /*!
679 \brief Load compressed vector from a sparse vector (with NULLs)
680 \param sv_src - source sparse vector
681 */
682 void load_from(const sparse_vector_type& sv_src);
683
684 /*!
685 \brief Exort compressed vector to a sparse vector (with NULLs)
686 \param sv - target sparse vector
687 */
689
690 ///@}
691
692 // ------------------------------------------------------------
693 /*! @name Iterator access */
694 ///@{
695
696 /** Provide const iterator access to container content */
697 const_iterator begin() const
698 {
699 if (!in_sync_)
700 throw_no_rsc_index(); // call sync() to build RSC fast access index
701 return const_iterator(this);
702 }
703
704 /** Provide const iterator access to the end */
705 const_iterator end() const BMNOEXCEPT
706 { return const_iterator(this, bm::id_max); }
707
708 /** Get const_itertor re-positioned to specific element
709 @param idx - position in the sparse vector
710 */
711 const_iterator get_const_iterator(size_type idx) const BMNOEXCEPT
712 { return const_iterator(this, idx); }
713
714 back_insert_iterator get_back_inserter() { return back_insert_iterator(this); }
715 ///@}
716
717 // ------------------------------------------------------------
718 /*! @name Memory optimization */
719 ///@{
720
721 /*!
722 \brief run memory optimization for all vector slices
723 \param temp_block - pre-allocated memory block to avoid unnecessary re-allocs
724 \param opt_mode - requested compression depth
725 \param stat - memory allocation statistics after optimization
726 */
728 bm::word_t* temp_block = 0,
730 statistics* stat = 0);
731
732 /*! \brief resize to zero, free memory
733 @param free_mem - free bit vector slices if true
734 */
735 void clear_all(bool free_mem, unsigned) BMNOEXCEPT;
736
737 /*! \brief resize to zero, free memory
738 @param free_mem - free bit vector slices if true
739 */
740 void clear() BMNOEXCEPT { clear_all(true, 0); }
741
742 /*!
743 @brief Calculates memory statistics.
744
745 Function fills statistics structure containing information about how
746 this vector uses memory and estimation of max. amount of memory
747 bvector needs to serialize itself.
748
749 @param st - pointer on statistics structure to be filled in.
750
751 @sa statistics
752 */
755
756 /**
757 @brief Turn sparse vector into immutable mode
758 Read-only (immutable) vector uses less memory and allows faster searches.
759 Before freezing it is recommenede to call optimize() to get full memory saving effect
760 @sa optimize
761 */
762 void freeze() { sv_.freeze(); }
763
764 /** Returns true if vector is read-only */
765 bool is_ro() const BMNOEXCEPT { return sv_.is_ro_; }
766
767
768
769 ///@}
770
771 // ------------------------------------------------------------
772 /*! @name Merge, split, partition data */
773 ///@{
774
775 /**
776 @brief copy range of values from another sparse vector
777
778 Copy [left..right] values from the source vector,
779 clear everything outside the range.
780
781 \param csv - source vector
782 \param left - index from in losed diapason of [left..right]
783 \param right - index to in losed diapason of [left..right]
784 */
786 size_type left, size_type right);
787
788 /**
789 @brief merge two vectors (argument gets destroyed)
790 It is important that both vectors have the same NULL vectors
791 @param csv - [in,out] argumnet vector to merge
792 (works like move so arg should not be used after the merge)
793 */
795
796 ///@}
797
798 // ------------------------------------------------------------
799 /*! @name Fast access structues sync */
800 ///@{
801 /*!
802 \brief Re-calculate rank-select index for faster access to vector
803 \param force - force recalculation even if it is already recalculated
804 */
805 void sync(bool force);
806
807 /*!
808 \brief Re-calculate prefix sum table used for rank search (if necessary)
809 */
810 void sync() { sync(false); }
811
812 /*!
813 \brief returns true if prefix sum table is in sync with the vector
814 */
815 bool in_sync() const BMNOEXCEPT { return in_sync_; }
816 /*!
817 \brief returns true if prefix sum table is in sync with the vector
818 */
819 bool is_sync() const BMNOEXCEPT { return in_sync_; }
820
821 /*!
822 \brief Unsync the prefix sum table
823 */
824 void unsync() BMNOEXCEPT { in_sync_ = is_dense_ = false; }
825 ///@}
826
827 // ------------------------------------------------------------
828 /*! @name Access to internals */
829 ///@{
830
831 /*!
832 \brief get access to bit-plane, function checks and creates a plane
833 \return bit-vector for the bit slice
834 */
836 { return sv_.get_slice(i); }
837
839 { return sv_.get_create_slice(i); }
840
842 { return sv_.slice(i); }
843
844 /*!
845 Number of effective bit-slices in the value type
846 */
848 { return sv_.effective_slices(); }
849
850 /*!
851 \brief get total number of bit-slices in the vector
852 */
853 static unsigned slices() BMNOEXCEPT
854 { return sparse_vector_type::slices(); }
855
856 /** Number of stored bit-slices (value slices + extra */
857 static unsigned stored_slices()
859
860 /*!
861 \brief access dense vector
862 */
863 const sparse_vector_type& get_sv() const BMNOEXCEPT { return sv_; }
864
865 /*!
866 \brief size of internal dense vector
867 */
868 size_type effective_size() const BMNOEXCEPT { return sv_.size(); }
869
870 /**
871 \brief Always 1 (non-matrix type)
872 */
874
875 /*!
876 get read-only access to inetrnal bit-matrix
877 */
879 { return sv_.get_bmatrix(); }
880
881 /*!
882 get read-only access to inetrnal bit-matrix
883 */
885 { return sv_.get_bmatrix(); }
886
887
888 /*! Get Rank-Select index pointer
889 @return NULL if sync() was not called to construct the index
890 @sa sync()
891 */
893 { return in_sync_ ? rs_idx_ : 0; }
894
895 void mark_null_idx(unsigned null_idx) BMNOEXCEPT
896 { sv_.mark_null_idx(null_idx); }
897
898 /*!
899 \brief Resolve logical address to access via rank compressed address
900
901 \param idx - input id to resolve
902 \param idx_to - output id
903
904 \return true if id is known and resolved successfully
905 @internal
906 */
907 bool resolve(size_type idx, size_type* idx_to) const BMNOEXCEPT;
908
909 ///@}
910
911protected:
913 {
915 };
916
917
918 bool resolve_sync(size_type idx, size_type* idx_to) const BMNOEXCEPT;
919
921 size_type* idx_from, size_type* idx_to) const BMNOEXCEPT;
922
924 {
925 sv_.resize_internal(sz);
926 }
927 size_type size_internal() const BMNOEXCEPT { return sv_.size(); }
928
929 constexpr bool is_remap() const BMNOEXCEPT { return false; }
930 size_t remap_size() const BMNOEXCEPT { return 0; }
931 const unsigned char* get_remap_buffer() const BMNOEXCEPT { return 0; }
932 unsigned char* init_remap_buffer() BMNOEXCEPT { return 0; }
934
935 /// unused remap matrix type for compatibility with the sparse serializer
936 typedef
937 bm::heap_matrix<unsigned char, 1, 1,
939
940 const remap_matrix_type* get_remap_matrix() const { return 0; }
942
944
945 /**
946 Convert signed value type to unsigned representation
947 */
948 static
951 static
954
955private:
956
957 /// Allocate memory for RS index
958 void construct_rs_index();
959 /// Free rs-index
960 void free_rs_index();
961
962 /**
963 \brief throw error that RSC index not constructed (call sync())
964 \internal
965 @sa sync
966 */
967 static
968 void throw_no_rsc_index();
969
970protected:
971 template<class SVect, unsigned S_FACTOR> friend class sparse_vector_scanner;
972 template<class SVect> friend class sparse_vector_serializer;
973 template<class SVect> friend class sparse_vector_deserializer;
974
975
976private:
977 sparse_vector_type sv_; ///< transpose-sparse vector for "dense" packing
978 size_type size_; ///< vector size (logical)
979 size_type max_id_; ///< control variable for sorted load
980 bool in_sync_; ///< flag if prefix sum is in-sync with vector
981 rs_index_type* rs_idx_ = 0; ///< prefix sum for rank-select translation
982 bool is_dense_ = false; ///< flag if vector is dense
983};
984
985//---------------------------------------------------------------------
986//
987//---------------------------------------------------------------------
988
989template<class Val, class SV>
992 size_type bv_max_size,
993 const allocator_type& alloc)
994: sv_(bm::use_null, ap, bv_max_size, alloc), in_sync_(false)
995{
996 (void) null_able;
997 BM_ASSERT(null_able == bm::use_null);
998 BM_ASSERT(int(sv_value_slices) == int(SV::sv_value_slices));
999 size_ = max_id_ = 0;
1000 construct_rs_index();
1001}
1002
1003//---------------------------------------------------------------------
1004
1005template<class Val, class SV>
1007: sv_(bm::use_null), in_sync_(false)
1008{
1009 construct_rs_index();
1010 bvector_type* bv = sv_.get_null_bvect();
1011 BM_ASSERT(bv);
1012 *bv = bv_null;
1013
1014 bool found = bv->find_reverse(max_id_);
1015 if (found)
1016 {
1017 size_ = max_id_ + 1;
1018 size_type sz = bv->count();
1019 sv_.resize(sz);
1020 }
1021 else
1022 {
1023 BM_ASSERT(!bv->any());
1024 size_ = max_id_ = 0;
1025 }
1026}
1027
1028//---------------------------------------------------------------------
1029
1030template<class Val, class SV>
1032{
1033 free_rs_index();
1034}
1035
1036//---------------------------------------------------------------------
1037
1038template<class Val, class SV>
1040 const rsc_sparse_vector<Val, SV>& csv)
1041: sv_(csv.sv_), size_(csv.size_), max_id_(csv.max_id_), in_sync_(csv.in_sync_)
1042{
1043 BM_ASSERT(int(sv_value_slices) == int(SV::sv_value_slices));
1044
1045 construct_rs_index();
1046 if (in_sync_)
1047 rs_idx_->copy_from(*(csv.rs_idx_));
1048}
1049
1050//---------------------------------------------------------------------
1051
1052template<class Val, class SV>
1055: sv_(bm::use_null),
1056 size_(0),
1057 max_id_(0), in_sync_(false)
1058{
1059 if (this != &csv)
1060 {
1061 sv_.swap(csv.sv_);
1062 size_ = csv.size_; max_id_ = csv.max_id_; in_sync_ = csv.in_sync_;
1063 rs_idx_ = csv.rs_idx_; csv.rs_idx_ = 0;
1064 }
1065}
1066
1067//---------------------------------------------------------------------
1068
1069template<class Val, class SV>
1072{
1073 return size_;
1074}
1075
1076//---------------------------------------------------------------------
1077
1078template<class Val, class SV>
1080{
1081 BM_ASSERT(new_size < bm::id_max);
1082 if (!new_size) // clear memory
1083 {
1084 sv_.resize(0);
1085 BM_ASSERT(sv_.get_null_bvect()->none());
1086 in_sync_ = false;
1087 size_ = max_id_ = 0;
1088 return;
1089 }
1090 if (new_size >= size_) // vector grows
1091 {
1092 size_ = new_size;
1093 max_id_ = new_size - 1;
1094 return;
1095 }
1096
1097 // vector shrinks
1098 // compute tail rank
1099 bvector_type* bv_null = sv_.get_null_bvect();
1100 size_type clear_size = bv_null->count_range(new_size, bm::id_max-1);
1101
1102 if (!clear_size) // tail set/rank is empty
1103 {
1104 size_ = new_size;
1105 max_id_ = new_size - 1;
1106 BM_ASSERT(!bv_null->any_range(size_, bm::id_max-1));
1107 return;
1108 }
1109
1110 BM_ASSERT(sv_.size() >= clear_size);
1111 size_type new_sv_size = sv_.size() - clear_size;
1112 sv_.resize_internal(new_sv_size, false); // without touching NULL plane
1113 bv_null->clear_range(new_size, bm::id_max-1);
1114
1115 size_ = new_size;
1116 max_id_ = new_size - 1;
1117
1118 BM_ASSERT(!bv_null->any_range(size_, bm::id_max-1));
1119
1120 in_sync_ = false;
1121}
1122
1123//---------------------------------------------------------------------
1124
1125template<class Val, class SV>
1127{
1128 if (sv_.empty())
1129 {}
1130 else
1131 if (idx <= max_id_ && size_)
1132 {
1133 sv_.throw_range_error("compressed sparse vector push_back() range error");
1134 }
1135 push_back_no_check(idx, v);
1136}
1137
1138//---------------------------------------------------------------------
1139
1140template<class Val, class SV>
1142{
1143 BM_ASSERT(size_ < bm::id_max - count); // overflow assert
1144 size_ += count;
1145 max_id_ = size_-1;
1146}
1147
1148//---------------------------------------------------------------------
1149
1150template<class Val, class SV>
1152{
1153 bvector_type* bv_null = sv_.get_null_bvect();
1154 BM_ASSERT(bv_null);
1155
1156 bv_null->set_bit_no_check(idx);
1157 sv_.push_back_no_null(v);
1158
1159 max_id_ = idx;
1160 size_ = idx + 1;
1161 in_sync_ = false;
1162}
1163
1164//---------------------------------------------------------------------
1165
1166template<class Val, class SV>
1168{
1169 bvector_type* bv_null = sv_.get_null_bvect();
1170 BM_ASSERT(bv_null);
1171
1172 bool found = bv_null->test(idx); // TODO: use extract bit
1173 if (found)
1174 {
1175 // TODO: maybe RS-index is available
1176 size_type sv_idx = bv_null->count_range(0, idx);
1177 bv_null->clear_bit_no_check(idx);
1178 sv_.erase(--sv_idx, false/*not delete NULL vector*/);
1179 in_sync_ = false;
1180 }
1181 else
1182 {
1183 if (idx > max_id_)
1184 {
1185 max_id_ = idx;
1186 size_ = max_id_ + 1;
1187 }
1188 }
1189}
1190
1191//---------------------------------------------------------------------
1192
1193template<class Val, class SV>
1195{
1196 bvector_type* bv_null = sv_.get_null_bvect();
1197 bvector_type bv_sub; // subtraction vector cleared from NOT NULLs
1198 bv_sub.bit_and(bv_idx, *bv_null);
1199 // clear the main matrix to accelerate the erase in all bit-slices
1200 {
1202 bvector_type bv_sub_rsc;
1203 rank_compr.compress(bv_sub_rsc, *bv_null, bv_sub);
1204 sv_.clear(bv_sub_rsc);
1205 }
1206
1207 in_sync_ = false;
1208 typename bvector_type::enumerator en(&bv_sub, 0);
1209 for (size_type cnt = 0; en.valid(); ++en, ++cnt)
1210 {
1211 auto idx = *en;
1212
1213 size_type sv_idx = bv_null->count_range(0, idx);
1214 sv_idx -= cnt; // correct rank for what we deleted previously
1215 sv_.erase(--sv_idx, false/*not delete the NULL vector*/);
1216 }
1217 bv_null->bit_sub(bv_sub);
1218}
1219
1220//---------------------------------------------------------------------
1221
1222template<class Val, class SV>
1224{
1225 const bvector_type* bv_null = sv_.get_null_bvector();
1226
1227 bvector_type bv_sub; // subtraction vector cleared from NOT NULLs
1228 bv_sub.bit_and(bv_idx, *bv_null);
1229
1231 bvector_type bv_sub_rsc;
1232 rank_compr.compress(bv_sub_rsc, *bv_null, bv_sub);
1233
1234 sv_.clear(bv_sub_rsc);
1235}
1236
1237//---------------------------------------------------------------------
1238
1239template<class Val, class SV>
1241{
1242 bvector_type* bv_null = sv_.get_null_bvect();
1243 BM_ASSERT(bv_null);
1244
1245 size_type sv_idx;
1246 bool found = bv_null->test(idx);
1247
1248 sv_idx = in_sync_ ? bv_null->count_to(idx, *rs_idx_)
1249 : bv_null->count_range(0, idx); // TODO: make test'n'count
1250
1251 if (found)
1252 {
1253 sv_.inc_no_null(--sv_idx);
1254 }
1255 else
1256 {
1257 sv_.insert_value_no_null(sv_idx, 1);
1258 bv_null->set_bit_no_check(idx);
1259
1260 if (idx > max_id_)
1261 {
1262 max_id_ = idx;
1263 size_ = max_id_ + 1;
1264 }
1265 in_sync_ = false;
1266 }
1267}
1268
1269//---------------------------------------------------------------------
1270
1271template<class Val, class SV>
1273{
1274 bvector_type* bv_null = sv_.get_null_bvect();
1275 BM_ASSERT(bv_null);
1276
1277 size_type sv_idx;
1278 bool found = bv_null->test(idx);
1279
1280 sv_idx = in_sync_ ? bv_null->count_to(idx, *rs_idx_)
1281 : bv_null->count_range(0, idx); // TODO: make test'n'count
1282
1283 if (found)
1284 {
1285 sv_.inc_no_null(--sv_idx, v);
1286 }
1287 else
1288 {
1289 sv_.insert_value_no_null(sv_idx, v);
1290 bv_null->set_bit_no_check(idx);
1291
1292 if (idx > max_id_)
1293 {
1294 max_id_ = idx;
1295 size_ = max_id_ + 1;
1296 }
1297 in_sync_ = false;
1298 }
1299}
1300
1301//---------------------------------------------------------------------
1302
1303template<class Val, class SV>
1305{
1306 bvector_type* bv_null = sv_.get_null_bvect();
1307 BM_ASSERT(bv_null->test(idx)); // idx must be NOT NULL
1308
1309 size_type sv_idx;
1310 sv_idx = in_sync_ ? bv_null->count_to(idx, *rs_idx_)
1311 : bv_null->count_range(0, idx); // TODO: make test'n'count
1312 --sv_idx;
1313 if (v == 1)
1314 sv_.inc_no_null(sv_idx);
1315 else
1316 sv_.inc_no_null(sv_idx, v);
1317}
1318
1319
1320//---------------------------------------------------------------------
1321
1322template<class Val, class SV>
1324{
1325 bvector_type* bv_null = sv_.get_null_bvect();
1326 BM_ASSERT(bv_null);
1327
1328 size_type sv_idx;
1329 bool found = bv_null->test(idx);
1330
1331 sv_idx = in_sync_ ? bv_null->count_to(idx, *rs_idx_)
1332 : bv_null->count_range(0, idx); // TODO: make test'n'count
1333
1334 if (found)
1335 {
1336 sv_.set_value_no_null(--sv_idx, v, true);
1337 }
1338 else
1339 {
1340 sv_.insert_value_no_null(sv_idx, v);
1341 bv_null->set_bit_no_check(idx);
1342
1343 if (idx > max_id_)
1344 {
1345 max_id_ = idx;
1346 size_ = max_id_ + 1;
1347 }
1348 in_sync_ = false;
1349 }
1350}
1351
1352//---------------------------------------------------------------------
1353
1354template<class Val, class SV>
1356 const rsc_sparse_vector<Val, SV>& csv) const BMNOEXCEPT
1357{
1358 if (this == &csv)
1359 return true;
1360 if (max_id_ != csv.max_id_ || size_ != csv.size_)
1361 return false;
1362 bool same_sv = sv_.equal(csv.sv_);
1363 return same_sv;
1364}
1365
1366//---------------------------------------------------------------------
1367
1368template<class Val, class SV>
1370 const sparse_vector_type& sv_src)
1371{
1372 max_id_ = size_ = 0;
1373
1374 bvector_type* bv_null = sv_.get_null_bvect();
1375 BM_ASSERT(bv_null);
1376
1377 const bvector_type* bv_null_src = sv_src.get_null_bvector();
1378 if (!bv_null_src) // dense vector (no NULL columns)
1379 {
1380 sv_ = sv_src;
1381 BM_ASSERT(sv_.get_null_bvect());
1382 }
1383 else
1384 {
1385 sv_.clear_all(true, 0);
1386 *bv_null = *bv_null_src;
1387
1388 bm::rank_compressor<bvector_type> rank_compr; // re-used for planes
1389
1390 unsigned src_planes = sv_src.slices();
1391 for (unsigned i = 0; i < src_planes; ++i)
1392 {
1393 const bvector_type* bv_src_plane = sv_src.get_slice(i);
1394 if (bv_src_plane)
1395 {
1396 bvector_type* bv_plane = sv_.get_create_slice(i);
1397 rank_compr.compress(*bv_plane, *bv_null, *bv_src_plane);
1398 }
1399 } // for
1400 size_type count = bv_null->count(); // set correct sizes
1401 sv_.resize(count);
1402 }
1403
1404 sync(true);
1405}
1406
1407//---------------------------------------------------------------------
1408
1409template<class Val, class SV>
1411{
1412 sv.clear_all(true, 0);
1413
1414 const bvector_type* bv_null_src = this->get_null_bvector();
1415 if (!bv_null_src)
1416 {
1417 BM_ASSERT(bv_null_src);
1418 return;
1419 }
1420
1421 bvector_type* bv_null = sv.get_null_bvect();
1422 BM_ASSERT(bv_null);
1423 *bv_null = *bv_null_src;
1424
1425 bm::rank_compressor<bvector_type> rank_compr; // re-used for planes
1426
1427 unsigned src_planes = sv_.slices();
1428 for (unsigned i = 0; i < src_planes; ++i)
1429 {
1430 if (const bvector_type* bv_src_plane = sv_.get_slice(i))
1431 {
1432 bvector_type* bv_plane = sv.get_create_slice(i);
1433 rank_compr.decompress(*bv_plane, *bv_null, *bv_src_plane);
1434 }
1435 } // for i
1436 sv.resize(this->size());
1437}
1438
1439//---------------------------------------------------------------------
1440
1441template<class Val, class SV>
1443{
1444 if (in_sync_ && force == false)
1445 return; // nothing to do
1446 const bvector_type* bv_null = sv_.get_null_bvector();
1447 BM_ASSERT(bv_null);
1448 bv_null->build_rs_index(rs_idx_); // compute popcount prefix list
1449 sv_.is_ro_ = bv_null->is_ro();
1450
1451 if (force)
1452 sync_size();
1453
1454 size_type cnt = size_ ? bv_null->count_range(0, size_-1, *rs_idx_)
1455 : 0;
1456 is_dense_ = (cnt == size_); // dense vector?
1457 BM_ASSERT(size_ >= bv_null->count());
1458
1459 in_sync_ = true;
1460}
1461
1462//---------------------------------------------------------------------
1463
1464template<class Val, class SV>
1466{
1467 const bvector_type* bv_null = sv_.get_null_bvector();
1468 BM_ASSERT(bv_null);
1469 // sync the max-id
1470 bool found = bv_null->find_reverse(max_id_);
1471 if (!found)
1472 max_id_ = size_ = 0;
1473 else
1474 size_ = max_id_ + 1;
1475 sync(false);
1476}
1477
1478//---------------------------------------------------------------------
1479
1480template<class Val, class SV>
1482 size_type* idx_to) const BMNOEXCEPT
1483{
1484 BM_ASSERT(idx_to);
1485 const bvector_type* bv_null = sv_.get_null_bvector();
1486 if (idx >= size_)
1487 {
1488 BM_ASSERT(bv_null->get_bit(idx) == false);
1489 return false;
1490 }
1491 if (in_sync_)
1492 return resolve_sync(idx, idx_to);
1493
1494 // not in-sync: slow access
1495 bool found = bv_null->test(idx);
1496 if (!found)
1497 return found;
1498 *idx_to = bv_null->count_range_no_check(0, idx);
1499 return found;
1500}
1501
1502//---------------------------------------------------------------------
1503
1504template<class Val, class SV>
1506 size_type idx,
1507 size_type* idx_to) const BMNOEXCEPT
1508{
1509 BM_ASSERT(idx_to);
1510 BM_ASSERT(in_sync_);
1511
1512 const bvector_type* bv_null = sv_.get_null_bvector();
1513 if (is_dense_)
1514 {
1515 *idx_to = idx+1;
1516 if (idx <= size_)
1517 return true;
1518 *idx_to = bm::id_max;
1519 BM_ASSERT(bv_null->get_bit(idx) == false);
1520 return false;
1521 }
1522 *idx_to = bv_null->count_to_test(idx, *rs_idx_);
1523 return bool(*idx_to);
1524}
1525
1526//---------------------------------------------------------------------
1527
1528template<class Val, class SV>
1530 size_type from, size_type to,
1531 size_type* idx_from, size_type* idx_to) const BMNOEXCEPT
1532{
1533 BM_ASSERT(idx_to && idx_from);
1534 const bvector_type* bv_null = sv_.get_null_bvector();
1535 size_type copy_sz, sv_left;
1536 if (in_sync_)
1537 copy_sz = bv_null->count_range(from, to, *rs_idx_);
1538 else // slow access
1539 copy_sz = bv_null->count_range(from, to);
1540 if (!copy_sz)
1541 return false;
1542
1543 if (in_sync_)
1544 sv_left = bv_null->rank_corrected(from, *rs_idx_);
1545 else
1546 {
1547 sv_left = bv_null->count_range(0, from);
1548 bool tl = bv_null->test(from); // TODO: add count and test
1549 sv_left -= tl; // rank correction
1550 }
1551
1552 *idx_from = sv_left; *idx_to = sv_left + copy_sz - 1;
1553 return true;
1554}
1555
1556
1557//---------------------------------------------------------------------
1558
1559template<class Val, class SV>
1562{
1563 size_type sv_idx;
1564 if (idx >= size())
1565 sv_.throw_range_error("compressed collection access error");
1566
1567 bool found = resolve(idx, &sv_idx);
1568 if (!found)
1569 {
1570 sv_.throw_range_error("compressed collection item not found");
1571 }
1572 return sv_.at(--sv_idx);
1573}
1574
1575//---------------------------------------------------------------------
1576
1577template<class Val, class SV>
1580{
1581 size_type sv_idx;
1582 bool found = resolve(idx, &sv_idx);
1583 if (!found)
1584 return value_type(0);
1585 BM_ASSERT(!is_null(idx));
1586 return sv_.get(--sv_idx);
1587}
1588
1589//---------------------------------------------------------------------
1590
1591template<class Val, class SV>
1594 size_type N_bits) const BMNOEXCEPT
1595{
1596 size_type sv_idx;
1597 bool found = resolve(idx, &sv_idx);
1598 if (!found)
1599 return unsigned_value_type(0);
1600 BM_ASSERT(!is_null(idx));
1601 return sv_.get_unsigned_bits(--sv_idx, N_bits);
1602}
1603
1604//---------------------------------------------------------------------
1605
1606template<class Val, class SV>
1608 size_type idx, value_type& v) const BMNOEXCEPT
1609{
1610 size_type sv_idx;
1611 if (!resolve(idx, &sv_idx))
1612 return false;
1613 v = sv_.get(--sv_idx);
1614 return true;
1615}
1616
1617//---------------------------------------------------------------------
1618
1619template<class Val, class SV>
1621 size_type idx, value_type& v) const BMNOEXCEPT
1622{
1623 size_type sv_idx;
1624 bool found = resolve_sync(idx, &sv_idx);
1625 if (!found)
1626 return found;
1627 v = sv_.get(--sv_idx);
1628 return true;
1629}
1630
1631//---------------------------------------------------------------------
1632
1633template<class Val, class SV>
1635{
1636 const bvector_type* bv_null = sv_.get_null_bvector();
1637 BM_ASSERT(bv_null);
1638 return !(bv_null->test(idx));
1639}
1640
1641//---------------------------------------------------------------------
1642
1643template<class Val, class SV>
1645 typename bvector_type::optmode opt_mode,
1646 statistics* st)
1647{
1648 sv_.optimize(temp_block, opt_mode, (typename sparse_vector_type::statistics*)st);
1649 if (st)
1650 {
1651 st->memory_used += sizeof(rs_index_type);
1652 st->memory_used += rs_idx_->get_total() *
1653 (sizeof(typename rs_index_type::size_type)
1654 + sizeof(typename rs_index_type::sb_pair_type));
1655 }
1656 if (is_sync()) // must rebuild the sync index after optimization
1657 this->sync(true);
1658}
1659
1660//---------------------------------------------------------------------
1661
1662template<class Val, class SV>
1664{
1665 sv_.clear_all(free_mem, 0);
1666 in_sync_ = false; max_id_ = size_ = 0;
1667}
1668
1669//---------------------------------------------------------------------
1670
1671template<class Val, class SV>
1674{
1675 BM_ASSERT(st);
1676 sv_.calc_stat((typename sparse_vector_type::statistics*)st);
1677 if (st)
1678 {
1679 st->memory_used += sizeof(rs_index_type);
1680 st->memory_used += rs_idx_->get_total() *
1681 (sizeof(typename rs_index_type::size_type)
1682 + sizeof(typename rs_index_type::sb_pair_type));
1683 }
1684}
1685
1686//---------------------------------------------------------------------
1687
1688template<class Val, class SV>
1691{
1692 return sv_.get_null_bvector();
1693}
1694
1695//---------------------------------------------------------------------
1696
1697template<class Val, class SV>
1698bool
1700 size_type& idx) const BMNOEXCEPT
1701{
1702 BM_ASSERT(rank);
1703 bool b;
1704 const bvector_type* bv_null = get_null_bvector();
1705 if (in_sync())
1706 b = bv_null->select(rank, idx, *rs_idx_);
1707 else
1708 b = bv_null->find_rank(rank, 0, idx);
1709 return b;
1710}
1711
1712//---------------------------------------------------------------------
1713
1714template<class Val, class SV>
1715void rsc_sparse_vector<Val, SV>::throw_no_rsc_index()
1716{
1717#ifndef BM_NO_STL
1718 throw std::domain_error("Rank-Select index not constructed, call sync() first");
1719#else
1720 BM_ASSERT_THROW(false, BM_ERR_RANGE);
1721#endif
1722}
1723
1724//---------------------------------------------------------------------
1725
1726
1727template<class Val, class SV>
1730 size_type idx_from,
1732 bool zero_mem) const
1733{
1734 if (size == 0)
1735 return 0;
1736 if (!in_sync_)
1737 throw_no_rsc_index(); // call sync() to build RSC fast access index
1738
1739 BM_ASSERT(arr);
1740 BM_ASSERT(rs_idx_);
1741
1742 if (idx_from >= this->size())
1743 return 0;
1744
1745 if ((bm::id_max - size) <= idx_from)
1746 size = bm::id_max - idx_from;
1747 if ((idx_from + size) > this->size())
1748 size = this->size() - idx_from;
1749
1750 const bvector_type* bv_null = sv_.get_null_bvector();
1751 size_type rank = bv_null->rank_corrected(idx_from, *rs_idx_);
1752
1753 BM_ASSERT(rank == bv_null->count_range(0, idx_from) - bv_null->test(idx_from));
1754
1755 size_type i = 0;
1756
1757 bvector_enumerator_type en_i = bv_null->get_enumerator(idx_from);
1758 if (!en_i.valid())
1759 return i;
1760
1761 if (zero_mem)
1762 ::memset(arr, 0, sizeof(value_type)*size);
1763
1764 sparse_vector_const_iterator it = sv_.get_const_iterator(rank);
1765 if (it.valid())
1766 {
1767 do
1768 {
1769 size_type en_idx = *en_i;
1770 size_type delta = en_idx - idx_from;
1771 idx_from += delta;
1772 i += delta;
1773 if (i >= size)
1774 return size;
1775 arr[i++] = it.value();
1776 if (!en_i.advance())
1777 break;
1778 if (!it.advance())
1779 break;
1780 ++idx_from;
1781 } while (i < size);
1782 }
1783 return i;
1784}
1785
1786
1787template<class Val, class SV>
1790 value_type* arr_buf_tmp,
1791 size_type idx_from,
1793 bool zero_mem) const BMNOEXCEPT
1794{
1795 if (!size || (idx_from >= this->size()))
1796 return 0;
1797
1798 BM_ASSERT(arr && arr_buf_tmp);
1799 BM_ASSERT(arr != arr_buf_tmp);
1800 BM_ASSERT(in_sync_); // call sync() to build RSC fast access index
1801 BM_ASSERT(rs_idx_);
1802
1803 if ((bm::id_max - size) <= idx_from)
1804 size = bm::id_max - idx_from;
1805 if ((idx_from + size) > this->size())
1806 size = this->size() - idx_from;
1807
1808 if (zero_mem)
1809 ::memset(arr, 0, sizeof(value_type)*size);
1810
1811 const bvector_type* bv_null = sv_.get_null_bvector();
1812 size_type rank = bv_null->rank_corrected(idx_from, *rs_idx_);
1813
1814 BM_ASSERT(rank == bv_null->count_range(0, idx_from) - bv_null->test(idx_from));
1815
1816 bvector_enumerator_type en_i = bv_null->get_enumerator(idx_from);
1817 if (!en_i.valid())
1818 return size;
1819
1820 size_type i = en_i.value();
1821 if (idx_from + size <= i) // empty space (all zeros)
1822 return size;
1823
1824 size_type extract_cnt =
1825 bv_null->count_range_no_check(idx_from, idx_from + size - 1, *rs_idx_);
1826
1827 BM_ASSERT(extract_cnt <= this->size());
1828 auto ex_sz = sv_.decode(arr_buf_tmp, rank, extract_cnt, true);
1829 BM_ASSERT(ex_sz == extract_cnt); (void) ex_sz;
1830
1831 for (i = 0; i < extract_cnt; ++i)
1832 {
1833 BM_ASSERT(en_i.valid());
1834 size_type en_idx = *en_i;
1835 arr[en_idx-idx_from] = arr_buf_tmp[i];
1836 en_i.advance();
1837 } // for i
1838
1839 return size;
1840}
1841
1842
1843//---------------------------------------------------------------------
1844
1845template<class Val, class SV>
1848 const size_type* idx,
1849 size_type* idx_tmp_buf,
1851 bm::sort_order sorted_idx) const
1852{
1853 BM_ASSERT(arr);
1854 BM_ASSERT(idx);
1855 BM_ASSERT(idx_tmp_buf);
1856 BM_ASSERT(size);
1857
1858 if (size == 1) // corner case: get 1 value
1859 {
1860 arr[0] = this->get(idx[0]);
1861 return size;
1862 }
1863
1864 if (is_dense_) // rank-select idx recalc is not needed (with bounds check)
1865 {
1866 BM_ASSERT(in_sync_);
1867 for (size_type i = 0; i < size; ++i)
1868 {
1869 if (idx[i] < size_)
1870 idx_tmp_buf[i] = idx[i];
1871 else
1872 {
1873 idx_tmp_buf[i] = bm::id_max;
1874 if (sorted_idx == bm::BM_SORTED)
1875 sorted_idx = bm::BM_UNKNOWN; // UNK will evaluate the sort-order
1876 }
1877 } // for i
1878 }
1879 else
1880 {
1881 // validate index, resolve rank addresses
1882 //
1883 for (size_type i = 0; i < size; ++i)
1884 {
1885 size_type sv_idx;
1886 if (resolve(idx[i], &sv_idx))
1887 {
1888 idx_tmp_buf[i] = sv_idx-1;
1889 }
1890 else
1891 {
1892 if (sorted_idx == bm::BM_SORTED)
1893 sorted_idx = bm::BM_UNKNOWN; // UNK will evaluate the sort-order
1894 idx_tmp_buf[i] = bm::id_max;
1895 }
1896 } // for i
1897 }
1898
1899 // gather the data using resolved indexes
1900 //
1901 size = sv_.gather(arr, idx_tmp_buf, size, sorted_idx);
1902 return size;
1903}
1904
1905
1906//---------------------------------------------------------------------
1907
1908template<class Val, class SV>
1909void rsc_sparse_vector<Val, SV>::construct_rs_index()
1910{
1911 if (rs_idx_)
1912 return;
1913 rs_idx_ = (rs_index_type*) bm::aligned_new_malloc(sizeof(rs_index_type));
1914 rs_idx_ = new(rs_idx_) rs_index_type(); // placement new
1915}
1916
1917//---------------------------------------------------------------------
1918
1919template<class Val, class SV>
1920void rsc_sparse_vector<Val, SV>::free_rs_index()
1921{
1922 if (rs_idx_)
1923 {
1924 rs_idx_->~rs_index_type();
1925 bm::aligned_free(rs_idx_);
1926 }
1927}
1928
1929//---------------------------------------------------------------------
1930
1931template<class Val, class SV>
1933 const rsc_sparse_vector<Val, SV>& csv,
1934 size_type left, size_type right)
1935{
1936 if (left > right)
1937 bm::xor_swap(left, right);
1938
1939 if (left >= csv.size())
1940 return;
1941
1942 size_ = csv.size_; max_id_ = csv.max_id_;
1943 in_sync_ = false;
1944
1945 const bvector_type* arg_bv_null = csv.sv_.get_null_bvector();
1946 size_type sv_left, sv_right;
1947 bool range_valid = csv.resolve_range(left, right, &sv_left, &sv_right);
1948 if (!range_valid)
1949 {
1950 sv_.clear_all(true, 0); sv_.resize(size_);
1951 bvector_type* bv_null = sv_.get_null_bvect();
1952 bv_null->copy_range(*arg_bv_null, 0, right);
1953 return;
1954 }
1955 bvector_type* bv_null = sv_.get_null_bvect();
1956 bv_null->copy_range(*arg_bv_null, 0, right); // not NULL vector gets a full copy
1957 sv_.copy_range(csv.sv_, sv_left, sv_right, bm::no_null); // don't copy NULL
1958}
1959
1960
1961//---------------------------------------------------------------------
1962
1963template<class Val, class SV>
1965{
1966 // MUST have the same NULL to work
1967 BM_ASSERT(sv_.get_null_bvector()->equal(*csv.sv_.get_null_bvector()));
1968
1969 sv_.merge(csv.sv_);
1970}
1971
1972//---------------------------------------------------------------------
1973
1974template<class Val, class SV>
1977 size_type left,
1978 size_type right) const BMNOEXCEPT
1979{
1980 if (left > right)
1981 bm::xor_swap(left, right);
1982
1983 const bvector_type* bv_null = sv_.get_null_bvector();
1984 size_type range = right - left;
1985 if ((range < rs3_border0) || !in_sync_)
1986 {
1987 return bv_null->count_range_no_check(left, right);
1988 }
1989 return bv_null->count_range_no_check(left, right, *rs_idx_);
1990}
1991
1992//---------------------------------------------------------------------
1993//
1994//---------------------------------------------------------------------
1995
1996
1997template<class Val, class SV>
2001
2002
2003//---------------------------------------------------------------------
2004
2005template<class Val, class SV>
2008: csv_(csv),
2009 sv_bi_(csv->sv_.get_back_inserter())
2010{
2011 sv_bi_.disable_set_null(); // NULL will be handled outside
2012}
2013
2014//---------------------------------------------------------------------
2015
2016template<class Val, class SV>
2018 const back_insert_iterator& bi)
2019: csv_(bi.csv_),
2020 sv_bi_(bi.sv_bi_)
2021{
2022}
2023
2024
2025//---------------------------------------------------------------------
2026
2027template<class Val, class SV>
2032
2033//---------------------------------------------------------------------
2034
2035template<class Val, class SV>
2038{
2039 BM_ASSERT(csv_);
2040 BM_ASSERT(bm::id64_t(csv_->size_) + 1 < bm::id64_t(bm::id_max));
2041
2042 sv_bi_.add_value_no_null(v);
2043 bvector_type* bv_null = sv_bi_.get_null_bvect();
2044 BM_ASSERT(bv_null);
2045 bv_null->set_bit_no_check(csv_->size_);
2046
2047 csv_->max_id_++;
2048 csv_->size_++;
2049}
2050
2051//---------------------------------------------------------------------
2052
2053template<class Val, class SV>
2055{
2056 BM_ASSERT(csv_);
2057 BM_ASSERT(bm::id64_t(csv_->size_) + 1 < bm::id64_t(bm::id_max));
2058
2059 csv_->max_id_++; csv_->size_++;
2060}
2061
2062//---------------------------------------------------------------------
2063
2064template<class Val, class SV>
2067{
2068 BM_ASSERT(csv_);
2069 BM_ASSERT(bm::id64_t(csv_->size_) + count < bm::id64_t(bm::id_max));
2070
2071 csv_->max_id_+=count;
2072 csv_->size_+=count;
2073}
2074
2075//---------------------------------------------------------------------
2076
2077template<class Val, class SV>
2079{
2080 if (sv_bi_.flush())
2081 csv_->in_sync_ = false;
2082}
2083
2084//---------------------------------------------------------------------
2085//
2086//---------------------------------------------------------------------
2087
2088template<class Val, class BV>
2092
2093//---------------------------------------------------------------------
2094
2095template<class Val, class SV>
2098: csv_(it.csv_), pos_(it.pos_), buf_ptr_(0)
2099{}
2100
2101//---------------------------------------------------------------------
2102
2103template<class Val, class SV>
2106 ) BMNOEXCEPT
2107: csv_(csv), buf_ptr_(0)
2108{
2109 BM_ASSERT(csv_);
2110 pos_ = csv_->empty() ? bm::id_max : 0u;
2111}
2112
2113//---------------------------------------------------------------------
2114
2115template<class Val, class SV>
2119: csv_(csv), buf_ptr_(0)
2120{
2121 BM_ASSERT(csv_);
2122 this->go_to(pos);
2123}
2124
2125//---------------------------------------------------------------------
2126
2127template<class Val, class SV>
2129{
2130 pos_ = (!csv_ || pos >= csv_->size()) ? bm::id_max : pos;
2131 buf_ptr_ = 0;
2132}
2133
2134//---------------------------------------------------------------------
2135
2136template<class Val, class SV>
2138{
2139 if (pos_ == bm::id_max) // nothing to do, we are at the end
2140 return false;
2141 ++pos_;
2142 if (pos_ >= csv_->size())
2143 {
2144 this->invalidate();
2145 return false;
2146 }
2147 if (buf_ptr_)
2148 {
2149 ++buf_ptr_;
2150 if (buf_ptr_ - ((value_type*)vbuffer_.data()) >= n_buf_size)
2151 buf_ptr_ = 0;
2152 }
2153 return true;
2154}
2155
2156//---------------------------------------------------------------------
2157
2158template<class Val, class SV>
2161{
2162 BM_ASSERT(this->valid());
2163 value_type v;
2164
2165 if (!buf_ptr_)
2166 {
2167 vbuffer_.reserve(n_buf_size * sizeof(value_type));
2168 tbuffer_.reserve(n_buf_size * sizeof(value_type));
2169 buf_ptr_ = (value_type*)(vbuffer_.data());
2170 value_type* tmp_buf_ptr = (value_type*) (tbuffer_.data());
2171
2172 csv_->decode_buf(buf_ptr_, tmp_buf_ptr, pos_, n_buf_size, true);
2173 }
2174 v = *buf_ptr_;
2175 return v;
2176}
2177
2178//---------------------------------------------------------------------
2179
2180template<class Val, class SV>
2182{
2183 value_type v = value();
2184 if (buf_ptr_)
2185 {
2186 v = *buf_ptr_;
2187 value_type* buf_end = ((value_type*)vbuffer_.data()) + n_buf_size;
2188 while(!v)
2189 {
2190 ++pos_;
2191 if (++buf_ptr_ < buf_end)
2192 v = *buf_ptr_;
2193 else
2194 break;
2195 }
2196 if (pos_ >= csv_->size())
2197 {
2198 pos_ = bm::id_max;
2199 return;
2200 }
2201 if (buf_ptr_ >= buf_end)
2202 buf_ptr_ = 0;
2203 }
2204}
2205
2206//---------------------------------------------------------------------
2207
2208template<class Val, class SV>
2210{
2211 return csv_->is_null(pos_);
2212}
2213
2214
2215//---------------------------------------------------------------------
2216
2217
2218
2219} // namespace bm
2220
2221
2222
2223#endif
Definitions(internal).
#define BMNOEXCEPT
Definition bmdef.h:82
#define BM_ASSERT
Definition bmdef.h:139
#define BM_ASSERT_THROW(x, xerrcode)
Definition bmdef.h:338
Sparse constainer sparse_vector<> for integer types using bit-transposition transform.
static unsigned slices() BMNOEXCEPT
get total number of bit-planes in the vector
Definition bmbmatrix.h:503
static value_type u2s(unsigned_value_type v) BMNOEXCEPT
Convert unsigned value type to signed representation.
Definition bmbmatrix.h:2108
static unsigned_value_type s2u(value_type v) BMNOEXCEPT
Convert signed value type to unsigned representation.
Definition bmbmatrix.h:2087
static unsigned stored_slices() BMNOEXCEPT
Number of stored bit-planes (value planes + extra.
Definition bmbmatrix.h:508
Constant iterator designed to enumerate "ON" bits.
Definition bm.h:603
size_type value() const BMNOEXCEPT
Get current position (value).
Definition bm.h:659
bool advance() BMNOEXCEPT
Definition bm.h:680
bool valid() const BMNOEXCEPT
Checks if iterator is still valid.
Definition bm.h:283
optmode
Optimization mode Every next level means additional checks (better compression vs time).
Definition bm.h:133
@ opt_compress
compress blocks when possible (GAP/prefix sum)
Definition bm.h:137
allocator_type::allocator_pool_type allocator_pool_type
Definition bm.h:118
rs_index< allocator_type > rs_index_type
Definition bm.h:816
Alloc allocator_type
Definition bm.h:117
Algorithms for rank compression of bit-vector.
Definition bmalgo.h:453
void decompress(BV &bv_target, const BV &bv_idx, const BV &bv_src)
Rank decompression.
Definition bmalgo.h:570
void compress(BV &bv_target, const BV &bv_idx, const BV &bv_src)
Rank compression algorithm based on two palallel iterators/enumerators set of source vector gets re-m...
Definition bmalgo.h:497
void flush()
flush the accumulated buffer
rsc_sparse_vector_type::sparse_vector_type sparse_vector_type
add value to the buffer without changing the NULL vector
void add(value_type v)
add value to the container
rsc_sparse_vector_type::bvector_type bvector_type
back_insert_iterator & operator=(value_type v)
push value to the vector
rsc_sparse_vector< Val, SV > rsc_sparse_vector_type
rsc_sparse_vector_type::size_type size_type
rsc_sparse_vector_type::value_type value_type
back_insert_iterator & operator++(int)
noop
void add_null() BMNOEXCEPT
add NULL (no-value) to the container
sparse_vector_type::back_insert_iterator sparse_vector_bi
Const iterator to traverse the rsc sparse vector.
const_iterator & operator++() BMNOEXCEPT
Advance to the next available value.
bool advance() BMNOEXCEPT
advance iterator forward by one
rsc_sparse_vector< Val, SV > rsc_sparse_vector_type
size_type pos() const BMNOEXCEPT
Current position (index) in the vector.
rsc_sparse_vector_type::value_type value_type
bool operator!=(const const_iterator &it) const BMNOEXCEPT
bool operator<=(const const_iterator &it) const BMNOEXCEPT
bool operator>(const const_iterator &it) const BMNOEXCEPT
bm::byte_buffer< allocator_type > buffer_type
void go_to(size_type pos) BMNOEXCEPT
re-position to a specified position
bool operator<(const const_iterator &it) const BMNOEXCEPT
bool operator==(const const_iterator &it) const BMNOEXCEPT
bool is_null() const BMNOEXCEPT
Get NULL status.
rsc_sparse_vector_type::bvector_type bvector_type
rsc_sparse_vector_type * rsc_sparse_vector_type_ptr
const_iterator & operator++(int)
Advance to the next available value.
bool valid() const BMNOEXCEPT
Returns true if iterator is at a valid position.
void invalidate() BMNOEXCEPT
Invalidate current iterator.
bool operator>=(const const_iterator &it) const BMNOEXCEPT
rsc_sparse_vector_type::size_type size_type
bvector_type::allocator_type::allocator_pool_type allocator_pool_type
value_type operator*() const
Get current position (value).
value_type value() const
Get current position (value).
bvector_type::allocator_type allocator_type
bool operator==(const reference &ref) const BMNOEXCEPT
reference(rsc_sparse_vector< Val, SV > &csv, size_type idx) BMNOEXCEPT
static constexpr bool is_str() BMNOEXCEPT
static constexpr bool is_compressed() BMNOEXCEPT
various type traits
static unsigned stored_slices()
Number of stored bit-slices (value slices + extra.
bvector_type * bvector_type_ptr
bool try_get(size_type idx, value_type &v) const BMNOEXCEPT
get specified element with NOT NULL check
void clear_all(bool free_mem, unsigned) BMNOEXCEPT
resize to zero, free memory
bool resolve(size_type idx, size_type *idx_to) const BMNOEXCEPT
Resolve logical address to access via rank compressed address.
void load_from(const sparse_vector_type &sv_src)
Load compressed vector from a sparse vector (with NULLs).
rsc_sparse_vector< Val, SV > & operator=(rsc_sparse_vector< Val, SV > &&csv) BMNOEXCEPT
const sparse_vector_type & get_sv() const BMNOEXCEPT
access dense vector
bool resolve_sync(size_type idx, size_type *idx_to) const BMNOEXCEPT
const bmatrix_type & get_bmatrix() const BMNOEXCEPT
void merge_not_null(rsc_sparse_vector< Val, SV > &csv)
merge two vectors (argument gets destroyed) It is important that both vectors have the same NULL vect...
size_type decode(value_type *arr, size_type idx_from, size_type size, bool zero_mem=true) const
void freeze()
Turn sparse vector into immutable mode Read-only (immutable) vector uses less memory and allows faste...
void push_back(size_type idx, value_type v)
set specified element with bounds checking and automatic resize
void clear(const bvector_type &bv_idx)
Set vector elements spcified by argument bit-vector to zero Note that set to 0 elements are NOT going...
back_insert_iterator get_back_inserter()
void load_to(sparse_vector_type &sv) const
Exort compressed vector to a sparse vector (with NULLs).
SV::bmatrix_type bmatrix_type
void set(size_type idx, value_type v)
set specified element with bounds checking and automatic resize
bm::heap_matrix< unsigned char, 1, 1, typename bvector_type::allocator_type > remap_matrix_type
unused remap matrix type for compatibility with the sparse serializer
const_iterator end() const BMNOEXCEPT
Provide const iterator access to the end.
value_type at(size_type idx) const
access specified element with bounds checking
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, statistics *stat=0)
run memory optimization for all vector slices
void sync(bool force)
Re-calculate rank-select index for faster access to vector.
size_type count_range_notnull(size_type left, size_type right) const BMNOEXCEPT
size_type effective_vector_max() const BMNOEXCEPT
Always 1 (non-matrix type).
SV::unsigned_value_type unsigned_value_type
bvector_type_ptr slice(unsigned i)
rsc_sparse_vector(bm::null_support null_able=bm::use_null, allocation_policy_type ap=allocation_policy_type(), size_type bv_max_size=bm::id_max, const allocator_type &alloc=allocator_type())
void push_back_null()
push back NULL value
void unsync() BMNOEXCEPT
Unsync the prefix sum table.
bool resolve_range(size_type from, size_type to, size_type *idx_from, size_type *idx_to) const BMNOEXCEPT
size_type effective_size() const BMNOEXCEPT
size of internal dense vector
static unsigned_value_type s2u(value_type v) BMNOEXCEPT
Convert signed value type to unsigned representation.
void set_remap() BMNOEXCEPT
void set_null(size_type idx)
set specified element to NULL RSC vector actually erases element when it is set to NULL (expensive).
void inc_not_null(size_type idx, value_type v)
increment specified element by one, element MUST be NOT NULL Faster than just inc() if element is NUL...
const remap_matrix_type * get_remap_matrix() const
bvector_type_const_ptr get_slice(unsigned i) const BMNOEXCEPT
get access to bit-plane, function checks and creates a plane
void resize_internal(size_type sz)
size_type decode_buf(value_type *arr, value_type *arr_buf_tmp, size_type idx_from, size_type size, bool zero_mem=true) const BMNOEXCEPT
size_type size_internal() const BMNOEXCEPT
SV::const_iterator sparse_vector_const_iterator
bvector_type_ptr get_create_slice(unsigned i)
constexpr bool is_remap() const BMNOEXCEPT
bool equal(const rsc_sparse_vector< Val, SV > &csv) const BMNOEXCEPT
check if another vector has the same content
value_type get(size_type idx) const BMNOEXCEPT
get specified element without bounds checking
const value_type & const_reference
bmatrix_type & get_bmatrix() BMNOEXCEPT
const_iterator get_const_iterator(size_type idx) const BMNOEXCEPT
Get const_itertor re-positioned to specific element.
const bvector_type * bvector_type_const_ptr
size_t remap_size() const BMNOEXCEPT
void sync()
Re-calculate prefix sum table used for rank search (if necessary).
const unsigned char * get_remap_buffer() const BMNOEXCEPT
unsigned char * init_remap_buffer() BMNOEXCEPT
void set_null(const bvector_type &bv_idx)
Set NULL all elements set as 1 in the argument vector.
bool is_null(size_type idx) const BMNOEXCEPT
test if specified element is NULL
static value_type u2s(unsigned_value_type v) BMNOEXCEPT
void mark_null_idx(unsigned null_idx) BMNOEXCEPT
void calc_stat(struct rsc_sparse_vector< Val, SV >::statistics *st) const BMNOEXCEPT
Calculates memory statistics.
bvector_type::allocator_type allocator_type
rsc_sparse_vector(rsc_sparse_vector< Val, SV > &&csv) BMNOEXCEPT
const bvector_type * get_null_bvector() const BMNOEXCEPT
Get bit-vector of assigned values (or NULL).
void clear() BMNOEXCEPT
resize to zero, free memory
void push_back(value_type v)
add element with automatic resize
unsigned effective_slices() const BMNOEXCEPT
bool try_get_sync(size_type idx, value_type &v) const BMNOEXCEPT
get specified element with NOT NULL check Caller guarantees that the vector is in sync mode (RS-index...
bvector_type::rs_index_type rs_index_type
bool is_sync() const BMNOEXCEPT
returns true if prefix sum table is in sync with the vector
bool find_rank(size_type rank, size_type &idx) const BMNOEXCEPT
remap_matrix_type * get_remap_matrix()
size_type size() const BMNOEXCEPT
return size of the vector
bvector_type::enumerator bvector_enumerator_type
const rs_index_type * get_RS() const BMNOEXCEPT
static unsigned slices() BMNOEXCEPT
get total number of bit-slices in the vector
unsigned_value_type get_unsigned_bits(size_type idx, size_type N_bits) const BMNOEXCEPT
Get raw unsigned value first N bits.
void push_back_null(size_type count)
push back specified amount of NULL values
bool in_sync() const BMNOEXCEPT
returns true if prefix sum table is in sync with the vector
void copy_range(const rsc_sparse_vector< Val, SV > &csv, size_type left, size_type right)
copy range of values from another sparse vector
void inc(size_type idx)
increment specified element by one
size_type gather(value_type *arr, const size_type *idx, size_type *idx_tmp_buf, size_type size, bm::sort_order sorted_idx) const
void push_back_no_check(size_type idx, value_type v)
bool is_ro() const BMNOEXCEPT
Returns true if vector is read-only.
bvector_type::allocation_policy allocation_policy_type
SV::bvector_type bvector_type
void inc(size_type idx, value_type v)
increment specified element by one
void sync_size() BMNOEXCEPT
recalculate size to exclude tail NULL elements After this call size() will return the true size of th...
const_iterator begin() const
Provide const iterator access to container content.
sort_order
Sort order declaration.
Definition bmconst.h:204
null_support
NULL-able value support.
Definition bmconst.h:229
@ BM_SORTED
input set is sorted (ascending order)
Definition bmconst.h:206
@ BM_UNKNOWN
sort order unknown
Definition bmconst.h:208
@ use_null
support "non-assigned" or "NULL" logic
Definition bmconst.h:230
@ no_null
do not support NULL values
Definition bmconst.h:231
Definition bm.h:78
const unsigned id_max
Definition bmconst.h:109
unsigned int word_t
Definition bmconst.h:39
void aligned_free(void *ptr) BMNOEXCEPT
Aligned free.
Definition bmalloc.h:464
void * aligned_new_malloc(size_t size)
Aligned malloc (unlike classic malloc it throws bad_alloc exception).
Definition bmalloc.h:436
BMFORCEINLINE void xor_swap(W &x, W &y) BMNOEXCEPT
XOR swap two variables.
Definition bmutil.h:534
unsigned long long int id64_t
Definition bmconst.h:35
const unsigned rs3_border0
Definition bmconst.h:119
bv_statistics() BMNOEXCEPT
Definition bmfunc.h:67
size_t memory_used
memory usage for all blocks and service tables
Definition bmfunc.h:62
memory allocation policy
Definition bm.h:805