1#ifndef BMSPARSEVEC_SERIAL__H__INCLUDED__
2#define BMSPARSEVEC_SERIAL__H__INCLUDED__
26#ifndef BM__H__INCLUDED__
29# error missing include (bm.h or bm64.h)
208 {
bvs_.set_bookmarks(enable, bm_interval); }
371 const unsigned char* buf,
372 bool clear_sv =
true);
386 bool clear_sv =
true);
410 const unsigned char* buf,
423 const unsigned char* buf);
443 const unsigned char* buf,
452 const unsigned char* buf,
478 typedef bm::heap_vector<unsigned, alloc_type, true> rlen_vector_type;
552 const unsigned char* buf,
599 const unsigned char* buf,
643 buffer_coll.calc_stat(&st);
645 buf.resize(st.max_serialize_mem);
648 unsigned char* buf_ptr = buf.data();
654 enc.
put_8((
unsigned char)bo);
656 unsigned char* mbuf1 = enc.
get_pos();
667 size_t addr_bv_size = bvs.
serialize(bv, buf_ptr, buf.size());
668 buf_ptr += addr_bv_size;
674 size_t coll_size = buffer_coll.size();
680 for (
unsigned i = 0; i < buffer_coll.size(); ++i)
683 size_t sz = cbuf.size();
689 for (
unsigned i = 0; i < buffer_coll.size(); ++i)
692 size_t sz = cbuf.size();
693 enc.
memcpy(cbuf.buf(), sz);
696 buf.resize(enc.
size());
703 const unsigned char* buf,
710 unsigned char h1 = dec.
get_8();
711 unsigned char h2 = dec.
get_8();
714 if (h1 !=
'B' && h2 !=
'C')
726 const unsigned char* bv_buf_ptr = dec.
get_pos();
736 dec.
seek((
int)addr_bv_size);
742 if (coll_size != addr_cnt)
747 typedef size_t vect_size_type;
748 bm::heap_vector<bm::id64_t, allocator_type, true> buf_size_vec;
750 buf_size_vec.resize(vect_size_type(coll_size));
752 for (
unsigned i = 0; i < coll_size; ++i)
755 buf_size_vec[i] = sz;
761 buf_vect.resize(vect_size_type(coll_size));
762 for (
unsigned i = 0; i < coll_size; ++i)
767 dec.
memcpy(b.data(),
size_t(sz));
782 bvs_.gap_length_serialization(
false);
818 bvs_.compute_sim_model(sim_model, ref_vect, params);
836 bv_ref_.build(sv.get_bmatrix());
845 const typename SV::remap_matrix_type* rm = sv.get_remap_matrix();
850 size_t rows = rmatr.rows();
851 size_t cols = rmatr.cols();
858 for (
size_t r = 0; r < rows; ++r)
860 const unsigned char*
BMRESTRICT remap_row = rmatr.row(r);
870 for (
size_t r = 0; r < rows; ++r)
873 csr_size_max += rl * 2;
876 size_t remap_size = sv.remap_size();
878 if (remap_size < csr_size_max)
880 const unsigned char* matrix_buf = sv.get_remap_buffer();
886 enc.
memcpy(matrix_buf,
size_t(remap_size));
891 enc.
put_32(
unsigned(rows));
896 for (
size_t r = 0; r < rows; ++r)
903 for (
size_t r = 0; r < rows; ++r)
905 const unsigned char*
BMRESTRICT row = rmatr.row(r);
906 for (
size_t j = 0; j < cols; ++j)
908 unsigned char v = row[j];
911 enc.
put_8((
unsigned char)j);
928 digest_bv.clear(
false);
929 unsigned planes = (unsigned)sv.get_bmatrix().rows();
930 for (
unsigned i = 0; i < planes; ++i)
932 typename SV::bvector_type_const_ptr bv = sv.get_slice(i);
934 digest_bv.set_bit_no_check(i);
944 bvs_.allow_stat_reset(
false);
945 bvs_.reset_compression_stats();
949 unsigned char* buf = sv_layout.
reserve(4);
950 buf[0]=
'B'; buf[1] =
'Z';
956 bvs_.set_ref_vectors(0);
959 unsigned planes = (unsigned)sv.get_bmatrix().rows();
965 typename SV::statistics sv_stat;
966 sv.calc_stat(&sv_stat);
968 unsigned char* buf = sv_layout.
reserve(sv_stat.max_serialize_mem);
975 unsigned h_size = 1 + 1 +
1013 ::memset(buf, 0, h_size);
1014 unsigned char* buf_ptr = buf + h_size;
1016 for (
unsigned i = 0; i < planes; ++i)
1018 typename SV::bvector_type_const_ptr bv = sv.get_slice(i);
1030 idx = (unsigned)
bv_ref_.find_bv(bv);
1032 bvs_.set_curr_ref_idx(idx);
1034 size_t buf_size = (size_t)
1035 bvs_.serialize(*bv, buf_ptr, sv_stat.max_serialize_mem);
1037 sv_layout.
set_plane(i, buf_ptr, buf_size);
1038 buf_ptr += buf_size;
1039 if (sv_stat.max_serialize_mem > buf_size)
1041 sv_stat.max_serialize_mem -= buf_size;
1047 bvs_.set_ref_vectors(0);
1054 bm::encoder enc_m(buf_ptr, sv_stat.max_serialize_mem);
1059 buf_ptr += enc_m.
size();
1065 size_t digest_offset = size_t(buf_ptr - buf);
1069 bool use_64bit =
false;
1071 for (
unsigned i = 0; i < planes; ++i)
1073 const unsigned char* p = sv_layout.
get_plane(i);
1076 size_t offset = size_t(p - buf);
1085 bm::encoder enc_o(buf_ptr, sv_stat.max_serialize_mem);
1091 for (
unsigned i = 0; i < planes; ++i)
1093 const unsigned char* p = sv_layout.
get_plane(i);
1096 size_t offset = size_t(p - buf);
1116 buf_ptr += enc_o.
size();
1121 sv_layout.
resize(
size_t(buf_ptr - buf));
1129 if (sv.is_compressed())
1134 enc.
put_8((
unsigned char)bo);
1136 unsigned char matr_s_ser = 1;
1142 enc.
put_8(matr_s_ser);
1144 bm::id64_t planes_code = planes | (1ull << 63);
1147 enc.
put_64(sv.size_internal());
1155template<
typename SV>
1165template<
typename SV>
1174template<
typename SV>
1183template<
typename SV>
1194template<
typename SV>
1204template<
typename SV>
1221template<
typename SV>
1223 const unsigned char* buf,
1234template<
typename SV>
1236 const unsigned char* buf)
1240 unsigned char matr_s_ser = 0;
1241 unsigned planes =
load_header(dec, sv, matr_s_ser);
1247 sv.get_bmatrix().allocate_rows(planes);
1249 for (
unsigned i = 0; i < planes; ++i)
1260template<
typename SV>
1262 const unsigned char* buf,
1268 sv.clear_all(
true, 1);
1275 unsigned char matr_s_ser = 0;
1276 unsigned planes =
load_header(dec, sv, matr_s_ser);
1288 sv.get_bmatrix().allocate_rows(planes);
1298 bool range_valid = sv.resolve_range(from, to, &sv_left, &sv_right);
1332template<
typename SV>
1334 const unsigned char* buf,
1339 sv.clear_all(
true, 1);
1344 unsigned char matr_s_ser = 0;
1345 unsigned planes =
load_header(dec, sv, matr_s_ser);
1356 sv.get_bmatrix().allocate_rows(planes);
1385#pragma warning( push )
1386#pragma warning( disable : 4127)
1388 if (sv.max_vector_size == 1)
1391 const bvector_type* bv_null = sv.get_slice(sv.sv_value_slices);
1393 sv.mark_null_idx(sv.sv_value_slices);
1396#pragma warning( pop )
1416template<
typename SV>
1418 bm::decoder& dec, SV& sv,
unsigned char& matr_s_ser)
1422 unsigned char h1 = dec.
get_8();
1423 unsigned char h2 = dec.
get_8();
1425 BM_ASSERT(h1 ==
'B' && (h2 ==
'M' || h2 ==
'C' || h2 ==
'Z'));
1427 bool sig2_ok = (h2 ==
'M' || h2 ==
'C' || h2 ==
'Z');
1428 if (h1 !=
'B' || !sig2_ok)
1430 unsigned planes = 0;
1437 unsigned char bv_bo = dec.
get_8(); (void) bv_bo;
1438 planes = dec.
get_8();
1441 matr_s_ser = dec.
get_8();
1442 planes_code = dec.
get_64();
1443 planes = (unsigned) planes_code;
1447 if (matr_s_ser == 2)
1451 if constexpr (SV::is_dynamic_splices::value ==
false)
1453 unsigned sv_planes = sv.stored_slices();
1454 if (!planes || planes > sv_planes)
1461 if (planes_code & (1ull << 63))
1471template<
typename SV>
1475 const unsigned char* buf,
1484 for (
int i =
int(planes-1); i >= 0; --i)
1489 const unsigned char* bv_buf_ptr = buf + offset;
1558template<
typename SV>
1561 const unsigned char* buf,
1565 if (!sv.is_nullable())
1574 const unsigned char* bv_buf_ptr = buf + offset;
1630template<
typename SV>
1632 const unsigned char* buf,
bm::decoder& dec,
unsigned planes)
1641 buf_ptr += read_bytes;
1645 unsigned char dtype = dec_o.
get_8();
1649 for (
unsigned i = 0; i < planes; ++i)
1653 offset = (size_t) dec_o.
get_64();
1663 unsigned min_v = dec_o.
get_32();
1664 unsigned max_v = dec_o.
get_32();
1673 for (
unsigned i = 0; i < planes; ++i)
1692 for (
unsigned i = 0; i < planes; ++i)
1694 size_t offset = (size_t) dec.
get_64();
1702template<
typename SV>
1704 const unsigned char* remap_buf_ptr)
1711 unsigned char rh = dec_m.
get_8();
1718 size_t remap_size = (size_t) dec_m.
get_64();
1719 unsigned char* remap_buf = sv.init_remap_buffer();
1721 size_t target_remap_size = sv.remap_size();
1722 if (!remap_size || !remap_buf || remap_size != target_remap_size)
1726 dec_m.
memcpy(remap_buf, remap_size);
1733 typename SV::remap_matrix_type* rmatr = sv.get_remap_matrix();
1738 size_t rows = (size_t) dec_m.
get_32();
1739 size_t cols = dec_m.
get_16();
1744 rmatr->resize(rows, cols,
false);
1753 for (
size_t r = 0; r < rows; ++r)
1755 unsigned rl = bi.
gamma();
1760 for (
size_t r = 0; r < rows; ++r)
1762 unsigned char*
BMRESTRICT row = rmatr->row(r);
1764 if (!cnt || cnt > 256)
1768 for (
size_t j = 0; j < cnt; ++j)
1770 unsigned idx = dec_m.
get_8();
1771 unsigned char v = dec_m.
get_8();
1785 unsigned char end_tok = dec_m.
get_8();
1795template<
typename SV>
1799 throw std::logic_error(
"BitMagic: Invalid serialization signature header");
1801 BM_THROW(BM_ERR_SERIALFORMAT);
1807template<
typename SV>
1811 throw std::logic_error(
"BitMagic: Invalid serialization target (64-bit BLOB)");
1813 BM_THROW(BM_ERR_SERIALFORMAT);
1819template<
typename SV>
1823 throw std::logic_error(
"BitMagic: Invalid serialization target (bit depth)");
1825 BM_THROW(BM_ERR_SERIALFORMAT);
1831template<
typename SV>
1835 throw std::logic_error(
"BitMagic: Invalid serialization fromat (BLOB corruption?)");
1837 BM_THROW(BM_ERR_SERIALFORMAT);
1843template<
typename SV>
1847 throw std::logic_error(
"BitMagic: Invalid serialization format (remap matrix)");
1849 BM_THROW(BM_ERR_SERIALFORMAT);
Serialization / compression of bvector<>. Set theoretical operations on compressed BLOBs.
Sparse constainer sparse_vector<> for integer types using bit-transposition transform.
Byte based reader for un-aligned bit streaming.
unsigned gamma() BMNOEXCEPT
decode unsigned value using Elias Gamma coding
void bic_decode_u32_cm(bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) BMNOEXCEPT
Binary Interpolative array decode (32-bit).
Byte based writer for un-aligned bit streaming.
void bic_encode_u32_cm(const bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) BMNOEXCEPT
Binary Interpolative encoding (array of 32-bit ints) cm - "center-minimal".
void gamma(unsigned value) BMNOEXCEPT
Elias Gamma encode the specified value.
@ opt_compress
compress blocks when possible (GAP/prefix sum)
allocator_type::allocator_pool_type allocator_pool_type
bvector_size_type size_type
Deseriaizer for compressed collections.
bvector_type::allocator_type allocator_type
CBC::buffer_type buffer_type
CBC::bvector_type bvector_type
CBC::statistics statistics_type
CBC::container_type container_type
CBC compressed_collection_type
int deserialize(CBC &buffer_coll, const unsigned char *buf, bm::word_t *temp_block=0)
CBC::address_resolver_type address_resolver_type
Seriaizer for compressed collections.
void serialize(const CBC &buffer_coll, buffer_type &buf, bm::word_t *temp_block=0)
CBC compressed_collection_type
CBC::bvector_type bvector_type
CBC::statistics statistics_type
CBC::address_resolver_type address_resolver_type
CBC::buffer_type buffer_type
const unsigned char * get_pos() const BMNOEXCEPT
Return current buffer pointer.
void seek(int delta) BMNOEXCEPT
change current position
unsigned char get_8() BMNOEXCEPT
Reads character from the decoding buffer.
void memcpy(unsigned char *dst, size_t count) BMNOEXCEPT
read bytes from the decode buffer
Class for decoding data from memory buffer.
bm::word_t get_32() BMNOEXCEPT
Reads 32-bit word from the decoding buffer.
bm::id64_t get_64() BMNOEXCEPT
Reads 64-bit word from the decoding buffer.
bm::short_t get_16() BMNOEXCEPT
Reads 16-bit word from the decoding buffer.
Deserializer for bit-vector.
size_t size() const BMNOEXCEPT
Returns size of the current encoding stream.
unsigned char * get_pos() const BMNOEXCEPT
Get current memory stream position.
void put_64(bm::id64_t w) BMNOEXCEPT
Puts 64 bits word into encoding buffer.
void put_8(unsigned char c) BMNOEXCEPT
Puts one character into the encoding buffer.
void set_pos(unsigned char *buf_pos) BMNOEXCEPT
Set current memory stream position.
void memcpy(const unsigned char *src, size_t count) BMNOEXCEPT
copy bytes into target buffer or just rewind if src is NULL
void put_32(bm::word_t w) BMNOEXCEPT
Puts 32 bits word into encoding buffer.
void put_16(bm::short_t s) BMNOEXCEPT
Puts short word (16 bits) into the encoding buffer.
Deserializer, performs logical operations between bit-vector and serialized bit-vector.
Algorithms for rank compression of bit-vector.
Bit-vector serialization class.
void gap_length_serialization(bool value) BMNOEXCEPT
Set GAP length serialization (serializes GAP levels of the original vector).
bm::bv_ref_vector< BV > bv_ref_vector_type
byte_buffer< allocator_type > buffer
bm::xor_sim_model< BV > xor_sim_model_type
size_type serialize(const BV &bv, unsigned char *buf, size_t buf_size)
Bitvector serialization into memory block.
sparse vector de-serializer
sparse_vector_deserializer()
void deserialize_planes(SV &sv, unsigned planes, const unsigned char *buf, const bvector_type *mask_bv=0)
deserialize bit-vector planes
void setup_xor_compression()
setup deserializers
allocator_pool_type pool_
bm::operation_deserializer< bvector_type > op_deserial_
void deserialize(SV &sv, const unsigned char *buf, bool clear_sv=true)
bm::serializer< bvector_type >::bv_ref_vector_type bv_ref_vector_type
void deserialize(SV &sv, const unsigned char *buf, const bvector_type &mask_bv)
bm::rank_compressor< bvector_type > rsc_compressor_
void deserialize_structure(SV &sv, const unsigned char *buf)
bm::id64_t digest_offset_
void deserialize_sv(SV &sv, const unsigned char *buf, const bvector_type *mask_bv, bool clear_sv)
static void raise_invalid_bitdepth()
throw error on incorrect deserialization
static void raise_invalid_header()
throw error on incorrect deserialization
SV::bvector_type bvector_type
size_type idx_range_from_
bvector_type not_null_mask_bv_
bvector_type::allocator_type::allocator_pool_type allocator_pool_type
bvector_type::allocator_type alloc_type
static void raise_invalid_format()
throw error on incorrect deserialization
unsigned load_header(bm::decoder &dec, SV &sv, unsigned char &matr_s_ser)
Deserialize header/version and other common info.
void deserialize(SV &sv, const unsigned char *buf, size_type from, size_type to)
bm::heap_vector< size_t, alloc_type, true > off_vect_
~sparse_vector_deserializer()
bm::heap_vector< unsigned, alloc_type, true > off32_vect_
bvector_type * bvector_type_ptr
static void raise_invalid_64bit()
throw error on incorrect deserialization
bvector_type plane_digest_bv_
static void raise_missing_remap_matrix()
throw error on incorrect deserialization
const bvector_type * bvector_type_const_ptr
void set_xor_ref(bv_ref_vector_type *bv_ref_ptr)
Set external XOR reference vectors (data frame referenece vectors).
bv_ref_vector_type * bv_ref_ptr_
rlen_vector_type remap_rlen_vect_
void set_finalization(bm::finalization is_final)
Set deserialization finalization to force deserialized vectors into READONLY (or READWRITE) mode.
bvector_type rsc_mask_bv_
bm::deserializer< bvector_type, bm::decoder > deserial_
const unsigned char * remap_buf_ptr_
bv_ref_vector_type bv_ref_
int load_null_plane(SV &sv, int planes, const unsigned char *buf, const bvector_type *mask_bv)
load NULL bit-plane (returns new planes count)
void deserialize_range(SV &sv, const unsigned char *buf, size_type from, size_type to, bool clear_sv=true)
void load_planes_off_table(const unsigned char *buf, bm::decoder &dec, unsigned planes)
load offset table
void clear_xor_compression()
unset XOR compression vectors
void load_remap(SV &sv, const unsigned char *remap_buf_ptr)
load string remap dict
bm::finalization is_final_
SV::value_type value_type
sparse_vector_serializer()
bvector_type plane_digest_bv_
u32_vector_type remap_rlen_vect_
void encode_remap_matrix(bm::encoder &enc, const SV &sv)
serialize the remap matrix used for SV encoding
void set_xor_ref(const bv_ref_vector_type *bv_ref_ptr) BMNOEXCEPT
Enable external XOR serialization via external reference vectors (data frame ref.
serializer_type::buffer buffer_type
bm::serializer< bvector_type >::bv_ref_vector_type bv_ref_vector_type
static void build_plane_digest(bvector_type &digest_bv, const SV &sv)
void set_bookmarks(bool enable, unsigned bm_interval=256) BMNOEXCEPT
Add skip-markers for faster range deserialization.
const bv_ref_vector_type * bv_ref_ptr_
void set_xor_ref(bool is_enabled) BMNOEXCEPT
Turn ON and OFF XOR compression of sparse vectors Enables XOR reference compression for the sparse ve...
bvector_type * bvector_type_ptr
void build_xor_ref_vector(const SV &sv)
SV::value_type value_type
const bvector_type * bvector_type_const_ptr
void compute_sim_model(xor_sim_model_type &sim_model, const bv_ref_vector_type &ref_vect, const bm::xor_sim_params ¶ms)
Calculate XOR similarity model for ref_vector refernece vector must be associated before.
alloc_type::allocator_pool_type allocator_pool_type
buffer_type plane_digest_buf_
void set_sim_model(const xor_sim_model_type *sim_model) BMNOEXCEPT
Attach serizalizer to a pre-computed similarity model.
u32_vector_type plane_off_vect_
bm::serializer< bvector_type > serializer_type
SV::remap_matrix_type remap_matrix_type
void serialize(const SV &sv, sparse_vector_serial_layout< SV > &sv_layout)
Serialize sparse vector into a memory buffer(s) structure.
bm::serializer< bvector_type > & get_bv_serializer() BMNOEXCEPT
Get access to the underlying bit-vector serializer This access can be used to fine tune compression s...
xor_sim_model_type sim_model_
bool is_xor_ref() const BMNOEXCEPT
Returns the XOR reference compression status (enabled/disabled).
void disable_xor_compression() BMNOEXCEPT
Disable XOR compression on serialization.
void enable_xor_compression() BMNOEXCEPT
Enable XOR compression on vector serialization.
const xor_sim_model_type * sim_model_ptr_
bm::heap_vector< unsigned, alloc_type, true > u32_vector_type
SV::bvector_type bvector_type
bm::serializer< bvector_type > bvs_
bvector_type::allocator_type alloc_type
bv_ref_vector_type bv_ref_
bm::serializer< bvector_type >::xor_sim_model_type xor_sim_model_type
bm::alloc_pool_guard< allocator_pool_type, bvector< Alloc > > mem_pool_guard
finalization
copy strategy
@ READONLY
immutable (read-only object)
size_t deserialize(BV &bv, const unsigned char *buf, bm::word_t *temp_block=0, const bm::bv_ref_vector< BV > *ref_vect=0)
Bitvector deserialization from a memory BLOB.
void sparse_vector_serialize(const SV &sv, sparse_vector_serial_layout< SV > &sv_layout, bm::word_t *temp_block=0)
Serialize sparse vector into a memory buffer(s) structure.
int sparse_vector_deserialize(SV &sv, const unsigned char *buf, bm::word_t *temp_block=0)
Deserialize sparse vector.
SZ count_nz(const VT *arr, SZ arr_size) BMNOEXCEPT
Find count of non-zero elements in the array.
ByteOrder
Byte orders recognized by the library.
unsigned long long int id64_t
unsigned short gap_word_t
static ByteOrder byte_order()
layout class for serialization buffer structure
SV::bvector_type bvector_type
const unsigned char * buf() const BMNOEXCEPT
Return serialization buffer pointer.
sizet_vector_type plane_size_
serialized plane size
serializer< bvector_type >::buffer buffer_type
void resize(size_t ssize)
Set new serialized size.
unsigned char * reserve(size_t capacity)
resize capacity
size_t capacity() const BMNOEXCEPT
return serialization buffer capacity
size_t size() const BMNOEXCEPT
return current serialized size
void resize_slices(unsigned new_slices_size)
Resize for the target number of plains / bit-slices.
bm::heap_vector< unsigned char *, allocator_type, true > ptr_vector_type
sparse_vector_serial_layout() BMNOEXCEPT
ptr_vector_type plane_ptrs_
pointers on serialized bit-planes
buffer_type buf_
serialization buffer
void set_plane(unsigned i, unsigned char *ptr, size_t buf_size) BMNOEXCEPT
Set plane output pointer and size.
const unsigned char * data() const BMNOEXCEPT
Return serialization buffer pointer.
bvector_type::allocator_type allocator_type
const unsigned char * get_plane(unsigned i) const BMNOEXCEPT
Get plane pointer.
bm::heap_vector< size_t, allocator_type, true > sizet_vector_type
void freemem() BMNOEXCEPT
free memory
SV::value_type value_type
~sparse_vector_serial_layout()
Parameters for XOR similarity search.