1#ifndef BMENCODING_H__INCLUDED__
2#define BMENCODING_H__INCLUDED__
32#pragma warning (disable : 4702)
66 unsigned char c8,
unsigned char c16,
unsigned char c32)
BMNOEXCEPT;
78 unsigned char* start_;
181template<
class TEncoder>
186 : dest_(dest), used_bits_(0), accum_(0)
235 dest_.put_32(accum_);
236 used_bits_ = accum_ = 0;
239 bit_out(
const bit_out&);
240 bit_out& operator=(
const bit_out&);
255template<
class TDecoder>
261 used_bits_(
unsigned(
sizeof(accum_) * 8)),
339template<
typename T,
typename TBitIO>
360template<
typename T,
typename TBitIO>
399: buf_(buf), start_(buf)
446#if (BM_UNALIGNED_ACCESS_OK == 1)
450 *buf_++ = (
unsigned char) s;
452 *buf_++ = (
unsigned char) s;
462#if (BM_UNALIGNED_ACCESS_OK == 1)
466 unsigned char* buf = buf_;
471 unsigned char a = (
unsigned char) w16;
472 unsigned char b = (
unsigned char) (w16 >> 8);
479 buf_ = (
unsigned char*)buf;
495 put_8((
unsigned char)w);
502 put_16((
unsigned short) w);
518 BM_ASSERT((buf_ + count) < (start_ + size_));
531 return size_t(buf_ - start_);
559 buf_[0] = (
unsigned char)w;
560 buf_[1] = (
unsigned char)(w >> 8);
561 buf_[2] = (
unsigned char)(w >> 16);
573#if (BM_UNALIGNED_ACCESS_OK == 1)
577 *buf_++ = (
unsigned char) w;
578 *buf_++ = (
unsigned char) (w >> 8);
579 *buf_++ = (
unsigned char) (w >> 16);
580 *buf_++ = (
unsigned char) (w >> 24);
591 BM_ASSERT((w & ~(0xFFFFFFFFFFFFUL)) == 0);
592 *buf_++ = (
unsigned char)w;
593 *buf_++ = (
unsigned char)(w >> 8);
594 *buf_++ = (
unsigned char)(w >> 16);
595 *buf_++ = (
unsigned char)(w >> 24);
596 *buf_++ = (
unsigned char)(w >> 32);
597 *buf_++ = (
unsigned char)(w >> 40);
608#if (BM_UNALIGNED_ACCESS_OK == 1)
612 *buf_++ = (
unsigned char) w;
613 *buf_++ = (
unsigned char) (w >> 8);
614 *buf_++ = (
unsigned char) (w >> 16);
615 *buf_++ = (
unsigned char) (w >> 24);
616 *buf_++ = (
unsigned char) (w >> 32);
617 *buf_++ = (
unsigned char) (w >> 40);
618 *buf_++ = (
unsigned char) (w >> 48);
619 *buf_++ = (
unsigned char) (w >> 56);
631 put_8((
unsigned char) h_mask);
632 for (
unsigned i = 0; w && (i < 8); ++i, w >>= 8)
634 if ((
unsigned char) w)
635 put_8((
unsigned char) w);
646#if (BM_UNALIGNED_ACCESS_OK == 1)
651 unsigned char* buf = buf_;
656 unsigned char a = (
unsigned char) w32;
657 unsigned char b = (
unsigned char) (w32 >> 8);
658 unsigned char c = (
unsigned char) (w32 >> 16);
659 unsigned char d = (
unsigned char) (w32 >> 24);
667 buf_ = (
unsigned char*)buf;
694 unsigned h_mask = (
unsigned char) *
buf_++;
695 for (
unsigned i = 0; h_mask && (i < 8); ++i)
697 if (h_mask & (1u<<i))
700 unsigned char a = (
unsigned char) *
buf_++;
724#if (BM_UNALIGNED_ACCESS_OK == 1)
741 ((
unsigned)
buf_[2] << 16);
753#if (BM_UNALIGNED_ACCESS_OK == 1)
758 ((
unsigned)
buf_[2] << 16) + ((unsigned)
buf_[3] << 24);
788#if (BM_UNALIGNED_ACCESS_OK == 1)
819#if (BM_UNALIGNED_ACCESS_OK == 1)
824 const unsigned char* buf =
buf_;
828 bm::word_t a = buf[0]+ ((unsigned)buf[1] << 8) +
829 ((
unsigned)buf[2] << 16) + ((unsigned)buf[3] << 24);
833 buf_ = (
unsigned char*)buf;
851#if defined(BMAVX2OPT)
852 __m256i* buf_start = (__m256i*)
buf_;
854 __m256i* buf_end = (__m256i*)
buf_;
857#elif defined(BMSSE42OPT) || defined(BMSSE2OPT)
858 __m128i* buf_start = (__m128i*)
buf_;
860 __m128i* buf_end = (__m128i*)
buf_;
867 for (
unsigned i = 0; i < count; i+=4)
869 acc &= (w[i+0] |=
get_32());
870 acc &= (w[i+1] |=
get_32());
871 acc &= (w[i+2] |=
get_32());
872 acc &= (w[i+3] |=
get_32());
874 return acc == not_acc;
892#if defined(BMAVX2OPT)
893 __m256i* buf_start = (__m256i*)
buf_;
895 __m256i* buf_end = (__m256i*)
buf_;
898#elif defined(BMSSE42OPT) || defined(BMSSE2OPT)
899 __m128i* buf_start = (__m128i*)
buf_;
901 __m128i* buf_end = (__m128i*)
buf_;
905 for (
unsigned i = 0; i < count; i+=4)
931#if (BM_UNALIGNED_ACCESS_OK == 1)
935 const unsigned char* buf =
buf_;
943 buf_ = (
unsigned char*)buf;
971 ((
unsigned)
buf_[2] << 16);
980 ((unsigned)
buf_[2] << 8) + ((
unsigned)
buf_[3]);
1022 const unsigned char* buf =
buf_;
1026 bm::word_t a = ((unsigned)buf[0] << 24)+ ((
unsigned)buf[1] << 16) +
1027 ((unsigned)buf[2] << 8) + ((
unsigned)buf[3]);
1030 }
while (w < w_end);
1031 buf_ = (
unsigned char*)buf;
1046 for (
unsigned i = 0; i < count; i+=4)
1048 acc &= (w[i+0] |=
get_32());
1049 acc &= (w[i+1] |=
get_32());
1050 acc &= (w[i+2] |=
get_32());
1051 acc &= (w[i+3] |=
get_32());
1053 return acc == not_acc;
1059 for (
unsigned i = 0; i < count; i+=4)
1078 const unsigned char* buf =
buf_;
1087 }
while (s < s_end);
1088 buf_ = (
unsigned char*)buf;
1094template<
typename TEncoder>
1098 accum_ |= (value << used_bits_);
1099 if (++used_bits_ == (
sizeof(accum_) * 8))
1105template<
typename TEncoder>
1108 unsigned used = used_bits_;
1109 unsigned acc = accum_;
1112 unsigned mask = ~0u;
1113 mask >>= (
sizeof(accum_) * 8) - count;
1118 unsigned free_bits = unsigned(
sizeof(accum_) * 8) - used;
1120 acc |= value << used;
1122 if (count <= free_bits)
1129 value >>= free_bits;
1136 if (used == (
sizeof(accum_) * 8))
1147template<
typename TEncoder>
1150 if (++used_bits_ == (
sizeof(accum_) * 8))
1156template<
typename TEncoder>
1159 unsigned used = used_bits_;
1160 unsigned free_bits = (
sizeof(accum_) * 8) - used;
1161 if (count >= free_bits)
1167 for ( ;count >=
sizeof(accum_) * 8; count -=
sizeof(accum_) * 8)
1177 accum_ |= (1u << used);
1178 if (++used == (
sizeof(accum_) * 8))
1186template<
typename TEncoder>
1195 unsigned used = used_bits_;
1196 unsigned acc = accum_;
1197 const unsigned acc_bits = (
sizeof(acc) * 8);
1198 unsigned free_bits = acc_bits - used;
1201 unsigned count = logv;
1202 if (count >= free_bits)
1208 for ( ;count >= acc_bits; count -= acc_bits)
1219 if (++used == acc_bits)
1229 unsigned mask = (~0u);
1230 mask >>= acc_bits - logv;
1235 acc |= value << used;
1236 free_bits = acc_bits - used;
1237 if (logv <= free_bits)
1244 value >>= free_bits;
1258template<
typename TEncoder>
1267 unsigned mid_idx = sz >> 1;
1273 unsigned r = hi - lo - sz + 1;
1276 unsigned value = val - lo - mid_idx;
1293template<
typename TEncoder>
1302 unsigned mid_idx = sz >> 1;
1308 unsigned r = hi - lo - sz + 1;
1311 unsigned value = val - lo - mid_idx;
1315 unsigned c = (unsigned)(1ull << (logv + 1)) - n;
1316 int64_t half_c = c >> 1;
1317 int64_t half_r = r >> 1;
1318 int64_t lo1 = half_r - half_c;
1319 int64_t hi1 = half_r + half_c + 1;
1321 logv += (value <= lo1 || value >= hi1);
1344struct bic_encode_stack_u16
1351 unsigned stack_size_ = 0;
1361 unsigned r = hi - lo - sz + 1;
1364 unsigned s = stack_size_++;
1381template<
typename TEncoder>
1389 bic_encode_stack_u16<bm::bie_cut_off> u16_stack;
1393 BM_ASSERT(sz_i == u16_stack.stack_size_);
1394 for (
unsigned i = 0; i < sz_i; ++i)
1399 unsigned r = u16_stack.r_[i];
1401 unsigned value = val - lo - mid_idx;
1404 unsigned c = (unsigned)(1ull << (logv + 1)) - n;
1406 int64_t half_c = c >> 1;
1407 int64_t half_r = r >> 1;
1408 int64_t lo1 = half_r - half_c;
1409 int64_t hi1 = half_r + half_c + 1;
1411 logv += (value <= lo1 || value >= hi1);
1413 put_bits(value, logv);
1419template<
typename TEncoder>
1428 unsigned mid_idx = sz >> 1;
1434 unsigned r = hi - lo - sz + 1;
1437 unsigned value = val - lo - mid_idx;
1441 unsigned c = (unsigned)(1ull << (logv + 1)) - n;
1442 unsigned half_c = c >> 1;
1443 unsigned half_r = r >> 1;
1444 int64_t lo1 = (int64_t(half_r) - half_c - (n & 1u));
1445 unsigned hi1 = (half_r + half_c);
1446 logv += (value <= lo1 || value > hi1);
1471template<
class TDecoder>
1484 unsigned r = hi - lo - sz + 1;
1496 unsigned mid_idx = sz >> 1;
1497 val += lo + mid_idx;
1515template<
class TDecoder>
1526 unsigned val = hi - lo - sz + 1;
1531 unsigned c = unsigned((1ull << (logv + 1)) - val - 1);
1532 int64_t half_c = c >> 1;
1533 int64_t half_r = val >> 1;
1534 int64_t lo1 = half_r - half_c - ((val + 1) & 1);
1535 int64_t hi1 = half_r + half_c + 1;
1538 if (val <= lo1 || val >= hi1)
1542 unsigned mid_idx = sz >> 1;
1543 val += lo + mid_idx;
1560template<
class TDecoder>
1571 unsigned val = hi - lo - sz + 1;
1576 unsigned c = unsigned((1ull << (logv + 1)) - val - 1);
1577 int64_t half_c = c >> 1;
1578 int64_t half_r = val >> 1;
1579 int64_t lo1 = half_r - half_c - ((val + 1) & 1);
1580 int64_t hi1 = half_r + half_c + 1;
1582 if (val <= lo1 || val >= hi1)
1586 unsigned mid_idx = sz >> 1;
1587 val += lo + mid_idx;
1604template<
class TDecoder>
1615 unsigned val = hi - lo - sz + 1;
1620 unsigned c = unsigned((1ull << (logv + 1)) - val - 1);
1621 int64_t half_c = c >> 1;
1622 int64_t half_r = val >> 1;
1623 int64_t lo1 = half_r - half_c - ((val + 1) & 1);
1624 int64_t hi1 = half_r + half_c + 1;
1627 if (val <= lo1 || val >= hi1)
1631 unsigned mid_idx = sz >> 1;
1632 val += lo + mid_idx;
1653template<
class TDecoder>
1668 unsigned r = hi - lo - sz + 1;
1673 unsigned c = unsigned((1ull << (logv + 1)) - r - 1);
1674 int64_t half_c = c >> 1;
1675 int64_t half_r = r >> 1;
1676 int64_t lo1 = half_r - half_c - ((r + 1) & 1);
1677 int64_t hi1 = half_r + half_c + 1;
1679 if (r <= lo1 || r >= hi1)
1685 unsigned mid_idx = sz >> 1;
1686 val += lo + mid_idx;
1702template<
class TDecoder>
1715 unsigned r = hi - lo - sz + 1;
1727 unsigned mid_idx = sz >> 1;
1728 val += lo + mid_idx;
1750template<
class TDecoder>
1762 unsigned r = hi - lo - sz + 1;
1774 unsigned mid_idx = sz >> 1;
1775 val += lo + mid_idx;
1791template<
class TDecoder>
1794 unsigned acc = accum_;
1795 unsigned used = used_bits_;
1797 if (used == (
sizeof(acc) * 8))
1799 acc = src_.get_32();
1802 unsigned zero_bits = 0;
1807 zero_bits = unsigned(zero_bits +(
sizeof(acc) * 8) - used);
1809 acc = src_.get_32();
1812 unsigned first_bit_idx =
1813 #if defined(BM_x86) && (defined(__GNUG__) || defined(_MSC_VER)) && !(defined(__arm64__) || defined(__arm__))
1818 acc >>= first_bit_idx;
1819 zero_bits += first_bit_idx;
1820 used += first_bit_idx;
1826 if (used == (
sizeof(acc) * 8))
1828 acc = src_.get_32();
1840 unsigned free_bits = unsigned((
sizeof(acc) * 8) - used);
1841 if (zero_bits <= free_bits)
1851 if (used == (
sizeof(acc) * 8))
1853 acc = src_.get_32();
1861 acc = src_.get_32();
1862 used = zero_bits - free_bits;
1876template<
class TDecoder>
1880 const unsigned maskFF = ~0u;
1881 unsigned acc = accum_;
1882 unsigned used = used_bits_;
1885 unsigned free_bits = unsigned((
sizeof(acc) * 8) - used);
1886 if (count <= free_bits)
1889 value = acc & (maskFF >> (32 - count));
1894 if (used == (
sizeof(acc) * 8))
1896 acc = src_.get_32();
1901 acc = src_.get_32();
1902 used = count - free_bits;
1903 value |= ((acc & (maskFF >> (32 - used))) << free_bits);
1913template<
class TDecoder>
1916 const unsigned mask = (~0u) >> (32 - 1);
1917 unsigned value = accum_ & mask;
1918 if (
unsigned free_bits =
unsigned(32u - used_bits_); free_bits)
1920 accum_ >>= 1; ++used_bits_;
1924 BM_ASSERT(used_bits_ == (
sizeof(accum_) * 8));
1925 unsigned a = src_.get_32();
Bit manipulation primitives (internal).
unsigned gamma() BMNOEXCEPT
decode unsigned value using Elias Gamma coding
void bic_decode_u16(bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
void bic_decode_u16_cm(bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
void bic_decode_u16_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
unsigned get_bit() BMNOEXCEPT
void bic_decode_u16_cm_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
bit_in(TDecoder &decoder) BMNOEXCEPT
void bic_decode_u32_cm(bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) BMNOEXCEPT
Binary Interpolative array decode (32-bit).
void bic_decode_u16_rg(bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode.
void bic_decode_u16_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
void bic_decode_u16_rg_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode into /dev/null.
unsigned get_bits(unsigned count) BMNOEXCEPT
void bic_decode_u16_cm_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
void bic_decode_u16_rg_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode into bitset (32-bit based).
void bic_encode_u16_rg(const bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative encoding (array of 16-bit ints).
void put_zero_bits(unsigned count) BMNOEXCEPT
void bic_encode_u32_cm(const bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) BMNOEXCEPT
Binary Interpolative encoding (array of 32-bit ints) cm - "center-minimal".
void put_bit(unsigned value) BMNOEXCEPT
issue single bit into encode bit-stream
void put_bits(unsigned value, unsigned count) BMNOEXCEPT
issue count bits out of value
void gamma(unsigned value) BMNOEXCEPT
void put_zero_bit() BMNOEXCEPT
issue 0 into output stream
void bic_encode_u16_cm(const bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
void bic_encode_u16(const bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
const unsigned char * start_
decoder_base(const unsigned char *buf) BMNOEXCEPT
const unsigned char * buf_
bm::id64_t get_h64() BMNOEXCEPT
Read h-64-bit.
const unsigned char * get_pos() const BMNOEXCEPT
Return current buffer pointer.
void seek(int delta) BMNOEXCEPT
change current position
size_t size() const BMNOEXCEPT
Returns size of the current decoding stream.
unsigned char get_8() BMNOEXCEPT
Reads character from the decoding buffer.
void memcpy(unsigned char *dst, size_t count) BMNOEXCEPT
read bytes from the decode buffer
void set_pos(const unsigned char *pos) BMNOEXCEPT
Set current buffer pointer.
decoder_little_endian(const unsigned char *buf)
bool get_32_OR(bm::word_t *w, unsigned count)
void get_32_AND(bm::word_t *w, unsigned count)
Class for decoding data from memory buffer.
bm::word_t get_32() BMNOEXCEPT
Reads 32-bit word from the decoding buffer.
bm::word_t get_24() BMNOEXCEPT
Reads 32-bit word from the decoding buffer.
bool get_32_OR(bm::word_t *w, unsigned count) BMNOEXCEPT
Reads block of 32-bit words from the decoding buffer and ORs to the destination.
bm::id64_t get_64() BMNOEXCEPT
Reads 64-bit word from the decoding buffer.
void get_32_AND(bm::word_t *w, unsigned count) BMNOEXCEPT
Reads block of 32-bit words from the decoding buffer and ANDs to the destination.
bm::short_t get_16() BMNOEXCEPT
Reads 16-bit word from the decoding buffer.
decoder(const unsigned char *buf) BMNOEXCEPT
Construction.
bm::id64_t get_48() BMNOEXCEPT
Reads 64-bit word from the decoding buffer.
unsigned char * position_type
size_t size() const BMNOEXCEPT
Returns size of the current encoding stream.
void put_48(bm::id64_t w) BMNOEXCEPT
Puts 48 bits word into encoding buffer.
unsigned char * get_pos() const BMNOEXCEPT
Get current memory stream position.
void put_64(bm::id64_t w) BMNOEXCEPT
Puts 64 bits word into encoding buffer.
encoder(unsigned char *buf, size_t size) BMNOEXCEPT
Construction.
void put_8(unsigned char c) BMNOEXCEPT
Puts one character into the encoding buffer.
void set_pos(unsigned char *buf_pos) BMNOEXCEPT
Set current memory stream position.
void put_prefixed_array_16(unsigned char c, const bm::short_t *s, unsigned count, bool encode_count) BMNOEXCEPT
Encode 8-bit prefix + an array.
void put_h64(bm::id64_t w) BMNOEXCEPT
Puts 64 bits word into encoding buffer with h-compression.
void memcpy(const unsigned char *src, size_t count) BMNOEXCEPT
copy bytes into target buffer or just rewind if src is NULL
void put_32(bm::word_t w) BMNOEXCEPT
Puts 32 bits word into encoding buffer.
void put_24(bm::word_t w) BMNOEXCEPT
Puts 24 bits word into encoding buffer.
void put_16(bm::short_t s) BMNOEXCEPT
Puts short word (16 bits) into the encoding buffer.
void put_prefixed_array_32(unsigned char c, const bm::word_t *w, unsigned count) BMNOEXCEPT
Encode 8-bit prefix + an array.
void put_8_16_32(unsigned w, unsigned char c8, unsigned char c16, unsigned char c32) BMNOEXCEPT
but gat plus value based on its VBR evaluation
void stop()
Stop decoding sequence.
void start()
Start encoding sequence.
T operator()(void)
Decode word.
gamma_decoder(TBitIO &bin)
Functor for Elias Gamma encoding.
gamma_encoder(TBitIO &bout)
void operator()(T value)
Encode word.
bool avx2_or_arr_unal(__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src, const __m256i *BMRESTRICT src_end)
OR array elements against another unaligned array dst |= *src.
unsigned avx2_and_arr_unal(__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src, const __m256i *BMRESTRICT src_end)
AND array elements against another array (unaligned) dst &= *src.
bool sse2_or_arr_unal(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end) BMNOEXCEPT
OR array elements against another array (unaligned) dst |= *src.
unsigned sse2_and_arr_unal(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end) BMNOEXCEPT
AND array elements against another array (unaligned) dst &= *src.
decoder decoder_big_endian
Class for decoding data from memory buffer.
unsigned compute_h64_mask(unsigned long long w) BMNOEXCEPT
Сompute mask of bytes presense in 64-bit word.
BMFORCEINLINE T bit_scan_fwd(T v) BMNOEXCEPT
BMFORCEINLINE unsigned bit_scan_reverse32(unsigned w) BMNOEXCEPT
const unsigned set_word_shift
unsigned long long int id64_t
unsigned short gap_word_t
const unsigned set_word_mask
static const unsigned _left[32]