Skip to content

Commit

Permalink
Merge pull request #3 from abreslow/master
Browse files Browse the repository at this point in the history
Fixes bugs and warnings.  Specific fixes include https://github.com/A…
  • Loading branch information
jlgreathouse authored Dec 16, 2020
2 parents 5492090 + 6c57563 commit cb7b788
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 13 deletions.
12 changes: 10 additions & 2 deletions benchmarking/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,20 @@


ifndef CXX
CXX=g++ # Code uses GCC builtins. I do not recommend changing the compiler.
CXX=g++ # Code uses GCC builtins. I recommend using either g++ or clang++.
# GCC's g++ is probably preferred since it appears to yield better
# throughput numbers. However, the results are roughly comparable.
endif

# Comment the line below back in if you want to instrument the code with the
# sanitizer functionality
#SANITIZE:=-fsanitize=address,undefined,leak

OPT=-Ofast -march=native -mpopcnt

FLAGS:=-Wall -Winline -g -std=c++11 $(OPT)
FLAGS:=-Wall -Winline -g -std=c++11 $(OPT) $(SANITIZE)

# Need to routinely check for bugs with -fsanitize=address -fsanitize=undefined

TARGETS=benchmark \
benchmark_cf benchmark_mf \
Expand Down
2 changes: 1 addition & 1 deletion bf.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ namespace BlockedBF{
inline bool contains_and_update(const hash_t item){
constexpr slot_type one = 1;
constexpr slot_type slot_width_bits = sizeof(slot_type) * 8;
constexpr slot_type log2_slot_width = log2(slot_width_bits);
constexpr slot_type log2_slot_width = util::log2ceil(slot_width_bits);
constexpr slot_type shift0 = 0;
constexpr slot_type shift1 = log2_slot_width;
constexpr slot_type shift2 = log2_slot_width * 2;
Expand Down
1 change: 0 additions & 1 deletion block.h
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,6 @@ struct Block{ // Assuming block size is a multiple of atom_t's size in bytes
memcpy(source_address, &item, field_width_bytes);
}

__attribute__((optimize("unroll-loops")))
INLINE void add_cross_left_displace(uint64_t raw_offset_bits,
uint64_t field_width_bits, uint64_t index, atom_t item){
uint64_t global_index = index * field_width_bits + raw_offset_bits;
Expand Down
22 changes: 14 additions & 8 deletions compressed_cuckoo_filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,9 @@ namespace CompressedCuckoo{
// Currently set to false because clear_swath hasn't been rigorously tested
constexpr bool _only_clear_ota_and_fca = false;
if(!_only_clear_ota_and_fca){ // Competitive with the code in the loop below
memset(storage, 0x0, allocation_size);
for(hash_t i = 0; i < total_blocks; i++){
storage[i] = block_t{};
}
return storage;
} // ELSE
for(hash_t i = 0; i < total_blocks; i++){
Expand Down Expand Up @@ -496,6 +498,9 @@ namespace CompressedCuckoo{

size_t allocation_size = sizeof(*_summed_counters) *
(_buckets_per_block + 1);
// Round allocation size up to a multiple of 64.
allocation_size = (allocation_size + g_cache_line_size_bytes - 1) &
~(g_cache_line_size_bytes - 1);
_summed_counters = static_cast<decltype(_summed_counters)>(aligned_alloc(g_cache_line_size_bytes, allocation_size));
// Memset not required, initialized by full_exclusive_scan

Expand Down Expand Up @@ -665,8 +670,8 @@ namespace CompressedCuckoo{
constexpr uint_fast8_t num_passes = util::log2ceil(_buckets_per_block);
static_assert(_max_fingerprints_per_block > 0, "The number of fingerprints "
"per block must be one or more.");
constexpr uint_fast8_t masked_count = static_cast<uint_fast8_t>(ceil(log2(
_max_fingerprints_per_block) / _fullness_counter_width)) - 1;
constexpr uint_fast8_t masked_count = static_cast<uint_fast8_t>(util::log2ceil(
_max_fingerprints_per_block) / _fullness_counter_width) - 1;
for(int8_t i = 0; i < masked_count; i++){ // Masked to avoid overflows
sum = (sum & _reduction_masks[i]) + ((sum >> (_fullness_counter_width << i)) & _reduction_masks[i]);
}
Expand Down Expand Up @@ -730,8 +735,9 @@ namespace CompressedCuckoo{
INLINE uint16_t exclusive_reduce_with_popcount128(const block_t& b,
uint8_t counter_index) const{
constexpr __uint128_t one = 1;
const __uint128_t mask = (one << (_fullness_counter_width * counter_index))
- one;
// Thanks to @asl for the bugfix https://github.com/AMDComputeLibraries/morton_filter/issues/2#issuecomment-568480311
const uint64_t shift = _fullness_counter_width * counter_index;
const __uint128_t mask = shift == 128 ? __uint128_t(-1) : (one << shift) - one;
uint8_t sum = 0u;
__uint128_t counters;
memcpy(&counters, &b, sizeof(__uint128_t));
Expand Down Expand Up @@ -1192,12 +1198,12 @@ namespace CompressedCuckoo{

// Reports the C parameter from the VLDB'18 paper. This is the ratio of physical slots in the FSA
// to logical slots per block.
constexpr double report_compression_ratio(){
constexpr double report_compression_ratio() const{
return static_cast<double>(_max_fingerprints_per_block) / (_buckets_per_block * _slots_per_bucket);
}

// This is the $\alpha_C$ term in the paper.
inline double report_block_occupancy(){
inline double report_block_occupancy() const{
uint64_t full_slots_count = 0;
for(uint64_t block_id = 0; block_id < _total_blocks; block_id++){
full_slots_count += get_bucket_start_index(block_id, _buckets_per_block);
Expand All @@ -1208,7 +1214,7 @@ namespace CompressedCuckoo{
// Methods specific to Morton filters

// Reports what fraction of the bits of the Overflow Tracking Array are set
double report_ota_occupancy(){
double report_ota_occupancy() const{
uint64_t set_bit_count = 0;
if(_morton_filter_functionality_enabled){
for(uint64_t i = 0; i < _total_blocks; i++){
Expand Down
3 changes: 2 additions & 1 deletion util.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ namespace util{
// This could be implemented using fancy binary arithmatic or builtins,
// but this probably suffices if the integer is known at compile time.
constexpr inline uint32_t log2ceil(uint32_t integer){
//return ceil(log2(integer));
// I'm doing it this way because log2 is not a constexpr function in the
// Clang toolchain whereas __builtin_clz is.
return 32u - __builtin_clz(integer - 1u);
}

Expand Down

0 comments on commit cb7b788

Please sign in to comment.