Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add PageView get_keys function; Introduce StableStringStore #174

Merged
merged 3 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/llfs/page_view.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,12 @@ Status PageView::validate(PageId expected_id)
return OkStatus();
}

StatusOr<usize> PageView::get_keys([[maybe_unused]] LowerBoundParam lower_bound,
[[maybe_unused]] KeyView* key_buffer_out,
[[maybe_unused]] usize key_buffer_size,
[[maybe_unused]] StableStringStore& storage) const
{
return StatusOr<usize>{batt::StatusCode::kUnimplemented};
}

} // namespace llfs
26 changes: 26 additions & 0 deletions src/llfs/page_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <llfs/page_layout.hpp>
//#include <llfs/page_loader.hpp>
#include <llfs/seq.hpp>
#include <llfs/stable_string_store.hpp>
#include <llfs/user_data.hpp>

#include <batteries/async/mutex.hpp>
Expand All @@ -32,6 +33,7 @@ class PageView
{
public:
//==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - -
using LowerBoundParam = std::variant<KeyView, usize>;

explicit PageView(std::shared_ptr<const PageBuffer>&& data) noexcept
: data_{std::move(data)}
Expand Down Expand Up @@ -92,6 +94,30 @@ class PageView
*/
virtual Optional<KeyView> max_key() const = 0;

/** \brief Retrieves at most `key_buffer_size` number of keys contained in this page.
*
* \param lower_bound This parameter allows for "skipping" to an arbitrary place in the page's key
* set. The caller can provide either a `KeyView` value or an index into the key set, which
* represents the starting key from which this function will collect keys from to return.
*
* \param key_buffer_out The output buffer that will be filled by this function with the requested
* keys.
*
* \param key_buffer_size The size of the output buffer holding the returned keys.
*
* \param storage A `StableStringStore` instance that the caller can provide so that the returned
* keys can still be a list of `KeyView` even if the keys in the page are stored in a way that
* isn't contiguous or are compressed. Specific implementations of `PageView` will choose to use
* this based on their key storage.
*
* \return The number of keys filled into `key_buffer_out`. This value will either be
* `key_buffer_size` or the number of keys between `lower_bound` and the end of the key set,
* whichever is smaller. In the event that the `lower_bound` parameter provided does not exist in
* the key set (or is out of the range of the key set), this function will return 0.
*/
virtual StatusOr<usize> get_keys(LowerBoundParam lower_bound, KeyView* key_buffer_out,
usize key_buffer_size, StableStringStore& storage) const;

// Builds a key-based approximate member query (AMQ) filter for the page, to answer the question
// whether a given key *might* be contained by the page.
//
Expand Down
86 changes: 86 additions & 0 deletions src/llfs/stable_string_store.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
//#=##=##=#==#=#==#===#+==#+==========+==+=+=+=+=+=++=+++=+++++=-++++=-+++++++++++
//
// Part of the LLFS Project, under Apache License v2.0.
// See https://www.apache.org/licenses/LICENSE-2.0 for license information.
// SPDX short identifier: Apache-2.0
//
//+++++++++++-+-+--+----- --- -- - - - -

#include <llfs/stable_string_store.hpp>
//

#include <llfs/data_packer.hpp>

#include <batteries/algo/parallel_copy.hpp>
#include <batteries/assert.hpp>
#include <batteries/math.hpp>

namespace llfs {
//==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - -
//
StableStringStore::StableStringStore() : free_chunk_{this->chunk0_.data(), this->chunk0_.size()}
{
}

//==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - -
//
MutableBuffer StableStringStore::allocate(usize n)
{
// Check if the current free_chunk_ is large enough to hold n bytes. If it isn't, we need to
// dynamically allocate a new chunk.
//
if (this->free_chunk_.size() < n) {
// Allocate new chunk, add it to the list of dynamically allocated chunks, and point free_chunk_
// to this new chunk.
//
const usize new_chunk_size = batt::round_up_bits(batt::log2_ceil(kDynamicAllocSize), n);
std::unique_ptr<char[]> new_chunk{new char[new_chunk_size]};
char* const new_chunk_data = new_chunk.get();
this->chunks_.emplace_back(std::move(new_chunk));
this->free_chunk_ = MutableBuffer{new_chunk_data, new_chunk_size};
}

BATT_CHECK_GE(this->free_chunk_.size(), n);

// Return the newly allocated chunk and advance the start of the free_chunk_ buffer by n bytes to
// indicate that this region of memory is now occupied.
//
MutableBuffer stable_buffer{this->free_chunk_.data(), n};
this->free_chunk_ += n;
return stable_buffer;
}

//==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - -
//
std::string_view StableStringStore::store(const std::string_view& s, batt::WorkerPool& worker_pool)
{
// Allocate a buffer the size of the input string data.
//
MutableBuffer stable_buffer = this->allocate(s.size());

BATT_CHECK_EQ(stable_buffer.size(), s.size());

// Check if we would benefit from parallelizing the copying process. If we do have workers in the
// worker_pool and the size of the string data isn't too small, parallelize.
//
if (worker_pool.size() == 0 || s.size() < llfs::DataPacker::min_parallel_copy_size()) {
std::memcpy(stable_buffer.data(), s.data(), s.size());
} else {
batt::ScopedWorkContext work_context{worker_pool};

const batt::TaskCount max_tasks{worker_pool.size() + 1};
const batt::TaskSize min_task_size{llfs::DataPacker::min_parallel_copy_size()};

const char* const src_begin = s.data();
const char* const src_end = src_begin + s.size();
char* const dst_begin = static_cast<char*>(stable_buffer.data());

batt::parallel_copy(work_context, src_begin, src_end, dst_begin, min_task_size, max_tasks);
}

// Return the copy.
//
return std::string_view{static_cast<const char*>(stable_buffer.data()), stable_buffer.size()};
}

} // namespace llfs
120 changes: 120 additions & 0 deletions src/llfs/stable_string_store.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
//#=##=##=#==#=#==#===#+==#+==========+==+=+=+=+=+=++=+++=+++++=-++++=-+++++++++++
//
// Part of the LLFS Project, under Apache License v2.0.
// See https://www.apache.org/licenses/LICENSE-2.0 for license information.
// SPDX short identifier: Apache-2.0
//
//+++++++++++-+-+--+----- --- -- - - - -

#pragma once
#ifndef LLFS_STABLE_STRING_STORE_HPP
#define LLFS_STABLE_STRING_STORE_HPP

#include <llfs/buffer.hpp>
#include <llfs/int_types.hpp>

#include <batteries/async/worker_pool.hpp>
#include <batteries/buffer.hpp>

#include <array>
#include <memory>
#include <string_view>
#include <vector>

namespace llfs {

//=#=#==#==#===============+=+=+=+=++=++++++++++++++-++-+--+-+----+---------------
tonyastolfi marked this conversation as resolved.
Show resolved Hide resolved
/** \brief A class that allows the user to efficiently allocate and copy string data in memory that
* is scoped to the lifetime of the object itself.
*/
class StableStringStore
{
public:
static constexpr usize kStaticAllocSize = 32;
static constexpr usize kDynamicAllocSize = 4096;

StableStringStore();

StableStringStore(const StableStringStore&) = delete;
StableStringStore& operator=(const StableStringStore&) = delete;

/** \brief Allocates a buffer of size `n` bytes.
*/
MutableBuffer allocate(usize n);

/** \brief Copies the given `string_view` into a memory location managed by this
* `StableStringStore` instance, and returns a `string_view` pointing to the stored data. The
* `worker_pool`, if provided, is used the parallelize the copying process if necessary.
*/
tonyastolfi marked this conversation as resolved.
Show resolved Hide resolved
std::string_view store(const std::string_view& s,
batt::WorkerPool& worker_pool = batt::WorkerPool::null_pool());

/** \brief Copies the given `ConstBuffer` into a memory location managed by this
* `StableStringStore` instance as string data, and returns a `ConstBuffer` pointing to the stored
* data.
*/
ConstBuffer store(const ConstBuffer& buffer,
batt::WorkerPool& worker_pool = batt::WorkerPool::null_pool())
{
const std::string_view s = this->store(
std::string_view{static_cast<const char*>(buffer.data()), buffer.size()}, worker_pool);

return ConstBuffer{s.data(), s.size()};
}

/** \brief Concatenates multiple chunks of data and copies the concatenation into a contiguous
* buffer of memory.
*/
template <typename... Parts>
ConstBuffer concat(Parts&&... parts)
{
usize total_size = 0;

// Compute the total amount of memory needed to be allocated for the result of the
// concatenation.
//
const auto add_to_total = [&total_size](auto&& part) {
total_size += batt::as_const_buffer(part).size();
return 0;
};

(add_to_total(parts), ...);

MutableBuffer mbuf = this->allocate(total_size);
MutableBuffer cbuf = mbuf;

// Copy each part to memory.
//
const auto copy_part = [&mbuf](auto&& part) {
auto src = batt::as_const_buffer(part);
std::memcpy(mbuf.data(), src.data(), src.size());
mbuf += src.size();
return 0;
};

(copy_part(parts), ...);

return cbuf;
}

private:
/** \brief The statically allocated block of memory that is initialized when this
* `StableStringStore` instance is created, used as a starting point for memory allocations done
* by this instance.
*/
std::array<char, kStaticAllocSize> chunk0_;

/** \brief A collection of dynamically allocated memory blocks, managing the chunks allocated
* beyond `chunk0_`.
*/
std::vector<std::unique_ptr<char[]>> chunks_;

/** \brief A buffer representing the current chunk of memory that has free space available for
* allocation.
*/
MutableBuffer free_chunk_;
};

} // namespace llfs

#endif // LLFS_STABLE_STRING_STORE_HPP
91 changes: 91 additions & 0 deletions src/llfs/stable_string_store.test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
//#=##=##=#==#=#==#===#+==#+==========+==+=+=+=+=+=++=+++=+++++=-++++=-+++++++++++
//
// Part of the LLFS Project, under Apache License v2.0.
// See https://www.apache.org/licenses/LICENSE-2.0 for license information.
// SPDX short identifier: Apache-2.0
//
//+++++++++++-+-+--+----- --- -- - - - -

#include <llfs/stable_string_store.hpp>
//

#include <gmock/gmock.h>
#include <gtest/gtest.h>

#include <string>

namespace {

using namespace batt::int_types;

TEST(StableStringStore, StaticAllocationTest)
{
llfs::StableStringStore strings;

auto out =
strings.concat(std::string_view{"Hello"}, std::string_view{", "}, std::string_view{"World!"});

std::string_view out_str{(const char*)out.data(), out.size()};

EXPECT_THAT(out_str, ::testing::StrEq("Hello, World!"));

// Since "Hello, World!" is less than kStaticAllocSize, test to see that the string's data was
// allocated statically, i.e., it "lives" inside the bounds of the StableStringStore object
// itself.
//
EXPECT_TRUE(out.data() >= static_cast<const void*>(&strings) &&
out.data() < static_cast<const void*>(&strings + 1));
}

TEST(StableStringStore, DynamicAllocationTest)
{
llfs::StableStringStore strings;
const usize data_size = 1;
const usize num_iterations_of_static_alloc = strings.kStaticAllocSize / data_size;

// Statically allocate a bunch of string data up to the static allocation limit.
//
for (usize i = 0; i < num_iterations_of_static_alloc; ++i) {
std::string_view string_to_store{"a"};
std::string_view copied_string = strings.store(string_to_store);
EXPECT_TRUE(
static_cast<const void*>(copied_string.data()) >= static_cast<const void*>(&strings) &&
static_cast<const void*>(copied_string.data()) < static_cast<const void*>(&strings + 1));
}

// Now perform another store. Since we have already allocated an amount of data greater than the
// size of kStaticAllocSize, we end up dynmically allocating the data for this string.
//
std::string_view dynamically_allocated_string{"b"};
std::string_view copy_stored = strings.store(dynamically_allocated_string);
EXPECT_TRUE(static_cast<const void*>(copy_stored.data()) < static_cast<const void*>(&strings) ||
static_cast<const void*>(copy_stored.data()) >=
static_cast<const void*>(&strings + 1));
}

TEST(StableStringStore, LargeDynamicAllocationTest)
{
llfs::StableStringStore strings;
const usize data_size = strings.kDynamicAllocSize + 1;
const usize num_allocations = 10;

// Allocate large strings, all with a size greater that kDynamicAllocSize. This will trigger
// multiple dynamic memory allocations.
//
std::string_view previous_string;
for (usize i = 0; i < num_allocations; ++i) {
if (i > 0) {
// Check to make sure that the memory for previously allocated strings doesn't go out of
// scope; memory of the string data is scoped to the lifetime of the StableStringObject.
//
std::string expected_previous_string(data_size, 'a' + (i - 1));
EXPECT_EQ(previous_string, expected_previous_string);
}

std::string large_string_data(data_size, 'a' + i);
std::string_view string_to_store{large_string_data};
previous_string = strings.store(string_to_store);
}
}
tonyastolfi marked this conversation as resolved.
Show resolved Hide resolved

} // namespace