Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for exegesis annotator in BHive conversion script #41

1 change: 1 addition & 0 deletions gematria/datasets/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ cc_binary(
deps = [
":bhive_importer",
":find_accessed_addrs",
":find_accessed_addrs_exegesis",
"//gematria/llvm:canonicalizer",
"//gematria/llvm:llvm_architecture_support",
"//gematria/utils:string",
Expand Down
72 changes: 71 additions & 1 deletion gematria/datasets/convert_bhive_to_llvm_exegesis_input.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,15 @@
#include "absl/flags/parse.h"
#include "gematria/datasets/bhive_importer.h"
#include "gematria/datasets/find_accessed_addrs.h"
#include "gematria/datasets/find_accessed_addrs_exegesis.h"
#include "gematria/llvm/canonicalizer.h"
#include "gematria/llvm/llvm_architecture_support.h"
#include "gematria/llvm/llvm_to_absl.h"
#include "gematria/utils/string.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/tools/llvm-exegesis/lib/TargetSelect.h"

// Use the constants from the BHive paper for setting initial register and
// memory values. These constants are set to a high enough value to avoid
Expand All @@ -46,10 +49,38 @@ constexpr std::string_view kMemDefPrefix = "# LLVM-EXEGESIS-MEM-DEF ";
constexpr std::string_view kMemMapPrefix = "# LLVM-EXEGESIS-MEM-MAP ";
constexpr std::string_view kMemNamePrefix = "MEM";

enum class AnnotatorType { kExegesis, kFast };

constexpr std::pair<AnnotatorType, std::string_view> kAnnotatorTypeNames[] = {
{AnnotatorType::kExegesis, "exegesis"}, {AnnotatorType::kFast, "fast"}};

bool AbslParseFlag(absl::string_view text, AnnotatorType* type,
std::string* error) {
for (const auto& [annotator_type, type_string] : kAnnotatorTypeNames) {
if (text == type_string) {
*type = annotator_type;
return true;
}
}

*error = "unknown annotator type";
return false;
}

std::string AbslUnparseFlag(AnnotatorType type) {
for (const auto& [annotator_type, type_string] : kAnnotatorTypeNames) {
if (annotator_type == type) return std::string(type_string);
}

__builtin_unreachable();
}

ABSL_FLAG(std::string, bhive_csv, "", "Filename of the input BHive CSV file");
ABSL_FLAG(
std::string, asm_output_dir, "",
"Directory containing output files that can be executed by llvm-exegesis");
ABSL_FLAG(AnnotatorType, annotator_implementation, AnnotatorType::kFast,
"The annotator implementation to use.");
ABSL_FLAG(std::string, json_output_dir, "",
"Directory containing JSON output files");
ABSL_FLAG(
Expand All @@ -58,6 +89,23 @@ ABSL_FLAG(
ABSL_FLAG(unsigned, max_bb_count, std::numeric_limits<unsigned>::max(),
"The maximum number of basic blocks to process");

absl::StatusOr<gematria::AccessedAddrs> GetAccessedAddrs(
absl::Span<const uint8_t> basic_block,
gematria::ExegesisAnnotator* exegesis_annotator) {
const AnnotatorType annotator_implementation =
absl::GetFlag(FLAGS_annotator_implementation);
switch (annotator_implementation) {
case AnnotatorType::kFast:
// This will only get the first segfault address.
return gematria::FindAccessedAddrs(basic_block);
case AnnotatorType::kExegesis:
return gematria::LlvmExpectedToStatusOr(
exegesis_annotator->findAccessedAddrs(
llvm::ArrayRef(basic_block.begin(), basic_block.end())));
}
return absl::InvalidArgumentError("unknown annotator type");
}

bool WriteJsonFile(llvm::json::Array to_write, size_t json_file_number,
llvm::StringRef json_output_dir) {
llvm::SmallString<40> json_output_file_path(json_output_dir);
Expand Down Expand Up @@ -96,6 +144,9 @@ int main(int argc, char* argv[]) {
return 1;
}

const AnnotatorType annotator_implementation =
absl::GetFlag(FLAGS_annotator_implementation);

std::string initial_reg_val_str =
gematria::ConvertHexToString(kInitialRegVal);
std::string initial_mem_val_str =
Expand Down Expand Up @@ -144,6 +195,25 @@ int main(int argc, char* argv[]) {
gematria::X86Canonicalizer canonicalizer(&llvm_support->target_machine());
gematria::BHiveImporter bhive_importer(&canonicalizer);

llvm::exegesis::InitializeX86ExegesisTarget();

auto llvm_state_or_error = llvm::exegesis::LLVMState::Create("", "native");
if (!llvm_state_or_error) {
std::cerr << "Failed to create LLVMState\n";
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we don't have absl::log?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could, but the rest of the file uses standard library output, so I'm staying consistent with that for now. Not sure what would be preferred though. I think within Google, everything would use absl::log, and LLVM has its own output facilities, so this is sort of in no man's land.

return 1;
}

std::unique_ptr<gematria::ExegesisAnnotator> exegesis_annotator = nullptr;
if (annotator_implementation == AnnotatorType::kExegesis) {
auto exegesis_annotator_or_error =
gematria::ExegesisAnnotator::create(*llvm_state_or_error);
if (!exegesis_annotator_or_error) {
std::cerr << "Failed to create exegesis annotator\n";
return 1;
}
exegesis_annotator = std::move(*exegesis_annotator_or_error);
}

std::ifstream bhive_csv_file(bhive_filename);
llvm::json::Array processed_snippets;
const unsigned max_bb_count = absl::GetFlag(FLAGS_max_bb_count);
Expand Down Expand Up @@ -175,7 +245,7 @@ int main(int argc, char* argv[]) {
}

// This will only get the first segfault address.
auto addrs = gematria::FindAccessedAddrs(*bytes);
auto addrs = GetAccessedAddrs(*bytes, exegesis_annotator.get());

if (!addrs.ok()) {
std::cerr << "Failed to find addresses for block '" << hex
Expand Down
7 changes: 7 additions & 0 deletions gematria/llvm/llvm_to_absl.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#ifndef THIRD_PARTY_GEMATRIA_GEMATRIA_LLVM_LLVM_TO_ABSL_H_
#define THIRD_PARTY_GEMATRIA_GEMATRIA_LLVM_LLVM_TO_ABSL_H_

#include <optional>
#include <string>

#include "absl/status/status.h"
Expand All @@ -37,6 +38,12 @@ absl::StatusOr<T> LlvmExpectedToStatusOr(llvm::Expected<T> expected) {
if (expected) return std::move(*expected);
return LlvmErrorToStatus(expected.takeError());
}

template <typename T>
std::optional<T> StatusOrToOptional(absl::StatusOr<T> status_or) {
if (status_or.ok()) return std::move(*status_or);
return std::nullopt;
}
} // namespace gematria

#endif // THIRD_PARTY_GEMATRIA_GEMATRIA_LLVM_LLVM_TO_ABSL_H_
Loading