Skip to content

Commit

Permalink
Add JSON output option to BHive converter
Browse files Browse the repository at this point in the history
This patch adds a JSON output option to the BHive converter. This makes
it significantly easier to implement other scripts down the line that
ingest this data. This also cuts down the number of inodes that a large
data set will use by a significant amount, which can be a problem on
some file systems.
  • Loading branch information
boomanaiden154 committed Jan 27, 2024
1 parent 9064608 commit 050b9c2
Showing 1 changed file with 132 additions and 29 deletions.
161 changes: 132 additions & 29 deletions gematria/datasets/convert_bhive_to_llvm_exegesis_input.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@

#include <fstream>
#include <iostream>
#include <limits>
#include <memory>
#include <sstream>
#include <string>
#include <string_view>
#include <vector>

#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
Expand All @@ -28,6 +30,8 @@
#include "gematria/llvm/canonicalizer.h"
#include "gematria/llvm/llvm_architecture_support.h"
#include "gematria/utils/string.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/raw_ostream.h"

constexpr uint64_t kInitialRegVal = 0x10000;
constexpr uint64_t kInitialMemVal = 0x7FFFFFFF;
Expand All @@ -44,6 +48,34 @@ ABSL_FLAG(std::string, bhive_csv, "", "Filename of the input BHive CSV file");
ABSL_FLAG(
std::string, output_dir, "",
"Directory containing output files that can be executed by llvm-exegesis");
ABSL_FLAG(std::string, json_output_dir, "",
"Directory containing JSON output files");
ABSL_FLAG(
unsigned, json_split_count, std::numeric_limits<unsigned>::max(),
"The number of annotated basic blocks to include in a single JSON file");
ABSL_FLAG(std::vector<std::string>, output_types,
std::vector<std::string>({"asm"}),
"A comma separated list of output types to generate");

bool write_json_file(llvm::json::Array to_write, size_t json_file_number,
std::string json_output_dir) {
llvm::Twine json_output_file_path = llvm::Twine(json_output_dir)
.concat("/")
.concat(llvm::Twine(json_file_number))
.concat(".json");
std::error_code file_ec;
llvm::raw_fd_ostream json_output_file(json_output_file_path.str(), file_ec);

if (file_ec) {
std::cerr << "Failed to open output file: " << json_output_file_path.str()
<< "\n";
return false;
}

json_output_file
<< llvm::formatv("{0:2}", llvm::json::Value(std::move(to_write))).str();
return true;
}

int main(int argc, char* argv[]) {
absl::ParseCommandLine(argc, argv);
Expand All @@ -54,9 +86,26 @@ int main(int argc, char* argv[]) {
return 1;
}

const std::string json_output_dir = absl::GetFlag(FLAGS_json_output_dir);
const std::string output_dir = absl::GetFlag(FLAGS_output_dir);
if (output_dir.empty()) {
std::cerr << "Error: --output_dir is required\n";
const std::vector<std::string> output_types =
absl::GetFlag(FLAGS_output_types);
bool json_output_enabled = false;
bool asm_output_enabled = false;
for (const std::string& output_type : output_types) {
if (output_type == "json")
json_output_enabled = true;
else if (output_type == "asm")
asm_output_enabled = true;
}

if (json_output_enabled && json_output_dir.empty()) {
std::cerr << "Error: --json_output_dir is required when the json output "
"type is requested\n";
return 1;
} else if (asm_output_enabled && output_dir.empty()) {
std::cerr << "Error: --output_dir is required when the asm output type is "
"requested\n";
return 1;
}

Expand Down Expand Up @@ -102,6 +151,8 @@ int main(int argc, char* argv[]) {
gematria::BHiveImporter bhive_importer(&canonicalizer);

std::ifstream bhive_csv_file(bhive_filename);
llvm::json::Array processed_snippets;
const int json_split_count = absl::GetFlag(FLAGS_json_split_count);
for (std::string line; std::getline(bhive_csv_file, line);) {
auto comma_index = line.find(',');
if (comma_index == std::string::npos) {
Expand Down Expand Up @@ -139,38 +190,90 @@ int main(int argc, char* argv[]) {
continue;
}

// Create output file path.
llvm::Twine output_file_path = llvm::Twine(output_dir)
.concat("/")
.concat(llvm::Twine(file_counter))
.concat(".test");

// Open output file for writing.
std::ofstream output_file(output_file_path.str());
if (!output_file.is_open()) {
std::cerr << "Failed to open output file: " << output_file_path.str()
<< "\n";
return 4;
}

// Write the register definition lines into the output file.
output_file << register_defs_lines;
if (asm_output_enabled) {
// Create output file path.
llvm::Twine output_file_path = llvm::Twine(output_dir)
.concat("/")
.concat(llvm::Twine(file_counter))
.concat(".test");

// Multiple mappings can point to the same definition.
if (addrs->accessed_blocks.size() > 0) {
output_file << kMemDefPrefix << kMemNamePrefix << " " << addrs->block_size
<< " " << initial_mem_val_str << "\n";
}
for (const auto& addr : addrs->accessed_blocks) {
output_file << kMemMapPrefix << kMemNamePrefix << " " << std::dec << addr
// Open output file for writing.
std::ofstream output_file(output_file_path.str());
if (!output_file.is_open()) {
std::cerr << "Failed to open output file: " << output_file_path.str()
<< "\n";
return 4;
}

// Write the register definition lines into the output file.
output_file << register_defs_lines;

// Multiple mappings can point to the same definition.
if (addrs->accessed_blocks.size() > 0) {
output_file << kMemDefPrefix << kMemNamePrefix << " "
<< addrs->block_size << " " << initial_mem_val_str << "\n";
}
for (const auto& addr : addrs->accessed_blocks) {
output_file << kMemMapPrefix << kMemNamePrefix << " " << std::dec
<< addr << "\n";
}

// Append disassembled instructions.
for (const auto& instr : proto->machine_instructions()) {
output_file << instr.assembly() << "\n";
}
}

// Append disassembled instructions.
for (const auto& instr : proto->machine_instructions()) {
output_file << instr.assembly() << "\n";
if (json_output_enabled) {
llvm::json::Object current_snippet;

if (addrs->accessed_blocks.size() > 0) {
llvm::json::Array memory_definitions;
llvm::json::Object current_memory_definition;
current_memory_definition["Name"] = llvm::json::Value(kMemNamePrefix);
current_memory_definition["Size"] =
llvm::json::Value(addrs->block_size);
current_memory_definition["Value"] = llvm::json::Value(kInitialMemVal);
memory_definitions.push_back(std::move(current_memory_definition));
current_snippet["MemoryDefinitions"] =
llvm::json::Value(std::move(memory_definitions));

llvm::json::Array memory_mappings;
for (const uintptr_t addr : addrs->accessed_blocks) {
llvm::json::Object current_memory_mapping;
current_memory_mapping["Value"] = llvm::json::Value(kMemNamePrefix);
current_memory_mapping["Address"] = llvm::json::Value(addr);
memory_mappings.push_back(std::move(current_memory_mapping));
}
current_snippet["MemoryMappings"] =
llvm::json::Value(std::move(memory_mappings));
} else {
current_snippet["MemoryDefinitions"] = llvm::json::Array();
current_snippet["MemoryMappings"] = llvm::json::Array();
}

std::string hex_string = {hex.begin(), hex.end()};
current_snippet["Hex"] = llvm::json::Value(hex_string);

processed_snippets.push_back(
llvm::json::Value(std::move(current_snippet)));

if (file_counter % json_split_count == 0) {
size_t json_file_number = file_counter / json_split_count;
bool write_successfully = write_json_file(
std::move(processed_snippets), json_file_number, json_output_dir);
if (!write_successfully) return 4;
processed_snippets.clear();
}
}

file_counter++;
}
}

if (json_output_enabled) {
size_t json_file_number = file_counter / json_split_count;
bool write_successfully = write_json_file(
std::move(processed_snippets), json_file_number, json_output_dir);
if (!write_successfully) return 4;
}
}

0 comments on commit 050b9c2

Please sign in to comment.