-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.cpp
86 lines (73 loc) · 2.58 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
// Copyright (C) 2021 Duc Thanh Tran
// Distributed under the MIT License.
// For further information, see LICENSE file or visit https://opensource.org/licenses/MIT
#include <algorithm>
#include <filesystem>
#include <iostream>
#include <string>
#include "greedy_outlier_entropy.hpp"
#include "ProgramOptions.hxx"
int main(int argc, char* argv[])
{
po::parser parser;
auto& help = parser["help"]
.abbreviation('h')
.description("Print help")
.callback([&]{ std::cout << parser << '\n'; });
auto& outliers = parser["outliers"]
.abbreviation('k')
.description("Number of outliers")
.type(po::u64);
auto& data = parser["data"]
.abbreviation('d')
.description("Path to input data file")
.type(po::string);
auto& output = parser["output"]
.abbreviation('o')
.description("OPTIONAL: Path to output file which contains non-outlier data")
.type(po::string);
if(!parser(argc, argv))
{
std::cout << "Could not parse arguments\n";
std::cout << parser << '\n';
return 1;
}
if(help.was_set())
{
return 0;
}
if(!outliers.available() || !data.available())
{
std::cout << "Number of outliers and data file are required\n";
std::cout << parser << '\n';
return 1;
}
try
{
const std::filesystem::path file_path{data.get().string};
auto dataset = read(file_path);
std::cout << "Loaded " << dataset.data.size() << " datapoints, each with " << dataset.data.front().size() << " dimensions.\n";
std::cout << "Entropy of original data: " << entropy(dataset.occurences, dataset.data.size()) << '\n';
const auto numOutliers{static_cast<std::size_t>(outliers.get().u64)};
const auto result = greedy_outliers(dataset, numOutliers);
std::cout << "Entropy of data without outliers: " << result.entropy << '\n';
std::vector<std::size_t> sorted_outliers(result.outliers.cbegin(), result.outliers.cend());
std::ranges::sort(sorted_outliers);
std::cout << "Outliers: ";
for(auto first{true}; const auto out : sorted_outliers)
{
std::cout << (first ? first = false, "" : ", ") << out;
}
std::cout << '\n';
if(output.available())
{
const std::filesystem::path out_file_path(output.get().string);
write_data(out_file_path, result, dataset.data);
}
}
catch(const std::exception &e)
{
std::cout << "Error: " << e.what() << '\n';
}
return 0;
}