Skip to content

Commit

Permalink
Output Markdown and summarize in job action
Browse files Browse the repository at this point in the history
  • Loading branch information
russcam committed May 1, 2024
1 parent 907df63 commit 35086b7
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 162 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/dotnet.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,8 @@ jobs:
run: dotnet build --no-restore
- name: Test
run: dotnet test --no-build --verbosity normal
- name: Accuracy Report
run: |
echo "# Lingua Language Accuracy report" >> $GITHUB_STEP_SUMMARY
cat accuracy-reports/lingua/*.md >> $GITHUB_STEP_SUMMARY
1 change: 1 addition & 0 deletions Lingua.sln
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Root", "Solution R
dotnet-tools.json = dotnet-tools.json
build.bat = build.bat
build.sh = build.sh
.github\workflows\dotnet.yml = .github\workflows\dotnet.yml
EndProjectSection
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Build", "build\Build\Build.csproj", "{E8C232A5-D100-47D5-A8CA-8E8927E52CD8}"
Expand Down
133 changes: 0 additions & 133 deletions accuracy-reports/lingua/Afrikaans.txt

This file was deleted.

87 changes: 58 additions & 29 deletions tests/Lingua.AccuracyReport.Tests/LanguageDetectionStatistics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public void Dispose()
);
var accuracyReportFilePath = Path.Combine(
accuracyReportsDirectoryPath,
$"{Language}.txt"
$"{Language}.md"
);

var statisticsReport = StatisticsReport();
Expand All @@ -75,6 +75,9 @@ public void Dispose()
}
private string StatisticsReport()
{
var newlines = new string('\n', 2);
var report = new StringBuilder($"## {Language}");

var singleWordsAccuracyvalues = MapCountsToAccuracies(_singleWordsStatistics);
var wordPairsAccuracyvalues = MapCountsToAccuracies(_wordPairsStatistics);
var sentencesAccuracyvalues = MapCountsToAccuracies(_sentencesStatistics);
Expand Down Expand Up @@ -103,47 +106,53 @@ private string StatisticsReport()
var averageAccuracyInHighAccuracyMode =
(singleWordAccuracies.HighAccuracy + wordPairAccuracies.HighAccuracy + sentenceAccuracies.HighAccuracy) / 3;

var averageAccuracyReport = Implementation == Implementation.Lingua
? $">>> Accuracy on average: {FormatAccuracy(averageAccuracyInLowAccuracyMode)} | " +
FormatAccuracy(averageAccuracyInHighAccuracyMode)
: $">>> Accuracy on average: {FormatAccuracy(averageAccuracyInHighAccuracyMode)}";
if (Implementation == Implementation.Lingua)
{
report.AppendLine($"{newlines}Overall average accuracy");
report.AppendLine();
report.AppendLine("| Low Accuracy Mode | High Accuracy Mode |");
report.AppendLine("| ----------------- | ------------------ |");
report.AppendLine(
$"| {FormatAccuracy(averageAccuracyInLowAccuracyMode)} | {FormatAccuracy(averageAccuracyInHighAccuracyMode)} |");
}
else
{
report.AppendLine($"{newlines}Overall average accuracy");
report.AppendLine();
report.AppendLine("| High Accuracy Mode |");
report.AppendLine("| ------------------ |");
report.AppendLine(
$"| {FormatAccuracy(averageAccuracyInHighAccuracyMode)} |");
}

var reportParts = new[]
{
averageAccuracyReport,
singleWordAccuracyReport,
wordPairAccuracyReport,
sentenceAccuracyReport
};
var newlines = new string('\n', 2);
var report = new StringBuilder($"##### {Language} #####");

if (Implementation == Implementation.Lingua)
{
report.Append(newlines);
report.Append("Legend: 'low accuracy mode | high accuracy mode'");
}

foreach (var reportPart in reportParts)
{
if (!string.IsNullOrEmpty(reportPart))
report.Append($"{newlines}{reportPart}");
}

report.Append($"{newlines}>> Exact values:");
report.Append($"{newlines}> Exact values:");
if (Implementation == Implementation.Lingua)
{
report.Append($" {averageAccuracyInLowAccuracyMode} {singleWordAccuracies.LowAccuracy} " +
$"{wordPairAccuracies.LowAccuracy} {sentenceAccuracies.LowAccuracy}");
report.Append($" {averageAccuracyInHighAccuracyMode} {singleWordAccuracies.HighAccuracy} " +
report.AppendLine($" {averageAccuracyInHighAccuracyMode} {singleWordAccuracies.HighAccuracy} " +
$"{wordPairAccuracies.HighAccuracy} {sentenceAccuracies.HighAccuracy}");
}
else
{
report.Append($" {averageAccuracyInHighAccuracyMode} {singleWordAccuracies.HighAccuracy} " +
report.AppendLine($" {averageAccuracyInHighAccuracyMode} {singleWordAccuracies.HighAccuracy} " +
$"{wordPairAccuracies.HighAccuracy} {sentenceAccuracies.HighAccuracy}");
}

report.AppendLine();
return report.ToString();
}

Expand Down Expand Up @@ -220,42 +229,62 @@ private static double ComputeAccuracy(int languageCount, int totalLanguagesCount
string description)
{
var accuracies = statistics.GetValueOrDefault(Language, (0d, 0d));
var report = new StringBuilder(
$">> Detection of {count} {description} (average length: {(int)((double)length / count)} chars)\n");
var report = new StringBuilder($"### {Language} {description}");
report.AppendLine();
report.AppendLine();
report.AppendLine($"Detection of {count} {description} (average length: {(int)((double)length / count)} chars)");
report.AppendLine();

bool errors;
if (Implementation == Implementation.Lingua)
report.Append($"Accuracy: {FormatAccuracy(accuracies.Item1)} | {FormatAccuracy(accuracies.Item2)}\n");
{
report.AppendLine("| Low Accuracy Mode | High Accuracy Mode |");
report.AppendLine("| ----------------- | ------------------ |");
report.AppendLine($"| {FormatAccuracy(accuracies.Item1)} | {FormatAccuracy(accuracies.Item2)} |");
errors = accuracies.Item1 < 100 || accuracies.Item2 < 100;
}
else
report.Append($"Accuracy: {FormatAccuracy(accuracies.Item2)}\n");
{
report.AppendLine("| High Accuracy Mode |");
report.AppendLine("| ------------------ |");
report.AppendLine($"| {FormatAccuracy(accuracies.Item2)} |");
errors = accuracies.Item2 < 100;
}

report.AppendLine("Erroneously classified as");
report.Append(FormatStatistics(statistics, Language));
if (errors)
{
report.AppendLine();
report.AppendLine("Erroneously classified as");
report.AppendLine();
FormatStatistics(statistics, Language, report);
}

return (accuracies, report.ToString());
}

private string FormatStatistics(Dictionary<Language,(double, double)> statistics, Language language)
private string FormatStatistics(Dictionary<Language,(double, double)> statistics, Language language, StringBuilder builder)
{
var sorted = statistics
.Where(s => s.Key != language)
.OrderByDescending(s => s.Value.Item2)
.ToList();

var builder = new StringBuilder();
if (Implementation == Implementation.Lingua)
{
builder.AppendLine("| Language | Low Accuracy Mode | High Accuracy Mode |");
builder.AppendLine("| -------- | ----------------- | ------------------ |");
foreach (var statistic in sorted)
{
builder.AppendLine(
$"{statistic.Key}: {FormatAccuracy(statistic.Value.Item1)} | {FormatAccuracy(statistic.Value.Item2)}");
$"| {statistic.Key} | {FormatAccuracy(statistic.Value.Item1)} | {FormatAccuracy(statistic.Value.Item2)} |");
}
}
else
{
builder.AppendLine("| Language | High Accuracy Mode |");
builder.AppendLine("| -------- | ------------------ |");
foreach (var statistic in sorted)
{
builder.AppendLine($"{statistic.Key}: {FormatAccuracy(statistic.Value.Item2)}");
}
builder.AppendLine($"| {statistic.Key} | {FormatAccuracy(statistic.Value.Item2)} |");
}

return builder.ToString();
Expand Down

0 comments on commit 35086b7

Please sign in to comment.