Skip to content

Commit

Permalink
add japanese, finally fix book lang stuff, format lang everything
Browse files Browse the repository at this point in the history
  • Loading branch information
eerussianguy committed Jan 28, 2024
1 parent a60bfda commit 12d44f0
Show file tree
Hide file tree
Showing 78 changed files with 4,827 additions and 1,573 deletions.
89 changes: 89 additions & 0 deletions resources/format_lang.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import difflib
import json

from typing import Tuple


def main(validate: bool, namespace: str, langs: Tuple[str, ...]):
en_us = load(namespace, 'en_us')
for lang in langs:
if lang != 'en_us':
format_lang(namespace, en_us, lang, validate)


def update(namespace: str, langs: Tuple[str, ...]):
en_us = load(namespace, 'en_us')
en_us_old = load_old(namespace, 'en_us')
updated_keys = {k for k in en_us.keys() if k in en_us_old and en_us[k] != en_us_old[k]}

if updated_keys:
print('Found %d modified values:' % len(updated_keys))
for k in updated_keys:
print('Modified: %s : "%s" -> "%s"' % (k, en_us_old[k], en_us[k]))

inp = input('Remove these keys from other translations?\n(yes|no) >')
print('Answer: %s' % inp)
if inp == 'yes':
# Strip these keys from en_us, so they don't show up in translations
for k in updated_keys:
del en_us[k]
for lang in langs:
if lang != 'en_us':
format_lang(namespace, en_us, lang, False)
else:
print('No differences found')


def format_lang(namespace: str, en_us, lang: str, validate: bool):
lang_data = load(namespace, lang)
lang_comments = {k: v for k, v in lang_data.items() if '__comment' in k and v != 'This file was automatically created by mcresources'}
lang_data = {k: v for k, v in lang_data.items() if '__comment' not in k}

formatted_lang_data = {}
for k, v in lang_comments.items():
formatted_lang_data[k] = v

translated = 0
for k, v in en_us.items():
if '__comment' in k:
pass # Exclude comments in en_us
elif k in lang_data and lang_data[k] != v:
translated += 1
formatted_lang_data[k] = lang_data[k]
else:
formatted_lang_data[k] = v

# Unique keys to this language, only allowed in the default vanilla overrides. It makes no sense for a language to have uniquely named TFC keys
# But, for vanilla minecraft, we may have to override for vanilla items we rename without renaming.
# e.g. we use 'Egg' but if a translation is 'Chicken Egg', that might be renamed for other languages only.
if namespace == 'minecraft':
for k, v in lang_data.items():
if k not in en_us:
formatted_lang_data[k] = v

print('Translation progress for %s (%s): %d / %d (%.1f%%)' % (lang, namespace, translated, len(en_us), 100 * translated / len(en_us)))
save(namespace, lang, formatted_lang_data, validate)


def load(namespace: str, lang: str):
with open('./src/main/resources/assets/%s/lang/%s.json' % (namespace, lang), 'r', encoding='utf-8') as f:
return json.load(f)


def load_old(namespace: str, lang: str):
""" The old lang file need to be manually placed under the project root and
be named as exactly `<lang>.<namespace>.old.json`, where <lang> is the
language code, and <namespace> is usually either 'minecraft' or 'tfc'.
"""
with open('./%s.%s.old.json' % (lang, namespace), 'r', encoding='utf-8') as f:
return json.load(f)


def save(namespace: str, lang: str, lang_data, validate: bool):
if validate:
with open('./src/main/resources/assets/%s/lang/%s.json' % (namespace, lang), 'r', encoding='utf-8') as f:
old_lang_data = json.load(f)
assert old_lang_data == lang_data, 'Validation error in mod localization for %s:\n\n=== Diff (expected vs. actual) ===\n\n%s' % (lang, '\n'.join(difflib.unified_diff(json.dumps(lang_data, ensure_ascii=False, indent=2).split('\n'), json.dumps(old_lang_data, ensure_ascii=False, indent=2).split('\n'))))
else:
with open('./src/main/resources/assets/%s/lang/%s.json' % (namespace, lang), 'w', encoding='utf-8') as f:
json.dump(lang_data, f, ensure_ascii=False, indent=2)
142 changes: 94 additions & 48 deletions resources/generate_book.py

Large diffs are not rendered by default.

68 changes: 45 additions & 23 deletions resources/i18n.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,27 @@
import os
import json
import os

import Levenshtein

class I18n:

@staticmethod
def create(lang: str):
return I18n(lang) if lang == 'en_us' else ForLanguage(lang)
class I18n:

lang: str

def __init__(self, lang: str):
def __init__(self, lang: str, validate: bool = False):
self.lang = lang

def translate(self, text: str) -> str:
""" Translates the string into the current domain """
return text

def flush(self):
""" Updates the local translation file, if needed """
pass


class ForLanguage(I18n):
def __init__(self, lang: str):
super().__init__(lang)
self.before = {}
self.after = {}
self.lang_path = './lang/%s.json' % lang
self.validate = validate
self.lang_path = './resources/lang/%s.json' % lang

self.fuzzy_matches = 0
self.fuzzy_non_matches = 0

# Default translation
if not os.path.isfile(self.lang_path):
if validate:
raise ValueError('Cannot validate book for lang %s, as resources/lang/%s.json does not exist' % (lang, lang))
print('Writing default translation for language %s to %s' % (self.lang, self.lang_path))
with open(self.lang_path, 'w', encoding='utf-8') as f:
f.write('{}\n')
Expand All @@ -47,17 +38,48 @@ def __init__(self, lang: str):
exit(-1)
self.before[key] = value

def is_root(self) -> bool:
""" Return true if we are in the root language (en_us) """
return self.lang == 'en_us'

def translate(self, text: str) -> str:
if text in self.before:
""" Translates the string into the current domain """
if self.is_root():
# For en_us, always keep the current text (read only)
translated = text
elif text in self.before:
translated = self.before[text] # Translate if available
else:
translated = text # Not available, but record and output anyway
# Try a fuzzy matcher (if we're not in en_us)
# Use the lowercase of both keys, as difference in capitalization is almost surely not a translation issue
distance, match = min(((Levenshtein.distance(text.lower(), key.lower()), key) for key in self.before.keys()))
if distance / len(text) < 0.1 and distance < 20: # Heuristic: < 5% of text, and < 20 overall distance
if self.before[match] == match:
# This has just matched a default key that was inserted in the translated files
# So if we slightly modify the en_us default, we should change this value as well.
self.fuzzy_non_matches += 1
translated = text
else:
# Use the fuzzy match
self.fuzzy_matches += 1
translated = self.before[match]
else:
# Not available, but record and output anyway
self.fuzzy_non_matches += 1
translated = text

self.after[text] = translated
return translated

def flush(self):
""" Updates the local translation file, if needed """
if not self.is_root() and self.fuzzy_matches + self.fuzzy_non_matches > 0:
print('Matched %d / %d entries (%.1f%%). Updated %d entries for lang %s.' % (self.fuzzy_matches, self.fuzzy_matches + self.fuzzy_non_matches, 100 * self.fuzzy_matches / (self.fuzzy_matches + self.fuzzy_non_matches), self.fuzzy_non_matches, self.lang))
if self.validate:
assert self.before == self.after, 'Validation error translating book to lang \'%s\'' % self.lang
with open(self.lang_path, 'w', encoding='utf-8') as f:
print('Writing updated translation for language %s' % self.lang)
unique_count = len(self.after) if self.is_root() else sum(k != v for k, v in self.after.items())
if unique_count > 0:
print('Writing updated translation for language %s: %d / %d (%.2f%%)' % (self.lang, unique_count, len(self.after), 100 * unique_count / len(self.after)))
json.dump(self.after, f, indent=2, ensure_ascii=False)

Loading

0 comments on commit 12d44f0

Please sign in to comment.