Skip to content

Commit

Permalink
Merge pull request #313 from cvzi/branch-2.14.1
Browse files Browse the repository at this point in the history
  • Loading branch information
TahirJalilov authored Jan 16, 2025
2 parents 3209717 + 75c263b commit 08c5cc4
Show file tree
Hide file tree
Showing 19 changed files with 1,273 additions and 1,251 deletions.
5 changes: 5 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
emoji
=====

v2.14.1 (2025-01-10)
-----
* Use `importlib.resources` to load json files #311
* Update translations to Unicode release-46-1

v2.14.0 (2024-10-02)
-----
* Update to Unicode 16.0
Expand Down
2 changes: 1 addition & 1 deletion LICENSE.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
New BSD License

Copyright (c) 2014-2024, Taehoon Kim, Kevin Wurster
Copyright (c) 2014-2025, Taehoon Kim, Kevin Wurster
All rights reserved.

Redistribution and use in source and binary forms, with or without
Expand Down
4 changes: 2 additions & 2 deletions emoji/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@
'LANGUAGES',
]

__version__ = '2.14.0'
__version__ = '2.14.1'
__author__ = 'Taehoon Kim, Kevin Wurster'
__email__ = '[email protected]'
# and [email protected], [email protected]
__source__ = 'https://github.com/carpedm20/emoji/'
__license__ = """
New BSD License
Copyright (c) 2014-2024, Taehoon Kim, Kevin Wurster
Copyright (c) 2014-2025, Taehoon Kim, Kevin Wurster
All rights reserved.
Redistribution and use in source and binary forms, with or without
Expand Down
18 changes: 12 additions & 6 deletions emoji/unicode_codes/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import sys
import importlib.resources
import json
from functools import lru_cache
from pathlib import Path
from warnings import warn

from typing import Optional, Dict, List, Any
from typing import Any, BinaryIO, Dict, List, Optional

from emoji.unicode_codes.data_dict import STATUS, LANGUAGES

Expand Down Expand Up @@ -75,12 +76,18 @@ def __missing__(self, key: str) -> str:
EMOJI_DATA: Dict[str, Dict[str, Any]]


def _open_file(name: str) -> BinaryIO:
if sys.version_info >= (3, 9):
return importlib.resources.files('emoji.unicode_codes').joinpath(name).open('rb')
else:
return importlib.resources.open_binary('emoji.unicode_codes', name)


def _load_default_from_json():
global EMOJI_DATA
global _loaded_keys

file = Path(__file__).with_name('emoji.json')
with open(file, 'rb') as f:
with _open_file('emoji.json') as f:
EMOJI_DATA = dict(json.load(f, object_pairs_hook=EmojiDataDict)) # type: ignore
_loaded_keys = list(_DEFAULT_KEYS)

Expand All @@ -94,8 +101,7 @@ def load_from_json(key: str):
if key not in LANGUAGES:
raise NotImplementedError('Language not supported', key)

file = Path(__file__).with_name(f'emoji_{key}.json')
with open(file, 'rb') as f:
with _open_file(f'emoji_{key}.json') as f:
for emj, value in json.load(f).items():
EMOJI_DATA[emj][key] = value # type: ignore

Expand Down
3 changes: 2 additions & 1 deletion emoji/unicode_codes/emoji.json
Original file line number Diff line number Diff line change
Expand Up @@ -1675,7 +1675,8 @@
"🇨🇶": {
"en": ":Sark:",
"status": 2,
"E": 16
"E": 16,
"alias": [":flag_for_Sark:"]
},
"🇸🇦": {
"en": ":Saudi_Arabia:",
Expand Down
181 changes: 91 additions & 90 deletions emoji/unicode_codes/emoji_ar.json

Large diffs are not rendered by default.

181 changes: 91 additions & 90 deletions emoji/unicode_codes/emoji_de.json

Large diffs are not rendered by default.

181 changes: 91 additions & 90 deletions emoji/unicode_codes/emoji_es.json

Large diffs are not rendered by default.

181 changes: 91 additions & 90 deletions emoji/unicode_codes/emoji_fa.json

Large diffs are not rendered by default.

181 changes: 91 additions & 90 deletions emoji/unicode_codes/emoji_fr.json

Large diffs are not rendered by default.

181 changes: 91 additions & 90 deletions emoji/unicode_codes/emoji_id.json

Large diffs are not rendered by default.

181 changes: 91 additions & 90 deletions emoji/unicode_codes/emoji_it.json

Large diffs are not rendered by default.

181 changes: 91 additions & 90 deletions emoji/unicode_codes/emoji_ja.json

Large diffs are not rendered by default.

217 changes: 109 additions & 108 deletions emoji/unicode_codes/emoji_ko.json

Large diffs are not rendered by default.

181 changes: 91 additions & 90 deletions emoji/unicode_codes/emoji_pt.json

Large diffs are not rendered by default.

181 changes: 91 additions & 90 deletions emoji/unicode_codes/emoji_ru.json

Large diffs are not rendered by default.

217 changes: 109 additions & 108 deletions emoji/unicode_codes/emoji_tr.json

Large diffs are not rendered by default.

217 changes: 109 additions & 108 deletions emoji/unicode_codes/emoji_zh.json

Large diffs are not rendered by default.

31 changes: 14 additions & 17 deletions utils/generate_emoji_translations.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,26 @@
emoji_pkg.config.load_language() # Make all languages available in EMOJI_DATA



def get_emojiterra_from_url(url: str) -> Dict[str, str]:
html = get_text_from_url(url)

soup = bs4.BeautifulSoup(html, 'html.parser')
emojis: Dict[str, str] = {}

data = soup.find_all('li')
lis = soup.find_all('li')

data = [
i
for i in data
if 'href' not in i.attrs and 'data-e' in i.attrs and i['data-e'].strip()
li
for li in lis
if 'class' in li.attrs
and 'href' not in li.attrs
and 'e-' in str(li['class'])
and li['title'].strip()
and li.text.strip()
]

for i in data:
code = i['data-e']
code = i.text.strip()
emojis[code] = i['title'].strip()

assert len(data) > 100, f'emojiterra data from {url} has only {len(data)} entries'
Expand All @@ -68,21 +71,14 @@ def add_unicode_annotations(data: Dict[str, str], lang: str, url: str):
for annotation in annotations:
if annotation.get('type') == 'tts':
emj = annotation.get('cp')
assert annotation.text is not None, "Empty annotation text"
assert annotation.text is not None, 'Empty annotation text'
text = annotation.text.strip()
assert emj is not None, f'No code point found in {url} for {annotation}'
assert text is not None, f'No text found in {url} for {annotation}'

emoji_name = adapt_emoji_name(text, lang, emj)

if emj in data and data[emj] != emoji_name:
if '\U0000200d\U000027a1' in emj:
# TODO Skip right-facing emoji (i.e. 🧑🏻‍🦽 vs 🧑🏻‍🦽‍➡️) for now because they are not correctly translated yet
# TODO They are currently missing the skin-colour information in the translations.
print(
f'# {lang}: {emj} SKIPPED CHANGE FROM {data[emj]} TO {emoji_name} \t\t(Source: {text})'
)
continue
print(
f'# {lang}: {emj} CHANGED {data[emj]} TO {emoji_name} \t\t(Source: {text})'
)
Expand Down Expand Up @@ -162,17 +158,18 @@ def extract_names(
return data



if __name__ == '__main__':
logging.warning('Please run generate_emoji.py before this script to update the list of emojis (emoji.json file).')
logging.warning(
'Please run generate_emoji.py before this script to update the list of emojis (emoji.json file).'
)

logging.info(' Downloading...\n')

emojis = emoji_pkg.EMOJI_DATA

# Find latest release tag at https://cldr.unicode.org/index/downloads
# or https://github.com/unicode-org/cldr/releases
github_tag = 'release-46-beta2'
github_tag = 'release-46-1'
languages = {
# Update names in other languages:
'de': extract_names(github_tag, 'de', 'de', get_emojiterra_from_url('https://emojiterra.com/de/tastatur/')),
Expand Down

0 comments on commit 08c5cc4

Please sign in to comment.