Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Version 2.14.1 #313

Merged
merged 4 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
emoji
=====

v2.14.1 (2025-01-10)
-----
* Use `importlib.resources` to load json files #311
* Update translations to Unicode release-46-1

v2.14.0 (2024-10-02)
-----
* Update to Unicode 16.0
Expand Down
2 changes: 1 addition & 1 deletion LICENSE.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
New BSD License

Copyright (c) 2014-2024, Taehoon Kim, Kevin Wurster
Copyright (c) 2014-2025, Taehoon Kim, Kevin Wurster
All rights reserved.

Redistribution and use in source and binary forms, with or without
Expand Down
4 changes: 2 additions & 2 deletions emoji/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@
'LANGUAGES',
]

__version__ = '2.14.0'
__version__ = '2.14.1'
__author__ = 'Taehoon Kim, Kevin Wurster'
__email__ = '[email protected]'
# and [email protected], [email protected]
__source__ = 'https://github.com/carpedm20/emoji/'
__license__ = """
New BSD License
Copyright (c) 2014-2024, Taehoon Kim, Kevin Wurster
Copyright (c) 2014-2025, Taehoon Kim, Kevin Wurster
All rights reserved.
Redistribution and use in source and binary forms, with or without
Expand Down
18 changes: 12 additions & 6 deletions emoji/unicode_codes/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import sys
import importlib.resources
import json
from functools import lru_cache
from pathlib import Path
from warnings import warn

from typing import Optional, Dict, List, Any
from typing import Any, BinaryIO, Dict, List, Optional

from emoji.unicode_codes.data_dict import STATUS, LANGUAGES

Expand Down Expand Up @@ -75,12 +76,18 @@ def __missing__(self, key: str) -> str:
EMOJI_DATA: Dict[str, Dict[str, Any]]


def _open_file(name: str) -> BinaryIO:
if sys.version_info >= (3, 9):
return importlib.resources.files('emoji.unicode_codes').joinpath(name).open('rb')
else:
return importlib.resources.open_binary('emoji.unicode_codes', name)


def _load_default_from_json():
global EMOJI_DATA
global _loaded_keys

file = Path(__file__).with_name('emoji.json')
with open(file, 'rb') as f:
with _open_file('emoji.json') as f:
EMOJI_DATA = dict(json.load(f, object_pairs_hook=EmojiDataDict)) # type: ignore
_loaded_keys = list(_DEFAULT_KEYS)

Expand All @@ -94,8 +101,7 @@ def load_from_json(key: str):
if key not in LANGUAGES:
raise NotImplementedError('Language not supported', key)

file = Path(__file__).with_name(f'emoji_{key}.json')
with open(file, 'rb') as f:
with _open_file(f'emoji_{key}.json') as f:
for emj, value in json.load(f).items():
EMOJI_DATA[emj][key] = value # type: ignore

Expand Down
3 changes: 2 additions & 1 deletion emoji/unicode_codes/emoji.json
Original file line number Diff line number Diff line change
Expand Up @@ -1675,7 +1675,8 @@
"🇨🇶": {
"en": ":Sark:",
"status": 2,
"E": 16
"E": 16,
"alias": [":flag_for_Sark:"]
},
"🇸🇦": {
"en": ":Saudi_Arabia:",
Expand Down
181 changes: 91 additions & 90 deletions emoji/unicode_codes/emoji_ar.json

Large diffs are not rendered by default.

181 changes: 91 additions & 90 deletions emoji/unicode_codes/emoji_de.json

Large diffs are not rendered by default.

181 changes: 91 additions & 90 deletions emoji/unicode_codes/emoji_es.json

Large diffs are not rendered by default.

181 changes: 91 additions & 90 deletions emoji/unicode_codes/emoji_fa.json

Large diffs are not rendered by default.

181 changes: 91 additions & 90 deletions emoji/unicode_codes/emoji_fr.json

Large diffs are not rendered by default.

181 changes: 91 additions & 90 deletions emoji/unicode_codes/emoji_id.json

Large diffs are not rendered by default.

181 changes: 91 additions & 90 deletions emoji/unicode_codes/emoji_it.json

Large diffs are not rendered by default.

181 changes: 91 additions & 90 deletions emoji/unicode_codes/emoji_ja.json

Large diffs are not rendered by default.

217 changes: 109 additions & 108 deletions emoji/unicode_codes/emoji_ko.json

Large diffs are not rendered by default.

181 changes: 91 additions & 90 deletions emoji/unicode_codes/emoji_pt.json

Large diffs are not rendered by default.

181 changes: 91 additions & 90 deletions emoji/unicode_codes/emoji_ru.json

Large diffs are not rendered by default.

217 changes: 109 additions & 108 deletions emoji/unicode_codes/emoji_tr.json

Large diffs are not rendered by default.

217 changes: 109 additions & 108 deletions emoji/unicode_codes/emoji_zh.json

Large diffs are not rendered by default.

31 changes: 14 additions & 17 deletions utils/generate_emoji_translations.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,26 @@
emoji_pkg.config.load_language() # Make all languages available in EMOJI_DATA



def get_emojiterra_from_url(url: str) -> Dict[str, str]:
html = get_text_from_url(url)

soup = bs4.BeautifulSoup(html, 'html.parser')
emojis: Dict[str, str] = {}

data = soup.find_all('li')
lis = soup.find_all('li')

data = [
i
for i in data
if 'href' not in i.attrs and 'data-e' in i.attrs and i['data-e'].strip()
li
for li in lis
if 'class' in li.attrs
and 'href' not in li.attrs
and 'e-' in str(li['class'])
and li['title'].strip()
and li.text.strip()
]

for i in data:
code = i['data-e']
code = i.text.strip()
emojis[code] = i['title'].strip()

assert len(data) > 100, f'emojiterra data from {url} has only {len(data)} entries'
Expand All @@ -68,21 +71,14 @@ def add_unicode_annotations(data: Dict[str, str], lang: str, url: str):
for annotation in annotations:
if annotation.get('type') == 'tts':
emj = annotation.get('cp')
assert annotation.text is not None, "Empty annotation text"
assert annotation.text is not None, 'Empty annotation text'
text = annotation.text.strip()
assert emj is not None, f'No code point found in {url} for {annotation}'
assert text is not None, f'No text found in {url} for {annotation}'

emoji_name = adapt_emoji_name(text, lang, emj)

if emj in data and data[emj] != emoji_name:
if '\U0000200d\U000027a1' in emj:
# TODO Skip right-facing emoji (i.e. 🧑🏻‍🦽 vs 🧑🏻‍🦽‍➡️) for now because they are not correctly translated yet
# TODO They are currently missing the skin-colour information in the translations.
print(
f'# {lang}: {emj} SKIPPED CHANGE FROM {data[emj]} TO {emoji_name} \t\t(Source: {text})'
)
continue
print(
f'# {lang}: {emj} CHANGED {data[emj]} TO {emoji_name} \t\t(Source: {text})'
)
Expand Down Expand Up @@ -162,17 +158,18 @@ def extract_names(
return data



if __name__ == '__main__':
logging.warning('Please run generate_emoji.py before this script to update the list of emojis (emoji.json file).')
logging.warning(
'Please run generate_emoji.py before this script to update the list of emojis (emoji.json file).'
)

logging.info(' Downloading...\n')

emojis = emoji_pkg.EMOJI_DATA

# Find latest release tag at https://cldr.unicode.org/index/downloads
# or https://github.com/unicode-org/cldr/releases
github_tag = 'release-46-beta2'
github_tag = 'release-46-1'
languages = {
# Update names in other languages:
'de': extract_names(github_tag, 'de', 'de', get_emojiterra_from_url('https://emojiterra.com/de/tastatur/')),
Expand Down
Loading