Merge pull request #313 from cvzi/branch-2.14.1

carpedm20 · Jan 16, 2025 · 08c5cc4 · 08c5cc4
2 parents 3209717 + 75c263b
commit 08c5cc4
Show file tree

Hide file tree

Showing 19 changed files with 1,273 additions and 1,251 deletions.
diff --git a/CHANGES.md b/CHANGES.md
@@ -1,6 +1,11 @@
 emoji
 =====
 
+v2.14.1 (2025-01-10)
+-----
+* Use `importlib.resources` to load json files #311
+* Update translations to Unicode release-46-1
+
 v2.14.0 (2024-10-02)
 -----
 * Update to Unicode 16.0

diff --git a/LICENSE.txt b/LICENSE.txt
@@ -1,6 +1,6 @@
 New BSD License
 
-Copyright (c) 2014-2024, Taehoon Kim, Kevin Wurster
+Copyright (c) 2014-2025, Taehoon Kim, Kevin Wurster
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without

diff --git a/emoji/__init__.py b/emoji/__init__.py
@@ -21,15 +21,15 @@
     'LANGUAGES',
 ]
 
-__version__ = '2.14.0'
+__version__ = '2.14.1'
 __author__ = 'Taehoon Kim, Kevin Wurster'
 __email__ = '[email protected]'
 # and [email protected], [email protected]
 __source__ = 'https://github.com/carpedm20/emoji/'
 __license__ = """
 New BSD License
 
-Copyright (c) 2014-2024, Taehoon Kim, Kevin Wurster
+Copyright (c) 2014-2025, Taehoon Kim, Kevin Wurster
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without

diff --git a/emoji/unicode_codes/__init__.py b/emoji/unicode_codes/__init__.py
@@ -1,9 +1,10 @@
+import sys
+import importlib.resources
 import json
 from functools import lru_cache
-from pathlib import Path
 from warnings import warn
 
-from typing import Optional, Dict, List, Any
+from typing import Any, BinaryIO, Dict, List, Optional
 
 from emoji.unicode_codes.data_dict import STATUS, LANGUAGES
 
@@ -75,12 +76,18 @@ def __missing__(self, key: str) -> str:
 EMOJI_DATA: Dict[str, Dict[str, Any]]
 
 
+def _open_file(name: str) -> BinaryIO:
+    if sys.version_info >= (3, 9):
+        return importlib.resources.files('emoji.unicode_codes').joinpath(name).open('rb')
+    else:
+        return importlib.resources.open_binary('emoji.unicode_codes', name)
+
+
 def _load_default_from_json():
     global EMOJI_DATA
     global _loaded_keys
 
-    file = Path(__file__).with_name('emoji.json')
-    with open(file, 'rb') as f:
+    with _open_file('emoji.json') as f:
         EMOJI_DATA = dict(json.load(f, object_pairs_hook=EmojiDataDict))  # type: ignore
     _loaded_keys = list(_DEFAULT_KEYS)
 
@@ -94,8 +101,7 @@ def load_from_json(key: str):
     if key not in LANGUAGES:
         raise NotImplementedError('Language not supported', key)
 
-    file = Path(__file__).with_name(f'emoji_{key}.json')
-    with open(file, 'rb') as f:
+    with _open_file(f'emoji_{key}.json') as f:
         for emj, value in json.load(f).items():
             EMOJI_DATA[emj][key] = value  # type: ignore
 

diff --git a/emoji/unicode_codes/emoji.json b/emoji/unicode_codes/emoji.json
@@ -1675,7 +1675,8 @@
 "🇨🇶": {
   "en": ":Sark:",
   "status": 2,
-  "E": 16
+  "E": 16,
+  "alias": [":flag_for_Sark:"]
 },
 "🇸🇦": {
   "en": ":Saudi_Arabia:",

diff --git a/emoji/unicode_codes/emoji_ar.json b/emoji/unicode_codes/emoji_ar.json
diff --git a/emoji/unicode_codes/emoji_de.json b/emoji/unicode_codes/emoji_de.json
diff --git a/emoji/unicode_codes/emoji_es.json b/emoji/unicode_codes/emoji_es.json
diff --git a/emoji/unicode_codes/emoji_fa.json b/emoji/unicode_codes/emoji_fa.json
diff --git a/emoji/unicode_codes/emoji_fr.json b/emoji/unicode_codes/emoji_fr.json
diff --git a/emoji/unicode_codes/emoji_id.json b/emoji/unicode_codes/emoji_id.json
diff --git a/emoji/unicode_codes/emoji_it.json b/emoji/unicode_codes/emoji_it.json
diff --git a/emoji/unicode_codes/emoji_ja.json b/emoji/unicode_codes/emoji_ja.json
diff --git a/emoji/unicode_codes/emoji_ko.json b/emoji/unicode_codes/emoji_ko.json
diff --git a/emoji/unicode_codes/emoji_pt.json b/emoji/unicode_codes/emoji_pt.json
diff --git a/emoji/unicode_codes/emoji_ru.json b/emoji/unicode_codes/emoji_ru.json
diff --git a/emoji/unicode_codes/emoji_tr.json b/emoji/unicode_codes/emoji_tr.json
diff --git a/emoji/unicode_codes/emoji_zh.json b/emoji/unicode_codes/emoji_zh.json
diff --git a/utils/generate_emoji_translations.py b/utils/generate_emoji_translations.py
@@ -27,23 +27,26 @@
 emoji_pkg.config.load_language()  # Make all languages available in EMOJI_DATA
 
 
-
 def get_emojiterra_from_url(url: str) -> Dict[str, str]:
     html = get_text_from_url(url)
 
     soup = bs4.BeautifulSoup(html, 'html.parser')
     emojis: Dict[str, str] = {}
 
-    data = soup.find_all('li')
+    lis = soup.find_all('li')
 
     data = [
-        i
-        for i in data
-        if 'href' not in i.attrs and 'data-e' in i.attrs and i['data-e'].strip()
+        li
+        for li in lis
+        if 'class' in li.attrs
+        and 'href' not in li.attrs
+        and 'e-' in str(li['class'])
+        and li['title'].strip()
+        and li.text.strip()
     ]
 
     for i in data:
-        code = i['data-e']
+        code = i.text.strip()
         emojis[code] = i['title'].strip()
 
     assert len(data) > 100, f'emojiterra data from {url} has only {len(data)} entries'
@@ -68,21 +71,14 @@ def add_unicode_annotations(data: Dict[str, str], lang: str, url: str):
     for annotation in annotations:
         if annotation.get('type') == 'tts':
             emj = annotation.get('cp')
-            assert annotation.text is not None, "Empty annotation text"
+            assert annotation.text is not None, 'Empty annotation text'
             text = annotation.text.strip()
             assert emj is not None, f'No code point found in {url} for {annotation}'
             assert text is not None, f'No text found in {url} for {annotation}'
 
             emoji_name = adapt_emoji_name(text, lang, emj)
 
             if emj in data and data[emj] != emoji_name:
-                if '\U0000200d\U000027a1' in emj:
-                    # TODO Skip right-facing emoji (i.e. 🧑🏻‍🦽 vs 🧑🏻‍🦽‍➡️) for now because they are not correctly translated yet
-                    # TODO They are currently missing the skin-colour information in the translations.
-                    print(
-                        f'# {lang}: {emj} SKIPPED CHANGE FROM {data[emj]} TO {emoji_name} \t\t(Source: {text})'
-                    )
-                    continue
                 print(
                     f'# {lang}: {emj} CHANGED {data[emj]} TO {emoji_name} \t\t(Source: {text})'
                 )
@@ -162,17 +158,18 @@ def extract_names(
     return data
 
 
-
 if __name__ == '__main__':
-    logging.warning('Please run generate_emoji.py before this script to update the list of emojis (emoji.json file).')
+    logging.warning(
+        'Please run generate_emoji.py before this script to update the list of emojis (emoji.json file).'
+    )
 
     logging.info('  Downloading...\n')
 
     emojis = emoji_pkg.EMOJI_DATA
 
     # Find latest release tag at https://cldr.unicode.org/index/downloads
     # or  https://github.com/unicode-org/cldr/releases
-    github_tag = 'release-46-beta2'
+    github_tag = 'release-46-1'
     languages = {
         # Update names in other languages:
         'de': extract_names(github_tag, 'de', 'de', get_emojiterra_from_url('https://emojiterra.com/de/tastatur/')),