Skip to content

Commit

Permalink
Merge branch 'devel'
Browse files Browse the repository at this point in the history
  • Loading branch information
garabik committed Dec 29, 2018
2 parents 0a5d750 + a1bae20 commit eb620cd
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 20 deletions.
9 changes: 7 additions & 2 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Enter regular expression, hexadecimal number or some characters as an
argument. unicode will try to guess what you want to look up, see the
manpage if you want to force other behaviour (the manpage is also the
best documentation). In particular, -r forces searching for regular
expression in the names of character, -s forces unicode to display
expression in the names of characters, -s forces unicode to display
information about the characters given.

Here are just some examples:
Expand Down Expand Up @@ -130,5 +130,10 @@ recognized:
{opt_decomp}{decomp_desc} -- the string `Decomposition: ' and a hexadecimal sequence
of decomposition characters; empty if the character
has no decomposition
{opt_unicode_block}{opt_unicode_block_desc} -- the string `Unicode block:', range of the unicode block and description of said unicode block for the given character
{opt_unicode_block}{opt_unicode_block_desc} -- the string `Unicode block:',
range of the unicode block
and description of said unicode
block for the given character
{opt_eaw}{eaw_desc} -- the string `East Asian width:' and the human readable
value of East Asian width

10 changes: 10 additions & 0 deletions debian/changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
unicode (2.7-1) unstable; urgency=low

* add East Asian width
* hack to consider regular expressions ending with '$' (closes: #830996)
* do not flush stdout (closes: #902018)
* better upper/lowercase from internal pytho db (closes: #848098)
* convert to quilt

-- Radovan Garabík <[email protected]> Thu, 27 Dec 2018 18:17:29 +0100

unicode (2.6) unstable; urgency=low

* fix crash when using Uxxxx (as opposed to U+xxxx) (closes: #836594)
Expand Down
2 changes: 1 addition & 1 deletion debian/control
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Section: utils
Priority: optional
Maintainer: Radovan Garabík <[email protected]>
Build-Depends: debhelper (>= 4), dh-python
Standards-Version: 3.9.6
Standards-Version: 4.3.0

Package: unicode
Architecture: all
Expand Down
2 changes: 2 additions & 0 deletions debian/source/format
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
3.0 (quilt)

11 changes: 10 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,22 @@

os.chdir(os.path.abspath(os.path.dirname(__file__)))



setup(name='unicode',
version='2.6',
version='2.7',
scripts=['unicode', 'paracode'],
# entry_points={'console_scripts': [
# 'unicode = unicode:main',
# 'paracode = paracode:main']},
description="Display unicode character properties",
long_description="""
Display unicode character properties:
Enter regular expression, hexadecimal number or some characters as an
argument. unicode will try to guess what you want to look up.
Use four-digit hexadecimal number followed by two dots to display
given unicode block in a nice tabular format.
""",
author="Radovan Garabik",
author_email='[email protected]',
url='http://kassiopeia.juls.savba.sk/~garabik/software/unicode.html',
Expand Down
64 changes: 48 additions & 16 deletions unicode
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ from __future__ import unicode_literals

import os, glob, sys, unicodedata, locale, gzip, re, traceback, encodings, io, codecs
import webbrowser, textwrap, struct
from pprint import pprint
#from pprint import pprint

# bz2 was introduced in 2.3, but we want this to work even if for some
# reason it is not available
Expand Down Expand Up @@ -35,7 +35,7 @@ if PY3:
def out(*args):
"pring args, converting them to output charset"
for i in args:
sys.stdout.flush()
#sys.stdout.flush()
sys.stdout.buffer.write(i.encode(options.iocharset, 'replace'))

# ord23 is used to convert elements of byte array in python3, which are already integers
Expand Down Expand Up @@ -66,7 +66,7 @@ else: # python2

from optparse import OptionParser

VERSION='2.6'
VERSION='2.7'


# list of terminals that support bidi
Expand Down Expand Up @@ -208,6 +208,15 @@ comb_classes = {
240: 'Below (iota subscript)',
}

eaw_description = {
'F': 'fullwidth',
'H': 'halfwidth',
'W': 'wide',
'Na':'narrow',
'A': 'ambiguous',
'N': 'neutral'
}

def get_unicode_blocks_descriptions():
"parses Blocks.txt"
unicodeblocks = {} # (low, high): 'desc'
Expand Down Expand Up @@ -248,7 +257,6 @@ def get_unicode_properties(ch):
for i, prop in enumerate(proplist):
if prop!='dummy':
properties[prop] = fields[i]

if properties['lowercase']:
properties['lowercase'] = chr(int(properties['lowercase'], 16))
if properties['uppercase']:
Expand All @@ -270,9 +278,17 @@ def get_unicode_properties(ch):
properties['mirrored'] = unicodedata.mirrored(ch)
properties['unicode1name'] = ''
properties['iso_comment'] = ''
properties['uppercase'] = ch.upper() # this is not correct
properties['lowercase'] = ch.lower()
properties['titlecase'] = ''
properties['lowercase'] = properties['uppercase'] = properties['titlecase'] = ''
ch_up = ch.upper()
ch_lo = ch.lower()
ch_title = ch.title()
if ch_up != ch:
properties['uppercase'] = ch_up
if ch_lo != ch:
properties['lowercase'] = ch_lo
if ch_title != ch_up:
properties['titlecase'] = ch_title
properties['east_asian_width'] = get_east_asian_width(ch)
return properties


Expand Down Expand Up @@ -397,12 +413,16 @@ def OpenGzip(fname):
return fo

def GrepInNames(pattern, prefill_cache=False):
pat = re.compile(pattern, re.I)
f = None
for name in UnicodeDataFileNames:
f = OpenGzip(name)
if f != None:
break
if f:
if pattern.endswith('$'):
pattern = pattern[:-1]+';'
pat = re.compile(pattern, re.I)

if not f:
out( """
Cannot find UnicodeData.txt, please place it into
Expand Down Expand Up @@ -597,6 +617,10 @@ def print_characters(clist, maxcount, format_string, query_wikipedia=0, query_wi
1 - spawn browser
"""
counter = 0

for colour_key in colours.keys():
locals()[colour_key] = maybe_colours(colour_key)

for c in clist:

if query_wikipedia or query_wiktionary:
Expand All @@ -613,16 +637,17 @@ def print_characters(clist, maxcount, format_string, query_wikipedia=0, query_wi
if counter > options.maxcount:
out("\nToo many characters to display, more than %s, use --max 0 (or other value) option to change it\n" % options.maxcount)
return
for colour_key in colours.keys():
locals()[colour_key] = maybe_colours(colour_key)
properties = get_unicode_properties(c)
ordc = ord(c)
if properties['name']:
name = properties['name']
else:
name = " - No such unicode character name in database"
utf8 = ' '.join([("%02x" % ord23(x)) for x in c.encode('utf-8')])
utf16be = ''.join([("%02x" % ord23(x)) for x in c.encode('utf-16be')])
if 0xd800 <= ordc <= 0xdfff: # surrogate
utf8 = utf16be = 'N/A'
else:
utf8 = ' '.join([("%02x" % ord23(x)) for x in c.encode('utf-8')])
utf16be = ''.join([("%02x" % ord23(x)) for x in c.encode('utf-16be')])
decimal = "&#%s;" % ordc
octal = "\\0%o" % ordc

Expand Down Expand Up @@ -673,7 +698,7 @@ def print_characters(clist, maxcount, format_string, query_wikipedia=0, query_wi
bidi_desc = bidi_category.get(bidi, bidi)
if bidi:
opt_bidi = 'Bidi: '
bidi_desc = ' ({0})'.format(bidi_desc)
bidi_desc = ' ({0})\n'.format(bidi_desc)
mirrored_desc = ''
mirrored = properties['mirrored']
if mirrored:
Expand All @@ -691,6 +716,10 @@ def print_characters(clist, maxcount, format_string, query_wikipedia=0, query_wi
if decomp:
opt_decomp = 'Decomposition: '
decomp_desc = decomp+'\n'
if properties['east_asian_width']:
opt_eaw = 'East Asian width: '
eaw = properties['east_asian_width']
eaw_desc = '{eaw} ({desc})'.format(eaw=eaw, desc=eaw_description.get(eaw, eaw))

opt_unicode_block = ''
opt_unicode_block_desc = ''
Expand All @@ -717,6 +746,9 @@ def print_characters(clist, maxcount, format_string, query_wikipedia=0, query_wi
for key in uhp:
printkv(key, uhp[key])

def get_east_asian_width(c):
eaw = 'east_asian_width' in unicodedata.__dict__ and unicodedata.east_asian_width(c)
return eaw

def print_block(block):
#header
Expand All @@ -742,7 +774,7 @@ def print_block(block):
if unicodedata.combining(c):
c_out = " "+c
# fallback for python without east_asian_width (probably unnecessary, since this script does not work with <2.6 anyway)
fullwidth = 'east_asian_width' in unicodedata.__dict__ and unicodedata.east_asian_width(c)[0] in 'FW'
fullwidth = get_east_asian_width(c)[0] in 'FW'
if not fullwidth:
c_out = ' '+c_out
out(c_out)
Expand Down Expand Up @@ -780,8 +812,8 @@ def unescape(s):
format_string_default = '''{yellow}{bold}U+{ordc:04X} {name}{default}
{green}UTF-8:{default} {utf8} {green}UTF-16BE:{default} {utf16be} {green}Decimal:{default} {decimal} {green}Octal:{default} {octal}{opt_additional}
{pchar}{opt_flipcase}{opt_uppercase}{opt_lowercase}
{green}Category:{default} {category} ({category_desc})
{green}{opt_unicode_block}{default}{opt_unicode_block_desc}{opt_numeric}{default}{numeric_desc}{green}{opt_digit}{default}{digit_desc}{green}{opt_bidi}{default}{bidi}{bidi_desc}
{green}Category:{default} {category} ({category_desc}); {green}{opt_eaw}{default}{eaw_desc}
{green}{opt_unicode_block}{default}{opt_unicode_block_desc}{green}{opt_numeric}{default}{numeric_desc}{green}{opt_digit}{default}{digit_desc}{green}{opt_bidi}{default}{bidi}{bidi_desc}
{mirrored_desc}{green}{opt_combining}{default}{combining_desc}{green}{opt_decomp}{default}{decomp_desc}
'''

Expand Down

0 comments on commit eb620cd

Please sign in to comment.