diff --git a/.coveragerc b/.coveragerc index 5e913bd..96752f1 100644 --- a/.coveragerc +++ b/.coveragerc @@ -2,8 +2,12 @@ branch=True source=unasync +[paths] +source = src/unasync + [report] precision = 1 exclude_lines = pragma: no cover abc.abstractmethod + \# PY2 diff --git a/.coveragerc-py2 b/.coveragerc-py2 new file mode 100644 index 0000000..c69e158 --- /dev/null +++ b/.coveragerc-py2 @@ -0,0 +1,13 @@ +[run] +branch=True +source=unasync + +[paths] +source = src/unasync + +[report] +precision = 1 +exclude_lines = + pragma: no cover + abc.abstractmethod + \# PY3 diff --git a/ci/travis.sh b/ci/travis.sh index 7319260..7ad4fc0 100755 --- a/ci/travis.sh +++ b/ci/travis.sh @@ -51,6 +51,16 @@ if [ "$USE_PYPY_RELEASE_VERSION" != "" ]; then source testenv/bin/activate fi +case "${MACPYTHON:-${TRAVIS_PYTHON_VERSION:-}}" in + 2*) + COVERAGE_FILE=.coveragerc-py2 + ;; + + *) + COVERAGE_FILE=.coveragerc + ;; +esac + pip install -U pip setuptools wheel if [ "$CHECK_FORMATTING" = "1" ]; then @@ -91,7 +101,7 @@ else mkdir empty cd empty - pytest -ra -v --cov=unasync --cov-config=../.coveragerc --verbose ../tests + pytest -ra -v --cov=unasync --cov-config="../${COVERAGE_FILE}" --verbose ../tests bash <(curl -s https://codecov.io/bash) fi diff --git a/src/unasync/__init__.py b/src/unasync/__init__.py index db4c0d9..24bd971 100644 --- a/src/unasync/__init__.py +++ b/src/unasync/__init__.py @@ -1,9 +1,11 @@ +# -*- encoding: utf8 -*- """Top-level package for unasync.""" from __future__ import print_function import collections import errno +import io import os import sys import tokenize as std_tokenize @@ -34,13 +36,34 @@ "StopAsyncIteration": "StopIteration", } +_TYPE_COMMENT_PREFIX = "# type: " + + +if sys.version_info[0] == 2: # PY2 + + def isidentifier(s): + return all([c.isalnum() or c == "_" for c in s]) + + StringIO = io.BytesIO +else: # PY3 + + def isidentifier(s): + return s.isidentifier() + + StringIO = io.StringIO + +if hasattr(os, "fspath"): # PY3 + fspath = os.fspath +else: # PY2 + fspath = str + class Rule: """A single set of rules for 'unasync'ing file(s)""" def __init__(self, fromdir, todir, additional_replacements=None): - self.fromdir = fromdir.replace("/", os.sep) - self.todir = todir.replace("/", os.sep) + self.fromdir = fspath(fromdir).replace("/", os.sep) + self.todir = fspath(todir).replace("/", os.sep) # Add any additional user-defined token replacements to our list. self.token_replacements = _ASYNC_TO_SYNC.copy() @@ -51,6 +74,8 @@ def _match(self, filepath): """Determines if a Rule matches a given filepath and if so returns a higher comparable value if the match is more specific. """ + filepath = fspath(filepath) + file_segments = [x for x in filepath.split(os.sep) if x] from_segments = [x for x in self.fromdir.split(os.sep) if x] len_from_segments = len(from_segments) @@ -65,9 +90,10 @@ def _match(self, filepath): return False def _unasync_file(self, filepath): + filepath = fspath(filepath) with open(filepath, "rb") as f: write_kwargs = {} - if sys.version_info[0] >= 3: + if sys.version_info[0] >= 3: # PY3 # pragma: no branch encoding, _ = std_tokenize.detect_encoding(f.readline) write_kwargs["encoding"] = encoding f.seek(0) @@ -82,7 +108,57 @@ def _unasync_file(self, filepath): def _unasync_tokens(self, tokens): # TODO __await__, ...? used_space = None + context = None # Can be `None`, `"func_decl"`, `"func_name"`, `"arg_list"`, `"arg_list_end"`, `"return_type"` + brace_depth = 0 + typing_ctx = False + for space, toknum, tokval in tokens: + # Update context state tracker + if context is None and toknum == std_tokenize.NAME and tokval == "def": + context = "func_decl" + elif context == "func_decl" and toknum == std_tokenize.NAME: + context = "func_name" + elif context == "func_name" and toknum == std_tokenize.OP and tokval == "(": + context = "arg_list" + elif context == "arg_list": + if toknum == std_tokenize.OP and tokval in ("(", "["): + brace_depth += 1 + elif ( + toknum == std_tokenize.OP + and tokval in (")", "]") + and brace_depth >= 1 + ): + brace_depth -= 1 + elif toknum == std_tokenize.OP and tokval == ")": + context = "arg_list_end" + elif toknum == std_tokenize.OP and tokval == ":" and brace_depth < 1: + typing_ctx = True + elif toknum == std_tokenize.OP and tokval == "," and brace_depth < 1: + typing_ctx = False + elif ( + context == "arg_list_end" + and toknum == std_tokenize.OP + and tokval == "->" + ): + context = "return_type" + typing_ctx = True + elif context == "return_type": + if toknum == std_tokenize.OP and tokval in ("(", "["): + brace_depth += 1 + elif ( + toknum == std_tokenize.OP + and tokval in (")", "]") + and brace_depth >= 1 + ): + brace_depth -= 1 + elif toknum == std_tokenize.OP and tokval == ":": + context = None + typing_ctx = False + else: # Something unexpected happend - reset state + context = None + brace_depth = 0 + typing_ctx = False + if tokval in ["async", "await"]: # When removing async or await, we want to use the whitespace that # was before async/await before the next token so that @@ -93,8 +169,59 @@ def _unasync_tokens(self, tokens): if toknum == std_tokenize.NAME: tokval = self._unasync_name(tokval) elif toknum == std_tokenize.STRING: - left_quote, name, right_quote = tokval[0], tokval[1:-1], tokval[-1] - tokval = left_quote + self._unasync_name(name) + right_quote + # Strings in typing context are forward-references and should be unasyncified + quote = "" + prefix = "" + while ord(tokval[0]) in range(ord("a"), ord("z") + 1): + prefix += tokval[0] + tokval = tokval[1:] + + if tokval.startswith('"""') and tokval.endswith('"""'): + quote = '"""' # Broken syntax highlighters workaround: """ + elif tokval.startswith("'''") and tokval.endswith("'''"): + quote = "'''" # Broken syntax highlighters wokraround: ''' + elif tokval.startswith('"') and tokval.endswith('"'): + quote = '"' + elif tokval.startswith( # pragma: no branch + "'" + ) and tokval.endswith("'"): + quote = "'" + assert ( + len(quote) > 0 + ), "Quoting style of string {0!r} unknown".format(tokval) + stringval = tokval[len(quote) : -len(quote)] + if typing_ctx: + stringval = _untokenize( + self._unasync_tokens(_tokenize(StringIO(stringval))) + ) + else: + stringval = self._unasync_name(stringval) + tokval = prefix + quote + stringval + quote + elif toknum == std_tokenize.COMMENT and tokval.startswith( + _TYPE_COMMENT_PREFIX + ): + type_decl, suffix = tokval[len(_TYPE_COMMENT_PREFIX) :], "" + if "#" in type_decl: + type_decl, suffix = type_decl.split("#", 1) + suffix = "#" + suffix + type_decl_stripped = type_decl.strip() + + # Do not process `type: ignore` or `type: ignore[…]` as these aren't actual identifiers + is_type_ignore = type_decl_stripped == "ignore" + is_type_ignore |= type_decl_stripped.startswith( + "ignore" + ) and not isidentifier(type_decl_stripped[0:7]) + if not is_type_ignore: + # Preserve trailing whitespace since the tokenizer won't + trailing_space_len = len(type_decl) - len(type_decl.rstrip()) + if trailing_space_len > 0: + suffix = type_decl[-trailing_space_len:] + suffix + type_decl = type_decl[:-trailing_space_len] + type_decl = _untokenize( + self._unasync_tokens(_tokenize(StringIO(type_decl))) + ) + + tokval = _TYPE_COMMENT_PREFIX + type_decl + suffix if used_space is None: used_space = space yield (used_space, tokval) @@ -128,12 +255,16 @@ def unasync_files(fpath_list, rules): def _get_tokens(f): - if sys.version_info[0] == 2: + if sys.version_info[0] == 2: # PY2 for tok in std_tokenize.generate_tokens(f.readline): type_, string, start, end, line = tok yield Token(type_, string, start, end, line) - else: - for tok in std_tokenize.tokenize(f.readline): + else: # PY3 + if isinstance(f, io.TextIOBase): + gen = std_tokenize.generate_tokens(f.readline) + else: + gen = std_tokenize.tokenize(f.readline) + for tok in gen: if tok.type == std_tokenize.ENCODING: continue yield tok @@ -143,13 +274,16 @@ def _tokenize(f): last_end = (1, 0) for tok in _get_tokens(f): if last_end[0] < tok.start[0]: - yield ("", std_tokenize.STRING, " \\\n") + # Somehow Python 3.5 and below produce the ENDMARKER in a way that + # causes superfluous continuation lines to be generated + if tok.type != std_tokenize.ENDMARKER: + yield (" ", std_tokenize.NEWLINE, "\\\n") last_end = (tok.start[0], 0) space = "" if tok.start > last_end: assert tok.start[0] == last_end[0] - space = " " * (tok.start[1] - last_end[1]) + space = tok.line[last_end[1] : tok.start[1]] yield (space, tok.type, tok.string) last_end = tok.end diff --git a/test-requirements.txt b/test-requirements.txt index 2c7cf23..55b2caa 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,2 +1,3 @@ pytest>=4.3.0 -pytest-cov \ No newline at end of file +pytest-cov +pathlib2 ; python_version < '3.5' \ No newline at end of file diff --git a/tests/data/async/tabs.py b/tests/data/async/tabs.py new file mode 100644 index 0000000..27c8ca2 --- /dev/null +++ b/tests/data/async/tabs.py @@ -0,0 +1,8 @@ +# fmt: off +async def dummy(): + await dummy2() # This line is indented with a tab that should be preserved +# fmt: on + + +async def dummy2(): + await dummy() # This one uses 4 spaces and these should also be preserved diff --git a/tests/data/async/typing.py b/tests/data/async/typing.py index 4f0cd41..64bcfb6 100644 --- a/tests/data/async/typing.py +++ b/tests/data/async/typing.py @@ -3,3 +3,26 @@ typing.AsyncIterable[bytes] typing.AsyncIterator[bytes] typing.AsyncGenerator[bytes] + +# A typed function that takes the first item of an (a)sync iterator and returns it +async def func1(a: typing.AsyncIterable[int]) -> str: + it: typing.AsyncIterator[int] = a.__aiter__() + b: int = await it.__anext__() + return str(b) + + +# Same as the above but using old-style typings (mainly for Python 2.7 – 3.5 compatibility) +async def func2(a): # type: (typing.AsyncIterable[int]) -> str + it = a.__aiter__() # type: typing.AsyncIterator[int] + b = await it.__anext__() # type: int + return str(b) + + +# And some funky edge cases to at least cover the relevant at all in this test +a: int = 5 +b: str = a # type: ignore # This is the actual comment and the type declaration silences the warning that would otherwise happen +c: str = a # type: ignore2 # This is the actual comment and the declaration declares another type, both of which are wrong + +# fmt: off +# And some genuine trailing whitespace (uww…) +z = a # type: int diff --git a/tests/data/async/typing_py3.py b/tests/data/async/typing_py3.py new file mode 100644 index 0000000..cd4e213 --- /dev/null +++ b/tests/data/async/typing_py3.py @@ -0,0 +1,13 @@ +# fmt: off +# A forward-reference typed function that returns an iterator for an (a)sync iterable +async def aiter1(a: "typing.AsyncIterable[int]") -> 'typing.AsyncIterable[int]': + return a.__aiter__() + +# Same as the above but using tripple-quoted strings +async def aiter2(a: """typing.AsyncIterable[int]""") -> r'''typing.AsyncIterable[int]''': + return a.__aiter__() + +# Same as the above but without forward-references +async def aiter3(a: typing.AsyncIterable[int]) -> typing.AsyncIterable[int]: + return a.__aiter__() +# fmt: on diff --git a/tests/data/sync/tabs.py b/tests/data/sync/tabs.py new file mode 100644 index 0000000..9e9f48b --- /dev/null +++ b/tests/data/sync/tabs.py @@ -0,0 +1,8 @@ +# fmt: off +def dummy(): + dummy2() # This line is indented with a tab that should be preserved +# fmt: on + + +def dummy2(): + dummy() # This one uses 4 spaces and these should also be preserved diff --git a/tests/data/sync/typing.py b/tests/data/sync/typing.py index 268e7c7..213b048 100644 --- a/tests/data/sync/typing.py +++ b/tests/data/sync/typing.py @@ -3,3 +3,26 @@ typing.Iterable[bytes] typing.Iterator[bytes] typing.Generator[bytes] + +# A typed function that takes the first item of an (a)sync iterator and returns it +def func1(a: typing.Iterable[int]) -> str: + it: typing.Iterator[int] = a.__iter__() + b: int = it.__next__() + return str(b) + + +# Same as the above but using old-style typings (mainly for Python 2.7 – 3.5 compatibility) +def func2(a): # type: (typing.Iterable[int]) -> str + it = a.__iter__() # type: typing.Iterator[int] + b = it.__next__() # type: int + return str(b) + + +# And some funky edge cases to at least cover the relevant at all in this test +a: int = 5 +b: str = a # type: ignore # This is the actual comment and the type declaration silences the warning that would otherwise happen +c: str = a # type: ignore2 # This is the actual comment and the declaration declares another type, both of which are wrong + +# fmt: off +# And some genuine trailing whitespace (uww…) +z = a # type: int diff --git a/tests/data/sync/typing_py3.py b/tests/data/sync/typing_py3.py new file mode 100644 index 0000000..cfad1f0 --- /dev/null +++ b/tests/data/sync/typing_py3.py @@ -0,0 +1,13 @@ +# fmt: off +# A forward-reference typed function that returns an iterator for an (a)sync iterable +def aiter1(a: "typing.Iterable[int]") -> 'typing.Iterable[int]': + return a.__iter__() + +# Same as the above but using tripple-quoted strings +def aiter2(a: """typing.Iterable[int]""") -> r'''typing.Iterable[int]''': + return a.__iter__() + +# Same as the above but without forward-references +def aiter3(a: typing.Iterable[int]) -> typing.Iterable[int]: + return a.__iter__() +# fmt: on diff --git a/tests/test_unasync.py b/tests/test_unasync.py index 6f198d0..511b956 100644 --- a/tests/test_unasync.py +++ b/tests/test_unasync.py @@ -2,8 +2,14 @@ import errno import io import os + +try: + import pathlib +except ImportError: + import pathlib2 as pathlib import shutil import subprocess +import sys import pytest @@ -14,10 +20,18 @@ SYNC_DIR = os.path.join(TEST_DIR, "sync") TEST_FILES = sorted([f for f in os.listdir(ASYNC_DIR) if f.endswith(".py")]) +if sys.version_info[0] == 2: + TEST_FILES.remove("typing_py3.py") + def list_files(startpath): output = "" for root, dirs, files in os.walk(startpath): + # Ensure that we do not capture the directory inode order on + # platforms that don't pre-sort `readdir` results + dirs.sort() + files.sort() + level = root.replace(startpath, "").count(os.sep) indent = " " * 4 * (level) output += "{}{}/".format(indent, os.path.basename(root)) @@ -34,6 +48,12 @@ def test_rule_on_short_path(): assert rule._match("/ahip/") is False +def test_rule_with_pathlib_path(): + path_async_base = pathlib.Path("/ahip") + path_sync_base = pathlib.Path("/hip") + unasync.Rule(path_async_base / "tests", path_sync_base / "tests") + + @pytest.mark.parametrize("source_file", TEST_FILES) def test_unasync(tmpdir, source_file): @@ -72,9 +92,9 @@ def test_build_py_modules(tmpdir): env = copy.copy(os.environ) env["PYTHONPATH"] = os.path.realpath(os.path.join(TEST_DIR, "..")) - subprocess.check_call(["python", "setup.py", "build"], cwd=mod_dir, env=env) + subprocess.check_call([sys.executable, "setup.py", "build"], cwd=mod_dir, env=env) # Calling it twice to test the "if not copied" branch - subprocess.check_call(["python", "setup.py", "build"], cwd=mod_dir, env=env) + subprocess.check_call([sys.executable, "setup.py", "build"], cwd=mod_dir, env=env) unasynced = os.path.join(mod_dir, "build/lib/_sync/some_file.py") tree_build_dir = list_files(mod_dir) @@ -92,7 +112,7 @@ def test_build_py_packages(tmpdir): env = copy.copy(os.environ) env["PYTHONPATH"] = os.path.realpath(os.path.join(TEST_DIR, "..")) - subprocess.check_call(["python", "setup.py", "build"], cwd=pkg_dir, env=env) + subprocess.check_call([sys.executable, "setup.py", "build"], cwd=pkg_dir, env=env) unasynced = os.path.join(pkg_dir, "build/lib/example_pkg/_sync/__init__.py") @@ -109,7 +129,7 @@ def test_project_structure_after_build_py_packages(tmpdir): env = copy.copy(os.environ) env["PYTHONPATH"] = os.path.realpath(os.path.join(TEST_DIR, "..")) - subprocess.check_call(["python", "setup.py", "build"], cwd=pkg_dir, env=env) + subprocess.check_call([sys.executable, "setup.py", "build"], cwd=pkg_dir, env=env) _async_dir_tree = list_files( os.path.join(source_pkg_dir, "src/example_pkg/_async/.") @@ -129,7 +149,7 @@ def test_project_structure_after_customized_build_py_packages(tmpdir): env = copy.copy(os.environ) env["PYTHONPATH"] = os.path.realpath(os.path.join(TEST_DIR, "..")) - subprocess.check_call(["python", "setup.py", "build"], cwd=pkg_dir, env=env) + subprocess.check_call([sys.executable, "setup.py", "build"], cwd=pkg_dir, env=env) _async_dir_tree = list_files(os.path.join(source_pkg_dir, "src/ahip/.")) unasynced_dir_path = os.path.join(pkg_dir, "build/lib/hip/.")