Skip to content

Commit

Permalink
CLI: Improve error message of PathOrUrl and FileOrUrl (#6360)
Browse files Browse the repository at this point in the history
The `PathOrUrl` and `FileOrUrl` parameter types extend click's `Path`
and `File` parameter types, respectively, by adding support for URLs.
If the conversion the base type fails, the provided value is tried as a
URL.

The problem was that if the provided value was intended to be a local
file or path, but conversion failed, the conversion to URL would almost
certainly also fail and that would be the returned error message. This
would be confusing for users that tried to specify a local path, but it
contained a typo or was not readable for other reasons, such as file
permissions.

When the base type conversion fails, now the parameter types first check
whether the provided file or path exists on the local disk. If so, it is
assumed that the value was intended to be a local path and an error is
returned that the path could not be read. Only then is the path tried as
a URL. Here the `URLError` and `socket.timeout` exceptions are caught
separately in which case we are almost certainly dealing with a URL
which just couldn't be reached (in time). The `urlopen` method can also
throw a `ValueError` which is usually when no protocol is defined. In
this case an error is returned that explicitly states that the provided
value does not correspond to a readable path nor to a reachable URL.
  • Loading branch information
sphuber authored Apr 25, 2024
1 parent dba1174 commit ffc6e4f
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 76 deletions.
81 changes: 41 additions & 40 deletions src/aiida/cmdline/params/types/path.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"""Click parameter types for paths."""

import os
import pathlib

import click

Expand Down Expand Up @@ -60,13 +61,42 @@ def __repr__(self):
return 'ABSOLUTEPATHEMPTY'


class PathOrUrl(click.Path):
"""Extension of click's Path-type to include URLs.
def convert_possible_url(value: str, timeout: int):
"""If ``value`` does not correspond to a path on disk, try to open it as a URL.
:param value: Potential path to file on disk or URL.
:param timeout: The timeout in seconds when opening the URL.
:param return_handle: Return the ``value`` as is. When set to ``True`` return an open file handle instead.
:returns: The URL if ``value`` could be opened as a URL
"""
import socket
import urllib.error
import urllib.request

filepath = pathlib.Path(value)

# Check whether the path actually corresponds to a file on disk, in which case the exception is reraised.
if filepath.exists():
raise click.BadParameter(f'The path `{value}` exists but could not be read.')

A PathOrUrl can either be a `click.Path`-type or a URL.
try:
return urllib.request.urlopen(value, timeout=timeout)
except urllib.error.URLError:
raise click.BadParameter(f'The URL `{value}` could not be reached.')
except socket.timeout:
raise click.BadParameter(f'The URL `{value}` could not be reached within {timeout} seconds.')
except ValueError as exception_url:
raise click.BadParameter(
f'The path `{value}` does not correspond to a file and also could not be reached as a URL.\n'
'Please check the spelling for typos and if it is a URL, make sure to include the protocol, e.g., http://'
) from exception_url


class PathOrUrl(click.Path):
"""Parameter type that accepts a path on the local file system or a URL.
:param int timeout_seconds: Maximum timeout accepted for URL response.
Must be an integer in the range [0;60].
:param timeout_seconds: Maximum timeout accepted for URL response. Must be an integer in the range [0;60].
:returns: The path or URL.
"""

name = 'PathOrUrl'
Expand All @@ -77,34 +107,18 @@ def __init__(self, timeout_seconds=URL_TIMEOUT_SECONDS, **kwargs):
self.timeout_seconds = check_timeout_seconds(timeout_seconds)

def convert(self, value, param, ctx):
"""Overwrite `convert` Check first if `click.Path`-type, then check if URL."""
try:
return super().convert(value, param, ctx)
except click.exceptions.BadParameter:
return self.checks_url(value, param, ctx)

def checks_url(self, url, param, ctx):
"""Check whether URL is reachable within timeout."""
import socket
import urllib.error
import urllib.request

try:
with urllib.request.urlopen(url, timeout=self.timeout_seconds):
pass
except (urllib.error.URLError, urllib.error.HTTPError, socket.timeout):
self.fail(f'{self.name} "{url}" could not be reached within {self.timeout_seconds} s.\n', param, ctx)

return url
convert_possible_url(value, self.timeout_seconds)
return value


class FileOrUrl(click.File):
"""Extension of click's File-type to include URLs.
"""Parameter type that accepts a path on the local file system or a URL.
Returns handle either to local file or to remote file fetched from URL.
:param int timeout_seconds: Maximum timeout accepted for URL response.
Must be an integer in the range [0;60].
:param timeout_seconds: Maximum timeout accepted for URL response. Must be an integer in the range [0;60].
:returns: The file or URL.
"""

name = 'FileOrUrl'
Expand All @@ -115,20 +129,7 @@ def __init__(self, timeout_seconds=URL_TIMEOUT_SECONDS, **kwargs):
self.timeout_seconds = check_timeout_seconds(timeout_seconds)

def convert(self, value, param, ctx):
"""Return file handle."""
try:
return super().convert(value, param, ctx)
except click.exceptions.BadParameter:
handle = self.get_url(value, param, ctx)
return handle

def get_url(self, url, param, ctx):
"""Retrieve file from URL."""
import socket
import urllib.error
import urllib.request

try:
return urllib.request.urlopen(url, timeout=self.timeout_seconds)
except (urllib.error.URLError, urllib.error.HTTPError, socket.timeout):
self.fail(f'{self.name} "{url}" could not be reached within {self.timeout_seconds} s.\n', param, ctx)
return convert_possible_url(value, self.timeout_seconds)
14 changes: 1 addition & 13 deletions tests/cmdline/commands/test_archive_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from aiida.orm import Group
from aiida.storage.sqlite_zip.migrator import list_versions
from aiida.tools.archive import ArchiveFormatSqlZip
from click.exceptions import BadParameter

from tests.utils.archives import get_archive_file

Expand Down Expand Up @@ -174,23 +173,12 @@ def test_import_url_and_local_archives(run_cli_command, newest_archive):
run_cli_command(cmd_archive.import_archive, options)


def test_import_url_timeout():
"""Test a timeout to valid URL is correctly errored"""
from aiida.cmdline.params.types import PathOrUrl

timeout_url = 'http://www.google.com:81'

test_timeout_path = PathOrUrl(exists=True, readable=True, timeout_seconds=0)
with pytest.raises(BadParameter, match=f'ath "{timeout_url}" could not be reached within 0 s.'):
test_timeout_path(timeout_url)


def test_raise_malformed_url(run_cli_command):
"""Test the correct error is raised when supplying a malformed URL"""
malformed_url = 'htp://www.aiida.net'

result = run_cli_command(cmd_archive.import_archive, [malformed_url], raises=True)
assert 'could not be reached within' in result.output, result.exception
assert 'could not be reached.' in result.output, result.exception


def test_migration(run_cli_command):
Expand Down
83 changes: 60 additions & 23 deletions tests/cmdline/params/types/test_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,38 +8,75 @@
###########################################################################
"""Tests for Path types"""

import click
import pytest
from aiida.cmdline.params.types.path import PathOrUrl, check_timeout_seconds


class TestPath:
"""Tests for `PathOrUrl` and `FileOrUrl`"""
def test_default_timeout():
"""Test the default timeout_seconds value is correct"""
from aiida.cmdline.params.types.path import URL_TIMEOUT_SECONDS

def test_default_timeout(self):
"""Test the default timeout_seconds value is correct"""
from aiida.cmdline.params.types.path import URL_TIMEOUT_SECONDS
import_path = PathOrUrl()

import_path = PathOrUrl()
assert import_path.timeout_seconds == URL_TIMEOUT_SECONDS

assert import_path.timeout_seconds == URL_TIMEOUT_SECONDS

def test_timeout_checks(self):
"""Test that timeout check handles different values.
def test_timeout_checks():
"""Test that timeout check handles different values.
* valid
* none
* wrong type
* outside range
"""
valid_values = [42, '42']
* valid
* none
* wrong type
* outside range
"""
valid_values = [42, '42']

for value in valid_values:
assert check_timeout_seconds(value) == int(value)
for value in valid_values:
assert check_timeout_seconds(value) == int(value)

for invalid in [None, 'test']:
with pytest.raises(TypeError):
check_timeout_seconds(invalid)
for invalid in [None, 'test']:
with pytest.raises(TypeError):
check_timeout_seconds(invalid)

for invalid in [-5, 65]:
with pytest.raises(ValueError):
check_timeout_seconds(invalid)
for invalid in [-5, 65]:
with pytest.raises(ValueError):
check_timeout_seconds(invalid)


def test_fail_non_existing_path():
"""Test the parameter for a non-existing path when ``exists=True``."""
with pytest.raises(
click.BadParameter, match=r'.*does not correspond to a file and also could not be reached as a URL.'
):
PathOrUrl(exists=True).convert('non-existent.txt', None, None)


def test_fail_non_readable_path(tmp_path):
"""Test that if the path exists but cannot be read, the parameter does not try to treat it as a URL."""
filepath = tmp_path / 'some_file'
filepath.touch()
filepath.chmod(0o333) # Make it writable and executable only, so it is not readable

with pytest.raises(click.BadParameter, match=r'.*exists but could not be read.'):
PathOrUrl(exists=True).convert(str(filepath), None, None)


def test_fail_unreachable_url():
"""Test the parameter in case of a valid URL that cannot be reached."""
with pytest.raises(click.BadParameter, match=r'.* could not be reached.'):
PathOrUrl(exists=True).convert('http://domain/some/path', None, None)


def test_fail_timeout(monkeypatch):
"""Test the parameter in case of a valid URL that times out."""
import socket
from urllib import request

def raise_timeout(*args, **kwargs):
raise socket.timeout

monkeypatch.setattr(request, 'urlopen', raise_timeout)

with pytest.raises(click.BadParameter, match=r'.* could not be reached within .* seconds.'):
PathOrUrl(exists=True).convert('http://domain/some/pat', None, None)

0 comments on commit ffc6e4f

Please sign in to comment.