From e0503f81e5d29d3db0dc17a6b67c5ddd279c7c51 Mon Sep 17 00:00:00 2001 From: ncclementi Date: Mon, 19 Aug 2024 17:29:57 -0400 Subject: [PATCH] refactor(duckdb): con.ddl.list_ implementation and con.tables refactor --- ibis/backends/__init__.py | 104 +++++++++++---------- ibis/backends/duckdb/__init__.py | 151 ++++++++++++++++++++----------- 2 files changed, 154 insertions(+), 101 deletions(-) diff --git a/ibis/backends/__init__.py b/ibis/backends/__init__.py index ab2da79f91ebb..20973d265b278 100644 --- a/ibis/backends/__init__.py +++ b/ibis/backends/__init__.py @@ -17,7 +17,6 @@ import ibis.expr.types as ir from ibis import util from ibis.common.caching import RefCountedCache -from ibis.util import deprecated if TYPE_CHECKING: from collections.abc import Iterable, Iterator, Mapping, MutableMapping @@ -48,6 +47,31 @@ class TablesAccessor(collections.abc.Mapping): def __init__(self, backend: BaseBackend): self._backend = backend + # list_* methods that might exist on a given backends + list_methods = [ + "list_tables", + "list_temp_tables", + "list_views", + "list_temp_views", + ] + + self._tables = [] + for method_name in list_methods: + self._tables.extend(self._execute_if_exists(method_name)) + + # TODO: maybe there is a better way of removing duplicates, for now + self._tables = list(set(self._tables)) + + def _execute_if_exists(self, method_name: str) -> list[str]: + """Executes method if it exists and it doesn't raise a NotImplementedError, else returns an empty list.""" + method = getattr(self._backend.ddl, method_name) + if callable(method): + try: + return method() + except NotImplementedError: + pass + return [] + def __getitem__(self, name) -> ir.Table: try: return self._backend.table(name) @@ -63,29 +87,28 @@ def __getattr__(self, name) -> ir.Table: raise AttributeError(name) from exc def __iter__(self) -> Iterator[str]: - return iter(sorted(self._backend.list_tables())) + return iter(sorted(self._tables)) def __len__(self) -> int: - return len(self._backend.list_tables()) + return len(self._tables) def __dir__(self) -> list[str]: o = set() o.update(dir(type(self))) o.update( name - for name in self._backend.list_tables() + for name in self._tables if name.isidentifier() and not keyword.iskeyword(name) ) return list(o) def __repr__(self) -> str: - tables = self._backend.list_tables() rows = ["Tables", "------"] - rows.extend(f"- {name}" for name in sorted(tables)) + rows.extend(f"- {name}" for name in sorted(self._tables)) return "\n".join(rows) def _ipython_key_completions_(self) -> list[str]: - return self._backend.list_tables() + return self._tables class DDLAccessor: @@ -94,17 +117,40 @@ class DDLAccessor: def __init__(self, backend: BaseBackend): self._backend = backend + def _raise_if_not_implemented(self, method_name: str): + method = getattr(self._backend, method_name) + if not callable(method): + raise NotImplementedError( + f"The method {method_name} is not implemented for the {self._backend.name} backend" + ) + def list_tables( self, like: str | None = None, database: tuple[str, str] | str | None = None ) -> list[str]: """Return the list of table names via the backend's implementation.""" - return self._backend.list_tables(like=like, database=database) + self._raise_if_not_implemented("_list_tables") + return self._backend._list_tables(like=like, database=database) + + def list_temp_tables( + self, like: str | None = None, database: tuple[str, str] | str | None = None + ) -> list[str]: + """Return the list of temporary table names via the backend's implementation.""" + self._raise_if_not_implemented("_list_temp_tables") + return self._backend._list_temp_tables(like=like, database=database) def list_views( self, like: str | None = None, database: tuple[str, str] | str | None = None ) -> list[str]: """Return the list of view names via the backend's implementation.""" - return self._backend.list_views(like=like, database=database) + self._raise_if_not_implemented("_list_views") + return self._backend._list_views(like=like, database=database) + + def list_temp_views( + self, like: str | None = None, database: tuple[str, str] | str | None = None + ) -> list[str]: + """Return the list of temp view names via the backend's implementation.""" + self._raise_if_not_implemented("_list_temp_views") + return self._backend._list_temp_views(like=like, database=database) class _FileIOHandler: @@ -952,46 +998,6 @@ def _filter_with_like(values: Iterable[str], like: str | None = None) -> list[st pattern = re.compile(like) return sorted(filter(pattern.findall, values)) - @deprecated(as_of="10.0", instead="use the con.ddl.list_tables()") - @abc.abstractmethod - def list_tables( - self, like: str | None = None, database: tuple[str, str] | str | None = None - ) -> list[str]: - """Return the list of table names in the current database. - - For some backends, the tables may be files in a directory, - or other equivalent entities in a SQL database. - - ::: {.callout-note} - ## Ibis does not use the word `schema` to refer to database hierarchy. - - A collection of tables is referred to as a `database`. - A collection of `database` is referred to as a `catalog`. - - These terms are mapped onto the corresponding features in each - backend (where available), regardless of whether the backend itself - uses the same terminology. - ::: - - Parameters - ---------- - like - A pattern in Python's regex format. - database - The database from which to list tables. - If not provided, the current database is used. - For backends that support multi-level table hierarchies, you can - pass in a dotted string path like `"catalog.database"` or a tuple of - strings like `("catalog", "database")`. - - Returns - ------- - list[str] - The list of the table names that match the pattern `like`. - - """ - return self.ddl.list_tables(like=like, database=database) - @abc.abstractmethod def table( self, name: str, database: tuple[str, str] | str | None = None diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index f1e2f647b1569..f9cd846eea221 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -973,86 +973,133 @@ def read_delta( self.con.register(table_name, delta_table.to_pyarrow_dataset()) return self.table(table_name) - def list_tables( + def _list_tables( self, like: str | None = None, database: tuple[str, str] | str | None = None, - schema: str | None = None, ) -> list[str]: - """List tables and views. + """List physical tables.""" - ::: {.callout-note} - ## Ibis does not use the word `schema` to refer to database hierarchy. + table_loc = self._warn_and_create_table_loc(database) - A collection of tables is referred to as a `database`. - A collection of `database` is referred to as a `catalog`. + catalog = table_loc.catalog or self.current_catalog + database = table_loc.db or self.current_database - These terms are mapped onto the corresponding features in each - backend (where available), regardless of whether the backend itself - uses the same terminology. - ::: + col = "table_name" + sql = ( + sg.select(col) + .from_(sg.table("tables", db="information_schema")) + .where( + C.table_catalog.eq(sge.convert(catalog)), + C.table_schema.eq(sge.convert(database)), + C.table_type.eq("BASE TABLE"), + ) + .sql(self.name, pretty=True) + ) - Parameters - ---------- - like - Regex to filter by table/view name. - database - Database location. If not passed, uses the current database. + out = self.con.execute(sql).fetch_arrow_table() - By default uses the current `database` (`self.current_database`) and - `catalog` (`self.current_catalog`). + return self._filter_with_like(out[col].to_pylist(), like) - To specify a table in a separate catalog, you can pass in the - catalog and database as a string `"catalog.database"`, or as a tuple of - strings `("catalog", "database")`. - schema - [deprecated] Schema name. If not passed, uses the current schema. + def _list_views( + self, + like: str | None = None, + database: tuple[str, str] | str | None = None, + ) -> list[str]: + """List views.""" - Returns - ------- - list[str] - List of table and view names. + table_loc = self._warn_and_create_table_loc(database) - Examples - -------- - >>> import ibis - >>> con = ibis.duckdb.connect() - >>> foo = con.create_table("foo", schema=ibis.schema(dict(a="int"))) - >>> con.list_tables() - ['foo'] - >>> bar = con.create_view("bar", foo) - >>> con.list_tables() - ['bar', 'foo'] - >>> con.create_database("my_database") - >>> con.list_tables(database="my_database") - [] - >>> with con.begin() as c: - ... c.exec_driver_sql("CREATE TABLE my_database.baz (a INTEGER)") # doctest: +ELLIPSIS - <...> - >>> con.list_tables(database="my_database") - ['baz'] + catalog = table_loc.catalog or self.current_catalog + database = table_loc.db or self.current_database - """ - table_loc = self._warn_and_create_table_loc(database, schema) + col = "table_name" + sql = ( + sg.select(col) + .from_(sg.table("tables", db="information_schema")) + .where( + C.table_catalog.eq(sge.convert(catalog)), + C.table_schema.eq(sge.convert(database)), + C.table_type.eq("VIEW"), + ) + .sql(self.name, pretty=True) + ) - catalog = table_loc.catalog or self.current_catalog + out = self.con.execute(sql).fetch_arrow_table() + + return self._filter_with_like(out[col].to_pylist(), like) + + def _list_temp_views( + self, + like: str | None = None, + database: tuple[str, str] | str | None = None, + ) -> list[str]: + """List views.""" + + table_loc = self._warn_and_create_table_loc(database) + + catalog = "temp" database = table_loc.db or self.current_database col = "table_name" sql = ( sg.select(col) .from_(sg.table("tables", db="information_schema")) - .distinct() .where( - C.table_catalog.isin(sge.convert(catalog), sge.convert("temp")), + C.table_catalog.eq(sge.convert(catalog)), C.table_schema.eq(sge.convert(database)), + C.table_type.eq("VIEW"), ) - .sql(self.dialect) + .sql(self.name, pretty=True) ) + out = self.con.execute(sql).fetch_arrow_table() return self._filter_with_like(out[col].to_pylist(), like) + def _list_temp_tables( + self, + like: str | None = None, + database: tuple[str, str] | str | None = None, + ) -> list[str]: + """List temporary tables.""" + + col = "table_name" + sql = ( + sg.select(col) + .from_(sg.table("tables", db="information_schema")) + .where(C.table_type.eq("LOCAL TEMPORARY")) + .sql(self.name, pretty=True) + ) + + out = self.con.execute(sql).fetch_arrow_table() + + return self._filter_with_like(out[col].to_pylist(), like) + + @deprecated(as_of="10.0", instead="use the con.tables") + def list_tables( + self, + like: str | None = None, + database: tuple[str, str] | str | None = None, + schema: str | None = None, + ) -> list[str]: + """List tables and views.""" + + table_loc = self._warn_and_create_table_loc(database, schema) + + database = self.current_database + if table_loc is not None: + database = table_loc.db or database + + tables_and_views = list( + set(self._backend._list_tables(like=like, database=database)) + | set(self._backend._list_temp_tables(like=like, database=database)) + | set(self._backend._list_views(like=like, database=database)) + | set(self._backend._list_temp_views(like=like, database=database)) + ) + + return tables_and_views + def read_postgres( self, uri: str, *, table_name: str | None = None, database: str = "public" ) -> ir.Table: