Skip to content
This repository has been archived by the owner on Mar 29, 2023. It is now read-only.

bug: substr fails to compile (#94) #95

Merged
merged 5 commits into from
Nov 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ibis_bigquery/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ def _string_substring(translator, expr):
raise ValueError("Length parameter should not be a negative value.")

base_substring = operation_registry[ops.Substring]
base_substring(translator, expr)
return base_substring(translator, expr)


def _array_literal_format(expr):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
"pyarrow >=1.0.0,<5.0.0dev",
"pydata-google-auth",
# Workaround for ibis 1.x incompatibility with SQLAlchemy 1.4
# AttributeError: module 'sqlalchemy' has no attribute 'Binary'
# AttributeError: module 'sqlalchemy' has no attribute 'Binary'
"sqlalchemy <1.4.0dev",
],
classifiers=[
Expand Down
225 changes: 110 additions & 115 deletions tests/system/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,39 +4,49 @@
import tempfile
import urllib.request

import google.auth
import google.auth.exceptions
import ibis # noqa: F401
import pytest
from google.api_core.exceptions import NotFound
from google.cloud import bigquery
import google.auth
import google.auth.exceptions

import ibis_bigquery

DEFAULT_PROJECT_ID = "ibis-gbq"
PROJECT_ID_ENV_VAR = "GOOGLE_BIGQUERY_PROJECT_ID"
DATASET_ID = "ibis_gbq_testing"
TESTING_DATA_URI = (
'https://raw.githubusercontent.com/ibis-project/testing-data/master')
TESTING_DATA_URI = "https://raw.githubusercontent.com/ibis-project/testing-data/master"

bq = ibis_bigquery.Backend()


def pytest_addoption(parser):
parser.addoption('--save-dataset', action='store_true', default=False,
help='saves all test data in the testing dataset')
parser.addoption('--no-refresh-dataset', action='store_true', default=False,
help='do not refresh the test data in the testing dataset')
parser.addoption(
"--save-dataset",
action="store_true",
default=False,
help="saves all test data in the testing dataset",
)
parser.addoption(
"--no-refresh-dataset",
action="store_true",
default=False,
help="do not refresh the test data in the testing dataset",
)


@pytest.fixture(scope="session")
def dataset_id() -> str:
return DATASET_ID
return DATASET_ID


@pytest.fixture(scope="session")
def default_credentials():
try:
credentials, project_id = google.auth.default(scopes=ibis_bigquery.EXTERNAL_DATA_SCOPES)
credentials, project_id = google.auth.default(
scopes=ibis_bigquery.EXTERNAL_DATA_SCOPES
)
except google.auth.excecptions.DefaultCredentialsError as exc:
pytest.skip(f"Could not get GCP credentials: {exc}")

Expand Down Expand Up @@ -115,13 +125,13 @@ def public(project_id, credentials):
# Native BigQuery client fixtures
# required to dynamically create the testing dataset,
# the tables, and to populate data into the tables.
@pytest.fixture(scope='session')
@pytest.fixture(scope="session")
def bqclient(client):
return client.client


# Create testing dataset.
@pytest.fixture(autouse=True, scope='session')
@pytest.fixture(autouse=True, scope="session")
def testing_dataset(bqclient, request, dataset_id):
dataset_ref = bigquery.DatasetReference(bqclient.project, dataset_id)
try:
Expand All @@ -130,88 +140,81 @@ def testing_dataset(bqclient, request, dataset_id):
pass
yield dataset_ref
if not request.config.getoption("--save-dataset"):
bqclient.delete_dataset(
dataset_ref, delete_contents=True, not_found_ok=True
)
bqclient.delete_dataset(dataset_ref, delete_contents=True, not_found_ok=True)


@pytest.fixture(scope='session')
@pytest.fixture(scope="session")
def functional_alltypes_table(testing_dataset):
return bigquery.TableReference(testing_dataset, 'functional_alltypes')
return bigquery.TableReference(testing_dataset, "functional_alltypes")


@pytest.fixture(autouse=True, scope='session')
@pytest.fixture(autouse=True, scope="session")
def create_functional_alltypes_table(bqclient, functional_alltypes_table):
table = bigquery.Table(functional_alltypes_table)
table.schema = [
bigquery.SchemaField('index', 'INTEGER'),
bigquery.SchemaField('Unnamed_0', 'INTEGER'),
bigquery.SchemaField('id', 'INTEGER'),
bigquery.SchemaField('bool_col', 'BOOLEAN'),
bigquery.SchemaField('tinyint_col', 'INTEGER'),
bigquery.SchemaField('smallint_col', 'INTEGER'),
bigquery.SchemaField('int_col', 'INTEGER'),
bigquery.SchemaField('bigint_col', 'INTEGER'),
bigquery.SchemaField('float_col', 'FLOAT'),
bigquery.SchemaField('double_col', 'FLOAT'),
bigquery.SchemaField('date_string_col', 'STRING'),
bigquery.SchemaField('string_col', 'STRING'),
bigquery.SchemaField('timestamp_col', 'TIMESTAMP'),
bigquery.SchemaField('year', 'INTEGER'),
bigquery.SchemaField('month', 'INTEGER'),
bigquery.SchemaField("index", "INTEGER"),
bigquery.SchemaField("Unnamed_0", "INTEGER"),
bigquery.SchemaField("id", "INTEGER"),
bigquery.SchemaField("bool_col", "BOOLEAN"),
bigquery.SchemaField("tinyint_col", "INTEGER"),
bigquery.SchemaField("smallint_col", "INTEGER"),
bigquery.SchemaField("int_col", "INTEGER"),
bigquery.SchemaField("bigint_col", "INTEGER"),
bigquery.SchemaField("float_col", "FLOAT"),
bigquery.SchemaField("double_col", "FLOAT"),
bigquery.SchemaField("date_string_col", "STRING"),
bigquery.SchemaField("string_col", "STRING"),
bigquery.SchemaField("timestamp_col", "TIMESTAMP"),
bigquery.SchemaField("year", "INTEGER"),
bigquery.SchemaField("month", "INTEGER"),
]
bqclient.create_table(table, exists_ok=True)
return table


@pytest.fixture(autouse=True, scope='session')
@pytest.fixture(autouse=True, scope="session")
def load_functional_alltypes_data(request, bqclient, create_functional_alltypes_table):
if request.config.getoption("--no-refresh-dataset"):
return

table = create_functional_alltypes_table
load_config = bigquery.LoadJobConfig()
load_config.skip_leading_rows = 1 # skip the header row.
load_config.write_disposition = 'WRITE_TRUNCATE'
filepath = download_file(
'{}/functional_alltypes.csv'.format(TESTING_DATA_URI))
with open(filepath.name, 'rb') as csvfile:
load_config.write_disposition = "WRITE_TRUNCATE"
filepath = download_file("{}/functional_alltypes.csv".format(TESTING_DATA_URI))
with open(filepath.name, "rb") as csvfile:
job = bqclient.load_table_from_file(
csvfile,
table,
job_config=load_config,
csvfile, table, job_config=load_config,
).result()
if job.error_result:
print('error')
print("error")


# Ingestion time partitioned table.
@pytest.fixture(scope='session')
@pytest.fixture(scope="session")
def functional_alltypes_parted_table(testing_dataset):
return bigquery.TableReference(
testing_dataset, 'functional_alltypes_parted')
return bigquery.TableReference(testing_dataset, "functional_alltypes_parted")


@pytest.fixture(scope='session')
def create_functional_alltypes_parted_table(
bqclient, functional_alltypes_parted_table):
@pytest.fixture(scope="session")
def create_functional_alltypes_parted_table(bqclient, functional_alltypes_parted_table):
table = bigquery.Table(functional_alltypes_parted_table)
table.schema = [
bigquery.SchemaField('index', 'INTEGER'),
bigquery.SchemaField('Unnamed_0', 'INTEGER'),
bigquery.SchemaField('id', 'INTEGER'),
bigquery.SchemaField('bool_col', 'BOOLEAN'),
bigquery.SchemaField('tinyint_col', 'INTEGER'),
bigquery.SchemaField('smallint_col', 'INTEGER'),
bigquery.SchemaField('int_col', 'INTEGER'),
bigquery.SchemaField('bigint_col', 'INTEGER'),
bigquery.SchemaField('float_col', 'FLOAT'),
bigquery.SchemaField('double_col', 'FLOAT'),
bigquery.SchemaField('date_string_col', 'STRING'),
bigquery.SchemaField('string_col', 'STRING'),
bigquery.SchemaField('timestamp_col', 'TIMESTAMP'),
bigquery.SchemaField('year', 'INTEGER'),
bigquery.SchemaField('month', 'INTEGER'),
bigquery.SchemaField("index", "INTEGER"),
bigquery.SchemaField("Unnamed_0", "INTEGER"),
bigquery.SchemaField("id", "INTEGER"),
bigquery.SchemaField("bool_col", "BOOLEAN"),
bigquery.SchemaField("tinyint_col", "INTEGER"),
bigquery.SchemaField("smallint_col", "INTEGER"),
bigquery.SchemaField("int_col", "INTEGER"),
bigquery.SchemaField("bigint_col", "INTEGER"),
bigquery.SchemaField("float_col", "FLOAT"),
bigquery.SchemaField("double_col", "FLOAT"),
bigquery.SchemaField("date_string_col", "STRING"),
bigquery.SchemaField("string_col", "STRING"),
bigquery.SchemaField("timestamp_col", "TIMESTAMP"),
bigquery.SchemaField("year", "INTEGER"),
bigquery.SchemaField("month", "INTEGER"),
]
table.time_partitioning = bigquery.TimePartitioning(
type_=bigquery.TimePartitioningType.DAY
Expand All @@ -221,128 +224,120 @@ def create_functional_alltypes_parted_table(
return table


@pytest.fixture(autouse=True, scope='session')
@pytest.fixture(autouse=True, scope="session")
def load_functional_alltypes_parted_data(
request, bqclient, create_functional_alltypes_parted_table):
request, bqclient, create_functional_alltypes_parted_table
):
if request.config.getoption("--no-refresh-dataset"):
return

table = create_functional_alltypes_parted_table
load_config = bigquery.LoadJobConfig()
load_config.write_disposition = 'WRITE_TRUNCATE'
load_config.write_disposition = "WRITE_TRUNCATE"
load_config.skip_leading_rows = 1 # skip the header row.
filepath = download_file(
'{}/functional_alltypes.csv'.format(TESTING_DATA_URI))
with open(filepath.name, 'rb') as csvfile:
filepath = download_file("{}/functional_alltypes.csv".format(TESTING_DATA_URI))
with open(filepath.name, "rb") as csvfile:
job = bqclient.load_table_from_file(
csvfile,
table,
job_config=load_config,
csvfile, table, job_config=load_config,
).result()
if job.error_result:
print('error')
print("error")


# Create a table with complex data types (nested and repeated).
@pytest.fixture(scope='session')
@pytest.fixture(scope="session")
def struct_bq_table(testing_dataset):
return bigquery.TableReference(testing_dataset, 'struct_table')
return bigquery.TableReference(testing_dataset, "struct_table")


@pytest.fixture(autouse=True, scope='session')
@pytest.fixture(autouse=True, scope="session")
def load_struct_table_data(request, bqclient, struct_bq_table):
if request.config.getoption("--no-refresh-dataset"):
return

load_config = bigquery.LoadJobConfig()
load_config.write_disposition = 'WRITE_TRUNCATE'
load_config.source_format = 'AVRO'
filepath = download_file(
'{}/struct_table.avro'.format(TESTING_DATA_URI))
with open(filepath.name, 'rb') as avrofile:
load_config.write_disposition = "WRITE_TRUNCATE"
load_config.source_format = "AVRO"
filepath = download_file("{}/struct_table.avro".format(TESTING_DATA_URI))
with open(filepath.name, "rb") as avrofile:
job = bqclient.load_table_from_file(
avrofile,
struct_bq_table,
job_config=load_config,
avrofile, struct_bq_table, job_config=load_config,
).result()
if job.error_result:
print('error')
print("error")


# Create empty date-partitioned table.
@pytest.fixture(scope='session')
@pytest.fixture(scope="session")
def date_table(testing_dataset):
return bigquery.TableReference(testing_dataset, 'date_column_parted')
return bigquery.TableReference(testing_dataset, "date_column_parted")


@pytest.fixture(autouse=True, scope='session')
@pytest.fixture(autouse=True, scope="session")
def create_date_table(bqclient, date_table):
table = bigquery.Table(date_table)
table.schema = [
bigquery.SchemaField('my_date_parted_col', 'DATE'),
bigquery.SchemaField('string_col', 'STRING'),
bigquery.SchemaField('int_col', 'INTEGER'),
bigquery.SchemaField("my_date_parted_col", "DATE"),
bigquery.SchemaField("string_col", "STRING"),
bigquery.SchemaField("int_col", "INTEGER"),
]
table.time_partitioning = bigquery.TimePartitioning(
field='my_date_parted_col'
)
table.time_partitioning = bigquery.TimePartitioning(field="my_date_parted_col")
bqclient.create_table(table, exists_ok=True)
return table


# Create empty timestamp-partitioned tables.
@pytest.fixture(scope='session')
@pytest.fixture(scope="session")
def timestamp_table(testing_dataset):
return bigquery.TableReference(testing_dataset, 'timestamp_column_parted')
return bigquery.TableReference(testing_dataset, "timestamp_column_parted")


@pytest.fixture(autouse=True, scope='session')
@pytest.fixture(autouse=True, scope="session")
def create_timestamp_table(bqclient, timestamp_table):
table = bigquery.Table(timestamp_table)
table.schema = [
bigquery.SchemaField('my_timestamp_parted_col', 'DATE'),
bigquery.SchemaField('string_col', 'STRING'),
bigquery.SchemaField('int_col', 'INTEGER'),
bigquery.SchemaField("my_timestamp_parted_col", "DATE"),
bigquery.SchemaField("string_col", "STRING"),
bigquery.SchemaField("int_col", "INTEGER"),
]
table.time_partitioning = bigquery.TimePartitioning(
field='my_timestamp_parted_col'
)
table.time_partitioning = bigquery.TimePartitioning(field="my_timestamp_parted_col")
bqclient.create_table(table, exists_ok=True)


# Create a table with a numeric column
@pytest.fixture(scope='session')
@pytest.fixture(scope="session")
def numeric_bq_table(testing_dataset):
return bigquery.TableReference(testing_dataset, 'numeric_table')
return bigquery.TableReference(testing_dataset, "numeric_table")


@pytest.fixture(scope='session')
@pytest.fixture(scope="session")
def create_numeric_table(bqclient, numeric_bq_table):
table = bigquery.Table(numeric_bq_table)
table.schema = [
bigquery.SchemaField('string_col', 'STRING'),
bigquery.SchemaField('numeric_col', 'NUMERIC'),
bigquery.SchemaField("string_col", "STRING"),
bigquery.SchemaField("numeric_col", "NUMERIC"),
]
bqclient.create_table(table, exists_ok=True)
return table


@pytest.fixture(autouse=True, scope='session')
@pytest.fixture(autouse=True, scope="session")
def load_numeric_data(request, bqclient, create_numeric_table):
if request.config.getoption("--no-refresh-dataset"):
return

load_config = bigquery.LoadJobConfig()
load_config.write_disposition = 'WRITE_TRUNCATE'
load_config.source_format = 'NEWLINE_DELIMITED_JSON'
data = u'''{"string_col": "1st value", "numeric_col": 0.999999999}\n\
{"string_col": "2nd value", "numeric_col": 0.000000002}'''
load_config.write_disposition = "WRITE_TRUNCATE"
load_config.source_format = "NEWLINE_DELIMITED_JSON"
data = """{"string_col": "1st value", "numeric_col": 0.999999999}\n\
{"string_col": "2nd value", "numeric_col": 0.000000002}"""
jsonfile = io.StringIO(data)
table = create_numeric_table
job = bqclient.load_table_from_file(
jsonfile, table, job_config=load_config).result()
jsonfile, table, job_config=load_config
).result()
if job.error_result:
print('error')
print("error")


def download_file(url):
Expand Down
Loading