Skip to content

Commit

Permalink
Merge branch 'main' of github.com:Altinity/clickhouse-regression
Browse files Browse the repository at this point in the history
  • Loading branch information
alsugiliazova committed Dec 6, 2024
2 parents 8a3c109 + 60ef329 commit 92eec1d
Show file tree
Hide file tree
Showing 10 changed files with 942 additions and 778 deletions.
86 changes: 66 additions & 20 deletions .github/workflows/run-regression-repeater.yml
Original file line number Diff line number Diff line change
Expand Up @@ -136,26 +136,66 @@ on:
type: number
default: 1

env:
run_x86: ${{ !contains(inputs.package, 'arm64') && !contains(inputs.package, 'aarch64') }}
run_arm: ${{ !contains(inputs.package, 'x86') && !contains(inputs.package, 'amd64') }}

jobs:
define-matrix:
define-matrix-x86:
runs-on: [self-hosted, x86, type-cpx41, image-x86-app-docker-ce]
outputs:
parallel_ids: ${{ steps.parallel_ids.outputs.parallel_ids }}
run_x86: ${{ steps.set_run_x86.outputs.run_x86 }}

steps:
- name: Define parallel_ids
- name: Set run_x86
id: set_run_x86
run: echo "run_x86=${{ env.run_x86 }}" >> $GITHUB_OUTPUT

- name: Define parallel_ids for x86
id: parallel_ids
run: |
echo parallel_ids=[\"$(seq -s '","' 1 ${{ inputs.repeat }})\"] >> "$GITHUB_OUTPUT"
if [ "${{ env.run_arm }}" = "true" ]; then
count=$(((${{ inputs.repeat }}+1)/2)) # +1 to imitate ceil division
else
count=${{ inputs.repeat }}
fi
echo parallel_ids=[\"$(seq -s '","' 1 $count)\"] >> $GITHUB_OUTPUT
define-matrix-arm:
runs-on: [self-hosted, x86, type-cpx41, image-x86-app-docker-ce]
outputs:
parallel_ids: ${{ steps.parallel_ids.outputs.parallel_ids }}
run_arm: ${{ steps.set_run_arm.outputs.run_arm }}

steps:
- name: Set run_arm
id: set_run_arm
run: echo "run_arm=${{ env.run_arm }}" >> $GITHUB_OUTPUT

- name: Define parallel_ids for ARM
id: parallel_ids
run: |
if [ "${{ env.run_x86 }}" = "true" ]; then
count=$((${{ inputs.repeat }}/2))
else
count=${{ inputs.repeat }}
fi
if [ $count -eq 0 ]; then
echo "parallel_ids=[]" >> $GITHUB_OUTPUT
else
echo parallel_ids=[\"$(seq -s '","' 1 $count)\"] >> $GITHUB_OUTPUT
fi
x86:
if: ${{ !contains(inputs.package, 'arm64') && !contains(inputs.package, 'aarch64') }}
if: ${{ needs.define-matrix-x86.outputs.run_x86 == 'true' }}
uses: ./.github/workflows/reusable-workflow-x86.yml
secrets: inherit
needs: define-matrix
needs: define-matrix-x86
strategy:
fail-fast: false
matrix:
parallel_id: ${{fromJson(needs.define-matrix.outputs.parallel_ids)}}
parallel_id: ${{fromJson(needs.define-matrix-x86.outputs.parallel_ids)}}

with:
package: ${{ inputs.package }}
Expand All @@ -167,16 +207,22 @@ jobs:
extra_args: ${{ inputs.flags != 'none' && inputs.flags || ''}} ${{ inputs.extra_args }}
artifact_name: ${{ matrix.parallel_id }}-x86

# ARM:
# if: ${{ !contains(inputs.package, 'x86') && !contains(inputs.package, 'amd64') }}
# uses: ./.github/workflows/reusable-workflow-arm.yml
# secrets: inherit
# with:
# package: ${{ inputs.package }}
# version: ${{ inputs.version }}
# suite: ${{ inputs.suite }}
# artifacts: ${{ inputs.artifacts }}
# output_format: ${{ inputs.output_format }}
# ref: ${{ inputs.ref }}
# extra_args: ${{ inputs.flags != 'none' && inputs.flags || ''}} ${{ inputs.extra_args }}
# artifact_name: arm
ARM:
if: ${{ needs.define-matrix-arm.outputs.run_arm == 'true' }}
uses: ./.github/workflows/reusable-workflow-arm.yml
secrets: inherit
needs: define-matrix-arm
strategy:
fail-fast: false
matrix:
parallel_id: ${{fromJson(needs.define-matrix-arm.outputs.parallel_ids)}}

with:
package: ${{ inputs.package }}
version: ${{ inputs.version }}
suite: ${{ inputs.suite }}
artifacts: ${{ inputs.artifacts }}
output_format: ${{ inputs.output_format }}
ref: ${{ inputs.ref }}
extra_args: ${{ inputs.flags != 'none' && inputs.flags || ''}} ${{ inputs.extra_args }}
artifact_name: ${{ matrix.parallel_id }}-arm
24 changes: 12 additions & 12 deletions helpers/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,20 +36,20 @@ def optimize(
def get_column_names(self, node: ClickHouseNode, table_name: str, timeout=30) -> list:
"""Get a list of a table's column names."""
r = node.query(
f"SELECT groupArray(name) FROM system.columns WHERE table='{table_name}' FORMAT JSONCompactEachRow",
f"SELECT groupArray(name) FROM system.columns WHERE table='{table_name}' FORMAT TSV",
timeout=timeout,
)
return json.loads(r.output)[0]
return json.loads(r.output)


@TestStep
def get_active_parts(self, node: ClickHouseNode, table_name: str, timeout=30) -> list:
"""Get a list of active parts in a table."""
r = node.query(
f"SELECT groupArray(name) FROM system.parts WHERE table='{table_name}' and active=1 FORMAT JSONCompactEachRow",
f"SELECT groupArray(name) FROM system.parts WHERE table='{table_name}' and active=1 FORMAT TSV",
timeout=timeout,
)
return json.loads(r.output)[0]
return json.loads(r.output)


@TestStep
Expand All @@ -58,10 +58,10 @@ def get_active_partition_ids(
) -> list:
"""Get a list of active partitions in a table."""
r = node.query(
f"SELECT groupArray(partition_id) FROM system.parts WHERE table='{table_name}' and active=1 FORMAT JSONCompactEachRow",
f"SELECT groupArray(partition_id) FROM system.parts WHERE table='{table_name}' and active=1 FORMAT TSV",
timeout=timeout,
)
return json.loads(r.output)[0]
return json.loads(r.output)


@TestStep
Expand All @@ -76,11 +76,11 @@ def get_row_count(
column = ""

r = node.query(
f"SELECT count({column}) FROM {table_name} FORMAT JSONCompactEachRow",
f"SELECT count({column}) FROM {table_name} FORMAT TSV",
exitcode=0,
timeout=timeout,
)
return int(json.loads(r.output)[0])
return int(json.loads(r.output))


@TestStep
Expand All @@ -89,10 +89,10 @@ def get_projections(self, node: ClickHouseNode, table_name: str) -> list:
Get a list of active projections for a given table.
"""
r = node.query(
f"SELECT groupArray(distinct(name)) FROM system.projection_parts WHERE table='{table_name}' and active FORMAT JSONCompactEachRow",
f"SELECT groupArray(distinct(name)) FROM system.projection_parts WHERE table='{table_name}' and active FORMAT TSV",
exitcode=0,
)
return json.loads(r.output)[0]
return json.loads(r.output)


@TestStep
Expand All @@ -101,10 +101,10 @@ def get_indexes(self, node: ClickHouseNode, table_name: str) -> list:
Get a list of secondary indexes for a given table.
"""
r = node.query(
f"SELECT groupArray(name) FROM system.data_skipping_indices WHERE table='{table_name}' FORMAT JSONCompactEachRow",
f"SELECT groupArray(name) FROM system.data_skipping_indices WHERE table='{table_name}' FORMAT TSV",
exitcode=0,
)
return json.loads(r.output)[0]
return json.loads(r.output)


@TestStep
Expand Down
Binary file added parquet/data/datatypes/float16.parquet
Binary file not shown.
1 change: 1 addition & 0 deletions parquet/parquet_env/parquetify-service.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ services:
build:
context: .
dockerfile: ./Dockerfile
init: true
restart: always
ports:
- "8080:8080"
Expand Down
1 change: 1 addition & 0 deletions parquet/parquet_env_arm64/parquetify-service.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ services:
build:
context: .
dockerfile: ./Dockerfile
init: true
restart: always
ports:
- "8080:8080"
Expand Down
42 changes: 37 additions & 5 deletions parquet/performance/native_reader.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#!/usr/bin/env python3
import subprocess
import sys

import os
from testflows.core import *

append_path(sys.path, "../..")

from helpers.cluster import get_binary_from_docker_container
from helpers.cluster import get_binary_from_docker_container, download_http_binary, unpack_deb, unpack_tgz


def argparser(parser):
Expand All @@ -22,6 +22,24 @@ def argparser(parser):
default="docker://clickhouse/clickhouse-server:head",
)

@TestStep(Given)
def get_binary_from_deb(self, source):
return unpack_deb(
deb_binary_path=source,
program_name="clickhouse",
)

@TestStep(Given)
def get_binary_from_package(self, source):
self.context.package_path = source
if source.endswith(".deb"):
return get_binary_from_deb(source=source)
elif source.endswith(".rpm"):
pass
elif source.endswith(".tgz"):
self.context.binary_path = os.path.join(
unpack_tgz(source), "usr/bin", "clickhouse"
)

@TestStep(Given)
def get_binary_clickhouse_binary(self, clickhouse_binary_path):
Expand All @@ -32,17 +50,31 @@ def get_binary_clickhouse_binary(self, clickhouse_binary_path):

return binary_path

@TestStep(Given)
def get_binary_from_http(self, url):
"""Get the binary from the HTTP URL."""
package_formats = (".deb", ".rpm", ".tgz")
binary_path = download_http_binary(url)
if binary_path.endswith(package_formats):
binary_path = get_binary_from_package(source=binary_path)
else:
binary_path = self.context.binary_path

return binary_path
@TestModule
@Name("native reader")
@ArgumentParser(argparser)
def module(self, clickhouse_path=None):
"""Running performance tests for Parquet native reader in ClickHouse."""

with Given("I get the ClickHouse binary from the docker container"):
self.context.clickhouse_binary = get_binary_clickhouse_binary(
clickhouse_binary_path=clickhouse_path
)
if clickhouse_path.startswith("docker://"):
self.context.clickhouse_binary = get_binary_clickhouse_binary(
clickhouse_binary_path=clickhouse_path
)
elif clickhouse_path.startswith(("http://", "https://")):
self.context.clickhouse_binary = get_binary_from_http(url=clickhouse_path)


Feature(run=load("parquet.performance.tests.native_reader.feature", "feature"))

Expand Down
8 changes: 8 additions & 0 deletions parquet/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ def parquet_argparser(parser):
"/parquet/compression/snappyplain/*": [
(Fail, "datetime different on export and import, needs to be investigated")
],
"/parquet/datatypes/float16": [
(Fail, "ClickHouse does not import FLOAT16 properly")
],
"/parquet/datatypes/manydatatypes/*": [
(Fail, "datetime different on export and import, needs to be investigated")
],
Expand Down Expand Up @@ -229,6 +232,11 @@ def parquet_argparser(parser):
"Different on 22.8",
check_clickhouse_version("<23.3"),
),
"/parquet/datatypes/float16": (
Skip,
"Requires ClickHouse 24.11 or higher",
check_clickhouse_version("<24.11"),
),
"/parquet/datatypes/columnwithnull*": (
Skip,
"Different on 22.8",
Expand Down
Loading

0 comments on commit 92eec1d

Please sign in to comment.