From f1ae5586b12f864995f04b5ff9453f71e694bd7f Mon Sep 17 00:00:00 2001 From: Minh Vu Date: Sat, 20 Jun 2026 20:34:46 +0200 Subject: [PATCH 1/2] Fix Tinybird benchmark runner auth --- tinybird/README.md | 1 + tinybird/run.sh | 13 ++++++------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tinybird/README.md b/tinybird/README.md index f527b516dd..ab123497e1 100644 --- a/tinybird/README.md +++ b/tinybird/README.md @@ -32,3 +32,4 @@ use the auto mode to make sure all the files are read. # Querying the data Once the data is inserted you can create the endpoints needed to run the benchmark using pipes. `run.sh` will iterate through each endpoint. +Set `TINYBIRD_TOKEN` to a Tinybird token with read access before running the script. diff --git a/tinybird/run.sh b/tinybird/run.sh index 81733a08bb..991de41358 100755 --- a/tinybird/run.sh +++ b/tinybird/run.sh @@ -1,19 +1,18 @@ #!/bin/bash +set -euo pipefail -# Define the base URL and Authorization token BASE_URL="https://api.tinybird.co/v0/pipes/" -AUTH_HEADER= +: "${TINYBIRD_TOKEN:?Set TINYBIRD_TOKEN}" +AUTH_HEADER="Authorization: Bearer ${TINYBIRD_TOKEN}" results="[" for i in {1..43}; do times=() for j in {1..3}; do - response=$(curl -s --compressed -H "$AUTH_HEADER" "${BASE_URL}Q${i}.json") - - elapsed=$(echo "$response" | jq '.statistics.elapsed') - echo "$elapsed" - times+=($elapsed) + response=$(curl -fsS --compressed -H "$AUTH_HEADER" "${BASE_URL}Q${i}.json") + elapsed=$(jq -er '.statistics.elapsed | numbers' <<< "$response") + times+=("$elapsed") done results+=$(printf "[%s,%s,%s]," "${times[0]}" "${times[1]}" "${times[2]}") done From 60ec13d65cbb0d709322697bdb8d3f24f63e4425 Mon Sep 17 00:00:00 2001 From: Minh Vu Date: Wed, 1 Jul 2026 00:42:47 +0200 Subject: [PATCH 2/2] Document Tinybird benchmark setup --- tinybird/README.md | 55 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/tinybird/README.md b/tinybird/README.md index ab123497e1..86bc4188a6 100644 --- a/tinybird/README.md +++ b/tinybird/README.md @@ -15,21 +15,70 @@ Load time and data size in the results are set to 0, as Tinybird did not indicat Head to https://www.tinybird.co and create an account. +Install the Tinybird CLI and authenticate it against the workspace you want to use for the benchmark: + +```bash +python3 -m venv .venv +source .venv/bin/activate +pip install tinybird-cli +tb auth -i +``` + +`tb auth -i` asks for the Tinybird region and an admin token. You can copy the admin token from the "Tokens" page in the Tinybird UI. +The command writes credentials to a local `.tinyb` file, so do not commit that file. + # Inserting data Tinybird supports data inserts from various sources. We are going to use S3 to load a Parquet file into Tinybird. Since Tinybird limits the file size to 1 GB, and the test data set is larger than that, we split it into smaller chunks using ClickHouse: -```sql -INSERT INTO FUNCTION s3('https://hitsparquet.s3.eu-west-3.amazonaws.com/data/hits_{_partition_id}.parquet', '', '', 'Parquet') +```bash +clickhouse-client --query " +INSERT INTO FUNCTION s3( + 'https://.s3..amazonaws.com//hits_{_partition_id}.parquet', + '', + '', + 'Parquet' +) PARTITION BY rand() % 50 SELECT * FROM hits +" ``` +Run this from a ClickHouse instance where the ClickBench `hits` table has already been loaded. Replace the S3 URL and credentials with a +bucket/prefix that Tinybird can read. After the `INSERT` finishes, create a Tinybird Data Source named `hits` from the generated +`hits_*.parquet` files, choose Parquet as the format, and use auto mode so all files in the prefix are imported. + Importing files with sizes a little bit less than 1 GB did not always work. We instead used 50 files of around 280 MB each. You will need to use the auto mode to make sure all the files are read. # Querying the data Once the data is inserted you can create the endpoints needed to run the benchmark using pipes. `run.sh` will iterate through each endpoint. -Set `TINYBIRD_TOKEN` to a Tinybird token with read access before running the script. + +Create one Tinybird Pipe endpoint for each query in `clickhouse/queries.sql`. The endpoint names must be `Q1`, `Q2`, ..., `Q43`, because +`run.sh` calls `/v0/pipes/Q${i}.json`. You can create them in the UI, or generate them with the CLI from this directory: + +```bash +i=1 +while IFS= read -r query; do + tb pipe generate "Q${i}" "$query" --force + i=$((i + 1)) +done < ../clickhouse/queries.sql + +tb push pipes/*.pipe +``` + +Create and copy a token for the benchmark runner. In a dedicated benchmark workspace, the simplest CLI command is: + +```bash +tb token create static clickbench_read --scope WORKSPACE:READ_ALL +tb token copy clickbench_read +``` + +Alternatively, create a more restrictive token in the Tinybird UI with `PIPES:READ` access to the `Q1` through `Q43` pipes. Then run the +benchmark from this directory: + +```bash +TINYBIRD_TOKEN='' ./run.sh 2>&1 | tee log.txt +```