From f1ae5586b12f864995f04b5ff9453f71e694bd7f Mon Sep 17 00:00:00 2001
From: Minh Vu <vuhoangminh97@gmail.com>
Date: Sat, 20 Jun 2026 20:34:46 +0200
Subject: [PATCH 1/2] Fix Tinybird benchmark runner auth

---
 tinybird/README.md |  1 +
 tinybird/run.sh    | 13 ++++++-------
 2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/tinybird/README.md b/tinybird/README.md
index f527b516dd..ab123497e1 100644
--- a/tinybird/README.md
+++ b/tinybird/README.md
@@ -32,3 +32,4 @@ use the auto mode to make sure all the files are read.
 # Querying the data
 
 Once the data is inserted you can create the endpoints needed to run the benchmark using pipes. `run.sh` will iterate through each endpoint.
+Set `TINYBIRD_TOKEN` to a Tinybird token with read access before running the script.
diff --git a/tinybird/run.sh b/tinybird/run.sh
index 81733a08bb..991de41358 100755
--- a/tinybird/run.sh
+++ b/tinybird/run.sh
@@ -1,19 +1,18 @@
 #!/bin/bash
+set -euo pipefail
 
-# Define the base URL and Authorization token
 BASE_URL="https://api.tinybird.co/v0/pipes/"
-AUTH_HEADER=<TOKEN>
+: "${TINYBIRD_TOKEN:?Set TINYBIRD_TOKEN}"
+AUTH_HEADER="Authorization: Bearer ${TINYBIRD_TOKEN}"
 
 results="["
 
 for i in {1..43}; do
     times=()
     for j in {1..3}; do
-        response=$(curl -s --compressed -H "$AUTH_HEADER" "${BASE_URL}Q${i}.json")
-
-        elapsed=$(echo "$response" | jq '.statistics.elapsed')
-        echo "$elapsed"
-        times+=($elapsed)
+        response=$(curl -fsS --compressed -H "$AUTH_HEADER" "${BASE_URL}Q${i}.json")
+        elapsed=$(jq -er '.statistics.elapsed | numbers' <<< "$response")
+        times+=("$elapsed")
     done
     results+=$(printf "[%s,%s,%s]," "${times[0]}" "${times[1]}" "${times[2]}")
 done

From 60ec13d65cbb0d709322697bdb8d3f24f63e4425 Mon Sep 17 00:00:00 2001
From: Minh Vu <vuhoangminh97@gmail.com>
Date: Wed, 1 Jul 2026 00:42:47 +0200
Subject: [PATCH 2/2] Document Tinybird benchmark setup

---
 tinybird/README.md | 55 +++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 52 insertions(+), 3 deletions(-)

diff --git a/tinybird/README.md b/tinybird/README.md
index ab123497e1..86bc4188a6 100644
--- a/tinybird/README.md
+++ b/tinybird/README.md
@@ -15,21 +15,70 @@ Load time and data size in the results are set to 0, as Tinybird did not indicat
 
 Head to https://www.tinybird.co and create an account.
 
+Install the Tinybird CLI and authenticate it against the workspace you want to use for the benchmark:
+
+```bash
+python3 -m venv .venv
+source .venv/bin/activate
+pip install tinybird-cli
+tb auth -i
+```
+
+`tb auth -i` asks for the Tinybird region and an admin token. You can copy the admin token from the "Tokens" page in the Tinybird UI.
+The command writes credentials to a local `.tinyb` file, so do not commit that file.
+
 # Inserting data
 
 Tinybird supports data inserts from various sources. We are going to use S3 to load a Parquet file into Tinybird. Since Tinybird limits the
 file size to 1 GB, and the test data set is larger than that, we split it into smaller chunks using ClickHouse:
 
-```sql
-INSERT INTO FUNCTION s3('https://hitsparquet.s3.eu-west-3.amazonaws.com/data/hits_{_partition_id}.parquet', '', '', 'Parquet')
+```bash
+clickhouse-client --query "
+INSERT INTO FUNCTION s3(
+    'https://<bucket>.s3.<region>.amazonaws.com/<prefix>/hits_{_partition_id}.parquet',
+    '<aws_access_key_id>',
+    '<aws_secret_access_key>',
+    'Parquet'
+)
 PARTITION BY rand() % 50
 SELECT * FROM hits
+"
 ```
 
+Run this from a ClickHouse instance where the ClickBench `hits` table has already been loaded. Replace the S3 URL and credentials with a
+bucket/prefix that Tinybird can read. After the `INSERT` finishes, create a Tinybird Data Source named `hits` from the generated
+`hits_*.parquet` files, choose Parquet as the format, and use auto mode so all files in the prefix are imported.
+
 Importing files with sizes a little bit less than 1 GB did not always work. We instead used 50 files of around 280 MB each. You will need to
 use the auto mode to make sure all the files are read.
 
 # Querying the data
 
 Once the data is inserted you can create the endpoints needed to run the benchmark using pipes. `run.sh` will iterate through each endpoint.
-Set `TINYBIRD_TOKEN` to a Tinybird token with read access before running the script.
+
+Create one Tinybird Pipe endpoint for each query in `clickhouse/queries.sql`. The endpoint names must be `Q1`, `Q2`, ..., `Q43`, because
+`run.sh` calls `/v0/pipes/Q${i}.json`. You can create them in the UI, or generate them with the CLI from this directory:
+
+```bash
+i=1
+while IFS= read -r query; do
+    tb pipe generate "Q${i}" "$query" --force
+    i=$((i + 1))
+done < ../clickhouse/queries.sql
+
+tb push pipes/*.pipe
+```
+
+Create and copy a token for the benchmark runner. In a dedicated benchmark workspace, the simplest CLI command is:
+
+```bash
+tb token create static clickbench_read --scope WORKSPACE:READ_ALL
+tb token copy clickbench_read
+```
+
+Alternatively, create a more restrictive token in the Tinybird UI with `PIPES:READ` access to the `Q1` through `Q43` pipes. Then run the
+benchmark from this directory:
+
+```bash
+TINYBIRD_TOKEN='<copied token>' ./run.sh 2>&1 | tee log.txt
+```