From c0c2dadbd9114552727c2de7d063834f6c82a121 Mon Sep 17 00:00:00 2001 From: Julien Schueller Date: Mon, 29 Jun 2026 17:23:43 +0200 Subject: [PATCH 1/2] lapack: fix missing thread caps --- interface/lapack/getrs.c | 6 +++++- interface/lapack/laed3.c | 3 ++- interface/lapack/laswp.c | 6 +++++- interface/lapack/lauum.c | 6 +++++- interface/lapack/trtrs.c | 6 +++++- interface/lapack/zgetrs.c | 6 +++++- interface/lapack/zlaswp.c | 6 +++++- interface/lapack/zlauum.c | 6 +++++- interface/lapack/ztrtri.c | 6 +++++- interface/lapack/ztrtrs.c | 6 +++++- 10 files changed, 47 insertions(+), 10 deletions(-) diff --git a/interface/lapack/getrs.c b/interface/lapack/getrs.c index c2a9eb882f..0cc6df0371 100644 --- a/interface/lapack/getrs.c +++ b/interface/lapack/getrs.c @@ -126,7 +126,11 @@ int NAME(char *TRANS, blasint *N, blasint *NRHS, FLOAT *a, blasint *ldA, #ifdef SMP args.common = NULL; - args.nthreads = num_cpu_avail(4); + if (args.m < 64) { + args.nthreads = 1; + } else { + args.nthreads = num_cpu_avail(4); + } if (args.nthreads == 1) { #endif diff --git a/interface/lapack/laed3.c b/interface/lapack/laed3.c index 4e5215fcf7..bf3198bc03 100644 --- a/interface/lapack/laed3.c +++ b/interface/lapack/laed3.c @@ -72,7 +72,8 @@ int NAME(blasint *k, blasint *n, blasint *n1, FLOAT *d, if (kval == 0) return 0; #ifdef SMP - int nthreads = num_cpu_avail(4); + int nthreads = 1; + if (n >= 64) nthreads = num_cpu_avail(4); if (nthreads == 1) { #endif diff --git a/interface/lapack/laswp.c b/interface/lapack/laswp.c index 6544dbc5b2..698e39a99f 100644 --- a/interface/lapack/laswp.c +++ b/interface/lapack/laswp.c @@ -77,7 +77,11 @@ int NAME(blasint *N, FLOAT *a, blasint *LDA, blasint *K1, blasint *K2, blasint * flag = (incx < 0); #ifdef SMP - nthreads = num_cpu_avail(1); + if (n < 64) { + nthreads = 1; + } else { + nthreads = num_cpu_avail(1); + } if (nthreads == 1) { #endif diff --git a/interface/lapack/lauum.c b/interface/lapack/lauum.c index 70f6a0ec54..81c4fc07aa 100644 --- a/interface/lapack/lauum.c +++ b/interface/lapack/lauum.c @@ -112,7 +112,11 @@ int NAME(char *UPLO, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){ #ifdef SMP args.common = NULL; - args.nthreads = num_cpu_avail(4); + if (args.n < 64) { + args.nthreads = 1; + } else { + args.nthreads = num_cpu_avail(4); + } if (args.nthreads == 1) { #endif diff --git a/interface/lapack/trtrs.c b/interface/lapack/trtrs.c index 3cc449318f..53d8fb0ec3 100644 --- a/interface/lapack/trtrs.c +++ b/interface/lapack/trtrs.c @@ -147,7 +147,11 @@ int NAME(char *UPLO, char* TRANS, char* DIAG, blasint *N, blasint *NRHS, FLOAT * #ifdef SMP args.common = NULL; - args.nthreads = num_cpu_avail(4); + if (args.m < 64) { + args.nthreads = 1; + } else { + args.nthreads = num_cpu_avail(4); + } if (args.nthreads == 1) { #endif diff --git a/interface/lapack/zgetrs.c b/interface/lapack/zgetrs.c index 0add909ca4..e76a8a1b24 100644 --- a/interface/lapack/zgetrs.c +++ b/interface/lapack/zgetrs.c @@ -125,7 +125,11 @@ int NAME(char *TRANS, blasint *N, blasint *NRHS, FLOAT *a, blasint *ldA, #endif #ifdef SMP - args.nthreads = num_cpu_avail(4); + if (args.m < 64) { + args.nthreads = 1; + } else { + args.nthreads = num_cpu_avail(4); + } if (args.nthreads == 1) { #endif diff --git a/interface/lapack/zlaswp.c b/interface/lapack/zlaswp.c index 7bb4a659ea..c5188cdf77 100644 --- a/interface/lapack/zlaswp.c +++ b/interface/lapack/zlaswp.c @@ -78,7 +78,11 @@ int NAME(blasint *N, FLOAT *a, blasint *LDA, blasint *K1, blasint *K2, blasint * flag = (incx < 0); #ifdef SMP - nthreads = num_cpu_avail(2); + if (n < 64) { + nthreads = 1; + } else { + nthreads = num_cpu_avail(2); + } if (nthreads == 1) { #endif diff --git a/interface/lapack/zlauum.c b/interface/lapack/zlauum.c index 4a36cc1733..9f0cd94001 100644 --- a/interface/lapack/zlauum.c +++ b/interface/lapack/zlauum.c @@ -112,7 +112,11 @@ int NAME(char *UPLO, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){ #ifdef SMP args.common = NULL; - args.nthreads = num_cpu_avail(4); + if (args.n < 64) { + args.nthreads = 1; + } else { + args.nthreads = num_cpu_avail(4); + } if (args.nthreads == 1) { #endif diff --git a/interface/lapack/ztrtri.c b/interface/lapack/ztrtri.c index dda4a9e4b6..bfd6e3ea9d 100644 --- a/interface/lapack/ztrtri.c +++ b/interface/lapack/ztrtri.c @@ -125,7 +125,11 @@ int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *In #ifdef SMP args.common = NULL; - args.nthreads = num_cpu_avail(4); + if (args.n < 64) { + args.nthreads = 1; + } else { + args.nthreads = num_cpu_avail(4); + } if (args.nthreads == 1) { #endif diff --git a/interface/lapack/ztrtrs.c b/interface/lapack/ztrtrs.c index ec3343393a..df0bac360a 100644 --- a/interface/lapack/ztrtrs.c +++ b/interface/lapack/ztrtrs.c @@ -147,7 +147,11 @@ int NAME(char *UPLO, char* TRANS, char* DIAG, blasint *N, blasint *NRHS, FLOAT * #ifdef SMP args.common = NULL; - args.nthreads = num_cpu_avail(4); + if (args.m < 64) { + args.nthreads = 1; + } else { + args.nthreads = num_cpu_avail(4); + } if (args.nthreads == 1) { #endif From 5010c0abb8976e6c0fdba019efeb32713fdf2883 Mon Sep 17 00:00:00 2001 From: Julien Schueller Date: Mon, 29 Jun 2026 17:23:55 +0200 Subject: [PATCH 2/2] blas: fix missing thread caps --- interface/spr.c | 3 +++ interface/spr2.c | 3 +++ interface/syr.c | 3 +++ interface/syr2.c | 3 +++ interface/tbmv.c | 3 +++ interface/tpmv.c | 3 +++ interface/zhpmv.c | 3 +++ interface/zspr.c | 3 +++ interface/zspr2.c | 3 +++ interface/zsymv.c | 3 +++ interface/zsyr.c | 3 +++ interface/zsyr2.c | 3 +++ interface/ztbmv.c | 3 +++ interface/ztpmv.c | 3 +++ 14 files changed, 42 insertions(+) diff --git a/interface/spr.c b/interface/spr.c index 8aafc9f857..1f5e0bd31a 100644 --- a/interface/spr.c +++ b/interface/spr.c @@ -194,6 +194,9 @@ void CNAME(enum CBLAS_ORDER order, #ifdef SMP nthreads = num_cpu_avail(2); + if (n < 50) nthreads = 1; + if (nthreads > 2 && n < 500) nthreads = 2; + if (nthreads == 1) { #endif diff --git a/interface/spr2.c b/interface/spr2.c index b5aab1767d..fb1b338f2b 100644 --- a/interface/spr2.c +++ b/interface/spr2.c @@ -198,6 +198,9 @@ void CNAME(enum CBLAS_ORDER order, #ifdef SMP nthreads = num_cpu_avail(2); + if (n < 50) nthreads = 1; + if (nthreads > 2 && n < 500) nthreads = 2; + if (nthreads == 1) { #endif diff --git a/interface/syr.c b/interface/syr.c index ad75264b1f..761df762af 100644 --- a/interface/syr.c +++ b/interface/syr.c @@ -197,6 +197,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, #ifdef SMP nthreads = num_cpu_avail(2); + if (n < 50) nthreads = 1; + if (nthreads > 2 && n < 500) nthreads = 2; + if (nthreads == 1) { #endif diff --git a/interface/syr2.c b/interface/syr2.c index 632906d288..b983fa0d6e 100644 --- a/interface/syr2.c +++ b/interface/syr2.c @@ -199,6 +199,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, #ifdef SMP nthreads = num_cpu_avail(2); + if (n < 50) nthreads = 1; + if (nthreads > 2 && n < 500) nthreads = 2; + if (nthreads == 1) { #endif diff --git a/interface/tbmv.c b/interface/tbmv.c index b5f3ab740d..c518c296a0 100644 --- a/interface/tbmv.c +++ b/interface/tbmv.c @@ -225,6 +225,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, #ifdef SMP nthreads = num_cpu_avail(2); + if (n < 50) nthreads = 1; + if (nthreads > 2 && n < 500) nthreads = 2; + if (nthreads == 1) { #endif diff --git a/interface/tpmv.c b/interface/tpmv.c index 262af2285d..c9ad3748ad 100644 --- a/interface/tpmv.c +++ b/interface/tpmv.c @@ -223,6 +223,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, #ifdef SMP nthreads = num_cpu_avail(2); + if (n < 50) nthreads = 1; + if (nthreads > 2 && n < 500) nthreads = 2; + if (nthreads == 1) { #endif diff --git a/interface/zhpmv.c b/interface/zhpmv.c index ff49716b54..013dd3e795 100644 --- a/interface/zhpmv.c +++ b/interface/zhpmv.c @@ -196,6 +196,9 @@ void CNAME(enum CBLAS_ORDER order, #ifdef SMP nthreads = num_cpu_avail(2); + if (n < 50) nthreads = 1; + if (nthreads > 2 && n < 500) nthreads = 2; + if (nthreads == 1) { #endif diff --git a/interface/zspr.c b/interface/zspr.c index 574b59aa28..190e62d668 100644 --- a/interface/zspr.c +++ b/interface/zspr.c @@ -123,6 +123,9 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, #ifdef SMP nthreads = num_cpu_avail(2); + if (n < 50) nthreads = 1; + if (nthreads > 2 && n < 500) nthreads = 2; + if (nthreads == 1) { #endif diff --git a/interface/zspr2.c b/interface/zspr2.c index 44c36d5536..53b7b98676 100644 --- a/interface/zspr2.c +++ b/interface/zspr2.c @@ -126,6 +126,9 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, #ifdef SMP nthreads = num_cpu_avail(2); + if (n < 50) nthreads = 1; + if (nthreads > 2 && n < 500) nthreads = 2; + if (nthreads == 1) { #endif diff --git a/interface/zsymv.c b/interface/zsymv.c index 1d6ff1f348..0cd690f812 100644 --- a/interface/zsymv.c +++ b/interface/zsymv.c @@ -120,6 +120,9 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a, blasint *LDA, #ifdef SMP nthreads = num_cpu_avail(2); + if (n < 50) nthreads = 1; + if (nthreads > 2 && n < 500) nthreads = 2; + if (nthreads == 1) { #endif diff --git a/interface/zsyr.c b/interface/zsyr.c index 51cca84ee6..0d3c35dc03 100644 --- a/interface/zsyr.c +++ b/interface/zsyr.c @@ -208,6 +208,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, int n, void* valpha, FL #ifdef SMP nthreads = num_cpu_avail(2); + if (n < 50) nthreads = 1; + if (nthreads > 2 && n < 500) nthreads = 2; + if (nthreads == 1) { #endif diff --git a/interface/zsyr2.c b/interface/zsyr2.c index 7c81c20930..e3bdf56e2b 100644 --- a/interface/zsyr2.c +++ b/interface/zsyr2.c @@ -128,6 +128,9 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, #ifdef SMP nthreads = num_cpu_avail(2); + if (n < 50) nthreads = 1; + if (nthreads > 2 && n < 500) nthreads = 2; + if (nthreads == 1) { #endif diff --git a/interface/ztbmv.c b/interface/ztbmv.c index d56620c5bc..98fce6606d 100644 --- a/interface/ztbmv.c +++ b/interface/ztbmv.c @@ -240,6 +240,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, #ifdef SMP nthreads = num_cpu_avail(2); + if (n < 50) nthreads = 1; + if (nthreads > 2 && n < 500) nthreads = 2; + if (nthreads == 1) { #endif diff --git a/interface/ztpmv.c b/interface/ztpmv.c index 3791d16026..3d79e84e1f 100644 --- a/interface/ztpmv.c +++ b/interface/ztpmv.c @@ -231,6 +231,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, #ifdef SMP nthreads = num_cpu_avail(2); + if (n < 50) nthreads = 1; + if (nthreads > 2 && n < 500) nthreads = 2; + if (nthreads == 1) { #endif