From 09c75e08c1f6bc53348e2fa46aa27e830413c29d Mon Sep 17 00:00:00 2001 From: Sander Muller Date: Sat, 4 Jul 2026 22:38:32 +0200 Subject: [PATCH 1/2] Single-flight cold directory scans in OptimizedDirectorySourceLocatorFactory On a cold cache every parallel worker builds the same directory source locators at once and scans the same directories redundantly (measured ~320 scans for 56 unique directories on a large project, mostly Composer package classmap dirs). A directory's scan is not published until it finishes and the cache save is atomic, so these races are wasteful, not unsafe. Take a per-directory exclusive file lock before scanning a cold entry: the first worker scans and saves, the others block briefly and then re-read the cache it wrote, turning N redundant scans into one. Best effort: if the lock cannot be taken (lock directory not writable, platform refuses) the worker scans as before, so behaviour is unchanged wherever locking is unavailable. Co-Authored-By: Claude Opus 4.8 (1M context) --- ...OptimizedDirectorySourceLocatorFactory.php | 116 ++++++++++++++---- 1 file changed, 94 insertions(+), 22 deletions(-) diff --git a/src/Reflection/BetterReflection/SourceLocator/OptimizedDirectorySourceLocatorFactory.php b/src/Reflection/BetterReflection/SourceLocator/OptimizedDirectorySourceLocatorFactory.php index 48b1404ee9e..9f148bb8085 100644 --- a/src/Reflection/BetterReflection/SourceLocator/OptimizedDirectorySourceLocatorFactory.php +++ b/src/Reflection/BetterReflection/SourceLocator/OptimizedDirectorySourceLocatorFactory.php @@ -6,11 +6,19 @@ use PHPStan\DependencyInjection\AutowiredParameter; use PHPStan\DependencyInjection\AutowiredService; use PHPStan\File\FileFinder; +use PHPStan\Internal\DirectoryCreator; +use PHPStan\Internal\DirectoryCreatorException; use PHPStan\Php\PhpVersion; use function array_key_exists; use function array_keys; +use function fclose; +use function flock; +use function fopen; use function hash_file; +use function sha1; use function sprintf; +use const LOCK_EX; +use const LOCK_UN; #[AutowiredService] final class OptimizedDirectorySourceLocatorFactory @@ -23,6 +31,8 @@ public function __construct( private PhpVersion $phpVersion, private SymbolFinderInFiles $symbolFinderInFiles, private Cache $cache, + #[AutowiredParameter] + private string $tmpDir, ) { } @@ -55,36 +65,58 @@ private function createCachedDirectorySourceLocator(array $fileHashes, string $c /** @var array|null $cached */ $cached = $this->cache->load($cacheKey, $variableCacheKey); - $findInFiles = []; - if ($cached !== null) { - foreach ($cached as $file => [$hash, $classes, $functions, $constants]) { - if (!array_key_exists($file, $fileHashes)) { - unset($cached[$file]); - continue; - } - $newHash = $fileHashes[$file]; - unset($fileHashes[$file]); - if ($hash === $newHash) { - continue; + + $scanLock = null; + if ($cached === null) { + // On a cold cache every parallel worker builds the same directory locator at once and + // scans the same directory redundantly. A scan is not published until it finishes and the + // save is atomic, so these races are wasteful rather than unsafe. Let the first worker take + // an exclusive lock and scan; the others block until it releases and then re-read the cache + // it wrote, turning N redundant scans of a directory into one. + $scanLock = $this->acquireDirectoryScanLock($cacheKey . $variableCacheKey); + if ($scanLock !== null) { + $cached = $this->cache->load($cacheKey, $variableCacheKey); + } + } + + try { + $findInFiles = []; + if ($cached !== null) { + foreach ($cached as $file => [$hash, $classes, $functions, $constants]) { + if (!array_key_exists($file, $fileHashes)) { + unset($cached[$file]); + continue; + } + $newHash = $fileHashes[$file]; + unset($fileHashes[$file]); + if ($hash === $newHash) { + continue; + } + + $findInFiles[] = $file; } + } else { + $cached = []; + } + foreach (array_keys($fileHashes) as $file) { $findInFiles[] = $file; } - } else { - $cached = []; - } - foreach (array_keys($fileHashes) as $file) { - $findInFiles[] = $file; - } + foreach ($this->symbolFinderInFiles->findSymbols($findInFiles, $this->phpVersion->supportsEnums()) as $file => [$newClasses, $newFunctions, $newConstants]) { + $newHash = $originalFileHashes[$file]; + $cached[$file] = [$newHash, $newClasses, $newFunctions, $newConstants]; + } - foreach ($this->symbolFinderInFiles->findSymbols($findInFiles, $this->phpVersion->supportsEnums()) as $file => [$newClasses, $newFunctions, $newConstants]) { - $newHash = $originalFileHashes[$file]; - $cached[$file] = [$newHash, $newClasses, $newFunctions, $newConstants]; + $this->cache->save($cacheKey, $variableCacheKey, $cached); + } finally { + // Release even if scanning or saving throws, so a failing worker cannot leave other + // workers blocked on the lock until it exits. + if ($scanLock !== null) { + $this->releaseDirectoryScanLock($scanLock); + } } - $this->cache->save($cacheKey, $variableCacheKey, $cached); - [$classToFile, $functionToFiles, $constantToFile] = $this->changeStructure($cached); return new OptimizedDirectorySourceLocator( @@ -97,6 +129,46 @@ private function createCachedDirectorySourceLocator(array $fileHashes, string $c ); } + /** + * Take an exclusive cross-process lock for a directory's symbol scan so that on a cold cache only + * the first worker scans it. Best effort: a null return means locking is unavailable (the lock + * directory could not be created or the platform refused the lock) and the caller scans as before. + * The returned handle is held until {@see releaseDirectoryScanLock()}; the OS releases the lock if + * the process dies while holding it. + * + * @return resource|null + */ + private function acquireDirectoryScanLock(string $lockKey) + { + $lockDirectory = sprintf('%s/cache/locks', $this->tmpDir); + try { + DirectoryCreator::ensureDirectoryExists($lockDirectory, 0777); + } catch (DirectoryCreatorException) { + return null; + } + + $lockHandle = @fopen(sprintf('%s/odsl-%s.lock', $lockDirectory, sha1($lockKey)), 'c'); + if ($lockHandle === false) { + return null; + } + + if (!@flock($lockHandle, LOCK_EX)) { + @fclose($lockHandle); + return null; + } + + return $lockHandle; + } + + /** + * @param resource $lockHandle + */ + private function releaseDirectoryScanLock($lockHandle): void + { + @flock($lockHandle, LOCK_UN); + @fclose($lockHandle); + } + /** * @param string[] $files * @param non-empty-string&literal-string $uniqueCacheIdentifier From 3059395bdd856a2f7fc5af9535eff5e1598da530 Mon Sep 17 00:00:00 2001 From: Sander Muller Date: Sun, 5 Jul 2026 16:28:29 +0200 Subject: [PATCH 2/2] Address review: skip redundant cache saves, bound the scan lock, release it after a hit - Only save the symbol cache when it actually changed. A lock loser re-reads exactly what the winner wrote and a warm run finds every hash unchanged, so both re-wrote the identical table before (the loser while still holding the lock). This also drops the redundant warm-path save that predates this PR. - Release the lock immediately after a successful re-read, shrinking each loser's serialized section to a single cache load. - Poll flock with LOCK_NB and a deadline instead of blocking forever, so a wedged winner or a foreign phpstan process holding the shared odsl-installed-files lock cannot block a worker until parallel.processTimeout; on timeout the worker scans as before. - Load the cache through a typed helper so the re-read keeps its array shape (was mixed), and document the lock-file lifecycle (zero-byte, reused, safe under a tmp reaper). Co-Authored-By: Claude Opus 4.8 (1M context) --- ...OptimizedDirectorySourceLocatorFactory.php | 92 +++++++++++++++---- 1 file changed, 73 insertions(+), 19 deletions(-) diff --git a/src/Reflection/BetterReflection/SourceLocator/OptimizedDirectorySourceLocatorFactory.php b/src/Reflection/BetterReflection/SourceLocator/OptimizedDirectorySourceLocatorFactory.php index 9f148bb8085..20302320faa 100644 --- a/src/Reflection/BetterReflection/SourceLocator/OptimizedDirectorySourceLocatorFactory.php +++ b/src/Reflection/BetterReflection/SourceLocator/OptimizedDirectorySourceLocatorFactory.php @@ -15,15 +15,28 @@ use function flock; use function fopen; use function hash_file; +use function hrtime; use function sha1; use function sprintf; +use function usleep; use const LOCK_EX; +use const LOCK_NB; use const LOCK_UN; #[AutowiredService] final class OptimizedDirectorySourceLocatorFactory { + /** + * Give up waiting for the scan lock after this long and scan the directory anyway, so a wedged + * winner or a foreign phpstan process holding the shared lock cannot block a worker until + * parallel.processTimeout. A directory scan finishes in well under a second, so this is far above + * any legitimate wait. + */ + private const SCAN_LOCK_WAIT_SECONDS_LIMIT = 10.0; + + private const SCAN_LOCK_POLL_INTERVAL_MICROSECONDS = 50_000; + public function __construct( private FileNodesFetcher $fileNodesFetcher, #[AutowiredParameter(ref: '@fileFinderScan')] @@ -63,28 +76,34 @@ private function createCachedDirectorySourceLocator(array $fileHashes, string $c $originalFileHashes = $fileHashes; - /** @var array|null $cached */ - $cached = $this->cache->load($cacheKey, $variableCacheKey); + $cached = $this->loadCachedSymbols($cacheKey, $variableCacheKey); $scanLock = null; if ($cached === null) { - // On a cold cache every parallel worker builds the same directory locator at once and - // scans the same directory redundantly. A scan is not published until it finishes and the - // save is atomic, so these races are wasteful rather than unsafe. Let the first worker take - // an exclusive lock and scan; the others block until it releases and then re-read the cache - // it wrote, turning N redundant scans of a directory into one. + // On a cold cache every parallel worker builds the same directory locator at once and would + // scan the same directory redundantly. A scan is not published until it finishes and the save + // is atomic, so these races are wasteful rather than unsafe. The first worker to take the lock + // scans and saves; the rest block until it releases, then re-read the cache it wrote. When the + // re-read hits, the lock has done its job, so release it right away and continue lock-free - + // the validation and any (re)scan below then run exactly as they did before this change. $scanLock = $this->acquireDirectoryScanLock($cacheKey . $variableCacheKey); if ($scanLock !== null) { - $cached = $this->cache->load($cacheKey, $variableCacheKey); + $cached = $this->loadCachedSymbols($cacheKey, $variableCacheKey); + if ($cached !== null) { + $this->releaseDirectoryScanLock($scanLock); + $scanLock = null; + } } } try { + $cacheModified = false; $findInFiles = []; if ($cached !== null) { foreach ($cached as $file => [$hash, $classes, $functions, $constants]) { if (!array_key_exists($file, $fileHashes)) { unset($cached[$file]); + $cacheModified = true; continue; } $newHash = $fileHashes[$file]; @@ -96,19 +115,30 @@ private function createCachedDirectorySourceLocator(array $fileHashes, string $c $findInFiles[] = $file; } } else { + // Cold miss: publish the result (even an empty one) so lock losers read it back instead + // of finding the cache still cold and re-scanning the directory themselves. $cached = []; + $cacheModified = true; } foreach (array_keys($fileHashes) as $file) { $findInFiles[] = $file; } - foreach ($this->symbolFinderInFiles->findSymbols($findInFiles, $this->phpVersion->supportsEnums()) as $file => [$newClasses, $newFunctions, $newConstants]) { - $newHash = $originalFileHashes[$file]; - $cached[$file] = [$newHash, $newClasses, $newFunctions, $newConstants]; + if ($findInFiles !== []) { + $cacheModified = true; + foreach ($this->symbolFinderInFiles->findSymbols($findInFiles, $this->phpVersion->supportsEnums()) as $file => [$newClasses, $newFunctions, $newConstants]) { + $newHash = $originalFileHashes[$file]; + $cached[$file] = [$newHash, $newClasses, $newFunctions, $newConstants]; + } } - $this->cache->save($cacheKey, $variableCacheKey, $cached); + // Only write when the cache actually changed. A lock loser re-reads exactly what the winner + // wrote, and a warm run finds every hash unchanged, so both would otherwise re-serialize and + // re-write the identical symbol table - the loser while still holding the lock. + if ($cacheModified) { + $this->cache->save($cacheKey, $variableCacheKey, $cached); + } } finally { // Release even if scanning or saving throws, so a failing worker cannot leave other // workers blocked on the lock until it exits. @@ -131,10 +161,9 @@ private function createCachedDirectorySourceLocator(array $fileHashes, string $c /** * Take an exclusive cross-process lock for a directory's symbol scan so that on a cold cache only - * the first worker scans it. Best effort: a null return means locking is unavailable (the lock - * directory could not be created or the platform refused the lock) and the caller scans as before. - * The returned handle is held until {@see releaseDirectoryScanLock()}; the OS releases the lock if - * the process dies while holding it. + * the first worker scans it. Best effort: a null return means locking is unavailable or the wait + * timed out, and the caller scans as before. The returned handle is held until + * {@see releaseDirectoryScanLock()}; the OS releases the lock if the process dies while holding it. * * @return resource|null */ @@ -147,19 +176,44 @@ private function acquireDirectoryScanLock(string $lockKey) return null; } + // The lock files are zero-byte markers, never written to and never removed - they are reused + // across runs. A tmp reaper (e.g. systemd-tmpfiles) unlinking one while it is held is harmless: + // the next fopen('c') creates a fresh inode and two workers may scan the same directory at once, + // which is exactly the pre-lock race and stays correct because the cache save is atomic. $lockHandle = @fopen(sprintf('%s/odsl-%s.lock', $lockDirectory, sha1($lockKey)), 'c'); if ($lockHandle === false) { return null; } - if (!@flock($lockHandle, LOCK_EX)) { - @fclose($lockHandle); - return null; + // Poll for the lock with a deadline instead of blocking forever: a live-but-wedged winner, or a + // foreign phpstan process holding the shared odsl-installed-files lock, would otherwise block + // this worker until parallel.processTimeout. On timeout we scan the directory ourselves, capping + // the damage at the pre-lock behaviour for that directory. + $deadline = hrtime(true) + (int) (self::SCAN_LOCK_WAIT_SECONDS_LIMIT * 1_000_000_000); + while (!@flock($lockHandle, LOCK_EX | LOCK_NB)) { + if (hrtime(true) >= $deadline) { + @fclose($lockHandle); + return null; + } + + usleep(self::SCAN_LOCK_POLL_INTERVAL_MICROSECONDS); } return $lockHandle; } + /** + * @param non-empty-string $cacheKey + * @return array|null + */ + private function loadCachedSymbols(string $cacheKey, string $variableCacheKey): ?array + { + /** @var array|null $cached */ + $cached = $this->cache->load($cacheKey, $variableCacheKey); + + return $cached; + } + /** * @param resource $lockHandle */