diff --git a/core/lib/Drupal/Core/DrupalKernel.php b/core/lib/Drupal/Core/DrupalKernel.php index d7bf8b8095..8d55ac870c 100644 --- a/core/lib/Drupal/Core/DrupalKernel.php +++ b/core/lib/Drupal/Core/DrupalKernel.php @@ -701,6 +701,8 @@ public function handle(Request $request, $type = self::MAIN_REQUEST, $catch = TR $this->initializeSettings($request); $this->boot(); } + // Wrap request handling in a Fiber, this allows us to call the cache + // prewarming service if any code tries to suspend a fiber. $fiber = new \Fiber(function () use ($request, $type, $catch) { return $this->getHttpKernel()->handle($request, $type, $catch); }); @@ -728,20 +730,6 @@ public function handle(Request $request, $type = self::MAIN_REQUEST, $catch = TR return $response; } - /** - * Prewarms caches for services that support it. - * - * When handling a request with cold caches, some services lock expensive - * operations to prevent two processes handling them at the same time. When - * this happens, further requests may go into a LockInterface::wait() pattern - * which will usually sleep until the lock is released, in the hope that the - * other request has cached in the meantime ready for it to use. This method - * allows us to do something else during that wait time instead of sleeping. - */ - protected function prewarmCaches(): void { - $this->container->get('cache_prewarmer')->prewarmCaches(); - } - /** * Converts an exception into a response. * diff --git a/core/lib/Drupal/Core/Lock/LockBackendAbstract.php b/core/lib/Drupal/Core/Lock/LockBackendAbstract.php index 41ee27cab4..08bf9b1beb 100644 --- a/core/lib/Drupal/Core/Lock/LockBackendAbstract.php +++ b/core/lib/Drupal/Core/Lock/LockBackendAbstract.php @@ -45,16 +45,27 @@ public function wait($name, $delay = 30) { // Begin sleeping at 25ms. $sleep = 25000; while ($delay > 0) { - // This function should only be called by a request that failed to get a - // lock, so we sleep first to give the parallel request a chance to finish - // and release the lock. + // Check if we're executing inside a Fiber. If so, before sleeping, + // suspend the fiber in case some other code can run in the meantime. By + // the time that code has finished running, the lock may already be + // available. + // @see Drupal\Core\Prewarm\CachePrewarmer + if (\Fiber::getCurrent() !== NULL) { + \Fiber::suspend(); + } + if ($this->lockMayBeAvailable($name)) { + // No longer need to wait. + return FALSE; + } + // If the lock is still not available, it's possible that the parent + // process immediately resumed the Fiber we're running in, so sleep + // to avoid a lock stampede. usleep($sleep); - // After each sleep, increase the value of $sleep until it reaches - // 500ms, to reduce the potential for a lock stampede. + // Also to avoid a lock stampede, slowly increase the value of $sleep + // the longer we wait, until it reaches 500ms. $delay = $delay - $sleep; $sleep = min(500000, $sleep + 25000, $delay); if ($this->lockMayBeAvailable($name)) { - // No longer need to wait. return FALSE; } } diff --git a/core/lib/Drupal/Core/PreWarm/CachePreWarmer.php b/core/lib/Drupal/Core/PreWarm/CachePreWarmer.php index c829a1ce12..bccbcc2aec 100644 --- a/core/lib/Drupal/Core/PreWarm/CachePreWarmer.php +++ b/core/lib/Drupal/Core/PreWarm/CachePreWarmer.php @@ -7,7 +7,17 @@ /** * Prewarms caches. * - * @todo long explanation + * Takes the list of prewarmable services, and picks one at random. By choosing + * the service at random, it increases the likelihood that when multiple + * requests all try to prewarm at the same time, that they'll try to prewarm + * different things. If we always chose the service to prewarm sequentially, we + * could end up reproducing the cache stampede situation. + * + * @see Drupal\Core\PreWarm\PreWarmableInterface + * @see Drupal\Core\DrupalKernel::handle() + * @see Drupal\Core\LockBackendAbstract::wait() + * @see Drupal\Core\Routing\RouteProvider::preLoadRoutes() + * */ class CachePreWarmer implements CachePreWarmerInterface { @@ -17,7 +27,7 @@ class CachePreWarmer implements CachePreWarmerInterface { * A list of services we have already prewarmed, so they can be skipped on * subsequent calls. * - * @var array + * @var string[] */ protected array $calledServices = []; @@ -27,11 +37,13 @@ public function __construct(protected readonly ClassResolverInterface $classReso * {@inheritdoc} */ public function prewarmCaches() { - // Pick a prewarmable service to prewarm the cache for at random. This - // increases the likelihood that when multiple requests all attempt to - // prewarm caches, they'll do so for different services. $candidates = array_diff($this->serviceIds, $this->calledServices); + // If we've tried to prewarm all the available services, don't try to do it + // again. We're most likely to hit this case if a request comes in late + // during a stampede and everything was warmed up just before we reached + // here. if ($candidates) { + // Pick a prewarmable service to prewarm the cache for at random. $key = array_rand($candidates); $this->calledServices[] = $key; $service = $this->classResolver->getInstanceFromDefinition($this->serviceIds[$key]); diff --git a/core/lib/Drupal/Core/PreWarm/CachePreWarmerInterface.php b/core/lib/Drupal/Core/PreWarm/CachePreWarmerInterface.php index 6ef0ebd309..13e09d54c3 100644 --- a/core/lib/Drupal/Core/PreWarm/CachePreWarmerInterface.php +++ b/core/lib/Drupal/Core/PreWarm/CachePreWarmerInterface.php @@ -5,7 +5,57 @@ /** * Interface for cache prewarmers. * - * @todo long explanation + * Drupal has multiple registries that are fairly expensive to build: plugins, + * theme hooks etc. These registries are required to serve most requests, and + * therefore are in the critical path. When the cache for one of them is empty, + * it is likely that the rest are too, usually due to a deployment. + * + * After a full cache clear on a high traffic site, a cache stampede may occur, + * where multiple simultaneous requests all hit the site before caches have been + * built. This either results in the same expensive cache item being built + * multiple times, or in requests being caught in a lock wait pattern while + * others build them, if this has been implemented (e.g. router rebuilds). In + * the worst cases, it can take several seconds before any pages can be served + * at all, meanwhile more requests are coming in, affecting both server loads + * and conccurrent request limits. + * + * The cache prewarm API attempts to mitigate this situation significantly. + * Except for via the lock system, Drupal can't detect that it's in a cache + * stampede situation itself, but there are particular caches we can assume that + * if they're empty, then we might be. On most sites, even in a stampede + * situation, these caches have to be built sequentially, i.e. the router has to + * exist before a controller can be rendered, Views plugins have to be available + * for a Views query to run, entity/field caches have to be built before + * entities can be rendered, theme and element info caches have to be built + * before templates can be rendered. Very few requests will try to render + * a template without first running routing, even if some minor details will be + * different between different routes and sites. + * + * To reduce duplicate work, and to enable those first pages after a cache clear + * to be served faster, we want to divide up different cache building between + * the different requests that are coming in. This is achieved by the + * cache_prewarmable service tag and Drupal\Core\PreWarm\PreWarmableInterface + * where any service can define itself as prewarmable with a common method to + * call to warm caches. + * + * By default, prewarming is triggered when DrupalKernel::handle() reaches + * a Fiber::suspend() call. A service can call Fiber::suspend() either when it + * detects a cache miss in the critical path, for example + * Drupal\Core\Routing\RouteProvider::preLoadRoutes(), or because it is about to + * execute an async i/o operation. In either case, this allows the caller to + * execute some different code, either a different callback in a Fiber, or in + * the case of DrupalKernel::handle(), this prewarming service. + * + * This implementation takes the list of prewarmable services, and picks one at + * random. By choosing the service at random, it increases the likelihood that + * when multiple requests all try to prewarm at the same time, that they'll try + * to prewarm different things. If we always chose the service to prewarm + * sequentially, we could end up reproducing the cache stampede situation. + * + * @see Drupal\Core\PreWarm\PreWarmableInterface + * @see Drupal\Core\DrupalKernel::handle() + * @see Drupal\Core\LockBackendAbstract::wait() + * @see Drupal\Core\Routing\RouteProvider::preload() */ interface CachePreWarmerInterface { diff --git a/core/lib/Drupal/Core/PreWarm/PreWarmableInterface.php b/core/lib/Drupal/Core/PreWarm/PreWarmableInterface.php index 47c834f99b..4ea14d3d03 100644 --- a/core/lib/Drupal/Core/PreWarm/PreWarmableInterface.php +++ b/core/lib/Drupal/Core/PreWarm/PreWarmableInterface.php @@ -5,7 +5,22 @@ /** * Interface for cache prewarmers. * - * @todo long explanation + * This interface should be implemented alongside the cache_prewarmable + * interface. + * + * You should consider carefully whether your service will benefit from + * implementing this interface, it should only be used when: + * 1. Your service has an expensive cache rebuild. + * 2. Your service is in the critical path of most requests to the site and is + * likely to be impacted by a cache stampede. If it's mainly used on cron or + * admin pages, then prewarming would be counter-productive. + * Additionally note that there is no guaranteed code path by which your service + * will be called, so it can not (for example) assume that routing has been + * completed. You should either ensure that you can prewarm your cache without + * knowing the route or current theme, or return early if these aren't + * available. + * + * @see Drupal\Core\Prewarm\PreWarmerInterface */ interface PreWarmableInterface {