-
-
Notifications
You must be signed in to change notification settings - Fork 1.3k
fix(webapp): verify deployment image exists before finalizing #4049
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
+422
−0
Merged
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| --- | ||
| area: webapp | ||
| type: fix | ||
| --- | ||
|
|
||
| Verify a deployment's image exists in the registry before marking it deployed, so a deploy whose image wasn't pushed fails instead of silently breaking runs (can be turned off via `DEPLOY_IMAGE_VERIFICATION_ENABLED=0` for setups that push images out of band) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
187 changes: 187 additions & 0 deletions
187
apps/webapp/app/v3/services/verifyDeploymentImage.server.ts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,187 @@ | ||
| import { | ||
| BatchGetImageCommand, | ||
| type BatchGetImageCommandOutput, | ||
| RepositoryNotFoundException, | ||
| } from "@aws-sdk/client-ecr"; | ||
| import { tryCatch } from "@trigger.dev/core"; | ||
| import pRetry, { AbortError } from "p-retry"; | ||
| import { logger } from "~/services/logger.server"; | ||
| import { | ||
| type AssumeRoleConfig, | ||
| createEcrClient, | ||
| isEcrRegistry, | ||
| parseEcrRegistryDomain, | ||
| } from "../getDeploymentImageRef.server"; | ||
| import { type RegistryConfig } from "../registryConfig.server"; | ||
|
|
||
| const SHA256_DIGEST = /^sha256:[a-f0-9]{64}$/; | ||
|
|
||
| export type ImageLookupResult = "found" | "missing" | "unknown"; | ||
|
|
||
| /** | ||
| * Split a stored ECR image reference into repository + tag. | ||
| * | ||
| * Trust boundary: the ref is platform-generated, but we still bind the lookup to | ||
| * our configured host (region/account come from the env host) and only parse refs | ||
| * that sit under it. Returns null otherwise. | ||
| */ | ||
| export function parseEcrImageReference( | ||
| imageReference: string, | ||
| registryHost: string | ||
| ): { repositoryName: string; tag: string } | null { | ||
| const prefix = `${registryHost}/`; | ||
| if (!imageReference.startsWith(prefix)) { | ||
| return null; | ||
| } | ||
|
|
||
| // namespace/projectRef:tag, optionally @sha256:... which we drop here | ||
| const remainder = imageReference.slice(prefix.length).split("@")[0]; | ||
| const lastColon = remainder.lastIndexOf(":"); | ||
|
|
||
| if (lastColon <= 0) { | ||
| return null; | ||
| } | ||
|
|
||
| const repositoryName = remainder.slice(0, lastColon); | ||
| const tag = remainder.slice(lastColon + 1); | ||
|
|
||
| if (!repositoryName || !tag || tag.includes("/")) { | ||
| return null; | ||
| } | ||
|
|
||
| return { repositoryName, tag }; | ||
| } | ||
|
|
||
| export function interpretBatchGetImageResponse( | ||
| response: BatchGetImageCommandOutput | ||
| ): ImageLookupResult { | ||
| if (response.images && response.images.length > 0) { | ||
| return "found"; | ||
| } | ||
|
|
||
| if (response.failures?.some((failure) => failure.failureCode === "ImageNotFound")) { | ||
| return "missing"; | ||
| } | ||
|
|
||
| // No image and no explicit not-found failure (some other failure code) - | ||
| // we can't say it's missing, so don't block the deploy on it. | ||
| return "unknown"; | ||
| } | ||
|
|
||
| type BatchGetImageInput = { | ||
| region: string; | ||
| assumeRole?: AssumeRoleConfig; | ||
| registryId?: string; | ||
| repositoryName: string; | ||
| imageIds: { imageTag?: string; imageDigest?: string }[]; | ||
| }; | ||
|
|
||
| type BatchGetImageSender = (input: BatchGetImageInput) => Promise<BatchGetImageCommandOutput>; | ||
|
|
||
| const sendBatchGetImage: BatchGetImageSender = async ({ | ||
| region, | ||
| assumeRole, | ||
| registryId, | ||
| repositoryName, | ||
| imageIds, | ||
| }) => { | ||
| const ecr = await createEcrClient({ region, assumeRole }); | ||
| // No acceptedMediaTypes: only the single-manifest types are valid enum values, and | ||
| // we only care whether the image exists, not its manifest format. | ||
| return ecr.send(new BatchGetImageCommand({ repositoryName, registryId, imageIds })); | ||
| }; | ||
|
|
||
| /** | ||
| * Pre-promotion backstop: check the deployment image actually exists in ECR. | ||
| * | ||
| * "found"/"missing" are definitive (a nonexistent repo counts as missing). | ||
| * "unknown" means we couldn't determine it - non-ECR registry, unparseable ref, or | ||
| * an API error; the caller decides what to do with each. `_send` is a test seam. | ||
| */ | ||
| export async function ecrImageExists( | ||
| { | ||
| imageReference, | ||
| imageDigest, | ||
| registryConfig, | ||
| }: { | ||
| imageReference: string; | ||
| imageDigest?: string; | ||
| registryConfig: RegistryConfig; | ||
| }, | ||
| _send: BatchGetImageSender = sendBatchGetImage | ||
| ): Promise<ImageLookupResult> { | ||
| if (!isEcrRegistry(registryConfig.host)) { | ||
| return "unknown"; | ||
| } | ||
|
|
||
| const parsed = parseEcrImageReference(imageReference, registryConfig.host); | ||
|
|
||
| if (!parsed) { | ||
| logger.warn("Could not parse deployment image reference for verification", { imageReference }); | ||
| return "unknown"; | ||
| } | ||
|
|
||
| const { accountId, region } = parseEcrRegistryDomain(registryConfig.host); | ||
|
|
||
| // imageDigest is supplied by the CLI request body - validate before trusting it. | ||
| // Prefer it when valid (catches a tag that resolves to a different image), else | ||
| // fall back to the platform-generated tag. | ||
| const validDigest = | ||
| imageDigest && SHA256_DIGEST.test(imageDigest.trim()) ? imageDigest.trim() : undefined; | ||
| const imageId = validDigest ? { imageDigest: validDigest } : { imageTag: parsed.tag }; | ||
|
|
||
| const assumeRole = registryConfig.ecrAssumeRoleArn | ||
| ? { | ||
| roleArn: registryConfig.ecrAssumeRoleArn, | ||
| externalId: registryConfig.ecrAssumeRoleExternalId, | ||
| } | ||
| : undefined; | ||
|
|
||
| // Retry transient ECR failures (throttling/network) before giving up, so a blip | ||
| // doesn't fail an otherwise-fine deploy. A missing repo is definitive - don't retry. | ||
| const [error, response] = await tryCatch( | ||
| pRetry( | ||
| () => | ||
| _send({ | ||
| region, | ||
| assumeRole, | ||
| registryId: accountId, | ||
| repositoryName: parsed.repositoryName, | ||
| imageIds: [imageId], | ||
| }).catch((err) => { | ||
| if (err instanceof RepositoryNotFoundException) { | ||
| throw new AbortError(err); | ||
| } | ||
| throw err; | ||
| }), | ||
| { | ||
| retries: 2, | ||
| minTimeout: 200, | ||
| maxTimeout: 1000, | ||
| onFailedAttempt: (e) => { | ||
| logger.warn("Retrying ECR image verification", { | ||
| imageReference, | ||
| attempt: e.attemptNumber, | ||
| error: e.message, | ||
| }); | ||
| }, | ||
| } | ||
| ) | ||
| ); | ||
|
|
||
| if (error) { | ||
| // A missing repo is a definitive miss, not an ambiguous error. | ||
| if (error instanceof RepositoryNotFoundException) { | ||
| return "missing"; | ||
| } | ||
|
|
||
| logger.error("Failed to verify deployment image in ECR", { | ||
| imageReference, | ||
| repositoryName: parsed.repositoryName, | ||
| error: error.message, | ||
| }); | ||
| return "unknown"; | ||
| } | ||
|
nicktrn marked this conversation as resolved.
|
||
|
|
||
| return interpretBatchGetImageResponse(response); | ||
| } | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.