From 07c5c4b67fa8fef2040ed32d1cb3b42def8c1c89 Mon Sep 17 00:00:00 2001 From: Zeke Sikelianos Date: Wed, 7 Feb 2024 00:29:09 -0800 Subject: [PATCH] use public Replicate API to get model metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit and test it. getting that mocking setup right was the hard part 😅 --- index.mjs | 28 +- index.test.js | 13 +- lib/models.js | 21 +- package.json | 4 +- test/fixtures/stability-ai/sdxl.json | 571 ++++++++++++++++++++++++++ test/fixtures/yorickvp/llava-13b.json | 482 ++++++++++++++++++++++ 6 files changed, 1096 insertions(+), 23 deletions(-) create mode 100644 test/fixtures/stability-ai/sdxl.json create mode 100644 test/fixtures/yorickvp/llava-13b.json diff --git a/index.mjs b/index.mjs index 452d595..e38840a 100755 --- a/index.mjs +++ b/index.mjs @@ -38,7 +38,7 @@ const envFile = path.join(targetDir, '.env') if (process.env.REPLICATE_API_TOKEN) { fs.writeFileSync(envFile, `REPLICATE_API_TOKEN=${process.env.REPLICATE_API_TOKEN}`) - console.log('Adding API token to .env file') + console.log(`Adding API token ${process.env.REPLICATE_API_TOKEN.slice(0, 5)} to .env file...`) } else { console.log('API token not found in environment.') const rl = readline.createInterface({ input: process.stdin, output: process.stdout }) @@ -47,24 +47,31 @@ if (process.env.REPLICATE_API_TOKEN) { if (answer.toLowerCase() === 'y' || answer === '') { await open('https://replicate.com/account') const token = readlineSync.question('Paste your API token here: ', { hideEchoBack: true }) + + // Add the pasted token to the user's local .env file for when they run their project fs.writeFileSync(envFile, `REPLICATE_API_TOKEN=${token}`) - console.log('API token written to .env file') + + // Also add the pasted token to THIS script's environment, so we can use it to make Replicate API calls + process.env.REPLICATE_API_TOKEN = token + + console.log(`API token ${process.env.REPLICATE_API_TOKEN.slice(0, 5)} written to .env file`) } } +// Check use-provided API token looks legit before proceeding +if (!process.env.REPLICATE_API_TOKEN.startsWith('r8_')) { + console.log('Invalid API token:', process.env.REPLICATE_API_TOKEN) + // process.exit(1) +} + console.log('Setting package name...') execSync(`npm pkg set name=${args.packageName}`, { cwd: targetDir, stdio: 'ignore' }) console.log('Installing dependencies...') execSync('npm install', { cwd: targetDir, stdio: 'ignore' }) -let model -try { - model = await getModel(args.model) -} catch (e) { - console.error('Model not found:', args.model) - process.exit() -} +console.log('Fetching model metadata using Replicate API...') +const model = await getModel(args.model) // If user has provided a model version, use it. Otherwise, use the latest version const modelVersionRegexp = /.*:[a-fA-F0-9]{64}$/ @@ -72,6 +79,7 @@ const modelNameWithVersion = args.model.match(modelVersionRegexp) ? args.model : const inputs = getModelInputs(model) +console.log('Adding model data and inputs to index.js...') const indexFile = path.join(targetDir, 'index.js') const indexFileContents = fs.readFileSync(indexFile, 'utf8') const newContents = indexFileContents @@ -82,7 +90,7 @@ fs.writeFileSync(indexFile, newContents) console.log('App created successfully!') if (args['run-after-setup']) { - console.log(`Running command: \`node ${args.packageName}/index.js\`\n\n`) + console.log(`Running command: \`node ${args.packageName}/index.js\``) execSync('node index.js', { cwd: targetDir, stdio: 'inherit' }) } else { console.log('To run your app, execute the following command:') diff --git a/index.test.js b/index.test.js index b5fcb22..165e5ac 100644 --- a/index.test.js +++ b/index.test.js @@ -23,8 +23,9 @@ describe('Node script test', () => { }) it('should create a directory with expected files', () => { - const command = `REPLICATE_API_TOKEN=test_token node index.mjs ${directoryName} --run-after-setup=false` + const command = `REPLICATE_API_TOKEN=r8_test_token node index.mjs ${directoryName} --run-after-setup=false` + // set stdio to 'inherit' to see script output in test output execSync(command, { stdio: 'ignore', env: process.env }) // Check if the directory exists @@ -40,7 +41,7 @@ describe('Node script test', () => { const envFile = path.join(directoryName, '.env') expect(fileExists(envFile)).toBe(true) const envFileContents = fs.readFileSync(envFile, 'utf8') - expect(envFileContents).toBe('REPLICATE_API_TOKEN=test_token') + expect(envFileContents).toBe('REPLICATE_API_TOKEN=r8_test_token') // Check if .gitignore exists in the directory const gitignoreFile = path.join(directoryName, '.gitignore') @@ -50,8 +51,9 @@ describe('Node script test', () => { }) it('handles basic `model` argument in the form {owner}/{model}', () => { - const command = `REPLICATE_API_TOKEN=test_token node index.mjs ${directoryName} --model=yorickvp/llava-13b --run-after-setup=false` + const command = `REPLICATE_API_TOKEN=r8_test_token node index.mjs ${directoryName} --model=yorickvp/llava-13b --run-after-setup=false` + // set stdio to 'inherit' to see script output in test output execSync(command, { stdio: 'ignore', env: process.env }) // Check if the directory exists @@ -63,12 +65,13 @@ describe('Node script test', () => { // Check if index.js contains the correct model name const indexFileContents = fs.readFileSync(indexFile, 'utf8') - expect(indexFileContents).toMatch(/yorickvp\/llava-13b:[a-zA-Z0-9]{40}/) + expect(indexFileContents).toMatch(/yorickvp\/llava-13b:[a-zA-Z0-9]{64}/) }) it('handles a `model` argument in the form {owner}/{model}:{version}', () => { - const command = `REPLICATE_API_TOKEN=test_token node index.mjs ${directoryName} --model=yorickvp/llava-13b:2cfef05a8e8e648f6e92ddb53fa21a81c04ab2c4f1390a6528cc4e331d608df8 --run-after-setup=false` + const command = `REPLICATE_API_TOKEN=r8_test_token node index.mjs ${directoryName} --model=yorickvp/llava-13b:2cfef05a8e8e648f6e92ddb53fa21a81c04ab2c4f1390a6528cc4e331d608df8 --run-after-setup=false` + // set stdio to 'inherit' to see script output in test output execSync(command, { stdio: 'ignore', env: process.env }) // Check if the directory exists diff --git a/lib/models.js b/lib/models.js index 7b22043..5483a44 100644 --- a/lib/models.js +++ b/lib/models.js @@ -1,4 +1,6 @@ -import models from 'all-the-public-replicate-models' +import Replicate from 'replicate' +import fs from 'fs' +import path from 'path' export function getModelInputs (model) { return model.default_example.input @@ -10,13 +12,20 @@ export function getModelNameWithVersion (model) { export async function getModel (fullModelName) { // Extract owner and model name, omitting the version if it's present - const [owner, modelName] = fullModelName.split(':')[0].split('/') + const [owner, name] = fullModelName.split(':')[0].split('/') - const model = models.find(model => model.owner === owner && model.name === modelName) - - if (!model) { - throw new Error(`Model "${fullModelName}" not found`) + if (process.env.REPLICATE_API_TOKEN === 'r8_test_token') { + const filePath = path.join(process.cwd(), 'test', 'fixtures', owner, `${name}.json`) + const fileContents = fs.readFileSync(filePath, 'utf8') + const loadedModel = JSON.parse(fileContents) + return loadedModel } + // Instantiate a Replicate client on the fly instead of at the top of this module, + // as the API token may have been user-provided and added to the process env AFTER this script's import time. + const replicate = new Replicate({ auth: process.env.REPLICATE_API_TOKEN }) + + const model = await replicate.models.get(owner, name) + return model } diff --git a/package.json b/package.json index 4018795..6d96bfa 100644 --- a/package.json +++ b/package.json @@ -19,11 +19,11 @@ "node": ">=18" }, "dependencies": { - "all-the-public-replicate-models": "^1.104.0", "json5": "^2.2.3", "minimist": "^1.2.8", "open": "^10.0.3", - "readline-sync": "^1.4.10" + "readline-sync": "^1.4.10", + "replicate": "^0.25.2" }, "devDependencies": { "standard": "^17.1.0", diff --git a/test/fixtures/stability-ai/sdxl.json b/test/fixtures/stability-ai/sdxl.json new file mode 100644 index 0000000..9c0638c --- /dev/null +++ b/test/fixtures/stability-ai/sdxl.json @@ -0,0 +1,571 @@ +{ + "cover_image_url": "https://tjzk.replicate.delivery/models_models_cover_image/61004930-fb88-4e09-9bd4-74fd8b4aa677/sdxl_cover.png", + "created_at": "2023-07-26T17:53:09.882651Z", + "default_example": { + "completed_at": "2023-10-12T17:10:12.909279Z", + "created_at": "2023-10-12T17:10:07.956869Z", + "error": null, + "id": "dzsqmb3bg4lqpjkz2iptjqgccm", + "input": { + "width": 768, + "height": 768, + "prompt": "An astronaut riding a rainbow unicorn, cinematic, dramatic", + "refine": "expert_ensemble_refiner", + "scheduler": "K_EULER", + "lora_scale": 0.6, + "num_outputs": 1, + "guidance_scale": 7.5, + "apply_watermark": false, + "high_noise_frac": 0.8, + "negative_prompt": "", + "prompt_strength": 0.8, + "num_inference_steps": 25 + }, + "metrics": { + "predict_time": 4.981337 + }, + "output": ["https://pbxt.replicate.delivery/YXbcLudoHBIYHV6L0HbcTx5iRzLFMwygLr3vhGpZI35caXbE/out-0.png"], + "started_at": "2023-10-12T17:10:07.927942Z", + "status": "succeeded", + "urls": { + "get": "https://api.replicate.com/v1/predictions/dzsqmb3bg4lqpjkz2iptjqgccm", + "cancel": "https://api.replicate.com/v1/predictions/dzsqmb3bg4lqpjkz2iptjqgccm/cancel" + }, + "model": "stability-ai/sdxl", + "version": "c221b2b8ef527988fb59bf24a8b97c4561f1c671f73bd389f866bfb27c061316", + "webhook_completed": null + }, + "description": "A text-to-image generative AI model that creates beautiful images", + "github_url": "https://github.com/replicate/cog-sdxl", + "latest_version": { + "id": "39ed52f2a78e934b3ba6e2a89f5b1c712de7dfea535525255b1aa35c5565e08b", + "created_at": "2023-11-06T23:13:07.906314Z", + "cog_version": "0.8.6", + "openapi_schema": { + "info": { + "title": "Cog", + "version": "0.1.0" + }, + "paths": { + "/": { + "get": { + "summary": "Root", + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "title": "Response Root Get" + } + } + }, + "description": "Successful Response" + } + }, + "operationId": "root__get" + } + }, + "/shutdown": { + "post": { + "summary": "Start Shutdown", + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "title": "Response Start Shutdown Shutdown Post" + } + } + }, + "description": "Successful Response" + } + }, + "operationId": "start_shutdown_shutdown_post" + } + }, + "/predictions": { + "post": { + "summary": "Predict", + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PredictionResponse" + } + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "parameters": [{ + "in": "header", + "name": "prefer", + "schema": { + "type": "string", + "title": "Prefer" + }, + "required": false + }], + "description": "Run a single prediction on the model", + "operationId": "predict_predictions_post", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PredictionRequest" + } + } + } + } + } + }, + "/health-check": { + "get": { + "summary": "Healthcheck", + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "title": "Response Healthcheck Health Check Get" + } + } + }, + "description": "Successful Response" + } + }, + "operationId": "healthcheck_health_check_get" + } + }, + "/predictions/{prediction_id}": { + "put": { + "summary": "Predict Idempotent", + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PredictionResponse" + } + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "parameters": [{ + "in": "path", + "name": "prediction_id", + "schema": { + "type": "string", + "title": "Prediction ID" + }, + "required": true + }, { + "in": "header", + "name": "prefer", + "schema": { + "type": "string", + "title": "Prefer" + }, + "required": false + }], + "description": "Run a single prediction on the model (idempotent creation).", + "operationId": "predict_idempotent_predictions__prediction_id__put", + "requestBody": { + "content": { + "application/json": { + "schema": { + "allOf": [{ + "$ref": "#/components/schemas/PredictionRequest" + }], + "title": "Prediction Request" + } + } + }, + "required": true + } + } + }, + "/predictions/{prediction_id}/cancel": { + "post": { + "summary": "Cancel", + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "title": "Response Cancel Predictions Prediction Id Cancel Post" + } + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "parameters": [{ + "in": "path", + "name": "prediction_id", + "schema": { + "type": "string", + "title": "Prediction ID" + }, + "required": true + }], + "description": "Cancel a running prediction", + "operationId": "cancel_predictions__prediction_id__cancel_post" + } + } + }, + "openapi": "3.0.2", + "components": { + "schemas": { + "Input": { + "type": "object", + "title": "Input", + "properties": { + "mask": { + "type": "string", + "title": "Mask", + "format": "uri", + "x-order": 3, + "description": "Input mask for inpaint mode. Black areas will be preserved, white areas will be inpainted." + }, + "seed": { + "type": "integer", + "title": "Seed", + "x-order": 11, + "description": "Random seed. Leave blank to randomize the seed" + }, + "image": { + "type": "string", + "title": "Image", + "format": "uri", + "x-order": 2, + "description": "Input image for img2img or inpaint mode" + }, + "width": { + "type": "integer", + "title": "Width", + "default": 1024, + "x-order": 4, + "description": "Width of output image" + }, + "height": { + "type": "integer", + "title": "Height", + "default": 1024, + "x-order": 5, + "description": "Height of output image" + }, + "prompt": { + "type": "string", + "title": "Prompt", + "default": "An astronaut riding a rainbow unicorn", + "x-order": 0, + "description": "Input prompt" + }, + "refine": { + "allOf": [{ + "$ref": "#/components/schemas/refine" + }], + "default": "no_refiner", + "x-order": 12, + "description": "Which refine style to use" + }, + "scheduler": { + "allOf": [{ + "$ref": "#/components/schemas/scheduler" + }], + "default": "K_EULER", + "x-order": 7, + "description": "scheduler" + }, + "lora_scale": { + "type": "number", + "title": "Lora Scale", + "default": 0.6, + "maximum": 1, + "minimum": 0, + "x-order": 16, + "description": "LoRA additive scale. Only applicable on trained models." + }, + "num_outputs": { + "type": "integer", + "title": "Num Outputs", + "default": 1, + "maximum": 4, + "minimum": 1, + "x-order": 6, + "description": "Number of images to output." + }, + "refine_steps": { + "type": "integer", + "title": "Refine Steps", + "x-order": 14, + "description": "For base_image_refiner, the number of steps to refine, defaults to num_inference_steps" + }, + "guidance_scale": { + "type": "number", + "title": "Guidance Scale", + "default": 7.5, + "maximum": 50, + "minimum": 1, + "x-order": 9, + "description": "Scale for classifier-free guidance" + }, + "apply_watermark": { + "type": "boolean", + "title": "Apply Watermark", + "default": true, + "x-order": 15, + "description": "Applies a watermark to enable determining if an image is generated in downstream applications. If you have other provisions for generating or deploying images safely, you can use this to disable watermarking." + }, + "high_noise_frac": { + "type": "number", + "title": "High Noise Frac", + "default": 0.8, + "maximum": 1, + "minimum": 0, + "x-order": 13, + "description": "For expert_ensemble_refiner, the fraction of noise to use" + }, + "negative_prompt": { + "type": "string", + "title": "Negative Prompt", + "default": "", + "x-order": 1, + "description": "Input Negative Prompt" + }, + "prompt_strength": { + "type": "number", + "title": "Prompt Strength", + "default": 0.8, + "maximum": 1, + "minimum": 0, + "x-order": 10, + "description": "Prompt strength when using img2img / inpaint. 1.0 corresponds to full destruction of information in image" + }, + "replicate_weights": { + "type": "string", + "title": "Replicate Weights", + "x-order": 17, + "description": "Replicate LoRA weights to use. Leave blank to use the default weights." + }, + "num_inference_steps": { + "type": "integer", + "title": "Num Inference Steps", + "default": 50, + "maximum": 500, + "minimum": 1, + "x-order": 8, + "description": "Number of denoising steps" + }, + "disable_safety_checker": { + "type": "boolean", + "title": "Disable Safety Checker", + "default": false, + "x-order": 18, + "description": "Disable safety checker for generated images. This feature is only available through the API. See https://replicate.com/docs/how-does-replicate-work#safety" + } + } + }, + "Output": { + "type": "array", + "items": { + "type": "string", + "format": "uri" + }, + "title": "Output" + }, + "Status": { + "enum": ["starting", "processing", "succeeded", "canceled", "failed"], + "type": "string", + "title": "Status", + "description": "An enumeration." + }, + "refine": { + "enum": ["no_refiner", "expert_ensemble_refiner", "base_image_refiner"], + "type": "string", + "title": "refine", + "description": "An enumeration." + }, + "scheduler": { + "enum": ["DDIM", "DPMSolverMultistep", "HeunDiscrete", "KarrasDPM", "K_EULER_ANCESTRAL", "K_EULER", "PNDM"], + "type": "string", + "title": "scheduler", + "description": "An enumeration." + }, + "WebhookEvent": { + "enum": ["start", "output", "logs", "completed"], + "type": "string", + "title": "WebhookEvent", + "description": "An enumeration." + }, + "ValidationError": { + "type": "object", + "title": "ValidationError", + "required": ["loc", "msg", "type"], + "properties": { + "loc": { + "type": "array", + "items": { + "anyOf": [{ + "type": "string" + }, { + "type": "integer" + }] + }, + "title": "Location" + }, + "msg": { + "type": "string", + "title": "Message" + }, + "type": { + "type": "string", + "title": "Error Type" + } + } + }, + "PredictionRequest": { + "type": "object", + "title": "PredictionRequest", + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "input": { + "$ref": "#/components/schemas/Input" + }, + "webhook": { + "type": "string", + "title": "Webhook", + "format": "uri", + "maxLength": 65536, + "minLength": 1 + }, + "created_at": { + "type": "string", + "title": "Created At", + "format": "date-time" + }, + "output_file_prefix": { + "type": "string", + "title": "Output File Prefix" + }, + "webhook_events_filter": { + "type": "array", + "items": { + "$ref": "#/components/schemas/WebhookEvent" + }, + "default": ["start", "output", "logs", "completed"] + } + } + }, + "PredictionResponse": { + "type": "object", + "title": "PredictionResponse", + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "logs": { + "type": "string", + "title": "Logs", + "default": "" + }, + "error": { + "type": "string", + "title": "Error" + }, + "input": { + "$ref": "#/components/schemas/Input" + }, + "output": { + "$ref": "#/components/schemas/Output" + }, + "status": { + "$ref": "#/components/schemas/Status" + }, + "metrics": { + "type": "object", + "title": "Metrics" + }, + "version": { + "type": "string", + "title": "Version" + }, + "created_at": { + "type": "string", + "title": "Created At", + "format": "date-time" + }, + "started_at": { + "type": "string", + "title": "Started At", + "format": "date-time" + }, + "completed_at": { + "type": "string", + "title": "Completed At", + "format": "date-time" + } + } + }, + "HTTPValidationError": { + "type": "object", + "title": "HTTPValidationError", + "properties": { + "detail": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ValidationError" + }, + "title": "Detail" + } + } + } + } + } + } + }, + "license_url": "https://github.com/Stability-AI/generative-models/blob/main/model_licenses/LICENSE-SDXL1.0", + "name": "sdxl", + "owner": "stability-ai", + "paper_url": "https://arxiv.org/abs/2307.01952", + "run_count": 35650209, + "url": "https://replicate.com/stability-ai/sdxl", + "visibility": "public" +} \ No newline at end of file diff --git a/test/fixtures/yorickvp/llava-13b.json b/test/fixtures/yorickvp/llava-13b.json new file mode 100644 index 0000000..1e7ad33 --- /dev/null +++ b/test/fixtures/yorickvp/llava-13b.json @@ -0,0 +1,482 @@ +{ + "cover_image_url": "https://tjzk.replicate.delivery/models_models_cover_image/2c5dbfff-209d-4ab5-a294-b3e5e56105c0/dalle3.jpg", + "created_at": "2023-10-09T16:27:51.777748Z", + "default_example": { + "completed_at": "2023-10-10T12:43:58.049663Z", + "created_at": "2023-10-10T12:39:54.406394Z", + "error": null, + "id": "gav7xf3bxlcjrql3iyidcu7qgu", + "input": { + "image": "https://replicate.delivery/pbxt/JfvBi04QfleIeJ3ASiBEMbJvhTQKWKLjKaajEbuhO1Y0wPHd/view.jpg", + "prompt": "Are you allowed to swim here?", + "max_tokens": 1024, + "temperature": 0.2 + }, + "logs": null, + "metrics": { + "predict_time": 2.31179 + }, + "output": "Yes, you are allowed to swim in the lake near the pier.", + "started_at": "2023-10-10T12:43:55.737873Z", + "status": "succeeded", + "urls": { + "get": "https://api.replicate.com/v1/predictions/gav7xf3bxlcjrql3iyidcu7qgu", + "cancel": "https://api.replicate.com/v1/predictions/gav7xf3bxlcjrql3iyidcu7qgu/cancel" + }, + "model": "yorickvp/llava-13b", + "version": "c293ca6d551ce5e74893ab153c61380f5bcbd80e02d49e08c582de184a8f6c83", + "webhook_completed": null + }, + "description": "Visual instruction tuning towards large language and vision models with GPT-4 level capabilities", + "github_url": "https://github.com/haotian-liu/LLaVA", + "latest_version": { + "id": "e272157381e2a3bf12df3a8edd1f38d1dbd736bbb7437277c8b34175f8fce358", + "created_at": "2023-11-21T10:19:14.945570Z", + "cog_version": "0.9.0-beta10", + "openapi_schema": { + "info": { + "title": "Cog", + "version": "0.1.0" + }, + "paths": { + "/": { + "get": { + "summary": "Root", + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "title": "Response Root Get" + } + } + }, + "description": "Successful Response" + } + }, + "operationId": "root__get" + } + }, + "/shutdown": { + "post": { + "summary": "Start Shutdown", + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "title": "Response Start Shutdown Shutdown Post" + } + } + }, + "description": "Successful Response" + } + }, + "operationId": "start_shutdown_shutdown_post" + } + }, + "/predictions": { + "post": { + "summary": "Predict", + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PredictionResponse" + } + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "parameters": [ + { + "in": "header", + "name": "prefer", + "schema": { + "type": "string", + "title": "Prefer" + }, + "required": false + } + ], + "description": "Run a single prediction on the model", + "operationId": "predict_predictions_post", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PredictionRequest" + } + } + } + } + } + }, + "/health-check": { + "get": { + "summary": "Healthcheck", + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "title": "Response Healthcheck Health Check Get" + } + } + }, + "description": "Successful Response" + } + }, + "operationId": "healthcheck_health_check_get" + } + }, + "/predictions/{prediction_id}": { + "put": { + "summary": "Predict Idempotent", + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PredictionResponse" + } + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "parameters": [ + { + "in": "path", + "name": "prediction_id", + "schema": { + "type": "string", + "title": "Prediction ID" + }, + "required": true + }, + { + "in": "header", + "name": "prefer", + "schema": { + "type": "string", + "title": "Prefer" + }, + "required": false + } + ], + "description": "Run a single prediction on the model (idempotent creation).", + "operationId": "predict_idempotent_predictions__prediction_id__put", + "requestBody": { + "content": { + "application/json": { + "schema": { + "allOf": [ + { + "$ref": "#/components/schemas/PredictionRequest" + } + ], + "title": "Prediction Request" + } + } + }, + "required": true + } + } + }, + "/predictions/{prediction_id}/cancel": { + "post": { + "summary": "Cancel", + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "title": "Response Cancel Predictions Prediction Id Cancel Post" + } + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "parameters": [ + { + "in": "path", + "name": "prediction_id", + "schema": { + "type": "string", + "title": "Prediction ID" + }, + "required": true + } + ], + "description": "Cancel a running prediction", + "operationId": "cancel_predictions__prediction_id__cancel_post" + } + } + }, + "openapi": "3.0.2", + "components": { + "schemas": { + "Input": { + "type": "object", + "title": "Input", + "required": [ + "image", + "prompt" + ], + "properties": { + "image": { + "type": "string", + "title": "Image", + "format": "uri", + "x-order": 0, + "description": "Input image" + }, + "top_p": { + "type": "number", + "title": "Top P", + "default": 1, + "maximum": 1, + "minimum": 0, + "x-order": 2, + "description": "When decoding text, samples from the top p percentage of most likely tokens; lower to ignore less likely tokens" + }, + "prompt": { + "type": "string", + "title": "Prompt", + "x-order": 1, + "description": "Prompt to use for text generation" + }, + "max_tokens": { + "type": "integer", + "title": "Max Tokens", + "default": 1024, + "minimum": 0, + "x-order": 4, + "description": "Maximum number of tokens to generate. A word is generally 2-3 tokens" + }, + "temperature": { + "type": "number", + "title": "Temperature", + "default": 0.2, + "minimum": 0, + "x-order": 3, + "description": "Adjusts randomness of outputs, greater than 1 is random and 0 is deterministic" + } + } + }, + "Output": { + "type": "array", + "items": { + "type": "string" + }, + "title": "Output", + "x-cog-array-type": "iterator", + "x-cog-array-display": "concatenate" + }, + "Status": { + "enum": [ + "starting", + "processing", + "succeeded", + "canceled", + "failed" + ], + "type": "string", + "title": "Status", + "description": "An enumeration." + }, + "WebhookEvent": { + "enum": [ + "start", + "output", + "logs", + "completed" + ], + "type": "string", + "title": "WebhookEvent", + "description": "An enumeration." + }, + "ValidationError": { + "type": "object", + "title": "ValidationError", + "required": [ + "loc", + "msg", + "type" + ], + "properties": { + "loc": { + "type": "array", + "items": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "integer" + } + ] + }, + "title": "Location" + }, + "msg": { + "type": "string", + "title": "Message" + }, + "type": { + "type": "string", + "title": "Error Type" + } + } + }, + "PredictionRequest": { + "type": "object", + "title": "PredictionRequest", + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "input": { + "$ref": "#/components/schemas/Input" + }, + "webhook": { + "type": "string", + "title": "Webhook", + "format": "uri", + "maxLength": 65536, + "minLength": 1 + }, + "created_at": { + "type": "string", + "title": "Created At", + "format": "date-time" + }, + "output_file_prefix": { + "type": "string", + "title": "Output File Prefix" + }, + "webhook_events_filter": { + "type": "array", + "items": { + "$ref": "#/components/schemas/WebhookEvent" + }, + "default": [ + "start", + "output", + "logs", + "completed" + ] + } + } + }, + "PredictionResponse": { + "type": "object", + "title": "PredictionResponse", + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "logs": { + "type": "string", + "title": "Logs", + "default": "" + }, + "error": { + "type": "string", + "title": "Error" + }, + "input": { + "$ref": "#/components/schemas/Input" + }, + "output": { + "$ref": "#/components/schemas/Output" + }, + "status": { + "$ref": "#/components/schemas/Status" + }, + "metrics": { + "type": "object", + "title": "Metrics" + }, + "version": { + "type": "string", + "title": "Version" + }, + "created_at": { + "type": "string", + "title": "Created At", + "format": "date-time" + }, + "started_at": { + "type": "string", + "title": "Started At", + "format": "date-time" + }, + "completed_at": { + "type": "string", + "title": "Completed At", + "format": "date-time" + } + } + }, + "HTTPValidationError": { + "type": "object", + "title": "HTTPValidationError", + "properties": { + "detail": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ValidationError" + }, + "title": "Detail" + } + } + } + } + } + } + }, + "license_url": "https://ai.meta.com/llama/license/", + "name": "llava-13b", + "owner": "yorickvp", + "paper_url": "https://arxiv.org/abs/2310.03744", + "run_count": 3596716, + "url": "https://replicate.com/yorickvp/llava-13b", + "visibility": "public" +}