# Completions

## Creates a new chat completion response using the specified AI model.

> Sends a chat completion request to generate AI responses based on the provided messages. Supports both streaming and non-streaming modes with automatic token billing.

```json
{"openapi":"3.0.4","info":{"title":"Inference API","version":"v1"},"servers":[{"url":"https://api.{tenant}.{region}.jylo.ai","variables":{"tenant":{"default":"example","description":"Tenant name"},"region":{"default":"uk","description":"Region","enum":["uk","us","eu","au","jp"]}}}],"security":[{"Bearer":[]}],"components":{"securitySchemes":{"Bearer":{"type":"http","description":"Bearer authentication with JWT","scheme":"bearer","bearerFormat":"JWT"}},"schemas":{"Jylo.Inference.Core.Models.ChatCompletionRequest":{"required":["messages"],"type":"object","properties":{"messages":{"type":"array","items":{"$ref":"#/components/schemas/Jylo.Inference.Core.Models.Message"},"nullable":true},"stream":{"type":"boolean"},"response_format":{"$ref":"#/components/schemas/Jylo.Inference.Core.Models.ResponseFormat"},"max_tokens":{"type":"integer","format":"int32","nullable":true}},"additionalProperties":false},"Jylo.Inference.Core.Models.Message":{"required":["content","role"],"type":"object","properties":{"role":{"type":"string","nullable":true},"content":{"type":"string","nullable":true}},"additionalProperties":false},"Jylo.Inference.Core.Models.ResponseFormat":{"required":["type"],"type":"object","properties":{"type":{"type":"string","nullable":true},"schema":{"type":"string","nullable":true}},"additionalProperties":false},"Jylo.Inference.Core.Models.ChatCompletionResponse":{"required":["choices"],"type":"object","properties":{"id":{"type":"string","nullable":true},"model":{"type":"string","nullable":true},"choices":{"type":"array","items":{"$ref":"#/components/schemas/Jylo.Inference.Core.Models.Choice"},"nullable":true},"usage":{"$ref":"#/components/schemas/Jylo.Inference.Core.Models.Usage"}},"additionalProperties":false},"Jylo.Inference.Core.Models.Choice":{"required":["message"],"type":"object","properties":{"index":{"type":"integer","format":"int32"},"message":{"$ref":"#/components/schemas/Jylo.Inference.Core.Models.Message"},"finishReason":{"type":"string","nullable":true}},"additionalProperties":false},"Jylo.Inference.Core.Models.Usage":{"type":"object","properties":{"promptTokens":{"type":"integer","format":"int32"},"completionTokens":{"type":"integer","format":"int32"},"totalTokens":{"type":"integer","format":"int32"}},"additionalProperties":false},"Microsoft.AspNetCore.Mvc.ProblemDetails":{"type":"object","properties":{"type":{"type":"string","nullable":true},"title":{"type":"string","nullable":true},"status":{"type":"integer","format":"int32","nullable":true},"detail":{"type":"string","nullable":true},"instance":{"type":"string","nullable":true}},"additionalProperties":{}}}},"paths":{"/inference/completions/{modelIdentifier}":{"post":{"tags":["Completions"],"summary":"Creates a new chat completion response using the specified AI model.","description":"Sends a chat completion request to generate AI responses based on the provided messages. Supports both streaming and non-streaming modes with automatic token billing.","parameters":[{"name":"modelIdentifier","in":"path","description":"The unique identifier of the AI model to use for completion.","required":true,"schema":{"type":"string","format":"uuid"}}],"requestBody":{"description":"The chat completion request containing messages and configuration.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Jylo.Inference.Core.Models.ChatCompletionRequest"}},"text/json":{"schema":{"$ref":"#/components/schemas/Jylo.Inference.Core.Models.ChatCompletionRequest"}},"application/*+json":{"schema":{"$ref":"#/components/schemas/Jylo.Inference.Core.Models.ChatCompletionRequest"}}}},"responses":{"200":{"description":"Successfully generated chat completion response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Jylo.Inference.Core.Models.ChatCompletionResponse"}}}},"400":{"description":"Invalid request - token limit exceeded or malformed input","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Microsoft.AspNetCore.Mvc.ProblemDetails"}}}},"401":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Microsoft.AspNetCore.Mvc.ProblemDetails"}}}},"402":{"description":"Insufficient credits to process the request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Microsoft.AspNetCore.Mvc.ProblemDetails"}}}},"403":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Microsoft.AspNetCore.Mvc.ProblemDetails"}}}},"404":{"description":"Specified model not found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Microsoft.AspNetCore.Mvc.ProblemDetails"}}}},"500":{"description":"Internal server error during processing"}}}}}}
```
