|
| 1 | +<!-- loio2392d9a0504e4380bfa75a5efdb64b6e --> |
| 2 | + |
| 3 | +# Consume Models with the Harmonized API |
| 4 | + |
| 5 | +In this section, we will provide a minimal inference call without any orchestration modules. |
| 6 | + |
| 7 | +A minimal call to orchestration contains only configurations of the required templating and model configuration modules. The curl command below shows how to make such a request. |
| 8 | + |
| 9 | +``` |
| 10 | +curl --request POST $ORCH_DEPLOYMENT_URL/completion \ |
| 11 | + --header 'content-type: application/json' \ |
| 12 | + --header "Authorization: Bearer $TOKEN" \ |
| 13 | + --header "ai-resource-group: $RESOURCE_GROUP" \ |
| 14 | + --data-raw '{ |
| 15 | + "orchestration_config": { |
| 16 | + "module_configurations": { |
| 17 | + "templating_module_config": { |
| 18 | + "template": [ |
| 19 | + { |
| 20 | + "role": "user", |
| 21 | + "content": "Reply with `{{?text}}` in {{?language}}" |
| 22 | + } |
| 23 | + ], |
| 24 | + "defaults": { |
| 25 | + "language": "English" |
| 26 | + } |
| 27 | + }, |
| 28 | + "llm_module_config": { |
| 29 | + "model_name": " gpt-35-turbo-16k ", |
| 30 | + "model_params": { |
| 31 | + "max_tokens": 50, |
| 32 | + "temperature": 0.1, |
| 33 | + "frequency_penalty": 0, |
| 34 | + "presence_penalty": 0 |
| 35 | + }, |
| 36 | + "model_version": "latest" |
| 37 | + } |
| 38 | + } |
| 39 | + }, |
| 40 | + "input_params": { |
| 41 | + "text": "Orchestration is Working!", |
| 42 | + "language": "German" |
| 43 | + } |
| 44 | +}' |
| 45 | +``` |
| 46 | + |
| 47 | +This request configures the templating module with a single user message with two parameters: `text` and `language`. The `language` parameter is also configured with English as the default. The LLM module is configured to use gpt-35-turbo-16k in the latest available version and a set of model parameters. The `input_params` field contains the values for the parameters `text` and `language`. These values are used during this request in the prompt sent to the model. |
| 48 | + |
| 49 | +The response contains a `request_id`, the module results from each module that was executed, and the `orchestration_result`, which includes the response of the call to the model. |
| 50 | + |
| 51 | +> ### Output Code: |
| 52 | +> ``` |
| 53 | +> { |
| 54 | +> "request_id": "53fc2dcd-399d-4a2b-8bde-912b9f001fed", |
| 55 | +> "module_results": { |
| 56 | +> "templating": [ |
| 57 | +> { |
| 58 | +> "role": "user", |
| 59 | +> "content": "Reply with `Orchestration is Working!` in German" |
| 60 | +> } |
| 61 | +> ], |
| 62 | +> "llm": { |
| 63 | +> "id": "chatcmpl-9k8M3djXphXPWh2QkQm1YVtXK4Eki", |
| 64 | +> "object": "chat.completion", |
| 65 | +> "created": 1720782231, |
| 66 | +> "model": " gpt-35-turbo-16k ", |
| 67 | +> "choices": [ |
| 68 | +> { |
| 69 | +> "index": 0, |
| 70 | +> "message": { |
| 71 | +> "role": "assistant", |
| 72 | +> "content": "Orchestrierungsdienst funktioniert!" |
| 73 | +> }, |
| 74 | +> "finish_reason": "stop" |
| 75 | +> } |
| 76 | +> ], |
| 77 | +> "usage": { |
| 78 | +> "completion_tokens": 10, |
| 79 | +> "prompt_tokens": 20, |
| 80 | +> "total_tokens": 30 |
| 81 | +> } |
| 82 | +> } |
| 83 | +> }, |
| 84 | +> "orchestration_result": { |
| 85 | +> "id": "chatcmpl-9k8M3djXphXPWh2QkQm1YVtXK4Eki", |
| 86 | +> "object": "chat.completion", |
| 87 | +> "created": 1720782231, |
| 88 | +> "model": "<ModelName>", |
| 89 | +> "choices": [ |
| 90 | +> { |
| 91 | +> "index": 0, |
| 92 | +> "message": { |
| 93 | +> "role": "assistant", |
| 94 | +> "content": "Orchestrierungsdienst funktioniert!" |
| 95 | +> }, |
| 96 | +> "finish_reason": "stop" |
| 97 | +> } |
| 98 | +> ], |
| 99 | +> "usage": { |
| 100 | +> "completion_tokens": 10, |
| 101 | +> "prompt_tokens": 20, |
| 102 | +> "total_tokens": 30 |
| 103 | +> } |
| 104 | +> } |
| 105 | +> } |
| 106 | +> ``` |
| 107 | +
|
| 108 | +The templating module result contains the user message with the filled in parameters. The LLM module result contains the response of the model execution. In this example, the LLM module result and the orchestration result are the same. However, they might differ, such as when the output filtering module filters the response. |
| 109 | +
|
| 110 | +**Related Information** |
| 111 | +
|
| 112 | +
|
| 113 | +[Consumption of GenAI Models Using Orchestration – A Beginner's Guide](https://developers.sap.com/tutorials/ai-core-orchestration-consumption.html) |
| 114 | +
|
| 115 | +[Libraries and SDKs](libraries-and-sdks-499309d.md "Explore additional SDKs and Libraries, for use with SAP AI Core.") |
| 116 | +
|
0 commit comments