-
Notifications
You must be signed in to change notification settings - Fork 45
/
Copy pathconfig.yaml
52 lines (44 loc) · 1.67 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
asr:
datasets:
datasets_path: ./data/datasets
dataset_name: PCA_dataset
dataset_description: Dataset for Post Call Analysis AISK
dataset_source_file: ./data/datasets/source.json
dataset_source_type: localMachine
dataset_language: english
apps:
asr_with_diarization_app_id: b6aefdf7-02a4-4384-9c3c-8a81d735a54e
application_field: language
urls:
base_url: https://your.sambastrudio.environment.com
datasets_url: /api/datasets
projects_url: /api/projects
jobs_url: /{project_id}/jobs
download_results_url: /results/download
projects:
project_name: PCA_Project
project_description: This project will process ASR with diarization jobs regarding Post Call Analysis AISK.
jobs:
job_name: PCA job pipeline
job_description: PCA - ASR with diarization and batch prediction
job_task: ASR With Diarization
job_type: batch_predict
model_checkpoint: Diarization_ASR_Pipeline_V2
output:
output_path: results/output.csv
llm:
"api": "sncloud" # set either sambastudio or sncloud
"temperature": 0.01
"do_sample": False
"max_tokens_to_generate": 1000
"bundle": True #set as true if using Sambastudio bundle endpoint
"select_expert": "Meta-Llama-3.3-70B-Instruct"
embedding_model:
"type": "cpu" # set either sambastudio or cpu
"batch_size": 1 #set depending of your endpoint configuration (1 if bundle embedding expert)
"bundle": True #set true if using Sambastudio embeddings in a bundle endpoint
"select_expert": "e5-mistral-7b-instruct" #set if using SambaStudio bundle embedding expert
retrieval:
"chunk_size": 1000
"chunk_overlap": 200
"db_type": "faiss"