feat: backend model load balancing support (#4927)
This commit is contained in:
parent
52ec152dd3
commit
d1dbbc1e33
47 changed files with 2191 additions and 256 deletions
|
|
@ -73,10 +73,8 @@ class ModelInvocationUtils:
|
|||
if not model_instance:
|
||||
raise InvokeModelError('Model not found')
|
||||
|
||||
llm_model = cast(LargeLanguageModel, model_instance.model_type_instance)
|
||||
|
||||
# get tokens
|
||||
tokens = llm_model.get_num_tokens(model_instance.model, model_instance.credentials, prompt_messages)
|
||||
tokens = model_instance.get_llm_num_tokens(prompt_messages)
|
||||
|
||||
return tokens
|
||||
|
||||
|
|
@ -108,13 +106,8 @@ class ModelInvocationUtils:
|
|||
tenant_id=tenant_id, model_type=ModelType.LLM,
|
||||
)
|
||||
|
||||
llm_model = cast(LargeLanguageModel, model_instance.model_type_instance)
|
||||
|
||||
# get model credentials
|
||||
model_credentials = model_instance.credentials
|
||||
|
||||
# get prompt tokens
|
||||
prompt_tokens = llm_model.get_num_tokens(model_instance.model, model_credentials, prompt_messages)
|
||||
prompt_tokens = model_instance.get_llm_num_tokens(prompt_messages)
|
||||
|
||||
model_parameters = {
|
||||
'temperature': 0.8,
|
||||
|
|
@ -144,9 +137,7 @@ class ModelInvocationUtils:
|
|||
db.session.commit()
|
||||
|
||||
try:
|
||||
response: LLMResult = llm_model.invoke(
|
||||
model=model_instance.model,
|
||||
credentials=model_credentials,
|
||||
response: LLMResult = model_instance.invoke_llm(
|
||||
prompt_messages=prompt_messages,
|
||||
model_parameters=model_parameters,
|
||||
tools=[], stop=[], stream=False, user=user_id, callbacks=[]
|
||||
|
|
@ -176,4 +167,4 @@ class ModelInvocationUtils:
|
|||
|
||||
db.session.commit()
|
||||
|
||||
return response
|
||||
return response
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue