fix: Text embedder caused by deprecated validation checks (#6024)

* fix embedder

* [autofix.ci] apply automated fixes

---------

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
Edwin Jose 2025-01-31 06:24:57 -05:00 committed by GitHub
commit 2f9cd3e40b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -38,46 +38,26 @@ class TextEmbedderComponent(Component):
embedding_model: Embeddings = self.embedding_model
message: Message = self.message
# Validate embedding model
if not embedding_model:
msg = "Embedding model not provided"
# Combine validation checks to reduce nesting
if not embedding_model or not hasattr(embedding_model, "embed_documents"):
msg = "Invalid or incompatible embedding model"
raise ValueError(msg)
# Extract the text content from the message
text_content = message.text if message and message.text else ""
if not text_content:
msg = "No text content found in message"
raise ValueError(msg)
# Check if the embedding model has the required attributes
if not hasattr(embedding_model, "client") or not embedding_model.client:
msg = "Embedding model client not properly initialized"
raise ValueError(msg)
# Ensure the base URL has proper protocol
if hasattr(embedding_model.client, "base_url"):
base_url = embedding_model.client.base_url
if not base_url.startswith(("http://", "https://")):
embedding_model.client.base_url = f"https://{base_url}"
# Generate embeddings using the provided embedding model
embeddings = embedding_model.embed_documents([text_content])
# Validate embeddings output
if not embeddings or not isinstance(embeddings, list):
msg = "Invalid embeddings generated"
raise ValueError(msg)
embedding_vector = embeddings[0]
self.status = {"text": text_content, "embeddings": embedding_vector}
return Data(data={"text": text_content, "embeddings": embedding_vector})
except Exception as e:
logging.exception("Error generating embeddings")
# Return empty data with error status
error_data = Data(data={"text": "", "embeddings": [], "error": str(e)})
self.status = {"error": str(e)}
return error_data
# Create a Data object to encapsulate the results
result_data = Data(data={"text": text_content, "embeddings": embedding_vector})
self.status = {"text": text_content, "embeddings": embedding_vector}
return result_data