fix: Dependency update for unstructured component (#3552)

* FIX: Dependency update for unstructured component

* [autofix.ci] apply automated fixes

* Add nltk download script

* chore(pyproject.toml): move nltk_setup script definition to [tool.poetry.scripts] section for better organization and readability

* [autofix.ci] apply automated fixes

* Make nltk resource downloading part of run

* [autofix.ci] apply automated fixes

* [autofix.ci] apply automated fixes

* Use logger for nltk messages

* Move the download resources function

* [autofix.ci] apply automated fixes

* Move nltk resource download

* [autofix.ci] apply automated fixes

* Update main.py

* Update poetry lock

* Update main.py

---------

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Gabriel Luiz Freitas Almeida <gabriel@langflow.org>
This commit is contained in:
Eric Hare 2024-08-30 06:48:36 -07:00 committed by GitHub
commit 1ed5ce8e26
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 920 additions and 28 deletions

View file

@ -3,6 +3,7 @@ import json
import os
import shutil
import time
import nltk
from collections import defaultdict
from copy import deepcopy
from datetime import datetime, timezone
@ -615,3 +616,19 @@ def initialize_super_user_if_needed():
create_default_folder_if_it_doesnt_exist(session, super_user.id)
session.commit()
logger.info("Super user initialized")
# Function to download NLTK packages if not already downloaded
def download_nltk_resources():
nltk_resources = {
"corpora": ["wordnet"],
"taggers": ["averaged_perceptron_tagger"],
"tokenizers": ["punkt", "punkt_tab"],
}
for category, packages in nltk_resources.items():
for package in packages:
try:
nltk.data.find(f"{category}/{package}")
except LookupError:
nltk.download(package)

View file

@ -25,6 +25,7 @@ from langflow.initial_setup.setup import (
create_or_update_starter_projects,
initialize_super_user_if_needed,
load_flows_from_directory,
download_nltk_resources,
)
from langflow.interface.types import get_and_cache_all_types_dict
from langflow.interface.utils import setup_llm_caching
@ -182,6 +183,9 @@ def create_app():
FastAPIInstrumentor.instrument_app(app)
# Get necessary NLTK packages
download_nltk_resources()
return app