diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..b27a20173 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,59 @@ +# `python-base` sets up all our shared environment variables +FROM python:3.10-slim + +# python +ENV PYTHONUNBUFFERED=1 \ + # prevents python creating .pyc files + PYTHONDONTWRITEBYTECODE=1 \ + \ + # pip + PIP_NO_CACHE_DIR=off \ + PIP_DISABLE_PIP_VERSION_CHECK=on \ + PIP_DEFAULT_TIMEOUT=100 \ + \ + # poetry + # https://python-poetry.org/docs/configuration/#using-environment-variables + POETRY_VERSION=1.3.2 \ + # make poetry install to this location + POETRY_HOME="/opt/poetry" \ + # make poetry create the virtual environment in the project's root + # it gets named `.venv` + POETRY_VIRTUALENVS_IN_PROJECT=true \ + # do not ask any interactive question + POETRY_NO_INTERACTION=1 \ + \ + # paths + # this is where our requirements + virtual environment will live + PYSETUP_PATH="/opt/pysetup" \ + VENV_PATH="/opt/pysetup/.venv" + +# prepend poetry and venv to path +ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH" + +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + # deps for installing poetry + curl \ + # deps for building python deps + build-essential libpq-dev + +# install poetry - respects $POETRY_VERSION & $POETRY_HOME +RUN curl -sSL https://install.python-poetry.org | python3 - + +# copy project requirement files here to ensure they will be cached. +WORKDIR $PYSETUP_PATH +COPY poetry.lock pyproject.toml ./ + +# install runtime deps - uses $POETRY_VIRTUALENVS_IN_PROJECT internally +RUN poetry install --only-root +RUN poetry add "git+https://github.com/ibiscp/langchain.git@ibis" + +WORKDIR /app + +ENV PATH="/opt/pysetup/.venv/bin:$PATH" + +COPY src/ . + +EXPOSE 80 + +CMD [ "uvicorn", "--host", "0.0.0.0", "--port", "80", "app:app" ] diff --git a/poetry.lock b/poetry.lock index 60d778118..884e04f9a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -69,6 +69,21 @@ docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib- tests = ["attrs[tests-no-zope]", "zope.interface"] tests-no-zope = ["cloudpickle", "cloudpickle", "hypothesis", "hypothesis", "mypy (>=0.971,<0.990)", "mypy (>=0.971,<0.990)", "pympler", "pympler", "pytest (>=4.3.0)", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-mypy-plugins", "pytest-xdist[psutil]", "pytest-xdist[psutil]"] +[[package]] +name = "beautifulsoup4" +version = "4.11.2" +description = "Screen-scraping library" +category = "main" +optional = false +python-versions = ">=3.6.0" + +[package.dependencies] +soupsieve = ">1.2" + +[package.extras] +html5lib = ["html5lib"] +lxml = ["lxml"] + [[package]] name = "black" version = "23.1.0" @@ -201,7 +216,7 @@ python-versions = ">=3.5" [[package]] name = "langchain" -version = "0.0.82" +version = "0.0.84" description = "Building applications with LLMs through composability" category = "main" optional = false @@ -374,6 +389,14 @@ category = "main" optional = false python-versions = ">=3.7" +[[package]] +name = "soupsieve" +version = "2.3.2.post1" +description = "A modern CSS selector implementation for Beautiful Soup." +category = "main" +optional = false +python-versions = ">=3.6" + [[package]] name = "sqlalchemy" version = "1.4.46" @@ -519,7 +542,7 @@ multidict = ">=4.0" [metadata] lock-version = "1.1" python-versions = "^3.10" -content-hash = "3d1426b69c1a6b52d866add9395f7731b107a08315964dccb05534aec63a079d" +content-hash = "0f7ccdac03322e997f334a0a983fece4c7b8b1c90bed4f3a108ca4a3982ab4c7" [metadata.files] aiohttp = [ @@ -627,6 +650,10 @@ attrs = [ {file = "attrs-22.2.0-py3-none-any.whl", hash = "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836"}, {file = "attrs-22.2.0.tar.gz", hash = "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"}, ] +beautifulsoup4 = [ + {file = "beautifulsoup4-4.11.2-py3-none-any.whl", hash = "sha256:0e79446b10b3ecb499c1556f7e228a53e64a2bfcebd455f370d8927cb5b59e39"}, + {file = "beautifulsoup4-4.11.2.tar.gz", hash = "sha256:bc4bdda6717de5a2987436fb8d72f45dc90dd856bdfd512a1314ce90349a0106"}, +] black = [ {file = "black-23.1.0-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:b6a92a41ee34b883b359998f0c8e6eb8e99803aa8bf3123bf2b2e6fec505a221"}, {file = "black-23.1.0-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:57c18c5165c1dbe291d5306e53fb3988122890e57bd9b3dcb75f967f13411a26"}, @@ -1046,6 +1073,10 @@ sniffio = [ {file = "sniffio-1.3.0-py3-none-any.whl", hash = "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"}, {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"}, ] +soupsieve = [ + {file = "soupsieve-2.3.2.post1-py3-none-any.whl", hash = "sha256:3b2503d3c7084a42b1ebd08116e5f81aadfaea95863628c80a3b774a11b7c759"}, + {file = "soupsieve-2.3.2.post1.tar.gz", hash = "sha256:fc53893b3da2c33de295667a0e19f078c14bf86544af307354de5fcf12a3f30d"}, +] sqlalchemy = [ {file = "SQLAlchemy-1.4.46-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:7001f16a9a8e06488c3c7154827c48455d1c1507d7228d43e781afbc8ceccf6d"}, {file = "SQLAlchemy-1.4.46-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:c7a46639ba058d320c9f53a81db38119a74b8a7a1884df44d09fbe807d028aaf"}, diff --git a/pyproject.toml b/pyproject.toml index 80cf3c519..0404d9313 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,17 +1,17 @@ [tool.poetry] name = "expert-backend" -version = "0.1.0" +version = "0.0.3" description = "" authors = ["Ibis Prevedello "] readme = "README.md" -packages = [{include = "expert_backend"}] [tool.poetry.dependencies] python = "^3.10" openai = "^0.26.5" fastapi = "^0.91.0" -langchain = {path = "../langchain", develop = true} uvicorn = "^0.20.0" +beautifulsoup4 = "^4.11.2" +langchain = {path = "../langchain", develop = true} [tool.poetry.group.dev.dependencies] diff --git a/src/main.py b/src/app.py similarity index 86% rename from src/main.py rename to src/app.py index 40b0cf19c..76f4974b9 100644 --- a/src/main.py +++ b/src/app.py @@ -13,8 +13,9 @@ def create_app(): return app +app = create_app() + if __name__ == "__main__": import uvicorn - app = create_app() - uvicorn.run(app) + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/src/list.py b/src/list.py index ccd3fe854..40d9452be 100644 --- a/src/list.py +++ b/src/list.py @@ -5,7 +5,17 @@ from langchain import agents from langchain import prompts from langchain import llms from langchain import utilities -from langchain.chains.conversation import memory +from langchain.chains.conversation import memory as memories +from langchain import document_loaders +from langchain import vectorstores +from langchain import docstore +from langchain.agents.load_tools import ( + _BASE_TOOLS, + _LLM_TOOLS, + _EXTRA_LLM_TOOLS, + _EXTRA_OPTIONAL_TOOLS, +) +import util # build router @@ -17,34 +27,83 @@ router = APIRouter( @router.get("/") def read_items(): - return ["chains", "agents", "prompts", "llms", "utilities", "memories"] + return [ + "chains", + "agents", + "prompts", + "llms", + "utilities", + "memories", + "document_loaders", + "vectorstores", + "docstores", + "tools", + ] @router.get("/chains") -def chains(): - return chains.__all__ +def list_chains(): + """List all chain types""" + return list(chains.loading.type_to_loader_dict.keys()) @router.get("/agents") -def agents(): - return agents.__all__ +def list_agents(): + """List all agent types""" + return list(agents.loading.AGENT_TO_CLASS.keys()) @router.get("/prompts") -def prompts(): - return prompts.__all__ +def list_prompts(): + """List all prompt types""" + return list(prompts.loading.type_to_loader_dict.keys()) @router.get("/llms") -def llms(): - return llms.__all__ - - -@router.get("/utilities") -def utilities(): - return utilities.__all__ +def list_llms(): + """List all llm types""" + return list(llms.type_to_cls_dict.keys()) @router.get("/memories") -def memories(): - return memory.__all__ +def list_memories(): + """List all memory types""" + return list(memories.type_to_cls_dict.keys()) + + +@router.get("/utilities") +def list_utilities(): + """List all utility types""" + return list(utilities.__all__) + + +@router.get("/document_loaders") +def list_document_loaders(): + """List all document loader types""" + return list(document_loaders.__all__) + + +@router.get("/vectorstores") +def list_vectorstores(): + """List all vector store types""" + return list(vectorstores.__all__) + + +@router.get("/docstores") +def list_docstores(): + """List all document store types""" + return list(docstore.__all__) + + +@router.get("/tools") +def list_tools(): + """List all load tools""" + + merged_dict = { + **_BASE_TOOLS, + **_LLM_TOOLS, + **{k: v[0] for k, v in _EXTRA_LLM_TOOLS.items()}, + **{k: v[0] for k, v in _EXTRA_OPTIONAL_TOOLS.items()}, + } + + return {k: util.get_tool_params(v) for k, v in merged_dict.items()} diff --git a/src/templates.py b/src/templates.py index 1f32bdc16..8db140a6a 100644 --- a/src/templates.py +++ b/src/templates.py @@ -5,8 +5,16 @@ from langchain import agents from langchain import prompts from langchain import llms from langchain import utilities -from langchain.chains.conversation import memory - +from langchain.chains.conversation import memory as memories +from langchain import document_loaders +from langchain.agents.load_tools import ( + get_all_tool_names, + _BASE_TOOLS, + _LLM_TOOLS, + _EXTRA_LLM_TOOLS, + _EXTRA_OPTIONAL_TOOLS, +) +import util # build router router = APIRouter( @@ -18,48 +26,48 @@ router = APIRouter( @router.get("/chain") def chain(name: str): # Raise error if name is not in chains - if name not in chains.__all__: + if name not in chains.loading.type_to_loader_dict.keys(): raise Exception(f"Prompt {name} not found.") - _class = getattr(chains, name) + _class = chains.loading.type_to_loader_dict[name].__annotations__["return"] return { name: {name: value for (name, value) in value.__repr_args__() if name != "name"} - for name, value in _class.__dict__["__fields__"].items() + for name, value in _class.__fields__.items() } @router.get("/agent") def agent(name: str): # Raise error if name is not in agents - if name not in agents.__all__: + if name not in agents.loading.AGENT_TO_CLASS.keys(): raise Exception(f"Prompt {name} not found.") - _class = getattr(agents, name) + _class = agents.loading.AGENT_TO_CLASS[name] return { name: {name: value for (name, value) in value.__repr_args__() if name != "name"} - for name, value in _class.__dict__["__fields__"].items() + for name, value in _class.__fields__.items() } @router.get("/prompt") def prompt(name: str): # Raise error if name is not in prompts - if name not in prompts.__all__: + if name not in prompts.loading.type_to_loader_dict.keys(): raise Exception(f"Prompt {name} not found.") - _class = getattr(prompts, name) + _class = prompts.loading.type_to_loader_dict[name].__annotations__["return"] return { name: {name: value for (name, value) in value.__repr_args__() if name != "name"} - for name, value in _class.__dict__["__fields__"].items() + for name, value in _class.__fields__.items() } @router.get("/llm") def llm(name: str): # Raise error if name is not in llms - if name not in llms.__all__: + if name not in llms.type_to_cls_dict.keys(): raise Exception(f"Prompt {name} not found.") - _class = getattr(llms, name) + _class = llms.type_to_cls_dict[name] return { name: {name: value for (name, value) in value.__repr_args__() if name != "name"} - for name, value in _class.__dict__["__fields__"].items() + for name, value in _class.__fields__.items() } @@ -78,10 +86,40 @@ def utility(name: str): @router.get("/memory") def memory(name: str): # Raise error if name is not in memory - if name not in memory.__all__: + if name not in memories.type_to_cls_dict.keys(): raise Exception(f"Prompt {name} not found.") - _class = getattr(memory, name) + _class = memories.type_to_cls_dict[name] return { name: {name: value for (name, value) in value.__repr_args__() if name != "name"} for name, value in _class.__dict__["__fields__"].items() } + + +@router.get("/document_loader") +def document_loader(name: str): + # Raise error if name is not in document_loader + if name not in document_loaders.__all__: + raise Exception(f"Prompt {name} not found.") + _class = getattr(document_loaders, name) + return { + name: {name: value for (name, value) in value.__repr_args__() if name != "name"} + for name, value in _class.__fields__.items() + } + + +@router.get("/tool") +def tool(name: str): + # Raise error if name is not in tools + if name not in get_all_tool_names(): + raise Exception(f"Tool {name} not found.") + + if name in _BASE_TOOLS: + return {"parameters": []} + elif name in _LLM_TOOLS: + return {"parameters": ["llm"]} + elif name in _EXTRA_LLM_TOOLS: + _, extra_keys = _EXTRA_LLM_TOOLS[name] + return {"parameters": ["llm"] + extra_keys} + elif name in _EXTRA_OPTIONAL_TOOLS: + _, extra_keys = _EXTRA_OPTIONAL_TOOLS[name] + return {"parameters": extra_keys} diff --git a/src/util.py b/src/util.py new file mode 100644 index 000000000..bb5184f3e --- /dev/null +++ b/src/util.py @@ -0,0 +1,29 @@ +import ast +import inspect + + +def get_tool_params(func): + # Parse the function code into an abstract syntax tree + tree = ast.parse(inspect.getsource(func)) + + # Iterate over the statements in the abstract syntax tree + for node in ast.walk(tree): + # Find the first return statement + if isinstance(node, ast.Return): + tool = node.value + if isinstance(tool, ast.Call) and tool.func.id == "Tool": + if tool.keywords: + tool_params = {} + for keyword in tool.keywords: + if keyword.arg == "name": + tool_params["name"] = ast.literal_eval(keyword.value) + elif keyword.arg == "description": + tool_params["description"] = ast.literal_eval(keyword.value) + return tool_params + return { + "name": ast.literal_eval(tool.args[0]), + "description": ast.literal_eval(tool.args[2]), + } + + # Return None if no return statement was found + return None