fix conflicts
This commit is contained in:
commit
b56d905042
17 changed files with 641 additions and 130 deletions
9
.gitignore
vendored
9
.gitignore
vendored
|
|
@ -9,6 +9,11 @@ lerna-debug.log*
|
|||
# Mac
|
||||
.DS_Store
|
||||
|
||||
# VSCode
|
||||
.vscode
|
||||
.chroma
|
||||
.ruff_cache
|
||||
|
||||
# Diagnostic reports (https://nodejs.org/api/report.html)
|
||||
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
|
||||
|
||||
|
|
@ -233,5 +238,5 @@ venv.bak/
|
|||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Poetry
|
||||
.testenv/*
|
||||
# Poetry
|
||||
.testenv/*
|
||||
|
|
|
|||
392
poetry.lock
generated
392
poetry.lock
generated
|
|
@ -1,4 +1,4 @@
|
|||
# This file is automatically @generated by Poetry 1.4.0 and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "aiohttp"
|
||||
|
|
@ -157,6 +157,18 @@ files = [
|
|||
{file = "appnope-0.1.3.tar.gz", hash = "sha256:02bd91c4de869fbb1e1c50aafc4098827a7a54ab2f39d9dcba6c9547ed920e24"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "argilla"
|
||||
version = "0.0.1"
|
||||
description = ""
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "argilla-0.0.1-py3-none-any.whl", hash = "sha256:8bdc3c505bcfb47ba4b91f5658034eae53bf7d4f9317980397605c0c55817396"},
|
||||
{file = "argilla-0.0.1.tar.gz", hash = "sha256:5017854754e89f573b31af25b25b803f51cea9ca1fa0bcf00505dee1f45cf7c9"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "asttokens"
|
||||
version = "2.2.1"
|
||||
|
|
@ -400,6 +412,18 @@ files = [
|
|||
[package.dependencies]
|
||||
pycparser = "*"
|
||||
|
||||
[[package]]
|
||||
name = "chardet"
|
||||
version = "5.1.0"
|
||||
description = "Universal encoding detector for Python 3"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "chardet-5.1.0-py3-none-any.whl", hash = "sha256:362777fb014af596ad31334fde1e8c327dfdb076e1960d1694662d46a6917ab9"},
|
||||
{file = "chardet-5.1.0.tar.gz", hash = "sha256:0d62712b956bc154f85fb0a266e2a3c5913c2967e00348701b32411d6def31e5"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "charset-normalizer"
|
||||
version = "3.1.0"
|
||||
|
|
@ -809,6 +833,18 @@ files = [
|
|||
{file = "duckdb-0.7.1.tar.gz", hash = "sha256:a7db6da0366b239ea1e4541fcc19556b286872f5015c9a54c2e347146e25a2ad"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "et-xmlfile"
|
||||
version = "1.1.0"
|
||||
description = "An implementation of lxml.xmlfile for the standard library"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
files = [
|
||||
{file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"},
|
||||
{file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "exceptiongroup"
|
||||
version = "1.1.1"
|
||||
|
|
@ -839,6 +875,21 @@ files = [
|
|||
[package.extras]
|
||||
tests = ["asttokens", "littleutils", "pytest", "rich"]
|
||||
|
||||
[[package]]
|
||||
name = "fake-useragent"
|
||||
version = "1.1.3"
|
||||
description = "Up-to-date simple useragent faker with real world database"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "fake-useragent-1.1.3.tar.gz", hash = "sha256:1c06f0aa7d6e4894b919b30b9c7ebd72ff497325191057fbb5df3d5db06b93fc"},
|
||||
{file = "fake_useragent-1.1.3-py3-none-any.whl", hash = "sha256:695d3b1bf7d11d04ab0f971fb73b0ca8de98b78bbadfbc8bacbc9a48423f7531"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
importlib-resources = {version = ">=5.0", markers = "python_version < \"3.10\""}
|
||||
|
||||
[[package]]
|
||||
name = "fastapi"
|
||||
version = "0.92.0"
|
||||
|
|
@ -986,14 +1037,14 @@ grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0dev)"]
|
|||
|
||||
[[package]]
|
||||
name = "google-api-python-client"
|
||||
version = "2.84.0"
|
||||
version = "2.85.0"
|
||||
description = "Google API Client Library for Python"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "google-api-python-client-2.84.0.tar.gz", hash = "sha256:c398fd6f9ead0be23aade3b2704c72c5146df0e3352d8ff9101286077e1b010a"},
|
||||
{file = "google_api_python_client-2.84.0-py2.py3-none-any.whl", hash = "sha256:83041bb895863225ecdd9c59dd58565fa48c57c2f10fe06f7c08da7c42c53abc"},
|
||||
{file = "google-api-python-client-2.85.0.tar.gz", hash = "sha256:07b21ef21a542dd69cd7c09817a6079b2769cc2a791981402e8f0fcdb2d47f90"},
|
||||
{file = "google_api_python_client-2.85.0-py2.py3-none-any.whl", hash = "sha256:baf3c6f9b1679d89fcb88c29941a8b04b9a815d721880786baecc6a7f5bd376f"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
|
@ -1360,7 +1411,7 @@ files = [
|
|||
name = "importlib-metadata"
|
||||
version = "6.3.0"
|
||||
description = "Read metadata from Python packages"
|
||||
category = "dev"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
|
|
@ -1376,6 +1427,25 @@ docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker
|
|||
perf = ["ipython"]
|
||||
testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)"]
|
||||
|
||||
[[package]]
|
||||
name = "importlib-resources"
|
||||
version = "5.12.0"
|
||||
description = "Read resources from Python packages"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "importlib_resources-5.12.0-py3-none-any.whl", hash = "sha256:7b1deeebbf351c7578e09bf2f63fa2ce8b5ffec296e0d349139d43cca061a81a"},
|
||||
{file = "importlib_resources-5.12.0.tar.gz", hash = "sha256:4be82589bf5c1d7999aedf2a45159d10cb3ca4f19b2271f8792bc8e6da7b22f6"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""}
|
||||
|
||||
[package.extras]
|
||||
docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
|
||||
testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"]
|
||||
|
||||
[[package]]
|
||||
name = "iniconfig"
|
||||
version = "2.0.0"
|
||||
|
|
@ -1611,6 +1681,99 @@ files = [
|
|||
[package.dependencies]
|
||||
typing-extensions = ">=4.5.0,<5.0.0"
|
||||
|
||||
[[package]]
|
||||
name = "lxml"
|
||||
version = "4.9.2"
|
||||
description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*"
|
||||
files = [
|
||||
{file = "lxml-4.9.2-cp27-cp27m-macosx_10_15_x86_64.whl", hash = "sha256:76cf573e5a365e790396a5cc2b909812633409306c6531a6877c59061e42c4f2"},
|
||||
{file = "lxml-4.9.2-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b1f42b6921d0e81b1bcb5e395bc091a70f41c4d4e55ba99c6da2b31626c44892"},
|
||||
{file = "lxml-4.9.2-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:9f102706d0ca011de571de32c3247c6476b55bb6bc65a20f682f000b07a4852a"},
|
||||
{file = "lxml-4.9.2-cp27-cp27m-win32.whl", hash = "sha256:8d0b4612b66ff5d62d03bcaa043bb018f74dfea51184e53f067e6fdcba4bd8de"},
|
||||
{file = "lxml-4.9.2-cp27-cp27m-win_amd64.whl", hash = "sha256:4c8f293f14abc8fd3e8e01c5bd86e6ed0b6ef71936ded5bf10fe7a5efefbaca3"},
|
||||
{file = "lxml-4.9.2-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2899456259589aa38bfb018c364d6ae7b53c5c22d8e27d0ec7609c2a1ff78b50"},
|
||||
{file = "lxml-4.9.2-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6749649eecd6a9871cae297bffa4ee76f90b4504a2a2ab528d9ebe912b101975"},
|
||||
{file = "lxml-4.9.2-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:a08cff61517ee26cb56f1e949cca38caabe9ea9fbb4b1e10a805dc39844b7d5c"},
|
||||
{file = "lxml-4.9.2-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:85cabf64adec449132e55616e7ca3e1000ab449d1d0f9d7f83146ed5bdcb6d8a"},
|
||||
{file = "lxml-4.9.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:8340225bd5e7a701c0fa98284c849c9b9fc9238abf53a0ebd90900f25d39a4e4"},
|
||||
{file = "lxml-4.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:1ab8f1f932e8f82355e75dda5413a57612c6ea448069d4fb2e217e9a4bed13d4"},
|
||||
{file = "lxml-4.9.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:699a9af7dffaf67deeae27b2112aa06b41c370d5e7633e0ee0aea2e0b6c211f7"},
|
||||
{file = "lxml-4.9.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b9cc34af337a97d470040f99ba4282f6e6bac88407d021688a5d585e44a23184"},
|
||||
{file = "lxml-4.9.2-cp310-cp310-win32.whl", hash = "sha256:d02a5399126a53492415d4906ab0ad0375a5456cc05c3fc0fc4ca11771745cda"},
|
||||
{file = "lxml-4.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:a38486985ca49cfa574a507e7a2215c0c780fd1778bb6290c21193b7211702ab"},
|
||||
{file = "lxml-4.9.2-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:c83203addf554215463b59f6399835201999b5e48019dc17f182ed5ad87205c9"},
|
||||
{file = "lxml-4.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:2a87fa548561d2f4643c99cd13131acb607ddabb70682dcf1dff5f71f781a4bf"},
|
||||
{file = "lxml-4.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:d6b430a9938a5a5d85fc107d852262ddcd48602c120e3dbb02137c83d212b380"},
|
||||
{file = "lxml-4.9.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3efea981d956a6f7173b4659849f55081867cf897e719f57383698af6f618a92"},
|
||||
{file = "lxml-4.9.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:df0623dcf9668ad0445e0558a21211d4e9a149ea8f5666917c8eeec515f0a6d1"},
|
||||
{file = "lxml-4.9.2-cp311-cp311-win32.whl", hash = "sha256:da248f93f0418a9e9d94b0080d7ebc407a9a5e6d0b57bb30db9b5cc28de1ad33"},
|
||||
{file = "lxml-4.9.2-cp311-cp311-win_amd64.whl", hash = "sha256:3818b8e2c4b5148567e1b09ce739006acfaa44ce3156f8cbbc11062994b8e8dd"},
|
||||
{file = "lxml-4.9.2-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ca989b91cf3a3ba28930a9fc1e9aeafc2a395448641df1f387a2d394638943b0"},
|
||||
{file = "lxml-4.9.2-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:822068f85e12a6e292803e112ab876bc03ed1f03dddb80154c395f891ca6b31e"},
|
||||
{file = "lxml-4.9.2-cp35-cp35m-win32.whl", hash = "sha256:be7292c55101e22f2a3d4d8913944cbea71eea90792bf914add27454a13905df"},
|
||||
{file = "lxml-4.9.2-cp35-cp35m-win_amd64.whl", hash = "sha256:998c7c41910666d2976928c38ea96a70d1aa43be6fe502f21a651e17483a43c5"},
|
||||
{file = "lxml-4.9.2-cp36-cp36m-macosx_10_15_x86_64.whl", hash = "sha256:b26a29f0b7fc6f0897f043ca366142d2b609dc60756ee6e4e90b5f762c6adc53"},
|
||||
{file = "lxml-4.9.2-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:ab323679b8b3030000f2be63e22cdeea5b47ee0abd2d6a1dc0c8103ddaa56cd7"},
|
||||
{file = "lxml-4.9.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:689bb688a1db722485e4610a503e3e9210dcc20c520b45ac8f7533c837be76fe"},
|
||||
{file = "lxml-4.9.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:f49e52d174375a7def9915c9f06ec4e569d235ad428f70751765f48d5926678c"},
|
||||
{file = "lxml-4.9.2-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:36c3c175d34652a35475a73762b545f4527aec044910a651d2bf50de9c3352b1"},
|
||||
{file = "lxml-4.9.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a35f8b7fa99f90dd2f5dc5a9fa12332642f087a7641289ca6c40d6e1a2637d8e"},
|
||||
{file = "lxml-4.9.2-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:58bfa3aa19ca4c0f28c5dde0ff56c520fbac6f0daf4fac66ed4c8d2fb7f22e74"},
|
||||
{file = "lxml-4.9.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:bc718cd47b765e790eecb74d044cc8d37d58562f6c314ee9484df26276d36a38"},
|
||||
{file = "lxml-4.9.2-cp36-cp36m-win32.whl", hash = "sha256:d5bf6545cd27aaa8a13033ce56354ed9e25ab0e4ac3b5392b763d8d04b08e0c5"},
|
||||
{file = "lxml-4.9.2-cp36-cp36m-win_amd64.whl", hash = "sha256:3ab9fa9d6dc2a7f29d7affdf3edebf6ece6fb28a6d80b14c3b2fb9d39b9322c3"},
|
||||
{file = "lxml-4.9.2-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:05ca3f6abf5cf78fe053da9b1166e062ade3fa5d4f92b4ed688127ea7d7b1d03"},
|
||||
{file = "lxml-4.9.2-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:a5da296eb617d18e497bcf0a5c528f5d3b18dadb3619fbdadf4ed2356ef8d941"},
|
||||
{file = "lxml-4.9.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:04876580c050a8c5341d706dd464ff04fd597095cc8c023252566a8826505726"},
|
||||
{file = "lxml-4.9.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:c9ec3eaf616d67db0764b3bb983962b4f385a1f08304fd30c7283954e6a7869b"},
|
||||
{file = "lxml-4.9.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2a29ba94d065945944016b6b74e538bdb1751a1db6ffb80c9d3c2e40d6fa9894"},
|
||||
{file = "lxml-4.9.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a82d05da00a58b8e4c0008edbc8a4b6ec5a4bc1e2ee0fb6ed157cf634ed7fa45"},
|
||||
{file = "lxml-4.9.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:223f4232855ade399bd409331e6ca70fb5578efef22cf4069a6090acc0f53c0e"},
|
||||
{file = "lxml-4.9.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d17bc7c2ccf49c478c5bdd447594e82692c74222698cfc9b5daae7ae7e90743b"},
|
||||
{file = "lxml-4.9.2-cp37-cp37m-win32.whl", hash = "sha256:b64d891da92e232c36976c80ed7ebb383e3f148489796d8d31a5b6a677825efe"},
|
||||
{file = "lxml-4.9.2-cp37-cp37m-win_amd64.whl", hash = "sha256:a0a336d6d3e8b234a3aae3c674873d8f0e720b76bc1d9416866c41cd9500ffb9"},
|
||||
{file = "lxml-4.9.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:da4dd7c9c50c059aba52b3524f84d7de956f7fef88f0bafcf4ad7dde94a064e8"},
|
||||
{file = "lxml-4.9.2-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:821b7f59b99551c69c85a6039c65b75f5683bdc63270fec660f75da67469ca24"},
|
||||
{file = "lxml-4.9.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:e5168986b90a8d1f2f9dc1b841467c74221bd752537b99761a93d2d981e04889"},
|
||||
{file = "lxml-4.9.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:8e20cb5a47247e383cf4ff523205060991021233ebd6f924bca927fcf25cf86f"},
|
||||
{file = "lxml-4.9.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:13598ecfbd2e86ea7ae45ec28a2a54fb87ee9b9fdb0f6d343297d8e548392c03"},
|
||||
{file = "lxml-4.9.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:880bbbcbe2fca64e2f4d8e04db47bcdf504936fa2b33933efd945e1b429bea8c"},
|
||||
{file = "lxml-4.9.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7d2278d59425777cfcb19735018d897ca8303abe67cc735f9f97177ceff8027f"},
|
||||
{file = "lxml-4.9.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5344a43228767f53a9df6e5b253f8cdca7dfc7b7aeae52551958192f56d98457"},
|
||||
{file = "lxml-4.9.2-cp38-cp38-win32.whl", hash = "sha256:925073b2fe14ab9b87e73f9a5fde6ce6392da430f3004d8b72cc86f746f5163b"},
|
||||
{file = "lxml-4.9.2-cp38-cp38-win_amd64.whl", hash = "sha256:9b22c5c66f67ae00c0199f6055705bc3eb3fcb08d03d2ec4059a2b1b25ed48d7"},
|
||||
{file = "lxml-4.9.2-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:5f50a1c177e2fa3ee0667a5ab79fdc6b23086bc8b589d90b93b4bd17eb0e64d1"},
|
||||
{file = "lxml-4.9.2-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:090c6543d3696cbe15b4ac6e175e576bcc3f1ccfbba970061b7300b0c15a2140"},
|
||||
{file = "lxml-4.9.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:63da2ccc0857c311d764e7d3d90f429c252e83b52d1f8f1d1fe55be26827d1f4"},
|
||||
{file = "lxml-4.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:5b4545b8a40478183ac06c073e81a5ce4cf01bf1734962577cf2bb569a5b3bbf"},
|
||||
{file = "lxml-4.9.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2e430cd2824f05f2d4f687701144556646bae8f249fd60aa1e4c768ba7018947"},
|
||||
{file = "lxml-4.9.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6804daeb7ef69e7b36f76caddb85cccd63d0c56dedb47555d2fc969e2af6a1a5"},
|
||||
{file = "lxml-4.9.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a6e441a86553c310258aca15d1c05903aaf4965b23f3bc2d55f200804e005ee5"},
|
||||
{file = "lxml-4.9.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ca34efc80a29351897e18888c71c6aca4a359247c87e0b1c7ada14f0ab0c0fb2"},
|
||||
{file = "lxml-4.9.2-cp39-cp39-win32.whl", hash = "sha256:6b418afe5df18233fc6b6093deb82a32895b6bb0b1155c2cdb05203f583053f1"},
|
||||
{file = "lxml-4.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:f1496ea22ca2c830cbcbd473de8f114a320da308438ae65abad6bab7867fe38f"},
|
||||
{file = "lxml-4.9.2-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:b264171e3143d842ded311b7dccd46ff9ef34247129ff5bf5066123c55c2431c"},
|
||||
{file = "lxml-4.9.2-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0dc313ef231edf866912e9d8f5a042ddab56c752619e92dfd3a2c277e6a7299a"},
|
||||
{file = "lxml-4.9.2-pp38-pypy38_pp73-macosx_10_15_x86_64.whl", hash = "sha256:16efd54337136e8cd72fb9485c368d91d77a47ee2d42b057564aae201257d419"},
|
||||
{file = "lxml-4.9.2-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:0f2b1e0d79180f344ff9f321327b005ca043a50ece8713de61d1cb383fb8ac05"},
|
||||
{file = "lxml-4.9.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:7b770ed79542ed52c519119473898198761d78beb24b107acf3ad65deae61f1f"},
|
||||
{file = "lxml-4.9.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:efa29c2fe6b4fdd32e8ef81c1528506895eca86e1d8c4657fda04c9b3786ddf9"},
|
||||
{file = "lxml-4.9.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7e91ee82f4199af8c43d8158024cbdff3d931df350252288f0d4ce656df7f3b5"},
|
||||
{file = "lxml-4.9.2-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:b23e19989c355ca854276178a0463951a653309fb8e57ce674497f2d9f208746"},
|
||||
{file = "lxml-4.9.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:01d36c05f4afb8f7c20fd9ed5badca32a2029b93b1750f571ccc0b142531caf7"},
|
||||
{file = "lxml-4.9.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7b515674acfdcadb0eb5d00d8a709868173acece5cb0be3dd165950cbfdf5409"},
|
||||
{file = "lxml-4.9.2.tar.gz", hash = "sha256:2455cfaeb7ac70338b3257f41e21f0724f4b5b0c0e7702da67ee6c3640835b67"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
cssselect = ["cssselect (>=0.7)"]
|
||||
html5 = ["html5lib"]
|
||||
htmlsoup = ["BeautifulSoup4"]
|
||||
source = ["Cython (>=0.29.7)"]
|
||||
|
||||
[[package]]
|
||||
name = "lz4"
|
||||
version = "4.3.2"
|
||||
|
|
@ -1661,6 +1824,24 @@ docs = ["sphinx (>=1.6.0)", "sphinx-bootstrap-theme"]
|
|||
flake8 = ["flake8"]
|
||||
tests = ["psutil", "pytest (!=3.3.0)", "pytest-cov"]
|
||||
|
||||
[[package]]
|
||||
name = "markdown"
|
||||
version = "3.4.3"
|
||||
description = "Python implementation of John Gruber's Markdown."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "Markdown-3.4.3-py3-none-any.whl", hash = "sha256:065fd4df22da73a625f14890dd77eb8040edcbd68794bcd35943be14490608b2"},
|
||||
{file = "Markdown-3.4.3.tar.gz", hash = "sha256:8bf101198e004dc93e84a12a7395e31aac6a9c9942848ae1d99b9d72cf9b3520"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
importlib-metadata = {version = ">=4.4", markers = "python_version < \"3.10\""}
|
||||
|
||||
[package.extras]
|
||||
testing = ["coverage", "pyyaml"]
|
||||
|
||||
[[package]]
|
||||
name = "markdown-it-py"
|
||||
version = "2.2.0"
|
||||
|
|
@ -1839,6 +2020,24 @@ docs = ["sphinx"]
|
|||
gmpy = ["gmpy2 (>=2.1.0a4)"]
|
||||
tests = ["pytest (>=4.6)"]
|
||||
|
||||
[[package]]
|
||||
name = "msg-parser"
|
||||
version = "1.2.0"
|
||||
description = "This module enables reading, parsing and converting Microsoft Outlook MSG E-Mail files."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.4"
|
||||
files = [
|
||||
{file = "msg_parser-1.2.0-py2.py3-none-any.whl", hash = "sha256:d47a2f0b2a359cb189fad83cc991b63ea781ecc70d91410324273fbf93e95375"},
|
||||
{file = "msg_parser-1.2.0.tar.gz", hash = "sha256:0de858d4fcebb6c8f6f028da83a17a20fe01cdce67c490779cf43b3b0162aa66"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
olefile = ">=0.46"
|
||||
|
||||
[package.extras]
|
||||
rtf = ["compressed-rtf (>=1.0.5)"]
|
||||
|
||||
[[package]]
|
||||
name = "multidict"
|
||||
version = "6.0.4"
|
||||
|
|
@ -2246,6 +2445,17 @@ files = [
|
|||
setuptools = "*"
|
||||
wheel = "*"
|
||||
|
||||
[[package]]
|
||||
name = "olefile"
|
||||
version = "0.46"
|
||||
description = "Python package to parse, read and write Microsoft OLE2 files (Structured Storage or Compound Document, Microsoft Office)"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
||||
files = [
|
||||
{file = "olefile-0.46.zip", hash = "sha256:133b031eaf8fd2c9399b78b8bc5b8fcbe4c31e85295749bb17a87cba8f3c3964"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "openai"
|
||||
version = "0.27.4"
|
||||
|
|
@ -2269,6 +2479,21 @@ dev = ["black (>=21.6b0,<22.0)", "pytest (>=6.0.0,<7.0.0)", "pytest-asyncio", "p
|
|||
embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"]
|
||||
wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"]
|
||||
|
||||
[[package]]
|
||||
name = "openpyxl"
|
||||
version = "3.1.2"
|
||||
description = "A Python library to read/write Excel 2010 xlsx/xlsm files"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
files = [
|
||||
{file = "openpyxl-3.1.2-py2.py3-none-any.whl", hash = "sha256:f91456ead12ab3c6c2e9491cf33ba6d08357d802192379bb482f1033ade496f5"},
|
||||
{file = "openpyxl-3.1.2.tar.gz", hash = "sha256:a6f5977418eff3b2d5500d54d9db50c8277a368436f4e4f8ddb1be3422870184"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
et-xmlfile = "*"
|
||||
|
||||
[[package]]
|
||||
name = "packaging"
|
||||
version = "23.0"
|
||||
|
|
@ -2720,6 +2945,18 @@ files = [
|
|||
[package.extras]
|
||||
plugins = ["importlib-metadata"]
|
||||
|
||||
[[package]]
|
||||
name = "pypandoc"
|
||||
version = "1.11"
|
||||
description = "Thin wrapper for pandoc."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
files = [
|
||||
{file = "pypandoc-1.11-py3-none-any.whl", hash = "sha256:b260596934e9cfc6513056110a7c8600171d414f90558bf4407e68b209be8007"},
|
||||
{file = "pypandoc-1.11.tar.gz", hash = "sha256:7f6d68db0e57e0f6961bec2190897118c4d305fc2d31c22cd16037f22ee084a5"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyparsing"
|
||||
version = "3.0.9"
|
||||
|
|
@ -2735,6 +2972,42 @@ files = [
|
|||
[package.extras]
|
||||
diagrams = ["jinja2", "railroad-diagrams"]
|
||||
|
||||
[[package]]
|
||||
name = "pypdf"
|
||||
version = "3.7.1"
|
||||
description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
files = [
|
||||
{file = "pypdf-3.7.1-py3-none-any.whl", hash = "sha256:fa780c9464ec3b49fd16dabd110a40a291439bc6edd0f21f302add63c1f5ade5"},
|
||||
{file = "pypdf-3.7.1.tar.gz", hash = "sha256:dfb61fcccd4bc6d321aae612c01924b3c953aa5857e6e39d31e24dbb9b49da13"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
typing_extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""}
|
||||
|
||||
[package.extras]
|
||||
crypto = ["PyCryptodome"]
|
||||
dev = ["black", "flit", "pip-tools", "pre-commit (<2.18.0)", "pytest-cov", "wheel"]
|
||||
docs = ["myst_parser", "sphinx", "sphinx_rtd_theme"]
|
||||
full = ["Pillow", "PyCryptodome"]
|
||||
image = ["Pillow"]
|
||||
|
||||
[[package]]
|
||||
name = "pysrt"
|
||||
version = "1.1.2"
|
||||
description = "SubRip (.srt) subtitle parser and writer"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "pysrt-1.1.2.tar.gz", hash = "sha256:b4f844ba33e4e7743e9db746492f3a193dc0bc112b153914698e7c1cdeb9b0b9"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
chardet = "*"
|
||||
|
||||
[[package]]
|
||||
name = "pytest"
|
||||
version = "7.3.0"
|
||||
|
|
@ -2773,6 +3046,20 @@ files = [
|
|||
[package.dependencies]
|
||||
six = ">=1.5"
|
||||
|
||||
[[package]]
|
||||
name = "python-docx"
|
||||
version = "0.8.11"
|
||||
description = "Create and update Microsoft Word .docx files."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "python-docx-0.8.11.tar.gz", hash = "sha256:1105d233a0956dd8dd1e710d20b159e2d72ac3c301041b95f4d4ceb3e0ebebc4"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
lxml = ">=2.3.2"
|
||||
|
||||
[[package]]
|
||||
name = "python-dotenv"
|
||||
version = "1.0.0"
|
||||
|
|
@ -2788,6 +3075,34 @@ files = [
|
|||
[package.extras]
|
||||
cli = ["click (>=5.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "python-magic"
|
||||
version = "0.4.27"
|
||||
description = "File type identification using libmagic"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||
files = [
|
||||
{file = "python-magic-0.4.27.tar.gz", hash = "sha256:c1ba14b08e4a5f5c31a302b7721239695b2f0f058d125bd5ce1ee36b9d9d3c3b"},
|
||||
{file = "python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-pptx"
|
||||
version = "0.6.21"
|
||||
description = "Generate and manipulate Open XML PowerPoint (.pptx) files"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "python-pptx-0.6.21.tar.gz", hash = "sha256:7798a2aaf89563565b3c7120c0acfe9aff775db0db3580544e3bf4840c2e378f"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
lxml = ">=3.1.0"
|
||||
Pillow = ">=3.3.2"
|
||||
XlsxWriter = ">=0.5.7"
|
||||
|
||||
[[package]]
|
||||
name = "pytz"
|
||||
version = "2023.3"
|
||||
|
|
@ -3571,6 +3886,10 @@ category = "main"
|
|||
optional = false
|
||||
python-versions = ">=3.8.0"
|
||||
files = [
|
||||
{file = "torch-2.0.0-1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:c9090bda7d2eeeecd74f51b721420dbeb44f838d4536cc1b284e879417e3064a"},
|
||||
{file = "torch-2.0.0-1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:bd42db2a48a20574d2c33489e120e9f32789c4dc13c514b0c44272972d14a2d7"},
|
||||
{file = "torch-2.0.0-1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:8969aa8375bcbc0c2993e7ede0a7f889df9515f18b9b548433f412affed478d9"},
|
||||
{file = "torch-2.0.0-1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:ab2da16567cb55b67ae39e32d520d68ec736191d88ac79526ca5874754c32203"},
|
||||
{file = "torch-2.0.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:7a9319a67294ef02459a19738bbfa8727bb5307b822dadd708bc2ccf6c901aca"},
|
||||
{file = "torch-2.0.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:9f01fe1f6263f31bd04e1757946fd63ad531ae37f28bb2dbf66f5c826ee089f4"},
|
||||
{file = "torch-2.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:527f4ae68df7b8301ee6b1158ca56350282ea633686537b30dbb5d7b4a52622a"},
|
||||
|
|
@ -3788,6 +4107,15 @@ category = "main"
|
|||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "triton-2.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:38806ee9663f4b0f7cd64790e96c579374089e58f49aac4a6608121aa55e2505"},
|
||||
{file = "triton-2.0.0-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:226941c7b8595219ddef59a1fdb821e8c744289a132415ddd584facedeb475b1"},
|
||||
{file = "triton-2.0.0-1-cp36-cp36m-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4c9fc8c89874bc48eb7e7b2107a9b8d2c0bf139778637be5bfccb09191685cfd"},
|
||||
{file = "triton-2.0.0-1-cp37-cp37m-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d2684b6a60b9f174f447f36f933e9a45f31db96cb723723ecd2dcfd1c57b778b"},
|
||||
{file = "triton-2.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9d4978298b74fcf59a75fe71e535c092b023088933b2f1df933ec32615e4beef"},
|
||||
{file = "triton-2.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:74f118c12b437fb2ca25e1a04759173b517582fcf4c7be11913316c764213656"},
|
||||
{file = "triton-2.0.0-1-pp37-pypy37_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9618815a8da1d9157514f08f855d9e9ff92e329cd81c0305003eb9ec25cc5add"},
|
||||
{file = "triton-2.0.0-1-pp38-pypy38_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1aca3303629cd3136375b82cb9921727f804e47ebee27b2677fef23005c3851a"},
|
||||
{file = "triton-2.0.0-1-pp39-pypy39_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e3e13aa8b527c9b642e3a9defcc0fbd8ffbe1c80d8ac8c15a01692478dc64d8a"},
|
||||
{file = "triton-2.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f05a7e64e4ca0565535e3d5d3405d7e49f9d308505bb7773d21fb26a4c008c2"},
|
||||
{file = "triton-2.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb4b99ca3c6844066e516658541d876c28a5f6e3a852286bbc97ad57134827fd"},
|
||||
{file = "triton-2.0.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47b4d70dc92fb40af553b4460492c31dc7d3a114a979ffb7a5cdedb7eb546c08"},
|
||||
|
|
@ -3897,6 +4225,44 @@ files = [
|
|||
mypy-extensions = ">=0.3.0"
|
||||
typing-extensions = ">=3.7.4"
|
||||
|
||||
[[package]]
|
||||
name = "unstructured"
|
||||
version = "0.5.11"
|
||||
description = "A library that prepares raw documents for downstream ML tasks."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.7.0"
|
||||
files = [
|
||||
{file = "unstructured-0.5.11.tar.gz", hash = "sha256:9b0272a1d52d9f1411a4ebb5c051abbf1239625b055397253ba4400d727ee4c1"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
argilla = "*"
|
||||
certifi = ">=2022.12.07"
|
||||
lxml = "*"
|
||||
markdown = "*"
|
||||
msg_parser = "*"
|
||||
nltk = "*"
|
||||
openpyxl = "*"
|
||||
pandas = "*"
|
||||
pillow = "*"
|
||||
pypandoc = "*"
|
||||
python-docx = "*"
|
||||
python-magic = "*"
|
||||
python-pptx = "*"
|
||||
requests = "*"
|
||||
|
||||
[package.extras]
|
||||
azure = ["adlfs", "fsspec"]
|
||||
github = ["pygithub (==1.57.0)"]
|
||||
gitlab = ["python-gitlab"]
|
||||
google-drive = ["google-api-python-client", "protobuf (<3.21)"]
|
||||
huggingface = ["langdetect", "sacremoses", "sentencepiece", "torch", "transformers"]
|
||||
local-inference = ["unstructured-inference (==0.3.2)"]
|
||||
reddit = ["praw"]
|
||||
s3 = ["fsspec", "s3fs"]
|
||||
wikipedia = ["wikipedia"]
|
||||
|
||||
[[package]]
|
||||
name = "uritemplate"
|
||||
version = "4.1.1"
|
||||
|
|
@ -4139,6 +4505,18 @@ files = [
|
|||
[package.extras]
|
||||
test = ["pytest (>=6.0.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "xlsxwriter"
|
||||
version = "3.0.9"
|
||||
description = "A Python module for creating Excel XLSX files."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
files = [
|
||||
{file = "XlsxWriter-3.0.9-py3-none-any.whl", hash = "sha256:5eaaf3c6f791cba1dd1c3065147c35982180f693436093aabe5b7d6c16148e95"},
|
||||
{file = "XlsxWriter-3.0.9.tar.gz", hash = "sha256:7216d39a2075afac7a28cad81f6ac31b0b16d8976bf1b775577d157346f891dd"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "yarl"
|
||||
version = "1.8.2"
|
||||
|
|
@ -4231,7 +4609,7 @@ multidict = ">=4.0"
|
|||
name = "zipp"
|
||||
version = "3.15.0"
|
||||
description = "Backport of pathlib-compatible object wrapper for zip files"
|
||||
category = "dev"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
|
|
@ -4313,4 +4691,4 @@ cffi = ["cffi (>=1.11)"]
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.9"
|
||||
content-hash = "e1d76e2c8056044cc816bda2fdc9d19027626b94d4dd8872b4a5cddf2b746ae9"
|
||||
content-hash = "4f27ad94f244998e9e79fe1ae733cc786d82025f022c1d337e5c84d0393947b6"
|
||||
|
|
|
|||
|
|
@ -39,6 +39,11 @@ huggingface-hub = "^0.13.3"
|
|||
rich = "^13.3.3"
|
||||
llama-cpp-python = "0.1.23"
|
||||
networkx = "^3.1"
|
||||
unstructured = "^0.5.11"
|
||||
pypdf = "^3.7.1"
|
||||
lxml = "^4.9.2"
|
||||
pysrt = "^1.1.2"
|
||||
fake-useragent = "^1.1.3"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
black = "^23.1.0"
|
||||
|
|
|
|||
56
src/backend/langflow/cache/utils.py
vendored
56
src/backend/langflow/cache/utils.py
vendored
|
|
@ -1,3 +1,4 @@
|
|||
import base64
|
||||
import contextlib
|
||||
import functools
|
||||
import hashlib
|
||||
|
|
@ -10,6 +11,19 @@ from pathlib import Path
|
|||
import dill # type: ignore
|
||||
|
||||
|
||||
def create_cache_folder(func):
|
||||
def wrapper(*args, **kwargs):
|
||||
# Get the destination folder
|
||||
cache_path = Path(tempfile.gettempdir()) / PREFIX
|
||||
|
||||
# Create the destination folder if it doesn't exist
|
||||
os.makedirs(cache_path, exist_ok=True)
|
||||
|
||||
return func(*args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
def memoize_dict(maxsize=128):
|
||||
cache = OrderedDict()
|
||||
|
||||
|
|
@ -39,9 +53,10 @@ def memoize_dict(maxsize=128):
|
|||
PREFIX = "langflow_cache"
|
||||
|
||||
|
||||
@create_cache_folder
|
||||
def clear_old_cache_files(max_cache_size: int = 3):
|
||||
cache_dir = Path(tempfile.gettempdir())
|
||||
cache_files = list(cache_dir.glob(f"{PREFIX}_*.dill"))
|
||||
cache_dir = Path(tempfile.gettempdir()) / PREFIX
|
||||
cache_files = list(cache_dir.glob("*.dill"))
|
||||
|
||||
if len(cache_files) > max_cache_size:
|
||||
cache_files_sorted_by_mtime = sorted(
|
||||
|
|
@ -84,8 +99,40 @@ def filter_json(json_data):
|
|||
return filtered_data
|
||||
|
||||
|
||||
@create_cache_folder
|
||||
def save_binary_file(content: str, file_name: str, accepted_types: list[str]) -> str:
|
||||
"""
|
||||
Save a binary file to the specified folder.
|
||||
|
||||
Args:
|
||||
content: The content of the file as a bytes object.
|
||||
file_name: The name of the file, including its extension.
|
||||
|
||||
Returns:
|
||||
The path to the saved file.
|
||||
"""
|
||||
if not any(file_name.endswith(suffix) for suffix in accepted_types):
|
||||
raise ValueError(f"File {file_name} is not accepted")
|
||||
|
||||
# Get the destination folder
|
||||
cache_path = Path(tempfile.gettempdir()) / PREFIX
|
||||
|
||||
data = content.split(",")[1]
|
||||
decoded_bytes = base64.b64decode(data)
|
||||
|
||||
# Create the full file path
|
||||
file_path = os.path.join(cache_path, file_name)
|
||||
|
||||
# Save the binary content to the file
|
||||
with open(file_path, "wb") as file:
|
||||
file.write(decoded_bytes)
|
||||
|
||||
return file_path
|
||||
|
||||
|
||||
@create_cache_folder
|
||||
def save_cache(hash_val: str, chat_data, clean_old_cache_files: bool):
|
||||
cache_path = Path(tempfile.gettempdir()) / f"{PREFIX}_{hash_val}.dill"
|
||||
cache_path = Path(tempfile.gettempdir()) / PREFIX / f"{hash_val}.dill"
|
||||
with cache_path.open("wb") as cache_file:
|
||||
dill.dump(chat_data, cache_file)
|
||||
|
||||
|
|
@ -93,8 +140,9 @@ def save_cache(hash_val: str, chat_data, clean_old_cache_files: bool):
|
|||
clear_old_cache_files()
|
||||
|
||||
|
||||
@create_cache_folder
|
||||
def load_cache(hash_val):
|
||||
cache_path = Path(tempfile.gettempdir()) / f"{PREFIX}_{hash_val}.dill"
|
||||
cache_path = Path(tempfile.gettempdir()) / PREFIX / f"{hash_val}.dill"
|
||||
if cache_path.exists():
|
||||
with cache_path.open("rb") as cache_file:
|
||||
return dill.load(cache_file)
|
||||
|
|
|
|||
|
|
@ -61,8 +61,31 @@ vectorstores:
|
|||
- Chroma
|
||||
|
||||
documentloaders:
|
||||
- AirbyteJSONLoader
|
||||
- CoNLLULoader
|
||||
- CSVLoader
|
||||
- UnstructuredEmailLoader
|
||||
- EverNoteLoader
|
||||
- FacebookChatLoader
|
||||
- GutenbergLoader
|
||||
- BSHTMLLoader
|
||||
- UnstructuredHTMLLoader
|
||||
# - UnstructuredImageLoader # Issue with Python 3.11 (https://github.com/Unstructured-IO/unstructured-inference/issues/83)
|
||||
- UnstructuredMarkdownLoader
|
||||
- PyPDFLoader
|
||||
- UnstructuredPowerPointLoader
|
||||
- SRTLoader
|
||||
- TelegramChatLoader
|
||||
- TextLoader
|
||||
- UnstructuredWordDocumentLoader
|
||||
- WebBaseLoader
|
||||
- AZLyricsLoader
|
||||
- CollegeConfidentialLoader
|
||||
- HNLoader
|
||||
- IFixitLoader
|
||||
- IMSDbLoader
|
||||
- GitbookLoader
|
||||
- ReadTheDocsLoader
|
||||
|
||||
textsplitters:
|
||||
- CharacterTextSplitter
|
||||
|
|
|
|||
|
|
@ -8,8 +8,8 @@ import warnings
|
|||
from copy import deepcopy
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langflow.cache import utils as cache_utils
|
||||
from langflow.graph.constants import DIRECT_TYPES
|
||||
from langflow.graph.utils import load_file
|
||||
from langflow.interface import loading
|
||||
from langflow.interface.listing import ALL_TYPES_DICT
|
||||
from langflow.utils.logger import logger
|
||||
|
|
@ -88,8 +88,11 @@ class Node:
|
|||
file_name = value.get("value")
|
||||
content = value.get("content")
|
||||
type_to_load = value.get("suffixes")
|
||||
loaded_dict = load_file(file_name, content, type_to_load)
|
||||
params[key] = loaded_dict
|
||||
file_path = cache_utils.save_binary_file(
|
||||
content=content, file_name=file_name, accepted_types=type_to_load
|
||||
)
|
||||
|
||||
params[key] = file_path
|
||||
|
||||
# We should check if the type is in something not
|
||||
# the opposite
|
||||
|
|
|
|||
|
|
@ -1,48 +1,4 @@
|
|||
import base64
|
||||
import csv
|
||||
import io
|
||||
import json
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
def load_file(file_name, file_content, accepted_types) -> Any:
|
||||
"""Load a file from a string."""
|
||||
# Check if the file is accepted
|
||||
if not any(file_name.endswith(suffix) for suffix in accepted_types):
|
||||
raise ValueError(f"File {file_name} is not accepted")
|
||||
# Get the suffix
|
||||
suffix = file_name.split(".")[-1]
|
||||
# file_content == 'data:application/x-yaml;base64,b3BlbmFwaTogIjMuMC4wIg...'
|
||||
data = file_content.split(",")[1]
|
||||
decoded_bytes = base64.b64decode(data)
|
||||
|
||||
# Convert the bytes object to a string
|
||||
decoded_string = decoded_bytes.decode("utf-8")
|
||||
if suffix == "json":
|
||||
# Return the json content
|
||||
return json.loads(decoded_string)
|
||||
elif suffix in ["yaml", "yml"]:
|
||||
# Return the yaml content
|
||||
loaded_yaml = yaml.load(decoded_string, Loader=yaml.FullLoader)
|
||||
try:
|
||||
from langchain.agents.agent_toolkits.openapi.spec import reduce_openapi_spec # type: ignore
|
||||
|
||||
return reduce_openapi_spec(loaded_yaml)
|
||||
except ImportError:
|
||||
return loaded_yaml
|
||||
|
||||
elif suffix == "csv":
|
||||
# Load the csv content
|
||||
csv_reader = csv.DictReader(io.StringIO(decoded_string))
|
||||
return list(csv_reader)
|
||||
elif suffix == "txt":
|
||||
# Return the text content
|
||||
return decoded_string
|
||||
else:
|
||||
raise ValueError(f"File {file_name} is not accepted")
|
||||
|
||||
|
||||
def validate_prompt(prompt: str):
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ class CSVAgent(AgentExecutor):
|
|||
@classmethod
|
||||
def from_toolkit_and_llm(
|
||||
cls,
|
||||
path: dict,
|
||||
path: str,
|
||||
llm: BaseLanguageModel,
|
||||
pandas_kwargs: Optional[dict] = None,
|
||||
**kwargs: Any
|
||||
|
|
@ -85,7 +85,7 @@ class CSVAgent(AgentExecutor):
|
|||
import pandas as pd # type: ignore
|
||||
|
||||
_kwargs = pandas_kwargs or {}
|
||||
df = pd.DataFrame.from_dict(path, **_kwargs)
|
||||
df = pd.read_csv(path, **_kwargs)
|
||||
|
||||
tools = [PythonAstREPLTool(locals={"df": df})] # type: ignore
|
||||
prompt = ZeroShotAgent.create_prompt(
|
||||
|
|
|
|||
|
|
@ -2,26 +2,32 @@ from typing import Dict, List, Optional
|
|||
|
||||
from langflow.interface.base import LangChainTypeCreator
|
||||
from langflow.interface.custom_lists import documentloaders_type_to_cls_dict
|
||||
from langflow.interface.document_loaders.custom import CUSTOM_DOCUMENTLOADERS
|
||||
from langflow.settings import settings
|
||||
from langflow.utils.util import build_template_from_class
|
||||
from langflow.utils.logger import logger
|
||||
|
||||
|
||||
def build_file_path_template(
|
||||
suffixes: list, fileTypes: list, name: str = "file_path"
|
||||
) -> Dict:
|
||||
"""Build a file path template for a document loader."""
|
||||
return {
|
||||
"type": "file",
|
||||
"required": True,
|
||||
"show": True,
|
||||
"name": name,
|
||||
"value": "",
|
||||
"suffixes": suffixes,
|
||||
"fileTypes": fileTypes,
|
||||
}
|
||||
|
||||
|
||||
class DocumentLoaderCreator(LangChainTypeCreator):
|
||||
type_name: str = "documentloaders"
|
||||
|
||||
@property
|
||||
def type_to_loader_dict(self) -> Dict:
|
||||
types = documentloaders_type_to_cls_dict
|
||||
|
||||
# Drop some types that are reimplemented with the same name
|
||||
types.pop("TextLoader")
|
||||
|
||||
for name, documentloader in CUSTOM_DOCUMENTLOADERS.items():
|
||||
types[name] = documentloader
|
||||
|
||||
return types
|
||||
return documentloaders_type_to_cls_dict
|
||||
|
||||
def get_signature(self, name: str) -> Optional[Dict]:
|
||||
"""Get the signature of a document loader."""
|
||||
|
|
@ -30,24 +36,96 @@ class DocumentLoaderCreator(LangChainTypeCreator):
|
|||
name, documentloaders_type_to_cls_dict
|
||||
)
|
||||
|
||||
if name == "TextLoader":
|
||||
signature["template"]["file"] = {
|
||||
"type": "file",
|
||||
"required": True,
|
||||
"show": True,
|
||||
"name": "path",
|
||||
"value": "",
|
||||
"suffixes": [".txt"],
|
||||
"fileTypes": ["txt"],
|
||||
}
|
||||
elif name == "WebBaseLoader":
|
||||
file_path_templates = {
|
||||
"AirbyteJSONLoader": build_file_path_template(
|
||||
suffixes=[".json"], fileTypes=["json"]
|
||||
),
|
||||
"CoNLLULoader": build_file_path_template(
|
||||
suffixes=[".csv"], fileTypes=["csv"]
|
||||
),
|
||||
"CSVLoader": build_file_path_template(
|
||||
suffixes=[".csv"], fileTypes=["csv"]
|
||||
),
|
||||
"UnstructuredEmailLoader": build_file_path_template(
|
||||
suffixes=[".eml"], fileTypes=["eml"]
|
||||
),
|
||||
"EverNoteLoader": build_file_path_template(
|
||||
suffixes=[".xml"], fileTypes=["xml"]
|
||||
),
|
||||
"FacebookChatLoader": build_file_path_template(
|
||||
suffixes=[".json"], fileTypes=["json"]
|
||||
),
|
||||
"GutenbergLoader": build_file_path_template(
|
||||
suffixes=[".txt"], fileTypes=["txt"]
|
||||
),
|
||||
"BSHTMLLoader": build_file_path_template(
|
||||
suffixes=[".html"], fileTypes=["html"]
|
||||
),
|
||||
"UnstructuredHTMLLoader": build_file_path_template(
|
||||
suffixes=[".html"], fileTypes=["html"]
|
||||
),
|
||||
"UnstructuredImageLoader": build_file_path_template(
|
||||
suffixes=[".jpg", ".jpeg", ".png", ".gif", ".bmp"],
|
||||
fileTypes=["jpg", "jpeg", "png", "gif", "bmp"],
|
||||
),
|
||||
"UnstructuredMarkdownLoader": build_file_path_template(
|
||||
suffixes=[".md"], fileTypes=["md"]
|
||||
),
|
||||
"PyPDFLoader": build_file_path_template(
|
||||
suffixes=[".pdf"], fileTypes=["pdf"]
|
||||
),
|
||||
"UnstructuredPowerPointLoader": build_file_path_template(
|
||||
suffixes=[".pptx", ".ppt"], fileTypes=["pptx", "ppt"]
|
||||
),
|
||||
"SRTLoader": build_file_path_template(
|
||||
suffixes=[".srt"], fileTypes=["srt"]
|
||||
),
|
||||
"TelegramChatLoader": build_file_path_template(
|
||||
suffixes=[".json"], fileTypes=["json"]
|
||||
),
|
||||
"TextLoader": build_file_path_template(
|
||||
suffixes=[".txt"], fileTypes=["txt"]
|
||||
),
|
||||
"UnstructuredWordDocumentLoader": build_file_path_template(
|
||||
suffixes=[".docx", ".doc"], fileTypes=["docx", "doc"]
|
||||
),
|
||||
}
|
||||
|
||||
if name in file_path_templates:
|
||||
signature["template"]["file_path"] = file_path_templates[name]
|
||||
elif name in {
|
||||
"WebBaseLoader",
|
||||
"AZLyricsLoader",
|
||||
"CollegeConfidentialLoader",
|
||||
"HNLoader",
|
||||
"IFixitLoader",
|
||||
"IMSDbLoader",
|
||||
}:
|
||||
signature["template"]["web_path"] = {
|
||||
"type": "str",
|
||||
"required": True,
|
||||
"show": True,
|
||||
"name": "web_path",
|
||||
"value": "",
|
||||
"display_name": "Web Path",
|
||||
"display_name": "Web Page",
|
||||
}
|
||||
elif name in {"GitbookLoader"}:
|
||||
signature["template"]["web_page"] = {
|
||||
"type": "str",
|
||||
"required": True,
|
||||
"show": True,
|
||||
"name": "web_page",
|
||||
"value": "",
|
||||
"display_name": "Web Page",
|
||||
}
|
||||
elif name in {"ReadTheDocsLoader"}:
|
||||
signature["template"]["path"] = {
|
||||
"type": "str",
|
||||
"required": True,
|
||||
"show": True,
|
||||
"name": "path",
|
||||
"value": "",
|
||||
"display_name": "Web Page",
|
||||
}
|
||||
|
||||
return signature
|
||||
|
|
|
|||
|
|
@ -1,22 +0,0 @@
|
|||
"""Load text files."""
|
||||
from typing import List
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.document_loaders.base import BaseLoader
|
||||
|
||||
|
||||
class TextLoader(BaseLoader):
|
||||
"""Load Text files."""
|
||||
|
||||
def __init__(self, file: str):
|
||||
"""Initialize with file path."""
|
||||
self.file = file
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load from file path."""
|
||||
return [Document(page_content=self.file, metadata={"source": "loaded"})]
|
||||
|
||||
|
||||
CUSTOM_DOCUMENTLOADERS = {
|
||||
"TextLoader": TextLoader,
|
||||
}
|
||||
|
|
@ -10,7 +10,6 @@ from langchain.chat_models.base import BaseChatModel
|
|||
from langchain.llms.base import BaseLLM
|
||||
from langchain.tools import BaseTool
|
||||
|
||||
from langflow.interface.document_loaders.custom import CUSTOM_DOCUMENTLOADERS
|
||||
from langflow.interface.tools.util import get_tool_by_name
|
||||
|
||||
|
||||
|
|
@ -132,8 +131,6 @@ def import_vectorstore(vectorstore: str) -> Any:
|
|||
|
||||
def import_documentloader(documentloader: str) -> Any:
|
||||
"""Import documentloader from documentloader name"""
|
||||
if documentloader in CUSTOM_DOCUMENTLOADERS:
|
||||
return CUSTOM_DOCUMENTLOADERS[documentloader]
|
||||
|
||||
return import_class(f"langchain.document_loaders.{documentloader}")
|
||||
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ from langflow.interface.agents.custom import CUSTOM_AGENTS
|
|||
from langflow.interface.importing.utils import import_by_type
|
||||
from langflow.interface.toolkits.base import toolkits_creator
|
||||
from langflow.interface.types import get_type_list
|
||||
from langflow.interface.utils import load_file_into_dict
|
||||
from langflow.utils import util, validate
|
||||
|
||||
|
||||
|
|
@ -36,21 +37,25 @@ def instantiate_class(node_type: str, base_type: str, params: Dict) -> Any:
|
|||
if base_type == "agents":
|
||||
# We need to initialize it differently
|
||||
return load_agent_executor(class_object, params)
|
||||
elif node_type == "ZeroShotPrompt":
|
||||
if "tools" not in params:
|
||||
params["tools"] = []
|
||||
return ZeroShotAgent.create_prompt(**params)
|
||||
|
||||
elif node_type == "PythonFunction":
|
||||
# If the node_type is "PythonFunction"
|
||||
# we need to get the function from the params
|
||||
# which will be a str containing a python function
|
||||
# and then we need to compile it and return the function
|
||||
# as the instance
|
||||
function_string = params["code"]
|
||||
if isinstance(function_string, str):
|
||||
return validate.eval_function(function_string)
|
||||
raise ValueError("Function should be a string")
|
||||
elif base_type == "prompts":
|
||||
if node_type == "ZeroShotPrompt":
|
||||
if "tools" not in params:
|
||||
params["tools"] = []
|
||||
return ZeroShotAgent.create_prompt(**params)
|
||||
elif base_type == "tools":
|
||||
if node_type == "JsonSpec":
|
||||
params["dict_"] = load_file_into_dict(params.pop("path"))
|
||||
return class_object(**params)
|
||||
elif node_type == "PythonFunction":
|
||||
# If the node_type is "PythonFunction"
|
||||
# we need to get the function from the params
|
||||
# which will be a str containing a python function
|
||||
# and then we need to compile it and return the function
|
||||
# as the instance
|
||||
function_string = params["code"]
|
||||
if isinstance(function_string, str):
|
||||
return validate.eval_function(function_string)
|
||||
raise ValueError("Function should be a string")
|
||||
elif base_type == "toolkits":
|
||||
loaded_toolkit = class_object(**params)
|
||||
# Check if node_type has a loader
|
||||
|
|
@ -68,8 +73,8 @@ def instantiate_class(node_type: str, base_type: str, params: Dict) -> Any:
|
|||
documents = params.pop("documents")
|
||||
text_splitter = class_object(**params)
|
||||
return text_splitter.split_documents(documents)
|
||||
else:
|
||||
return class_object(**params)
|
||||
|
||||
return class_object(**params)
|
||||
|
||||
|
||||
def load_flow_from_json(path: str):
|
||||
|
|
|
|||
|
|
@ -26,6 +26,15 @@ class TextSplitterCreator(LangChainTypeCreator):
|
|||
"name": "documents",
|
||||
}
|
||||
|
||||
signature["template"]["separator"] = {
|
||||
"type": "str",
|
||||
"required": True,
|
||||
"show": True,
|
||||
"value": ".",
|
||||
"name": "separator",
|
||||
"display_name": "Separator",
|
||||
}
|
||||
|
||||
return signature
|
||||
except ValueError as exc:
|
||||
raise ValueError(f"Text Splitter {name} not found") from exc
|
||||
|
|
|
|||
|
|
@ -47,12 +47,14 @@ TOOL_INPUTS = {
|
|||
value="",
|
||||
multiline=True,
|
||||
),
|
||||
"dict_": TemplateField(
|
||||
"path": TemplateField(
|
||||
field_type="file",
|
||||
required=True,
|
||||
is_list=False,
|
||||
show=True,
|
||||
value="",
|
||||
suffixes=[".json", ".yaml", ".yml"],
|
||||
fileTypes=["json", "yaml", "yml"],
|
||||
),
|
||||
}
|
||||
|
||||
|
|
@ -114,6 +116,8 @@ class ToolCreator(LangChainTypeCreator):
|
|||
return node
|
||||
elif tool_type in FILE_TOOLS:
|
||||
params = all_tools[name]["params"] # type: ignore
|
||||
if tool_type == "JsonSpec":
|
||||
params["path"] = params.pop("dict_") # type: ignore
|
||||
base_classes += [name]
|
||||
else:
|
||||
params = []
|
||||
|
|
|
|||
22
src/backend/langflow/interface/utils.py
Normal file
22
src/backend/langflow/interface/utils.py
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
import json
|
||||
import os
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
def load_file_into_dict(file_path: str) -> dict:
|
||||
if not os.path.exists(file_path):
|
||||
raise FileNotFoundError(f"File not found: {file_path}")
|
||||
|
||||
file_extension = os.path.splitext(file_path)[1].lower()
|
||||
|
||||
if file_extension == ".json":
|
||||
with open(file_path, "r") as json_file:
|
||||
data = json.load(json_file)
|
||||
elif file_extension in [".yaml", ".yml"]:
|
||||
with open(file_path, "r") as yaml_file:
|
||||
data = yaml.safe_load(yaml_file)
|
||||
else:
|
||||
raise ValueError("Unsupported file type. Please provide a JSON or YAML file.")
|
||||
|
||||
return data
|
||||
|
|
@ -206,7 +206,7 @@
|
|||
"type": "JsonSpec",
|
||||
"node": {
|
||||
"template": {
|
||||
"dict_": {
|
||||
"path": {
|
||||
"required": true,
|
||||
"placeholder": "",
|
||||
"show": true,
|
||||
|
|
@ -218,7 +218,7 @@
|
|||
".yml"
|
||||
],
|
||||
"password": false,
|
||||
"name": "dict_",
|
||||
"name": "path",
|
||||
"type": "file",
|
||||
"list": false,
|
||||
"fileTypes": [
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ def test_cache_creation(basic_data_graph):
|
|||
)
|
||||
save_cache(computed_hash, langchain_object, is_first_message)
|
||||
# Check if the cache file exists
|
||||
cache_file = Path(tempfile.gettempdir()) / f"{PREFIX}_{computed_hash}.dill"
|
||||
cache_file = Path(tempfile.gettempdir()) / f"{PREFIX}/{computed_hash}.dill"
|
||||
|
||||
assert cache_file.exists()
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue