diff --git a/.gitignore b/.gitignore index 96512b650..c2ffb8276 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,11 @@ lerna-debug.log* # Mac .DS_Store +# VSCode +.vscode +.chroma +.ruff_cache + # Diagnostic reports (https://nodejs.org/api/report.html) report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json @@ -233,5 +238,5 @@ venv.bak/ .dmypy.json dmypy.json -# Poetry -.testenv/* \ No newline at end of file +# Poetry +.testenv/* diff --git a/poetry.lock b/poetry.lock index c0999f18e..62fea8658 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.4.0 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand. [[package]] name = "aiohttp" @@ -157,6 +157,18 @@ files = [ {file = "appnope-0.1.3.tar.gz", hash = "sha256:02bd91c4de869fbb1e1c50aafc4098827a7a54ab2f39d9dcba6c9547ed920e24"}, ] +[[package]] +name = "argilla" +version = "0.0.1" +description = "" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "argilla-0.0.1-py3-none-any.whl", hash = "sha256:8bdc3c505bcfb47ba4b91f5658034eae53bf7d4f9317980397605c0c55817396"}, + {file = "argilla-0.0.1.tar.gz", hash = "sha256:5017854754e89f573b31af25b25b803f51cea9ca1fa0bcf00505dee1f45cf7c9"}, +] + [[package]] name = "asttokens" version = "2.2.1" @@ -400,6 +412,18 @@ files = [ [package.dependencies] pycparser = "*" +[[package]] +name = "chardet" +version = "5.1.0" +description = "Universal encoding detector for Python 3" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "chardet-5.1.0-py3-none-any.whl", hash = "sha256:362777fb014af596ad31334fde1e8c327dfdb076e1960d1694662d46a6917ab9"}, + {file = "chardet-5.1.0.tar.gz", hash = "sha256:0d62712b956bc154f85fb0a266e2a3c5913c2967e00348701b32411d6def31e5"}, +] + [[package]] name = "charset-normalizer" version = "3.1.0" @@ -809,6 +833,18 @@ files = [ {file = "duckdb-0.7.1.tar.gz", hash = "sha256:a7db6da0366b239ea1e4541fcc19556b286872f5015c9a54c2e347146e25a2ad"}, ] +[[package]] +name = "et-xmlfile" +version = "1.1.0" +description = "An implementation of lxml.xmlfile for the standard library" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"}, + {file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"}, +] + [[package]] name = "exceptiongroup" version = "1.1.1" @@ -839,6 +875,21 @@ files = [ [package.extras] tests = ["asttokens", "littleutils", "pytest", "rich"] +[[package]] +name = "fake-useragent" +version = "1.1.3" +description = "Up-to-date simple useragent faker with real world database" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "fake-useragent-1.1.3.tar.gz", hash = "sha256:1c06f0aa7d6e4894b919b30b9c7ebd72ff497325191057fbb5df3d5db06b93fc"}, + {file = "fake_useragent-1.1.3-py3-none-any.whl", hash = "sha256:695d3b1bf7d11d04ab0f971fb73b0ca8de98b78bbadfbc8bacbc9a48423f7531"}, +] + +[package.dependencies] +importlib-resources = {version = ">=5.0", markers = "python_version < \"3.10\""} + [[package]] name = "fastapi" version = "0.92.0" @@ -986,14 +1037,14 @@ grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0dev)"] [[package]] name = "google-api-python-client" -version = "2.84.0" +version = "2.85.0" description = "Google API Client Library for Python" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "google-api-python-client-2.84.0.tar.gz", hash = "sha256:c398fd6f9ead0be23aade3b2704c72c5146df0e3352d8ff9101286077e1b010a"}, - {file = "google_api_python_client-2.84.0-py2.py3-none-any.whl", hash = "sha256:83041bb895863225ecdd9c59dd58565fa48c57c2f10fe06f7c08da7c42c53abc"}, + {file = "google-api-python-client-2.85.0.tar.gz", hash = "sha256:07b21ef21a542dd69cd7c09817a6079b2769cc2a791981402e8f0fcdb2d47f90"}, + {file = "google_api_python_client-2.85.0-py2.py3-none-any.whl", hash = "sha256:baf3c6f9b1679d89fcb88c29941a8b04b9a815d721880786baecc6a7f5bd376f"}, ] [package.dependencies] @@ -1360,7 +1411,7 @@ files = [ name = "importlib-metadata" version = "6.3.0" description = "Read metadata from Python packages" -category = "dev" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1376,6 +1427,25 @@ docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker perf = ["ipython"] testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)"] +[[package]] +name = "importlib-resources" +version = "5.12.0" +description = "Read resources from Python packages" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "importlib_resources-5.12.0-py3-none-any.whl", hash = "sha256:7b1deeebbf351c7578e09bf2f63fa2ce8b5ffec296e0d349139d43cca061a81a"}, + {file = "importlib_resources-5.12.0.tar.gz", hash = "sha256:4be82589bf5c1d7999aedf2a45159d10cb3ca4f19b2271f8792bc8e6da7b22f6"}, +] + +[package.dependencies] +zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] + [[package]] name = "iniconfig" version = "2.0.0" @@ -1611,6 +1681,99 @@ files = [ [package.dependencies] typing-extensions = ">=4.5.0,<5.0.0" +[[package]] +name = "lxml" +version = "4.9.2" +description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*" +files = [ + {file = "lxml-4.9.2-cp27-cp27m-macosx_10_15_x86_64.whl", hash = "sha256:76cf573e5a365e790396a5cc2b909812633409306c6531a6877c59061e42c4f2"}, + {file = "lxml-4.9.2-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b1f42b6921d0e81b1bcb5e395bc091a70f41c4d4e55ba99c6da2b31626c44892"}, + {file = "lxml-4.9.2-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:9f102706d0ca011de571de32c3247c6476b55bb6bc65a20f682f000b07a4852a"}, + {file = "lxml-4.9.2-cp27-cp27m-win32.whl", hash = "sha256:8d0b4612b66ff5d62d03bcaa043bb018f74dfea51184e53f067e6fdcba4bd8de"}, + {file = "lxml-4.9.2-cp27-cp27m-win_amd64.whl", hash = "sha256:4c8f293f14abc8fd3e8e01c5bd86e6ed0b6ef71936ded5bf10fe7a5efefbaca3"}, + {file = "lxml-4.9.2-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2899456259589aa38bfb018c364d6ae7b53c5c22d8e27d0ec7609c2a1ff78b50"}, + {file = "lxml-4.9.2-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6749649eecd6a9871cae297bffa4ee76f90b4504a2a2ab528d9ebe912b101975"}, + {file = "lxml-4.9.2-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:a08cff61517ee26cb56f1e949cca38caabe9ea9fbb4b1e10a805dc39844b7d5c"}, + {file = "lxml-4.9.2-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:85cabf64adec449132e55616e7ca3e1000ab449d1d0f9d7f83146ed5bdcb6d8a"}, + {file = "lxml-4.9.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:8340225bd5e7a701c0fa98284c849c9b9fc9238abf53a0ebd90900f25d39a4e4"}, + {file = "lxml-4.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:1ab8f1f932e8f82355e75dda5413a57612c6ea448069d4fb2e217e9a4bed13d4"}, + {file = "lxml-4.9.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:699a9af7dffaf67deeae27b2112aa06b41c370d5e7633e0ee0aea2e0b6c211f7"}, + {file = "lxml-4.9.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b9cc34af337a97d470040f99ba4282f6e6bac88407d021688a5d585e44a23184"}, + {file = "lxml-4.9.2-cp310-cp310-win32.whl", hash = "sha256:d02a5399126a53492415d4906ab0ad0375a5456cc05c3fc0fc4ca11771745cda"}, + {file = "lxml-4.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:a38486985ca49cfa574a507e7a2215c0c780fd1778bb6290c21193b7211702ab"}, + {file = "lxml-4.9.2-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:c83203addf554215463b59f6399835201999b5e48019dc17f182ed5ad87205c9"}, + {file = "lxml-4.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:2a87fa548561d2f4643c99cd13131acb607ddabb70682dcf1dff5f71f781a4bf"}, + {file = "lxml-4.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:d6b430a9938a5a5d85fc107d852262ddcd48602c120e3dbb02137c83d212b380"}, + {file = "lxml-4.9.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3efea981d956a6f7173b4659849f55081867cf897e719f57383698af6f618a92"}, + {file = "lxml-4.9.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:df0623dcf9668ad0445e0558a21211d4e9a149ea8f5666917c8eeec515f0a6d1"}, + {file = "lxml-4.9.2-cp311-cp311-win32.whl", hash = "sha256:da248f93f0418a9e9d94b0080d7ebc407a9a5e6d0b57bb30db9b5cc28de1ad33"}, + {file = "lxml-4.9.2-cp311-cp311-win_amd64.whl", hash = "sha256:3818b8e2c4b5148567e1b09ce739006acfaa44ce3156f8cbbc11062994b8e8dd"}, + {file = "lxml-4.9.2-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ca989b91cf3a3ba28930a9fc1e9aeafc2a395448641df1f387a2d394638943b0"}, + {file = "lxml-4.9.2-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:822068f85e12a6e292803e112ab876bc03ed1f03dddb80154c395f891ca6b31e"}, + {file = "lxml-4.9.2-cp35-cp35m-win32.whl", hash = "sha256:be7292c55101e22f2a3d4d8913944cbea71eea90792bf914add27454a13905df"}, + {file = "lxml-4.9.2-cp35-cp35m-win_amd64.whl", hash = "sha256:998c7c41910666d2976928c38ea96a70d1aa43be6fe502f21a651e17483a43c5"}, + {file = "lxml-4.9.2-cp36-cp36m-macosx_10_15_x86_64.whl", hash = "sha256:b26a29f0b7fc6f0897f043ca366142d2b609dc60756ee6e4e90b5f762c6adc53"}, + {file = "lxml-4.9.2-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:ab323679b8b3030000f2be63e22cdeea5b47ee0abd2d6a1dc0c8103ddaa56cd7"}, + {file = "lxml-4.9.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:689bb688a1db722485e4610a503e3e9210dcc20c520b45ac8f7533c837be76fe"}, + {file = "lxml-4.9.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:f49e52d174375a7def9915c9f06ec4e569d235ad428f70751765f48d5926678c"}, + {file = "lxml-4.9.2-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:36c3c175d34652a35475a73762b545f4527aec044910a651d2bf50de9c3352b1"}, + {file = "lxml-4.9.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a35f8b7fa99f90dd2f5dc5a9fa12332642f087a7641289ca6c40d6e1a2637d8e"}, + {file = "lxml-4.9.2-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:58bfa3aa19ca4c0f28c5dde0ff56c520fbac6f0daf4fac66ed4c8d2fb7f22e74"}, + {file = "lxml-4.9.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:bc718cd47b765e790eecb74d044cc8d37d58562f6c314ee9484df26276d36a38"}, + {file = "lxml-4.9.2-cp36-cp36m-win32.whl", hash = "sha256:d5bf6545cd27aaa8a13033ce56354ed9e25ab0e4ac3b5392b763d8d04b08e0c5"}, + {file = "lxml-4.9.2-cp36-cp36m-win_amd64.whl", hash = "sha256:3ab9fa9d6dc2a7f29d7affdf3edebf6ece6fb28a6d80b14c3b2fb9d39b9322c3"}, + {file = "lxml-4.9.2-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:05ca3f6abf5cf78fe053da9b1166e062ade3fa5d4f92b4ed688127ea7d7b1d03"}, + {file = "lxml-4.9.2-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:a5da296eb617d18e497bcf0a5c528f5d3b18dadb3619fbdadf4ed2356ef8d941"}, + {file = "lxml-4.9.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:04876580c050a8c5341d706dd464ff04fd597095cc8c023252566a8826505726"}, + {file = "lxml-4.9.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:c9ec3eaf616d67db0764b3bb983962b4f385a1f08304fd30c7283954e6a7869b"}, + {file = "lxml-4.9.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2a29ba94d065945944016b6b74e538bdb1751a1db6ffb80c9d3c2e40d6fa9894"}, + {file = "lxml-4.9.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a82d05da00a58b8e4c0008edbc8a4b6ec5a4bc1e2ee0fb6ed157cf634ed7fa45"}, + {file = "lxml-4.9.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:223f4232855ade399bd409331e6ca70fb5578efef22cf4069a6090acc0f53c0e"}, + {file = "lxml-4.9.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d17bc7c2ccf49c478c5bdd447594e82692c74222698cfc9b5daae7ae7e90743b"}, + {file = "lxml-4.9.2-cp37-cp37m-win32.whl", hash = "sha256:b64d891da92e232c36976c80ed7ebb383e3f148489796d8d31a5b6a677825efe"}, + {file = "lxml-4.9.2-cp37-cp37m-win_amd64.whl", hash = "sha256:a0a336d6d3e8b234a3aae3c674873d8f0e720b76bc1d9416866c41cd9500ffb9"}, + {file = "lxml-4.9.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:da4dd7c9c50c059aba52b3524f84d7de956f7fef88f0bafcf4ad7dde94a064e8"}, + {file = "lxml-4.9.2-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:821b7f59b99551c69c85a6039c65b75f5683bdc63270fec660f75da67469ca24"}, + {file = "lxml-4.9.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:e5168986b90a8d1f2f9dc1b841467c74221bd752537b99761a93d2d981e04889"}, + {file = "lxml-4.9.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:8e20cb5a47247e383cf4ff523205060991021233ebd6f924bca927fcf25cf86f"}, + {file = "lxml-4.9.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:13598ecfbd2e86ea7ae45ec28a2a54fb87ee9b9fdb0f6d343297d8e548392c03"}, + {file = "lxml-4.9.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:880bbbcbe2fca64e2f4d8e04db47bcdf504936fa2b33933efd945e1b429bea8c"}, + {file = "lxml-4.9.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7d2278d59425777cfcb19735018d897ca8303abe67cc735f9f97177ceff8027f"}, + {file = "lxml-4.9.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5344a43228767f53a9df6e5b253f8cdca7dfc7b7aeae52551958192f56d98457"}, + {file = "lxml-4.9.2-cp38-cp38-win32.whl", hash = "sha256:925073b2fe14ab9b87e73f9a5fde6ce6392da430f3004d8b72cc86f746f5163b"}, + {file = "lxml-4.9.2-cp38-cp38-win_amd64.whl", hash = "sha256:9b22c5c66f67ae00c0199f6055705bc3eb3fcb08d03d2ec4059a2b1b25ed48d7"}, + {file = "lxml-4.9.2-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:5f50a1c177e2fa3ee0667a5ab79fdc6b23086bc8b589d90b93b4bd17eb0e64d1"}, + {file = "lxml-4.9.2-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:090c6543d3696cbe15b4ac6e175e576bcc3f1ccfbba970061b7300b0c15a2140"}, + {file = "lxml-4.9.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:63da2ccc0857c311d764e7d3d90f429c252e83b52d1f8f1d1fe55be26827d1f4"}, + {file = "lxml-4.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:5b4545b8a40478183ac06c073e81a5ce4cf01bf1734962577cf2bb569a5b3bbf"}, + {file = "lxml-4.9.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2e430cd2824f05f2d4f687701144556646bae8f249fd60aa1e4c768ba7018947"}, + {file = "lxml-4.9.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6804daeb7ef69e7b36f76caddb85cccd63d0c56dedb47555d2fc969e2af6a1a5"}, + {file = "lxml-4.9.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a6e441a86553c310258aca15d1c05903aaf4965b23f3bc2d55f200804e005ee5"}, + {file = "lxml-4.9.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ca34efc80a29351897e18888c71c6aca4a359247c87e0b1c7ada14f0ab0c0fb2"}, + {file = "lxml-4.9.2-cp39-cp39-win32.whl", hash = "sha256:6b418afe5df18233fc6b6093deb82a32895b6bb0b1155c2cdb05203f583053f1"}, + {file = "lxml-4.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:f1496ea22ca2c830cbcbd473de8f114a320da308438ae65abad6bab7867fe38f"}, + {file = "lxml-4.9.2-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:b264171e3143d842ded311b7dccd46ff9ef34247129ff5bf5066123c55c2431c"}, + {file = "lxml-4.9.2-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0dc313ef231edf866912e9d8f5a042ddab56c752619e92dfd3a2c277e6a7299a"}, + {file = "lxml-4.9.2-pp38-pypy38_pp73-macosx_10_15_x86_64.whl", hash = "sha256:16efd54337136e8cd72fb9485c368d91d77a47ee2d42b057564aae201257d419"}, + {file = "lxml-4.9.2-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:0f2b1e0d79180f344ff9f321327b005ca043a50ece8713de61d1cb383fb8ac05"}, + {file = "lxml-4.9.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:7b770ed79542ed52c519119473898198761d78beb24b107acf3ad65deae61f1f"}, + {file = "lxml-4.9.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:efa29c2fe6b4fdd32e8ef81c1528506895eca86e1d8c4657fda04c9b3786ddf9"}, + {file = "lxml-4.9.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7e91ee82f4199af8c43d8158024cbdff3d931df350252288f0d4ce656df7f3b5"}, + {file = "lxml-4.9.2-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:b23e19989c355ca854276178a0463951a653309fb8e57ce674497f2d9f208746"}, + {file = "lxml-4.9.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:01d36c05f4afb8f7c20fd9ed5badca32a2029b93b1750f571ccc0b142531caf7"}, + {file = "lxml-4.9.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7b515674acfdcadb0eb5d00d8a709868173acece5cb0be3dd165950cbfdf5409"}, + {file = "lxml-4.9.2.tar.gz", hash = "sha256:2455cfaeb7ac70338b3257f41e21f0724f4b5b0c0e7702da67ee6c3640835b67"}, +] + +[package.extras] +cssselect = ["cssselect (>=0.7)"] +html5 = ["html5lib"] +htmlsoup = ["BeautifulSoup4"] +source = ["Cython (>=0.29.7)"] + [[package]] name = "lz4" version = "4.3.2" @@ -1661,6 +1824,24 @@ docs = ["sphinx (>=1.6.0)", "sphinx-bootstrap-theme"] flake8 = ["flake8"] tests = ["psutil", "pytest (!=3.3.0)", "pytest-cov"] +[[package]] +name = "markdown" +version = "3.4.3" +description = "Python implementation of John Gruber's Markdown." +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "Markdown-3.4.3-py3-none-any.whl", hash = "sha256:065fd4df22da73a625f14890dd77eb8040edcbd68794bcd35943be14490608b2"}, + {file = "Markdown-3.4.3.tar.gz", hash = "sha256:8bf101198e004dc93e84a12a7395e31aac6a9c9942848ae1d99b9d72cf9b3520"}, +] + +[package.dependencies] +importlib-metadata = {version = ">=4.4", markers = "python_version < \"3.10\""} + +[package.extras] +testing = ["coverage", "pyyaml"] + [[package]] name = "markdown-it-py" version = "2.2.0" @@ -1839,6 +2020,24 @@ docs = ["sphinx"] gmpy = ["gmpy2 (>=2.1.0a4)"] tests = ["pytest (>=4.6)"] +[[package]] +name = "msg-parser" +version = "1.2.0" +description = "This module enables reading, parsing and converting Microsoft Outlook MSG E-Mail files." +category = "main" +optional = false +python-versions = ">=3.4" +files = [ + {file = "msg_parser-1.2.0-py2.py3-none-any.whl", hash = "sha256:d47a2f0b2a359cb189fad83cc991b63ea781ecc70d91410324273fbf93e95375"}, + {file = "msg_parser-1.2.0.tar.gz", hash = "sha256:0de858d4fcebb6c8f6f028da83a17a20fe01cdce67c490779cf43b3b0162aa66"}, +] + +[package.dependencies] +olefile = ">=0.46" + +[package.extras] +rtf = ["compressed-rtf (>=1.0.5)"] + [[package]] name = "multidict" version = "6.0.4" @@ -2246,6 +2445,17 @@ files = [ setuptools = "*" wheel = "*" +[[package]] +name = "olefile" +version = "0.46" +description = "Python package to parse, read and write Microsoft OLE2 files (Structured Storage or Compound Document, Microsoft Office)" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "olefile-0.46.zip", hash = "sha256:133b031eaf8fd2c9399b78b8bc5b8fcbe4c31e85295749bb17a87cba8f3c3964"}, +] + [[package]] name = "openai" version = "0.27.4" @@ -2269,6 +2479,21 @@ dev = ["black (>=21.6b0,<22.0)", "pytest (>=6.0.0,<7.0.0)", "pytest-asyncio", "p embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"] wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"] +[[package]] +name = "openpyxl" +version = "3.1.2" +description = "A Python library to read/write Excel 2010 xlsx/xlsm files" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "openpyxl-3.1.2-py2.py3-none-any.whl", hash = "sha256:f91456ead12ab3c6c2e9491cf33ba6d08357d802192379bb482f1033ade496f5"}, + {file = "openpyxl-3.1.2.tar.gz", hash = "sha256:a6f5977418eff3b2d5500d54d9db50c8277a368436f4e4f8ddb1be3422870184"}, +] + +[package.dependencies] +et-xmlfile = "*" + [[package]] name = "packaging" version = "23.0" @@ -2720,6 +2945,18 @@ files = [ [package.extras] plugins = ["importlib-metadata"] +[[package]] +name = "pypandoc" +version = "1.11" +description = "Thin wrapper for pandoc." +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pypandoc-1.11-py3-none-any.whl", hash = "sha256:b260596934e9cfc6513056110a7c8600171d414f90558bf4407e68b209be8007"}, + {file = "pypandoc-1.11.tar.gz", hash = "sha256:7f6d68db0e57e0f6961bec2190897118c4d305fc2d31c22cd16037f22ee084a5"}, +] + [[package]] name = "pyparsing" version = "3.0.9" @@ -2735,6 +2972,42 @@ files = [ [package.extras] diagrams = ["jinja2", "railroad-diagrams"] +[[package]] +name = "pypdf" +version = "3.7.1" +description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pypdf-3.7.1-py3-none-any.whl", hash = "sha256:fa780c9464ec3b49fd16dabd110a40a291439bc6edd0f21f302add63c1f5ade5"}, + {file = "pypdf-3.7.1.tar.gz", hash = "sha256:dfb61fcccd4bc6d321aae612c01924b3c953aa5857e6e39d31e24dbb9b49da13"}, +] + +[package.dependencies] +typing_extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""} + +[package.extras] +crypto = ["PyCryptodome"] +dev = ["black", "flit", "pip-tools", "pre-commit (<2.18.0)", "pytest-cov", "wheel"] +docs = ["myst_parser", "sphinx", "sphinx_rtd_theme"] +full = ["Pillow", "PyCryptodome"] +image = ["Pillow"] + +[[package]] +name = "pysrt" +version = "1.1.2" +description = "SubRip (.srt) subtitle parser and writer" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "pysrt-1.1.2.tar.gz", hash = "sha256:b4f844ba33e4e7743e9db746492f3a193dc0bc112b153914698e7c1cdeb9b0b9"}, +] + +[package.dependencies] +chardet = "*" + [[package]] name = "pytest" version = "7.3.0" @@ -2773,6 +3046,20 @@ files = [ [package.dependencies] six = ">=1.5" +[[package]] +name = "python-docx" +version = "0.8.11" +description = "Create and update Microsoft Word .docx files." +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "python-docx-0.8.11.tar.gz", hash = "sha256:1105d233a0956dd8dd1e710d20b159e2d72ac3c301041b95f4d4ceb3e0ebebc4"}, +] + +[package.dependencies] +lxml = ">=2.3.2" + [[package]] name = "python-dotenv" version = "1.0.0" @@ -2788,6 +3075,34 @@ files = [ [package.extras] cli = ["click (>=5.0)"] +[[package]] +name = "python-magic" +version = "0.4.27" +description = "File type identification using libmagic" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "python-magic-0.4.27.tar.gz", hash = "sha256:c1ba14b08e4a5f5c31a302b7721239695b2f0f058d125bd5ce1ee36b9d9d3c3b"}, + {file = "python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3"}, +] + +[[package]] +name = "python-pptx" +version = "0.6.21" +description = "Generate and manipulate Open XML PowerPoint (.pptx) files" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "python-pptx-0.6.21.tar.gz", hash = "sha256:7798a2aaf89563565b3c7120c0acfe9aff775db0db3580544e3bf4840c2e378f"}, +] + +[package.dependencies] +lxml = ">=3.1.0" +Pillow = ">=3.3.2" +XlsxWriter = ">=0.5.7" + [[package]] name = "pytz" version = "2023.3" @@ -3571,6 +3886,10 @@ category = "main" optional = false python-versions = ">=3.8.0" files = [ + {file = "torch-2.0.0-1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:c9090bda7d2eeeecd74f51b721420dbeb44f838d4536cc1b284e879417e3064a"}, + {file = "torch-2.0.0-1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:bd42db2a48a20574d2c33489e120e9f32789c4dc13c514b0c44272972d14a2d7"}, + {file = "torch-2.0.0-1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:8969aa8375bcbc0c2993e7ede0a7f889df9515f18b9b548433f412affed478d9"}, + {file = "torch-2.0.0-1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:ab2da16567cb55b67ae39e32d520d68ec736191d88ac79526ca5874754c32203"}, {file = "torch-2.0.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:7a9319a67294ef02459a19738bbfa8727bb5307b822dadd708bc2ccf6c901aca"}, {file = "torch-2.0.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:9f01fe1f6263f31bd04e1757946fd63ad531ae37f28bb2dbf66f5c826ee089f4"}, {file = "torch-2.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:527f4ae68df7b8301ee6b1158ca56350282ea633686537b30dbb5d7b4a52622a"}, @@ -3788,6 +4107,15 @@ category = "main" optional = false python-versions = "*" files = [ + {file = "triton-2.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:38806ee9663f4b0f7cd64790e96c579374089e58f49aac4a6608121aa55e2505"}, + {file = "triton-2.0.0-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:226941c7b8595219ddef59a1fdb821e8c744289a132415ddd584facedeb475b1"}, + {file = "triton-2.0.0-1-cp36-cp36m-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4c9fc8c89874bc48eb7e7b2107a9b8d2c0bf139778637be5bfccb09191685cfd"}, + {file = "triton-2.0.0-1-cp37-cp37m-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d2684b6a60b9f174f447f36f933e9a45f31db96cb723723ecd2dcfd1c57b778b"}, + {file = "triton-2.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9d4978298b74fcf59a75fe71e535c092b023088933b2f1df933ec32615e4beef"}, + {file = "triton-2.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:74f118c12b437fb2ca25e1a04759173b517582fcf4c7be11913316c764213656"}, + {file = "triton-2.0.0-1-pp37-pypy37_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9618815a8da1d9157514f08f855d9e9ff92e329cd81c0305003eb9ec25cc5add"}, + {file = "triton-2.0.0-1-pp38-pypy38_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1aca3303629cd3136375b82cb9921727f804e47ebee27b2677fef23005c3851a"}, + {file = "triton-2.0.0-1-pp39-pypy39_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e3e13aa8b527c9b642e3a9defcc0fbd8ffbe1c80d8ac8c15a01692478dc64d8a"}, {file = "triton-2.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f05a7e64e4ca0565535e3d5d3405d7e49f9d308505bb7773d21fb26a4c008c2"}, {file = "triton-2.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb4b99ca3c6844066e516658541d876c28a5f6e3a852286bbc97ad57134827fd"}, {file = "triton-2.0.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47b4d70dc92fb40af553b4460492c31dc7d3a114a979ffb7a5cdedb7eb546c08"}, @@ -3897,6 +4225,44 @@ files = [ mypy-extensions = ">=0.3.0" typing-extensions = ">=3.7.4" +[[package]] +name = "unstructured" +version = "0.5.11" +description = "A library that prepares raw documents for downstream ML tasks." +category = "main" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "unstructured-0.5.11.tar.gz", hash = "sha256:9b0272a1d52d9f1411a4ebb5c051abbf1239625b055397253ba4400d727ee4c1"}, +] + +[package.dependencies] +argilla = "*" +certifi = ">=2022.12.07" +lxml = "*" +markdown = "*" +msg_parser = "*" +nltk = "*" +openpyxl = "*" +pandas = "*" +pillow = "*" +pypandoc = "*" +python-docx = "*" +python-magic = "*" +python-pptx = "*" +requests = "*" + +[package.extras] +azure = ["adlfs", "fsspec"] +github = ["pygithub (==1.57.0)"] +gitlab = ["python-gitlab"] +google-drive = ["google-api-python-client", "protobuf (<3.21)"] +huggingface = ["langdetect", "sacremoses", "sentencepiece", "torch", "transformers"] +local-inference = ["unstructured-inference (==0.3.2)"] +reddit = ["praw"] +s3 = ["fsspec", "s3fs"] +wikipedia = ["wikipedia"] + [[package]] name = "uritemplate" version = "4.1.1" @@ -4139,6 +4505,18 @@ files = [ [package.extras] test = ["pytest (>=6.0.0)"] +[[package]] +name = "xlsxwriter" +version = "3.0.9" +description = "A Python module for creating Excel XLSX files." +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "XlsxWriter-3.0.9-py3-none-any.whl", hash = "sha256:5eaaf3c6f791cba1dd1c3065147c35982180f693436093aabe5b7d6c16148e95"}, + {file = "XlsxWriter-3.0.9.tar.gz", hash = "sha256:7216d39a2075afac7a28cad81f6ac31b0b16d8976bf1b775577d157346f891dd"}, +] + [[package]] name = "yarl" version = "1.8.2" @@ -4231,7 +4609,7 @@ multidict = ">=4.0" name = "zipp" version = "3.15.0" description = "Backport of pathlib-compatible object wrapper for zip files" -category = "dev" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4313,4 +4691,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "e1d76e2c8056044cc816bda2fdc9d19027626b94d4dd8872b4a5cddf2b746ae9" +content-hash = "4f27ad94f244998e9e79fe1ae733cc786d82025f022c1d337e5c84d0393947b6" diff --git a/pyproject.toml b/pyproject.toml index 215619794..485ac8954 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,11 @@ huggingface-hub = "^0.13.3" rich = "^13.3.3" llama-cpp-python = "0.1.23" networkx = "^3.1" +unstructured = "^0.5.11" +pypdf = "^3.7.1" +lxml = "^4.9.2" +pysrt = "^1.1.2" +fake-useragent = "^1.1.3" [tool.poetry.group.dev.dependencies] black = "^23.1.0" diff --git a/src/backend/langflow/cache/utils.py b/src/backend/langflow/cache/utils.py index 763a2af0c..310f3be80 100644 --- a/src/backend/langflow/cache/utils.py +++ b/src/backend/langflow/cache/utils.py @@ -1,3 +1,4 @@ +import base64 import contextlib import functools import hashlib @@ -10,6 +11,19 @@ from pathlib import Path import dill # type: ignore +def create_cache_folder(func): + def wrapper(*args, **kwargs): + # Get the destination folder + cache_path = Path(tempfile.gettempdir()) / PREFIX + + # Create the destination folder if it doesn't exist + os.makedirs(cache_path, exist_ok=True) + + return func(*args, **kwargs) + + return wrapper + + def memoize_dict(maxsize=128): cache = OrderedDict() @@ -39,9 +53,10 @@ def memoize_dict(maxsize=128): PREFIX = "langflow_cache" +@create_cache_folder def clear_old_cache_files(max_cache_size: int = 3): - cache_dir = Path(tempfile.gettempdir()) - cache_files = list(cache_dir.glob(f"{PREFIX}_*.dill")) + cache_dir = Path(tempfile.gettempdir()) / PREFIX + cache_files = list(cache_dir.glob("*.dill")) if len(cache_files) > max_cache_size: cache_files_sorted_by_mtime = sorted( @@ -84,8 +99,40 @@ def filter_json(json_data): return filtered_data +@create_cache_folder +def save_binary_file(content: str, file_name: str, accepted_types: list[str]) -> str: + """ + Save a binary file to the specified folder. + + Args: + content: The content of the file as a bytes object. + file_name: The name of the file, including its extension. + + Returns: + The path to the saved file. + """ + if not any(file_name.endswith(suffix) for suffix in accepted_types): + raise ValueError(f"File {file_name} is not accepted") + + # Get the destination folder + cache_path = Path(tempfile.gettempdir()) / PREFIX + + data = content.split(",")[1] + decoded_bytes = base64.b64decode(data) + + # Create the full file path + file_path = os.path.join(cache_path, file_name) + + # Save the binary content to the file + with open(file_path, "wb") as file: + file.write(decoded_bytes) + + return file_path + + +@create_cache_folder def save_cache(hash_val: str, chat_data, clean_old_cache_files: bool): - cache_path = Path(tempfile.gettempdir()) / f"{PREFIX}_{hash_val}.dill" + cache_path = Path(tempfile.gettempdir()) / PREFIX / f"{hash_val}.dill" with cache_path.open("wb") as cache_file: dill.dump(chat_data, cache_file) @@ -93,8 +140,9 @@ def save_cache(hash_val: str, chat_data, clean_old_cache_files: bool): clear_old_cache_files() +@create_cache_folder def load_cache(hash_val): - cache_path = Path(tempfile.gettempdir()) / f"{PREFIX}_{hash_val}.dill" + cache_path = Path(tempfile.gettempdir()) / PREFIX / f"{hash_val}.dill" if cache_path.exists(): with cache_path.open("rb") as cache_file: return dill.load(cache_file) diff --git a/src/backend/langflow/config.yaml b/src/backend/langflow/config.yaml index 94156d0eb..b3ee373e7 100644 --- a/src/backend/langflow/config.yaml +++ b/src/backend/langflow/config.yaml @@ -61,8 +61,31 @@ vectorstores: - Chroma documentloaders: + - AirbyteJSONLoader + - CoNLLULoader + - CSVLoader + - UnstructuredEmailLoader + - EverNoteLoader + - FacebookChatLoader + - GutenbergLoader + - BSHTMLLoader + - UnstructuredHTMLLoader + # - UnstructuredImageLoader # Issue with Python 3.11 (https://github.com/Unstructured-IO/unstructured-inference/issues/83) + - UnstructuredMarkdownLoader + - PyPDFLoader + - UnstructuredPowerPointLoader + - SRTLoader + - TelegramChatLoader - TextLoader + - UnstructuredWordDocumentLoader - WebBaseLoader + - AZLyricsLoader + - CollegeConfidentialLoader + - HNLoader + - IFixitLoader + - IMSDbLoader + - GitbookLoader + - ReadTheDocsLoader textsplitters: - CharacterTextSplitter diff --git a/src/backend/langflow/graph/base.py b/src/backend/langflow/graph/base.py index 490517362..6896de28b 100644 --- a/src/backend/langflow/graph/base.py +++ b/src/backend/langflow/graph/base.py @@ -8,8 +8,8 @@ import warnings from copy import deepcopy from typing import Any, Dict, List, Optional +from langflow.cache import utils as cache_utils from langflow.graph.constants import DIRECT_TYPES -from langflow.graph.utils import load_file from langflow.interface import loading from langflow.interface.listing import ALL_TYPES_DICT from langflow.utils.logger import logger @@ -88,8 +88,11 @@ class Node: file_name = value.get("value") content = value.get("content") type_to_load = value.get("suffixes") - loaded_dict = load_file(file_name, content, type_to_load) - params[key] = loaded_dict + file_path = cache_utils.save_binary_file( + content=content, file_name=file_name, accepted_types=type_to_load + ) + + params[key] = file_path # We should check if the type is in something not # the opposite diff --git a/src/backend/langflow/graph/utils.py b/src/backend/langflow/graph/utils.py index f428e9ba9..6d56e933e 100644 --- a/src/backend/langflow/graph/utils.py +++ b/src/backend/langflow/graph/utils.py @@ -1,48 +1,4 @@ -import base64 -import csv -import io -import json import re -from typing import Any - -import yaml - - -def load_file(file_name, file_content, accepted_types) -> Any: - """Load a file from a string.""" - # Check if the file is accepted - if not any(file_name.endswith(suffix) for suffix in accepted_types): - raise ValueError(f"File {file_name} is not accepted") - # Get the suffix - suffix = file_name.split(".")[-1] - # file_content == 'data:application/x-yaml;base64,b3BlbmFwaTogIjMuMC4wIg...' - data = file_content.split(",")[1] - decoded_bytes = base64.b64decode(data) - - # Convert the bytes object to a string - decoded_string = decoded_bytes.decode("utf-8") - if suffix == "json": - # Return the json content - return json.loads(decoded_string) - elif suffix in ["yaml", "yml"]: - # Return the yaml content - loaded_yaml = yaml.load(decoded_string, Loader=yaml.FullLoader) - try: - from langchain.agents.agent_toolkits.openapi.spec import reduce_openapi_spec # type: ignore - - return reduce_openapi_spec(loaded_yaml) - except ImportError: - return loaded_yaml - - elif suffix == "csv": - # Load the csv content - csv_reader = csv.DictReader(io.StringIO(decoded_string)) - return list(csv_reader) - elif suffix == "txt": - # Return the text content - return decoded_string - else: - raise ValueError(f"File {file_name} is not accepted") def validate_prompt(prompt: str): diff --git a/src/backend/langflow/interface/agents/custom.py b/src/backend/langflow/interface/agents/custom.py index f06c11562..9f6d15257 100644 --- a/src/backend/langflow/interface/agents/custom.py +++ b/src/backend/langflow/interface/agents/custom.py @@ -77,7 +77,7 @@ class CSVAgent(AgentExecutor): @classmethod def from_toolkit_and_llm( cls, - path: dict, + path: str, llm: BaseLanguageModel, pandas_kwargs: Optional[dict] = None, **kwargs: Any @@ -85,7 +85,7 @@ class CSVAgent(AgentExecutor): import pandas as pd # type: ignore _kwargs = pandas_kwargs or {} - df = pd.DataFrame.from_dict(path, **_kwargs) + df = pd.read_csv(path, **_kwargs) tools = [PythonAstREPLTool(locals={"df": df})] # type: ignore prompt = ZeroShotAgent.create_prompt( diff --git a/src/backend/langflow/interface/document_loaders/base.py b/src/backend/langflow/interface/document_loaders/base.py index 826543757..d69e87cf3 100644 --- a/src/backend/langflow/interface/document_loaders/base.py +++ b/src/backend/langflow/interface/document_loaders/base.py @@ -2,26 +2,32 @@ from typing import Dict, List, Optional from langflow.interface.base import LangChainTypeCreator from langflow.interface.custom_lists import documentloaders_type_to_cls_dict -from langflow.interface.document_loaders.custom import CUSTOM_DOCUMENTLOADERS from langflow.settings import settings from langflow.utils.util import build_template_from_class from langflow.utils.logger import logger +def build_file_path_template( + suffixes: list, fileTypes: list, name: str = "file_path" +) -> Dict: + """Build a file path template for a document loader.""" + return { + "type": "file", + "required": True, + "show": True, + "name": name, + "value": "", + "suffixes": suffixes, + "fileTypes": fileTypes, + } + + class DocumentLoaderCreator(LangChainTypeCreator): type_name: str = "documentloaders" @property def type_to_loader_dict(self) -> Dict: - types = documentloaders_type_to_cls_dict - - # Drop some types that are reimplemented with the same name - types.pop("TextLoader") - - for name, documentloader in CUSTOM_DOCUMENTLOADERS.items(): - types[name] = documentloader - - return types + return documentloaders_type_to_cls_dict def get_signature(self, name: str) -> Optional[Dict]: """Get the signature of a document loader.""" @@ -30,24 +36,96 @@ class DocumentLoaderCreator(LangChainTypeCreator): name, documentloaders_type_to_cls_dict ) - if name == "TextLoader": - signature["template"]["file"] = { - "type": "file", - "required": True, - "show": True, - "name": "path", - "value": "", - "suffixes": [".txt"], - "fileTypes": ["txt"], - } - elif name == "WebBaseLoader": + file_path_templates = { + "AirbyteJSONLoader": build_file_path_template( + suffixes=[".json"], fileTypes=["json"] + ), + "CoNLLULoader": build_file_path_template( + suffixes=[".csv"], fileTypes=["csv"] + ), + "CSVLoader": build_file_path_template( + suffixes=[".csv"], fileTypes=["csv"] + ), + "UnstructuredEmailLoader": build_file_path_template( + suffixes=[".eml"], fileTypes=["eml"] + ), + "EverNoteLoader": build_file_path_template( + suffixes=[".xml"], fileTypes=["xml"] + ), + "FacebookChatLoader": build_file_path_template( + suffixes=[".json"], fileTypes=["json"] + ), + "GutenbergLoader": build_file_path_template( + suffixes=[".txt"], fileTypes=["txt"] + ), + "BSHTMLLoader": build_file_path_template( + suffixes=[".html"], fileTypes=["html"] + ), + "UnstructuredHTMLLoader": build_file_path_template( + suffixes=[".html"], fileTypes=["html"] + ), + "UnstructuredImageLoader": build_file_path_template( + suffixes=[".jpg", ".jpeg", ".png", ".gif", ".bmp"], + fileTypes=["jpg", "jpeg", "png", "gif", "bmp"], + ), + "UnstructuredMarkdownLoader": build_file_path_template( + suffixes=[".md"], fileTypes=["md"] + ), + "PyPDFLoader": build_file_path_template( + suffixes=[".pdf"], fileTypes=["pdf"] + ), + "UnstructuredPowerPointLoader": build_file_path_template( + suffixes=[".pptx", ".ppt"], fileTypes=["pptx", "ppt"] + ), + "SRTLoader": build_file_path_template( + suffixes=[".srt"], fileTypes=["srt"] + ), + "TelegramChatLoader": build_file_path_template( + suffixes=[".json"], fileTypes=["json"] + ), + "TextLoader": build_file_path_template( + suffixes=[".txt"], fileTypes=["txt"] + ), + "UnstructuredWordDocumentLoader": build_file_path_template( + suffixes=[".docx", ".doc"], fileTypes=["docx", "doc"] + ), + } + + if name in file_path_templates: + signature["template"]["file_path"] = file_path_templates[name] + elif name in { + "WebBaseLoader", + "AZLyricsLoader", + "CollegeConfidentialLoader", + "HNLoader", + "IFixitLoader", + "IMSDbLoader", + }: signature["template"]["web_path"] = { "type": "str", "required": True, "show": True, "name": "web_path", "value": "", - "display_name": "Web Path", + "display_name": "Web Page", + } + elif name in {"GitbookLoader"}: + signature["template"]["web_page"] = { + "type": "str", + "required": True, + "show": True, + "name": "web_page", + "value": "", + "display_name": "Web Page", + } + elif name in {"ReadTheDocsLoader"}: + signature["template"]["path"] = { + "type": "str", + "required": True, + "show": True, + "name": "path", + "value": "", + "display_name": "Web Page", } return signature diff --git a/src/backend/langflow/interface/document_loaders/custom.py b/src/backend/langflow/interface/document_loaders/custom.py deleted file mode 100644 index 053d0d5f9..000000000 --- a/src/backend/langflow/interface/document_loaders/custom.py +++ /dev/null @@ -1,22 +0,0 @@ -"""Load text files.""" -from typing import List - -from langchain.docstore.document import Document -from langchain.document_loaders.base import BaseLoader - - -class TextLoader(BaseLoader): - """Load Text files.""" - - def __init__(self, file: str): - """Initialize with file path.""" - self.file = file - - def load(self) -> List[Document]: - """Load from file path.""" - return [Document(page_content=self.file, metadata={"source": "loaded"})] - - -CUSTOM_DOCUMENTLOADERS = { - "TextLoader": TextLoader, -} diff --git a/src/backend/langflow/interface/importing/utils.py b/src/backend/langflow/interface/importing/utils.py index 33b53ce02..a3480928e 100644 --- a/src/backend/langflow/interface/importing/utils.py +++ b/src/backend/langflow/interface/importing/utils.py @@ -10,7 +10,6 @@ from langchain.chat_models.base import BaseChatModel from langchain.llms.base import BaseLLM from langchain.tools import BaseTool -from langflow.interface.document_loaders.custom import CUSTOM_DOCUMENTLOADERS from langflow.interface.tools.util import get_tool_by_name @@ -132,8 +131,6 @@ def import_vectorstore(vectorstore: str) -> Any: def import_documentloader(documentloader: str) -> Any: """Import documentloader from documentloader name""" - if documentloader in CUSTOM_DOCUMENTLOADERS: - return CUSTOM_DOCUMENTLOADERS[documentloader] return import_class(f"langchain.document_loaders.{documentloader}") diff --git a/src/backend/langflow/interface/loading.py b/src/backend/langflow/interface/loading.py index 292f3d944..ca2017ff3 100644 --- a/src/backend/langflow/interface/loading.py +++ b/src/backend/langflow/interface/loading.py @@ -22,6 +22,7 @@ from langflow.interface.agents.custom import CUSTOM_AGENTS from langflow.interface.importing.utils import import_by_type from langflow.interface.toolkits.base import toolkits_creator from langflow.interface.types import get_type_list +from langflow.interface.utils import load_file_into_dict from langflow.utils import util, validate @@ -36,21 +37,25 @@ def instantiate_class(node_type: str, base_type: str, params: Dict) -> Any: if base_type == "agents": # We need to initialize it differently return load_agent_executor(class_object, params) - elif node_type == "ZeroShotPrompt": - if "tools" not in params: - params["tools"] = [] - return ZeroShotAgent.create_prompt(**params) - - elif node_type == "PythonFunction": - # If the node_type is "PythonFunction" - # we need to get the function from the params - # which will be a str containing a python function - # and then we need to compile it and return the function - # as the instance - function_string = params["code"] - if isinstance(function_string, str): - return validate.eval_function(function_string) - raise ValueError("Function should be a string") + elif base_type == "prompts": + if node_type == "ZeroShotPrompt": + if "tools" not in params: + params["tools"] = [] + return ZeroShotAgent.create_prompt(**params) + elif base_type == "tools": + if node_type == "JsonSpec": + params["dict_"] = load_file_into_dict(params.pop("path")) + return class_object(**params) + elif node_type == "PythonFunction": + # If the node_type is "PythonFunction" + # we need to get the function from the params + # which will be a str containing a python function + # and then we need to compile it and return the function + # as the instance + function_string = params["code"] + if isinstance(function_string, str): + return validate.eval_function(function_string) + raise ValueError("Function should be a string") elif base_type == "toolkits": loaded_toolkit = class_object(**params) # Check if node_type has a loader @@ -68,8 +73,8 @@ def instantiate_class(node_type: str, base_type: str, params: Dict) -> Any: documents = params.pop("documents") text_splitter = class_object(**params) return text_splitter.split_documents(documents) - else: - return class_object(**params) + + return class_object(**params) def load_flow_from_json(path: str): diff --git a/src/backend/langflow/interface/text_splitters/base.py b/src/backend/langflow/interface/text_splitters/base.py index f72f86ac5..68f340055 100644 --- a/src/backend/langflow/interface/text_splitters/base.py +++ b/src/backend/langflow/interface/text_splitters/base.py @@ -26,6 +26,15 @@ class TextSplitterCreator(LangChainTypeCreator): "name": "documents", } + signature["template"]["separator"] = { + "type": "str", + "required": True, + "show": True, + "value": ".", + "name": "separator", + "display_name": "Separator", + } + return signature except ValueError as exc: raise ValueError(f"Text Splitter {name} not found") from exc diff --git a/src/backend/langflow/interface/tools/base.py b/src/backend/langflow/interface/tools/base.py index 8a89c429d..f175a9902 100644 --- a/src/backend/langflow/interface/tools/base.py +++ b/src/backend/langflow/interface/tools/base.py @@ -47,12 +47,14 @@ TOOL_INPUTS = { value="", multiline=True, ), - "dict_": TemplateField( + "path": TemplateField( field_type="file", required=True, is_list=False, show=True, value="", + suffixes=[".json", ".yaml", ".yml"], + fileTypes=["json", "yaml", "yml"], ), } @@ -114,6 +116,8 @@ class ToolCreator(LangChainTypeCreator): return node elif tool_type in FILE_TOOLS: params = all_tools[name]["params"] # type: ignore + if tool_type == "JsonSpec": + params["path"] = params.pop("dict_") # type: ignore base_classes += [name] else: params = [] diff --git a/src/backend/langflow/interface/utils.py b/src/backend/langflow/interface/utils.py new file mode 100644 index 000000000..b3b154790 --- /dev/null +++ b/src/backend/langflow/interface/utils.py @@ -0,0 +1,22 @@ +import json +import os + +import yaml + + +def load_file_into_dict(file_path: str) -> dict: + if not os.path.exists(file_path): + raise FileNotFoundError(f"File not found: {file_path}") + + file_extension = os.path.splitext(file_path)[1].lower() + + if file_extension == ".json": + with open(file_path, "r") as json_file: + data = json.load(json_file) + elif file_extension in [".yaml", ".yml"]: + with open(file_path, "r") as yaml_file: + data = yaml.safe_load(yaml_file) + else: + raise ValueError("Unsupported file type. Please provide a JSON or YAML file.") + + return data diff --git a/tests/data/Openapi.json b/tests/data/Openapi.json index 143dd6ad1..863853672 100644 --- a/tests/data/Openapi.json +++ b/tests/data/Openapi.json @@ -206,7 +206,7 @@ "type": "JsonSpec", "node": { "template": { - "dict_": { + "path": { "required": true, "placeholder": "", "show": true, @@ -218,7 +218,7 @@ ".yml" ], "password": false, - "name": "dict_", + "name": "path", "type": "file", "list": false, "fileTypes": [ diff --git a/tests/test_cache.py b/tests/test_cache.py index 85559bc7b..9c6ad30e3 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -49,7 +49,7 @@ def test_cache_creation(basic_data_graph): ) save_cache(computed_hash, langchain_object, is_first_message) # Check if the cache file exists - cache_file = Path(tempfile.gettempdir()) / f"{PREFIX}_{computed_hash}.dill" + cache_file = Path(tempfile.gettempdir()) / f"{PREFIX}/{computed_hash}.dill" assert cache_file.exists()