Gitloader (#533)

Added GitLoader
This commit is contained in:
Gabriel Luiz Freitas Almeida 2023-06-23 20:24:24 +00:00 committed by GitHub
commit 3940c0959d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 73 additions and 9 deletions

View file

@ -159,6 +159,17 @@ def instantiate_vectorstore(class_object, params):
def instantiate_documentloader(class_object, params):
if "file_filter" in params:
# file_filter will be a string but we need a function
# that will be used to filter the files using file_filter
# like lambda x: x.endswith(".txt") but as we don't know
# anything besides the string, we will simply check if the string is
# in x and if it is, we will return True
file_filter = params.pop("file_filter", None)
extensions = file_filter.split(",")
params["file_filter"] = lambda x: any(
extension.strip() in x for extension in extensions
)
metadata = params.pop("metadata", None)
docs = class_object(**params).load()
if metadata:
@ -172,6 +183,7 @@ def instantiate_documentloader(class_object, params):
for doc in docs:
doc.metadata = metadata
return docs

View file

@ -72,7 +72,7 @@ class ChainFrontendNode(FrontendNode):
field.show = True
field.advanced = False
if field.name == "memory":
field.required = False
# field.required = False
field.show = True
field.advanced = False
if field.name == "verbose":

View file

@ -55,7 +55,54 @@ class DocumentLoaderFrontNode(FrontendNode):
def add_extra_fields(self) -> None:
name = None
display_name = "Web Page"
if self.template.type_name in self.file_path_templates:
if self.template.type_name in {"GitLoader"}:
# Add fields repo_path, clone_url, branch and file_filter
self.template.add_field(
TemplateField(
field_type="str",
required=True,
show=True,
name="repo_path",
value="",
display_name="Path to repository",
advanced=False,
)
)
self.template.add_field(
TemplateField(
field_type="str",
required=False,
show=True,
name="clone_url",
value="",
display_name="Clone URL",
advanced=False,
)
)
self.template.add_field(
TemplateField(
field_type="str",
required=True,
show=True,
name="branch",
value="",
display_name="Branch",
advanced=False,
)
)
self.template.add_field(
TemplateField(
field_type="str",
required=False,
show=True,
name="file_filter",
value="",
display_name="File extensions (comma-separated)",
advanced=False,
)
)
elif self.template.type_name in self.file_path_templates:
self.template.add_field(self.file_path_templates[self.template.type_name])
elif self.template.type_name in {
"WebBaseLoader",
@ -68,7 +115,10 @@ class DocumentLoaderFrontNode(FrontendNode):
name = "web_path"
elif self.template.type_name in {"GitbookLoader"}:
name = "web_page"
elif self.template.type_name in {"DirectoryLoader", "ReadTheDocsLoader"}:
elif self.template.type_name in {
"DirectoryLoader",
"ReadTheDocsLoader",
}:
name = "path"
display_name = "Local directory"
if name:
@ -112,3 +162,4 @@ class DocumentLoaderFrontNode(FrontendNode):
if field.name == "metadata":
field.show = True
field.advanced = False
field.show = True

View file

@ -80,7 +80,7 @@ export default function ParameterComponent({
useEffect(() => {
const groupedObj = groupByFamily(myData, tooltipTitle);
refHtml.current = groupedObj.map((item, i) => (
<span
key={getRandomKeyByssmm()}
@ -105,7 +105,9 @@ export default function ParameterComponent({
? item.type.split(", ").map((el, i) => (
<React.Fragment key={el + i}>
<span>
{i === item.type.split(", ").length - 1 ? el : (el += `, `)}
{i === item.type.split(", ").length - 1
? el
: (el += `, `)}
</span>
{i % 2 === 0 && i > 0 && <br />}
</React.Fragment>
@ -115,7 +117,6 @@ export default function ParameterComponent({
</span>
</span>
));
}, [tooltipTitle]);
return (

View file

@ -810,9 +810,9 @@ export function getRandomName(
return toTitleCase(final_name);
}
export function getRandomKeyByssmm(): string{
export function getRandomKeyByssmm(): string {
const now = new Date();
const seconds = String(now.getSeconds()).padStart(2, '0');
const milliseconds = String(now.getMilliseconds()).padStart(3, '0');
const seconds = String(now.getSeconds()).padStart(2, "0");
const milliseconds = String(now.getMilliseconds()).padStart(3, "0");
return seconds + milliseconds;
}