diff --git a/src/backend/base/langflow/components/documentloaders/GitLoader.py b/src/backend/base/langflow/components/documentloaders/GitLoader.py
new file mode 100644
index 000000000..ea39d76bd
--- /dev/null
+++ b/src/backend/base/langflow/components/documentloaders/GitLoader.py
@@ -0,0 +1,116 @@
+from pathlib import Path
+from typing import List
+import re
+
+from langchain_community.document_loaders.git import GitLoader
+from langflow.custom import Component
+from langflow.io import MessageTextInput, Output
+from langflow.schema import Data
+
+
+class GitLoaderComponent(Component):
+ display_name = "GitLoader"
+ description = "Load files from a Git repository"
+ documentation = "https://python.langchain.com/v0.2/docs/integrations/document_loaders/git/"
+ trace_type = "tool"
+ icon = "GitLoader"
+ name = "GitLoader"
+
+ inputs = [
+ MessageTextInput(
+ name="repo_path",
+ display_name="Repository Path",
+ required=True,
+ info="The local path to the Git repository.",
+ ),
+ MessageTextInput(
+ name="clone_url",
+ display_name="Clone URL",
+ required=False,
+ info="The URL to clone the Git repository from.",
+ ),
+ MessageTextInput(
+ name="branch",
+ display_name="Branch",
+ required=False,
+ value="main",
+ info="The branch to load files from. Defaults to 'main'.",
+ ),
+ MessageTextInput(
+ name="file_filter",
+ display_name="File Filter",
+ required=False,
+ advanced=True,
+ info="A list of patterns to filter files. Example to include only .py files: '*.py'. "
+ "Example to exclude .py files: '!*.py'. Multiple patterns can be separated by commas.",
+ ),
+ MessageTextInput(
+ name="content_filter",
+ display_name="Content Filter",
+ required=False,
+ advanced=True,
+ info="A regex pattern to filter files based on their content.",
+ ),
+ ]
+
+ outputs = [
+ Output(name="data", display_name="Data", method="load_documents"),
+ ]
+
+ @staticmethod
+ def is_binary(file_path: str) -> bool:
+ """
+ Check if a file is binary by looking for null bytes.
+ This is necessary because when searches are performed using
+ the content_filter, binary files need to be ignored.
+ """
+ with open(file_path, "rb") as file:
+ return b"\x00" in file.read(1024)
+
+ def build_gitloader(self) -> GitLoader:
+ file_filter_patterns = getattr(self, "file_filter", None)
+ content_filter_pattern = getattr(self, "content_filter", None)
+
+ file_filters = []
+ if file_filter_patterns:
+ patterns = [pattern.strip() for pattern in file_filter_patterns.split(",")]
+
+ def file_filter(file_path: Path) -> bool:
+ if len(patterns) == 1 and patterns[0].startswith("!"):
+ return not file_path.match(patterns[0][1:])
+ included = any(file_path.match(pattern) for pattern in patterns if not pattern.startswith("!"))
+ excluded = any(file_path.match(pattern[1:]) for pattern in patterns if pattern.startswith("!"))
+ return included and not excluded
+
+ file_filters.append(file_filter)
+
+ if content_filter_pattern:
+ content_regex = re.compile(content_filter_pattern)
+
+ def content_filter(file_path: Path) -> bool:
+ with file_path.open("r", encoding="utf-8", errors="ignore") as file:
+ content = file.read()
+ return bool(content_regex.search(content))
+
+ file_filters.append(content_filter)
+
+ def combined_filter(file_path: str) -> bool:
+ path = Path(file_path)
+ if self.is_binary(file_path):
+ return False
+ return all(f(path) for f in file_filters)
+
+ loader = GitLoader(
+ repo_path=self.repo_path,
+ clone_url=self.clone_url,
+ branch=self.branch,
+ file_filter=combined_filter,
+ )
+ return loader
+
+ def load_documents(self) -> List[Data]:
+ gitloader = self.build_gitloader()
+ documents = list(gitloader.lazy_load())
+ data = [Data.from_document(doc) for doc in documents]
+ self.status = data
+ return data
diff --git a/src/backend/base/langflow/components/documentloaders/__init__.py b/src/backend/base/langflow/components/documentloaders/__init__.py
index d33437c75..8f18cd3db 100644
--- a/src/backend/base/langflow/components/documentloaders/__init__.py
+++ b/src/backend/base/langflow/components/documentloaders/__init__.py
@@ -1,3 +1,4 @@
from .Confluence import ConfluenceComponent
+from .GitLoader import GitLoaderComponent
-__all__ = ["ConfluenceComponent"]
+__all__ = ["ConfluenceComponent", "GitLoaderComponent"]
diff --git a/src/frontend/src/icons/GitLoader/Git.svg b/src/frontend/src/icons/GitLoader/Git.svg
new file mode 100644
index 000000000..5bf444b9b
--- /dev/null
+++ b/src/frontend/src/icons/GitLoader/Git.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/src/frontend/src/icons/GitLoader/GitLoader.jsx b/src/frontend/src/icons/GitLoader/GitLoader.jsx
new file mode 100644
index 000000000..f1b291643
--- /dev/null
+++ b/src/frontend/src/icons/GitLoader/GitLoader.jsx
@@ -0,0 +1,22 @@
+const GitLoaderIcon = (props) => (
+
+);
+
+export default GitLoaderIcon;
diff --git a/src/frontend/src/icons/GitLoader/index.tsx b/src/frontend/src/icons/GitLoader/index.tsx
new file mode 100644
index 000000000..51e82f4f8
--- /dev/null
+++ b/src/frontend/src/icons/GitLoader/index.tsx
@@ -0,0 +1,9 @@
+import React, { forwardRef } from "react";
+import SvgGitLoader from "./GitLoader";
+
+export const GitLoaderIcon = forwardRef<
+ SVGSVGElement,
+ React.PropsWithChildren<{}>
+>((props, ref) => {
+ return ;
+});
diff --git a/src/frontend/src/utils/styleUtils.ts b/src/frontend/src/utils/styleUtils.ts
index 3bde56e9a..244e68fa6 100644
--- a/src/frontend/src/utils/styleUtils.ts
+++ b/src/frontend/src/utils/styleUtils.ts
@@ -179,6 +179,7 @@ import { EvernoteIcon } from "../icons/Evernote";
import { FBIcon } from "../icons/FacebookMessenger";
import { FirecrawlIcon } from "../icons/Firecrawl";
import { GitBookIcon } from "../icons/GitBook";
+import { GitLoaderIcon } from "../icons/GitLoader";
import { GoogleIcon } from "../icons/Google";
import { GoogleGenerativeAIIcon } from "../icons/GoogleGenerativeAI";
import {
@@ -588,4 +589,5 @@ export const nodeIconsLucide: iconsType = {
Table: Table,
AIML: AIMLIcon,
"AI/ML": AIMLIcon,
+ GitLoader: GitLoaderIcon,
};