diff --git a/src/backend/langflow/base/data/utils.py b/src/backend/langflow/base/data/utils.py
index b2c11a270..eaf266d97 100644
--- a/src/backend/langflow/base/data/utils.py
+++ b/src/backend/langflow/base/data/utils.py
@@ -21,6 +21,7 @@ TEXT_FILE_TYPES = [
     "xml",
     "html",
     "htm",
+    "pdf",
 ]
 
 
@@ -54,7 +55,9 @@ def retrieve_file_paths(
 
     glob = "**/*" if recursive else "*"
     paths = walk_level(path_obj, depth) if depth else path_obj.glob(glob)
-    file_paths = [Text(p) for p in paths if p.is_file() and match_types(p) and is_not_hidden(p)]
+    file_paths = [
+        Text(p) for p in paths if p.is_file() and match_types(p) and is_not_hidden(p)
+    ]
 
     return file_paths
 
@@ -83,9 +86,20 @@ def read_text_file(file_path: str) -> str:
         return f.read()
 
 
+def parse_pdf_to_text(file_path: str) -> str:
+    from pypdf import PdfReader  # type: ignore
+
+    with open(file_path, "rb") as f:
+        reader = PdfReader(f)
+        return "\n\n".join([page.extract_text() for page in reader.pages])
+
+
 def parse_text_file_to_record(file_path: str, silent_errors: bool) -> Optional[Record]:
     try:
-        text = read_text_file(file_path)
+        if file_path.endswith(".pdf"):
+            text = parse_pdf_to_text(file_path)
+        else:
+            text = read_text_file(file_path)
         # if file is json, yaml, or xml, we can parse it
         if file_path.endswith(".json"):
             text = json.loads(text)
@@ -111,7 +125,10 @@ def get_elements(
     if use_multithreading:
         records = parallel_load_records(file_paths, silent_errors, max_concurrency)
     else:
-        records = [partition_file_to_record(file_path, silent_errors) for file_path in file_paths]
+        records = [
+            partition_file_to_record(file_path, silent_errors)
+            for file_path in file_paths
+        ]
     records = list(filter(None, records))
     return records