🐛 fix(GetRequest.py): handle request timeout by returning a Document with status code 408 and "Request Timed Out" content
✨ feat(GetRequest.py): add timeout parameter to the build method to allow configuring the request timeout value
This commit is contained in:
parent
5fa8281e94
commit
696a139171
1 changed files with 24 additions and 26 deletions
|
|
@ -25,48 +25,46 @@ class GetRequest(CustomComponent):
|
|||
}
|
||||
|
||||
def get_document(
|
||||
self,
|
||||
url: str,
|
||||
headers: Optional[dict] = None,
|
||||
self, session: requests.Session, url: str, headers: Optional[dict], timeout: int
|
||||
) -> Document:
|
||||
try:
|
||||
if headers is None:
|
||||
headers = {}
|
||||
with requests.Session() as session:
|
||||
response = session.get(url, headers=headers)
|
||||
try:
|
||||
response_json = response.json()
|
||||
result = orjson_dumps(response_json, indent_2=False)
|
||||
except Exception:
|
||||
result = response.text
|
||||
self.repr_value = result
|
||||
return Document(
|
||||
page_content=result,
|
||||
metadata={
|
||||
"source": url,
|
||||
"headers": headers,
|
||||
"status_code": response.status_code,
|
||||
},
|
||||
)
|
||||
except Exception as exc:
|
||||
response = session.get(url, headers=headers, timeout=timeout)
|
||||
try:
|
||||
response_json = response.json()
|
||||
result = orjson_dumps(response_json, indent=2)
|
||||
except Exception:
|
||||
result = response.text
|
||||
self.repr_value = result
|
||||
return Document(
|
||||
page_content=str(exc),
|
||||
page_content=result,
|
||||
metadata={
|
||||
"source": url,
|
||||
"headers": headers,
|
||||
"status_code": 500,
|
||||
"status_code": response.status_code,
|
||||
},
|
||||
)
|
||||
except requests.Timeout:
|
||||
return Document(
|
||||
page_content="Request Timed Out",
|
||||
metadata={"source": url, "headers": headers, "status_code": 408},
|
||||
)
|
||||
except Exception as exc:
|
||||
return Document(
|
||||
page_content=str(exc),
|
||||
metadata={"source": url, "headers": headers, "status_code": 500},
|
||||
)
|
||||
|
||||
def build(
|
||||
self,
|
||||
url: str,
|
||||
headers: Optional[dict] = None,
|
||||
timeout: int = 5,
|
||||
) -> Document:
|
||||
if headers is None:
|
||||
headers = {}
|
||||
if not isinstance(url, list):
|
||||
url = [url]
|
||||
documents = [self.get_document(u, headers) for u in url]
|
||||
self.repr_value = documents
|
||||
with requests.Session() as session:
|
||||
documents = [self.get_document(session, u, headers, timeout) for u in url]
|
||||
self.repr_value = documents
|
||||
return documents
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue