From 214829ab45df454ca0fdae9f828b56d3e508af20 Mon Sep 17 00:00:00 2001 From: Saurabh Misra Date: Wed, 18 Dec 2024 13:53:59 -0800 Subject: [PATCH] =?UTF-8?q?refactor:=20=20(codeflash)=E2=9A=A1=EF=B8=8F=20?= =?UTF-8?q?Speed=20up=20method=20`JSONCleaner.=5Fremove=5Fcontrol=5Fcharac?= =?UTF-8?q?ters`=20by=201,491%=20(#5322)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ⚡️ Speed up method `JSONCleaner._remove_control_characters` by 1,491% To optimize the function `_remove_control_characters`, we can use the `translate` method with a translation table to remove control characters. This method is generally faster than using regular expressions for character replacement/removal tasks. Here is the optimized version of the program. By precompiling the translation table in the `__init__` method, we're reducing the repeated overhead of creating this table every time `_remove_control_characters` is called. Using `str.translate` with this precompiled table significantly improves the performance compared to using a regular expression substitution. * add super() --------- Co-authored-by: codeflash-ai[bot] <148906541+codeflash-ai[bot]@users.noreply.github.com> --- .../base/langflow/components/processing/json_cleaner.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/backend/base/langflow/components/processing/json_cleaner.py b/src/backend/base/langflow/components/processing/json_cleaner.py index d9d6051c8..87b11aa79 100644 --- a/src/backend/base/langflow/components/processing/json_cleaner.py +++ b/src/backend/base/langflow/components/processing/json_cleaner.py @@ -1,5 +1,4 @@ import json -import re import unicodedata from langflow.custom import Component @@ -83,7 +82,7 @@ class JSONCleaner(Component): def _remove_control_characters(self, s: str) -> str: """Remove control characters from the string.""" - return re.sub(r"[\x00-\x1F\x7F]", "", s) + return s.translate(self.translation_table) def _normalize_unicode(self, s: str) -> str: """Normalize Unicode characters in the string.""" @@ -97,3 +96,8 @@ class JSONCleaner(Component): msg = f"Invalid JSON string: {e}" raise ValueError(msg) from e return s + + def __init__(self): + # Create a translation table that maps control characters to None + super().__init__() + self.translation_table = str.maketrans("", "", "".join(chr(i) for i in range(32)) + chr(127))