refactor: improve handling of leading punctuation removal (#10761)
This commit is contained in:
parent
0ba17ec116
commit
14f3d44c37
5 changed files with 42 additions and 15 deletions
16
api/core/tools/utils/text_processing_utils.py
Normal file
16
api/core/tools/utils/text_processing_utils.py
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
import re
|
||||
|
||||
|
||||
def remove_leading_symbols(text: str) -> str:
|
||||
"""
|
||||
Remove leading punctuation or symbols from the given text.
|
||||
|
||||
Args:
|
||||
text (str): The input text to process.
|
||||
|
||||
Returns:
|
||||
str: The text with leading punctuation or symbols removed.
|
||||
"""
|
||||
# Match Unicode ranges for punctuation and symbols
|
||||
pattern = r"^[\u2000-\u206F\u2E00-\u2E7F\u3000-\u303F!\"#$%&'()*+,\-./:;<=>?@\[\]^_`{|}~]+"
|
||||
return re.sub(pattern, "", text)
|
||||
Loading…
Add table
Add a link
Reference in a new issue