refactor: syntax cleanup (#172)
This commit is contained in:
@@ -33,4 +33,4 @@ class InitConfig(BaseConfig):
|
|||||||
|
|
||||||
def _set_embedding_function(self, ef):
|
def _set_embedding_function(self, ef):
|
||||||
self.ef = ef
|
self.ef = ef
|
||||||
return
|
return
|
||||||
|
|||||||
@@ -13,21 +13,20 @@ def clean_string(text):
|
|||||||
"""
|
"""
|
||||||
# Replacement of newline characters:
|
# Replacement of newline characters:
|
||||||
text = text.replace('\n', ' ')
|
text = text.replace('\n', ' ')
|
||||||
|
|
||||||
# Stripping and reducing multiple spaces to single:
|
# Stripping and reducing multiple spaces to single:
|
||||||
cleaned_text = re.sub(r'\s+', ' ', text.strip())
|
cleaned_text = re.sub(r'\s+', ' ', text.strip())
|
||||||
|
|
||||||
# Removing backslashes:
|
# Removing backslashes:
|
||||||
cleaned_text = cleaned_text.replace('\\', '')
|
cleaned_text = cleaned_text.replace('\\', '')
|
||||||
|
|
||||||
# Replacing hash characters:
|
# Replacing hash characters:
|
||||||
cleaned_text = cleaned_text.replace('#', ' ')
|
cleaned_text = cleaned_text.replace('#', ' ')
|
||||||
|
|
||||||
# Eliminating consecutive non-alphanumeric characters:
|
# Eliminating consecutive non-alphanumeric characters:
|
||||||
# This regex identifies consecutive non-alphanumeric characters (i.e., not a word character [a-zA-Z0-9_] and not a whitespace) in the string
|
# This regex identifies consecutive non-alphanumeric characters (i.e., not a word character [a-zA-Z0-9_] and not a whitespace) in the string
|
||||||
# and replaces each group of such characters with a single occurrence of that character.
|
# and replaces each group of such characters with a single occurrence of that character.
|
||||||
# For example, "!!! hello !!!" would become "! hello !".
|
# For example, "!!! hello !!!" would become "! hello !".
|
||||||
cleaned_text = re.sub(r'([^\w\s])\1*', r'\1', cleaned_text)
|
cleaned_text = re.sub(r'([^\w\s])\1*', r'\1', cleaned_text)
|
||||||
|
|
||||||
return cleaned_text
|
|
||||||
|
|
||||||
|
return cleaned_text
|
||||||
|
|||||||
Reference in New Issue
Block a user