Files
t6_mem0/embedchain-js/embedchain/utils.ts
2023-09-07 05:52:44 +05:30

27 lines
1.1 KiB
TypeScript

/**
* This function takes in a string and performs a series of text cleaning operations.
* @param {str} text: The text to be cleaned. This is expected to be a string.
* @returns {str}: The cleaned text after all the cleaning operations have been performed.
*/
export function cleanString(text: string): string {
// Replacement of newline characters:
let cleanedText = text.replace(/\n/g, ' ');
// Stripping and reducing multiple spaces to single:
cleanedText = cleanedText.trim().replace(/\s+/g, ' ');
// Removing backslashes:
cleanedText = cleanedText.replace(/\\/g, '');
// Replacing hash characters:
cleanedText = cleanedText.replace(/#/g, ' ');
// Eliminating consecutive non-alphanumeric characters:
// This regex identifies consecutive non-alphanumeric characters (i.e., not a word character [a-zA-Z0-9_] and not a whitespace) in the string
// and replaces each group of such characters with a single occurrence of that character.
// For example, "!!! hello !!!" would become "! hello !".
cleanedText = cleanedText.replace(/([^\w\s])\1*/g, '$1');
return cleanedText;
}