Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"lint": "oxlint",
"lint:fix": "oxlint --fix",
"format": "prettier . --write",
"check": "npm run format && npm run lint && npm test && npm run build",
"prepublishOnly": "npm run lint && npm test",
"translate": "docker compose -f ./src/tools/translate/docker-compose.yml up -d && ts-node ./src/tools/translate/translate.ts && docker compose -f ./src/tools/translate/docker-compose.yml down",
"benchmark": "docker compose -f ./src/tools/benchmark/docker-compose.yml up --build"
Expand Down
99 changes: 60 additions & 39 deletions src/profanity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ import { List, CensorType } from "./models";
import { escapeRegExp } from "./utils";
import { profaneWords } from "./data";

const FIRST_VOWEL_RE = /[aeiou]/i;
const ALL_VOWELS_RE = /[aeiou]/gi;
const UNICODE_WORD_CHAR_RE = /[\p{L}\p{N}\p{M}_-]/u;
const ASCII_WORD_CHAR_RE = /[\w-_]/;

export class Profanity {
options: ProfanityOptions;
whitelist: List;
Expand Down Expand Up @@ -84,7 +89,7 @@ export class Profanity {
return this.options.grawlixChar + word.slice(1);
case CensorType.FirstVowel:
case CensorType.AllVowels: {
const vowelRegex = new RegExp("[aeiou]", censorType === CensorType.FirstVowel ? "i" : "ig");
const vowelRegex = censorType === CensorType.FirstVowel ? FIRST_VOWEL_RE : ALL_VOWELS_RE;
return word.replace(vowelRegex, this.options.grawlixChar);
}
default:
Expand Down Expand Up @@ -153,32 +158,31 @@ export class Profanity {
* @returns True if the match is whitelisted, false otherwise.
*/
private isWhitelisted(matchStart: number, matchEnd: number, text: string): boolean {
const wholeWord = this.options.wholeWord;
const wordCharRe = wholeWord ? (this.options.unicodeWordBoundaries ? UNICODE_WORD_CHAR_RE : ASCII_WORD_CHAR_RE) : null;

for (const whitelistedWord of this.whitelist.words) {
const whitelistedIndex = text.indexOf(whitelistedWord, Math.max(0, matchStart - whitelistedWord.length + 1));
if (whitelistedIndex !== -1) {
const whitelistedEnd = whitelistedIndex + whitelistedWord.length;

if (this.options.wholeWord) {
const isWordChar = (ch: string | undefined) => {
if (!ch) return false;
return this.options.unicodeWordBoundaries ? /[\p{L}\p{N}\p{M}_-]/u.test(ch) : /[\w-_]/.test(ch);
};
if (
matchStart === whitelistedIndex &&
matchEnd === whitelistedEnd &&
(matchStart === 0 || !isWordChar(text.charAt(matchStart - 1))) &&
(matchEnd === text.length || !isWordChar(text.charAt(matchEnd)))
) {
return true;
}
} else {
if (
(matchStart >= whitelistedIndex && matchStart < whitelistedEnd) ||
(matchEnd > whitelistedIndex && matchEnd <= whitelistedEnd) ||
(whitelistedIndex >= matchStart && whitelistedEnd <= matchEnd)
) {
return true;
}
if (whitelistedIndex === -1) continue;

const whitelistedEnd = whitelistedIndex + whitelistedWord.length;

if (wordCharRe) {
if (
matchStart === whitelistedIndex &&
matchEnd === whitelistedEnd &&
(matchStart === 0 || !wordCharRe.test(text.charAt(matchStart - 1))) &&
(matchEnd === text.length || !wordCharRe.test(text.charAt(matchEnd)))
) {
return true;
}
} else {
if (
(matchStart >= whitelistedIndex && matchStart < whitelistedEnd) ||
(matchEnd > whitelistedIndex && matchEnd <= whitelistedEnd) ||
(whitelistedIndex >= matchStart && whitelistedEnd <= matchEnd)
) {
return true;
}
}
}
Expand All @@ -199,20 +203,22 @@ export class Profanity {
replacer: (word: string, start: number, end: number) => string,
regex: RegExp,
): string {
let result = text;
let offset = 0;
const parts: string[] = [];
let lastIndex = 0;

let match: RegExpExecArray | null;
while ((match = regex.exec(lowercaseText)) !== null) {
const matchStart = match.index;
const matchEnd = matchStart + match[0].length;
const originalWord = text.slice(matchStart + offset, matchEnd + offset);
const originalWord = text.slice(matchStart, matchEnd);
const censoredWord = replacer(originalWord, matchStart, matchEnd);
result = result.slice(0, matchStart + offset) + censoredWord + result.slice(matchEnd + offset);
offset += censoredWord.length - originalWord.length;
parts.push(text.slice(lastIndex, matchStart), censoredWord);
lastIndex = matchEnd;
}

return result;
if (lastIndex === 0) return text;
parts.push(text.slice(lastIndex));
return parts.join("");
}

/**
Expand All @@ -237,20 +243,35 @@ export class Profanity {
throw new Error("At least one language must be provided");
}

const uniqueLanguages = [...new Set(languages.map((language) => language.trim().toLowerCase()))];

const regexKey = uniqueLanguages.toSorted().join(",");
if (this.regexes.has(regexKey)) {
return this.regexes.get(regexKey)!;
const seen = new Set<string>();
const uniqueLanguages: string[] = [];
for (const language of languages) {
const normalized = language.trim().toLowerCase();
if (!seen.has(normalized)) {
seen.add(normalized);
uniqueLanguages.push(normalized);
}
}

const allWords = uniqueLanguages.flatMap((language) => {
uniqueLanguages.sort();
const regexKey = uniqueLanguages.join(",");

const cached = this.regexes.get(regexKey);
if (cached) return cached;

const removedWords = this.removed.words;
const allWords: string[] = [];
for (const language of uniqueLanguages) {
const words = profaneWords.get(language);
if (!words) {
throw new Error(`Invalid language: "${language}"`);
}
return words.filter((word) => !this.removed.words.has(word));
});
for (const word of words) {
if (!removedWords.has(word)) {
allWords.push(word);
}
}
}

const regex = this.buildRegex(allWords);
this.regexes.set(regexKey, regex);
Expand Down
2 changes: 1 addition & 1 deletion src/tools/benchmark/benchmark.ts
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ const startMainSuite = () =>
.on("cycle", (event: Event) => {
console.log(String(event.target));
})
.on("complete", function () {
.on("complete", function (this: any) {
console.log(`Fastest: ${this.filter("fastest").map("name")[0]}`);
})
.run({ async: true });
Expand Down
Loading