发布:2022/10/21 17:39:41作者:管理员 来源:本站 浏览次数:864
1、Trie Tree
Trie Tree(字典树),一种常见的树形结构,由于其利用字符串的公共前缀极大程度上减少的字符比较,所以非常适合用来实现敏感词过滤方案。
如,有A,ABC,AC,BCD,BD,CD六个敏感词,则构建Trie Tree结构如下所以。
2、代码实现
using System.Runtime.CompilerServices;
namespace JWordFilter
{
[Flags]
public enum FilterWordEnum
{
None = 0,
IgnoreCase = 1 << 0, // 忽略大小写
AllowSkipWord = 1 << 1, // 允许跳词
All = int.MaxValue,
}
public class WordFilter
{
private class WordFilterNode
{
// 是否是敏感词结尾节点
public bool IsEnd;
// 子节点
public Dictionary<char, WordFilterNode> Nodes = new Dictionary<char, WordFilterNode>(0);
/// <summary>
/// 获取子节点
/// </summary>
/// <param name="ch"></param>
/// <returns></returns>
public WordFilterNode GetNode(char ch)
{
if (Nodes.ContainsKey(ch))
{
return Nodes[ch];
}
return null;
}
/// <summary>
/// 添加子节点
/// </summary>
/// <param name="ch"></param>
/// <returns></returns>
public WordFilterNode AddNode(char ch)
{
var node = new WordFilterNode();
Nodes.Add(ch, node);
return node;
}
}
// 根节点
private WordFilterNode _root = new WordFilterNode();
// 跳词
private string _skipList = " \t\r\n~!@#$%^&*()_+-=【】、[]{}|;':\",./<>?αβγδεζηθικλμνξοπρστυφχψωΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ。,、;:?!…—·ˉ¨‘’“”々~‖∶"'`|〃〔〕〈〉《》「」『』.〖〗【】()[]{}ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ⒈⒉⒊⒋⒌⒍⒎⒏⒐⒑⒒⒓⒔⒕⒖⒗⒘⒙⒚⒛㈠㈡㈢㈣㈤㈥㈦㈧㈨㈩①②③④⑤⑥⑦⑧⑨⑩⑴⑵⑶⑷⑸⑹⑺⑻⑼⑽⑾⑿⒀⒁⒂⒃⒄⒅⒆⒇≈≡≠=≤≥<>≮≯∷±+-×÷/∫∮∝∞∧∨∑∏∪∩∈∵∴⊥∥∠⌒⊙≌∽√§№☆★○●◎◇◆□℃‰€■△▲※→←↑↓〓¤°#&@\︿_ ̄―♂♀┌┍┎┐┑┒┓─┄┈├┝┞┟┠┡┢┣│┆┊┬┭┮┯┰┱┲┳┼┽┾┿╀╁╂╃└┕┖┗┘┙┚┛━┅┉┤┥┦┧┨┩┪┫┃┇┋┴┵┶┷┸┹┺┻╋╊╉╈╇╆╅╄丶";
// 跳词数组,方便快速查找
private bool[] _skipArr = new bool[char.MaxValue];
public WordFilter()
{
for (int i = 0; i < _skipList.Length; i++)
{
int code = (int)_skipList[i];
_skipArr[code] = true;
}
}
/// <summary>
/// 添加过滤词
/// </summary>
/// <param name="value"></param>
public void AddWord(string value)
{
WordFilterNode node = _root;
int index = 0;
int len = value.Length;
while (index < len)
{
char ch = value[index];
var childNode = node.GetNode(ch);
if (childNode == null)
{
childNode = node.AddNode(ch);
}
if (index == len - 1)
{
childNode.IsEnd = true;
}
index++;
node = childNode;
}
}
/// <summary>
/// 是否存在过滤字符串
/// </summary>
/// <returns></returns>
public bool HasFilterWord(string word, FilterWordEnum @enum = FilterWordEnum.None)
{
int len = word.Length;
for (int i = 0; i < len; i++)
{
int endIndex = GetFilterWordEndIndex(_root, word, i, @enum);
if (endIndex != -1)
{
return true;
}
}
return false;
}
/// <summary>
/// 替换屏蔽词
/// </summary>
/// <param name="word"></param>
/// <param name="enum"></param>
/// <param name="replace"></param>
/// <returns></returns>
public string ReplaceFilterWord(string word, FilterWordEnum @enum = FilterWordEnum.None, char replace = '*')
{
int len = word.Length;
char[] chars = null;
for (int i = 0; i < len;)
{
int endIndex = GetFilterWordEndIndex(_root, word, i, @enum);
if (endIndex == -1)
{
i++;
}
else
{
if (chars == null)
{
chars = word.ToCharArray();
}
for (int j = i; j <= endIndex; j++)
{
chars[j] = replace;
}
i = endIndex + 1;
}
}
if (chars == null)
{
return word;
}
return new string(chars);
}
/// <summary>
/// 获取index索引起始屏蔽词结尾索引
/// </summary>
/// <param name="node"></param>
/// <param name="value"></param>
/// <param name="index"></param>
/// <param name="enum"></param>
/// <returns></returns>
private int GetFilterWordEndIndex(WordFilterNode node, string value, int index, FilterWordEnum @enum)
{
int endIndex = -1;
int len = value.Length;
bool ignoreCase = (@enum & FilterWordEnum.IgnoreCase) == FilterWordEnum.IgnoreCase;
bool isSkip = (@enum & FilterWordEnum.AllowSkipWord) == FilterWordEnum.AllowSkipWord;
while (index < len)
{
char ch = value[index];
WordFilterNode childNode = null;
// 忽略大小写
if (ignoreCase)
{
// 先获取一次,childNode为null再转换
childNode = node.GetNode(ch);
// childNode为null 且 是字母
if (childNode == null && char.IsLetter(ch))
{
// 小写 转 大写
if (ch >= 'a')
{
ch = (char)(ch & 191U);
}
// 大写 转 小写
else
{
ch = (char) (ch | 32U);
}
childNode = node.GetNode(ch);
}
}
else
{
childNode = node.GetNode(ch);
}
// 没有子节点
if (childNode == null)
{
// 不允许跳词 或 不是跳词范围字符 直接返回
if (!isSkip || !_skipArr[ch])
{
return endIndex;
}
}
// 有子节点
else
{
// 最后一个节点
if(childNode.IsEnd)
{
endIndex = index;
}
node = childNode;
}
// 索引+1 继续向下检测
index++;
}
return endIndex;
}
}
}
3、测试
WordFilter filter = new WordFilter();
filter.AddWord("asd");
Console.WriteLine(filter.ReplaceFilterWord("asDf"));
Console.WriteLine(filter.ReplaceFilterWord("asDf", FilterWordEnum.All));
Console.WriteLine(filter.ReplaceFilterWord("as Df", FilterWordEnum.All));
© Copyright 2014 - 2025 柏港建站平台 ejk5.com. 渝ICP备16000791号-4