发布:2021/11/19 16:08:33作者:管理员 来源:本站 浏览次数:843
1、定义保存分词的list<string>
public static List<Participle> _participles = new List<Participle>();
public static List<Participle> _participles_1 = new List<Participle>();
public static List<string> _sql = new List<string>();
2、调用分词接口拆分查询文本
/// 利用SCWS进行中文分词
/// 1370676020@qq.com
/// </summary>
/// <param name="str">需要分词的字符串</param>
/// <returns>用空格分开的分词结果</returns>
public async Task<string> Segment(string str)
{
Task<string> items = Task.Run(()=> {
System.Text.StringBuilder sb = new System.Text.StringBuilder();
try
{
string s = string.Empty;
System.Net.CookieContainer cookieContainer = new System.Net.CookieContainer();
// 将提交的字符串数据转换成字节数组
byte[] postData = System.Text.Encoding.ASCII.GetBytes("data=" + System.Web.HttpUtility.UrlEncode(str) + "&respond=json&charset=utf8&ignore=yes&duality=no&traditional=no&multi=0");
// 设置提交的相关参数
System.Net.HttpWebRequest request = System.Net.WebRequest.Create("http://www.ftphp.com/scws/api.php") as System.Net.HttpWebRequest;
request.Method = "POST";
request.KeepAlive = false;
request.ContentType = "application/x-www-form-urlencoded";
request.CookieContainer = cookieContainer;
request.ContentLength = postData.Length;
// 提交请求数据
System.IO.Stream outputStream = request.GetRequestStream();
outputStream.Write(postData, 0, postData.Length);
outputStream.Close();
// 接收返回的页面
System.Net.HttpWebResponse response = request.GetResponse() as System.Net.HttpWebResponse;
System.IO.Stream responseStream = response.GetResponseStream();
System.IO.StreamReader reader = new System.IO.StreamReader(responseStream, System.Text.Encoding.GetEncoding("utf-8"));
string val = reader.ReadToEnd();
Newtonsoft.Json.Linq.JObject results = Newtonsoft.Json.Linq.JObject.Parse(val);
foreach (var item in results["words"].Children())
{
Newtonsoft.Json.Linq.JObject word = Newtonsoft.Json.Linq.JObject.Parse(item.ToString());
var _w = word["word"].ToString();
if (_w.Length > 1)
{
_participles.Add(new Participle { Title = _w });
}
sb.Append(" or title like '%" + word["word"].ToString() + "%'");
// sb.Append(word["word"].ToString() + " ");
//Response.Write(word["word"].ToString() + "<br/>");
}
}
catch
{
return "";
}
return sb.ToString();
});
return await items;
}
3、根据分词列表进行SQL语句组装
public async Task<List<string>> GetSearchWordList(int top=5)
{
int topcount = _participles.Count;
if (topcount < 2)
{
return null;
}
Task<List<string>> items = Task.Run(() =>
{
List<string> items1 = new List<string>();
_sql.Add(string.Format("select top {2} *,{1} as wcount from tbarticle where title like '%{0}%' UNION ALL ", KeyWord,100,top));
#region 1 职业病,诊断,鉴定,管理办法,中华人民共和国,国家,卫生健康,委员会,
/*
职业病,诊断,鉴定,管理办法,中华人民共和国,国家,卫生健康,委员会,
职业病,诊断,鉴定,管理办法,中华人民共和国,国家,卫生健康,
职业病,诊断,鉴定,管理办法,中华人民共和国,国家,
职业病,诊断,鉴定,管理办法,中华人民共和国,
职业病,诊断,鉴定,管理办法,
职业病,诊断,鉴定,
职业病,诊断,
*
* */
for (int i = 0; i < _participles.Count - 1; i++)
{
var str = string.Empty;
for (int ii = 0; ii < topcount - i; ii++)
{
str = str + " title like '%" + _participles[ii].Title + "%' and ";
}
if (topcount - i == 0) break;
items1.Add(str);
if (!string.IsNullOrEmpty(str))
{
str = str.Substring(0, str.Length - 4);
}
var sql = string.Format("select top {2} *,{1} as wcount from tb where {0} UNION ALL ", str, topcount - i,top);
//Response.Write(str + "<br/>");
_sql.Add(sql);
}
#endregion
#region 2 诊断,鉴定,管理办法,中华人民共和国,国家,卫生健康,委员会,
/*
诊断,鉴定,管理办法,中华人民共和国,国家,卫生健康,委员会,
鉴定,管理办法,中华人民共和国,国家,卫生健康,委员会,
管理办法,中华人民共和国,国家,卫生健康,委员会,
中华人民共和国,国家,卫生健康,委员会,
国家,卫生健康,委员会,
卫生健康,委员会,
*
*
* */
for (int i = 0; i < _participles.Count - 2; i++)
{
var str = string.Empty;
for (int ii = i; ii < topcount - 1; ii++)
{
str = str + " title like '%" + _participles[ii + 1].Title + "%' and ";
}
if (topcount - 1 - i == 0) break;
items1.Add(str);
if (!string.IsNullOrEmpty(str))
{
str = str.Substring(0, str.Length - 4);
}
var sql = string.Format("select top {2} *,{1} as wcount from tb where {0} UNION ALL ", str, topcount - 1 - i,top);
//Response.Write(str + "<br/>");
_sql.Add(sql);
}
#endregion
#region 3
for (int i = 1; i < _participles.Count - 1; i++)
{
var str = string.Empty;
for (int ii = i; ii < topcount - i; ii++)
{
str = str + " title like '%" + _participles[ii].Title + "%' and ";
}
if (topcount - i * 2 == 1 || topcount - i * 2 == 0) break;
items1.Add(str);
if (!string.IsNullOrEmpty(str))
{
str = str.Substring(0, str.Length - 4);
}
var sql = string.Format("select top {2} *,{1} as wcount from tb where {0} UNION ALL ", str, topcount - i*2,top);
//Response.Write(str + "<br/>" );
_sql.Add(sql);
}
#endregion
return items1;
});
return await items;
}
4、使用
protected void Page_Load(object sender, EventArgs e)
{
if (!IsPostBack)
{
_participles.Clear();
_participles_1.Clear();
_sql.Clear();
_word= await Segment(KeyWord);
await GetSearchWordList();
var str = "";
var _sql_1 = "";
foreach (var sql in _sql)
{
str = str + sql;
}
str = str.Substring(0, str.Length - 10);
_sql_1 = "select * ,wcount from tb1 tce right join (select top 100 * from (select id,MAX(wcount) wcount from (" + str + ") a group by id) b order by wcount desc) s on tce.id=s.id ";
}
}
© Copyright 2014 - 2024 柏港建站平台 ejk5.com. 渝ICP备16000791号-4