发布:2022/12/19 22:34:12作者:管理员 来源:本站 浏览次数:615
C#中使用正则表达式取出标签中内的内容的方法如下:
/// <summary>
/// 获取字符中指定标签的值
/// </summary>
/// <param name="str"> 字符串 </param>
/// <param name="tagName"> 标签 </param>
/// <param name="attrib"> 属性名 </param>
/// <returns> 属性 </returns>
public static List<string> GetTagAttr(string str, string tagName, string attrib)
{
string tmpStr = string.Format("<{0}[^>]*?{1}=(['\"\"]?)(?<url>[^'\"\"\\s>]+)\\1[^>]*>", tagName, attrib);
// 获取 <Script> 属性值
MatchCollection titleMatch = Regex.Matches(str, tmpStr, RegexOptions.IgnoreCase);
List<string> list = new List<string>();
foreach (Match m in titleMatch)
{
string result = m.Groups["url"].Value;
if (string.IsNullOrEmpty(result) || list.Contains(result)) continue;
list.Add(result);
}
return list;
}
/// <summary>
/// 获取字符中指定标签的值
/// </summary>
/// <param name="str"> 字符串 </param>
/// <param name="tagName"> 标签 </param>
/// <returns> 值 </returns>
public static List<string> GetTagContent(string str, string tagName)
{
string tmpStr = string.Format ("<{0}[^>]*?>(?<Text>[^<]*)</{1}>", tagName, tagName); // 获取 < Script > 之间内容
MatchCollection titleMatch = Regex.Matches(str, tmpStr, RegexOptions.IgnoreCase);
List<string> list = new List<string>();
foreach (Match m in titleMatch)
{
string result = m.Groups["Text"].Value;
if (string.IsNullOrEmpty(result) || list.Contains(result)) continue;
list.Add(result);
}
return list;
}
© Copyright 2014 - 2024 柏港建站平台 ejk5.com. 渝ICP备16000791号-4