隐藏

asp.net利用正则在HTML中提取图片路径

发布:2013/5/4 16:40:42作者:管理员 来源:本站 浏览次数:1746

public static string GetImgUrl(string HTMLStr)
{
string str = string.Empty;
//string sPattern = @"^<img\s+[^>]*>";
Regex r = new Regex(@"<img\s+[^>]*\s*src\s*=\s*([']?)(?<url>\S+)'?[^>]*>", //注意这里的(?<url>\S+)是按正则表达式中的组来处理的,下面的代码中用使用到,也可以更改成其它的HTML标签,以同样的方法获得内容!
RegexOptions.Compiled);
Match m = r.Match(HTMLStr.ToLower());
if (m.Success)
str = m.Result("${url}");
return str;
}
//返回多个路径的情况
public static StringBuilder MyGetImgUrl(string text)
{
StringBuilder str = new StringBuilder();
string pat = @"<img\s+[^>]*\s*src\s*=\s*([']?)(?<url>\S+)'?[^>]*>";

Regex r = new Regex(pat, RegexOptions.Compiled);

Match m = r.Match(text.ToLower());
//int matchCount = 0;
while (m.Success)
{
Group g = m.Groups[2];
str.Append(g).Append(",");
m = m.NextMatch();
}
return str;
}


using System;
using System.Text.RegularExpressions;

public class Test
{

    public static void Main ()
    {

        // Define a regular expression for repeated words.
        Regex rx = new Regex(@"\b(?<word>\w+)\s+(\k<word>)\b",
          RegexOptions.Compiled | RegexOptions.IgnoreCase);

        // Define a test string.        
        string text = "The the quick brown fox  fox jumped over the lazy dog dog.";

        // Find matches.
        MatchCollection matches = rx.Matches(text);

        // Report the number of matches found.
        Console.WriteLine("{0} matches found in:\n   {1}",
                          matches.Count,
                          text);

        // Report on each match.
        foreach (Match match in matches)
        {
            GroupCollection groups = match.Groups;
            Console.WriteLine("'{0}' repeated at positions {1} and {2}", 
                              groups["word"].Value,
                              groups[0].Index,
                              groups[1].Index);
        }

    }
 
}