/// <summary>
/// 移除html tag
/// </summary>
/// <param name="htmlSource"></param>
/// <returns></returns>
public static string RemoveHTMLTag(string htmlSource)
{
//移除 javascript code.
htmlSource = Regex.Replace(htmlSource, @"<script[\d\D]*?>[\d\D]*?</script>", String.Empty);
//移除html tag.
htmlSource = Regex.Replace(htmlSource, @"<[^>]*>", String.Empty);
return htmlSource;
}
/// <summary>
/// 從網路上取得原始碼
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
public static string GetSourceFromUrl(string url)
{
WebClient client = new WebClient();
//以防萬一 模擬自己為瀏覽器
client.Headers.Add("User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.56 Safari/536.5");
client.Headers.Add("Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
client.Headers.Add("Accept-Encoding: identity");
client.Headers.Add("Accept-Language: zh-TW,en;q=0.8");
client.Headers.Add("Accept-Charset: utf-8;q=0.7,*;q=0.3");
client.Headers.Add("ContentType", "application/x-www-form-urlencoded");
client.Encoding = Encoding.UTF8;
return client.DownloadString(url);
}
使用:
protected void btnTest_Click(object sender, EventArgs e)
{
ltlResult.Text = RemoveHTMLTag(GetSourceFromUrl(this.txtLink.Text));
}
原始文章出自於此
沒有留言:
張貼留言