关于网友提出的“内测网页游戏 采集某一个网页的某一个栏目的内容”问题疑问,本网通过在网上对“内测网页游戏 采集某一个网页的某一个栏目的内容”有关的相关答案进行了整理,供用户进行参考,详细问题解答如下:
问题:内测网页游戏 采集某一个网页的某一个栏目的内容
描述:
script runat="server">
void Page_Load(object sender, EventArgs e)
{
try
{
WebRequest request = WebRequest.Create("http://www.hexun.com/");
WebResponse response = request.GetResponse();
StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("gb2312"));
tb.Text = reader.ReadToEnd();
reader.Close();
reader.Dispose();
response.Close();
}
catch (Exception ex)
{
tb.Text = ex.Message;
}
}
我用了label控件。我想采集某一个网页的某一个栏目的内容,需要怎么写呢?多谢呢
解决方案1: 抓取HTML
抓取html 写正则
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net;
using System.IO;
using System.IO.Compression;
using System.Text.RegularExpressions;
namespace WikiPageCreater.Common
{
public class PageHelper
{
///
/// 根据 url 获取网页编码
///
///
///
public static string GetEncoding(string url)
{
HttpWebRequest request = null;
HttpWebResponse response = null;
StreamReader reader = null;
try
{
request = (HttpWebRequest)WebRequest.Create(url);
request.Timeout = 20000;
request.AllowAutoRedirect = false;
response = (HttpWebResponse)request.GetResponse();
if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024)
{
if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase))
reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress));
else
reader = new StreamReader(response.GetResponseStream(), Encoding.ASCII);
string html = reader.ReadToEnd();
Regex reg_charset = new Regex(@"charset\b\s*=\s*(?[^""]*)");
if (reg_charset.IsMatch(html))
{
return reg_charset.Match(html).Groups["charset"].Value;
}
else if (response.CharacterSet != string.Empty)
{
return response.CharacterSet;
}
else
return Encoding.Default.BodyName;
}
}
catch
{
}
finally
{
if (response != null)
{
response.Close();
response = null;
}
if (reader != null)
reader.Close();
if (request != null)
request = null;
}
return Encoding.Default.BodyName;
}
///
/// 根据 url 和 encoding 获取当前url页面的 html 源代码
///
///
///
///
public static string GetHtml(string url, Encoding encoding)
{
HttpWebRequest request = null;
HttpWebResponse response = null;
StreamReader reader = null;
try
{
request = (HttpWebRequest)WebRequest.Create(url);
request.Timeout = 20000;
request.AllowAutoRedirect = false;
response = (HttpWebResponse)request.GetResponse();
if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024)
{
if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase))
reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress), encoding);
else
reader = new StreamReader(response.GetResponseStream(), encoding);
string html = reader.ReadToEnd();
return html;
}
}
catch
{
}
finally
{
if (response != null)
{
response.Close();
response = null;
}
if (reader != null)
reader.Close();
if (request != null)
request = null;
}
return string.Empty;
}
}
}
以上介绍了“内测网页游戏 采集某一个网页的某一个栏目的内容”的问题解答,希望对有需要的网友有所帮助。
本文网址链接:http://www.codes51.com/itwd/2273544.html