关于网友提出的“C#多线程处理百度关键词排名查询的问题”问题疑问,本网通过在网上对“C#多线程处理百度关键词排名查询的问题”有关的相关答案进行了整理,供用户进行参考,详细问题解答如下:
问题:C#多线程处理百度关键词排名查询的问题
描述:c#百度多线程域名
最近在做一个百度关键词排名查询的功能,要求是批量查询的,域名批量,关键词批量,就像爱站工具包关键词Url及排名那个功能,目前自己已实现关键词批量查询,卡在了域名批量这个功能点上了,不知道要用什么方法把多个域名和多个关键词联系起来,试了很多种办法都没达到预期的效果,谁能出来指导一下?万分感谢!!!
贴出部分代码:
(开启多线程目前最多5个线程)
private void GetBaiduRanking()
{
this.lbSearchInfo.Text = "开始查询......";
thread = new Thread[intThreadCount];
for (int i = 0; i < intThreadCount; i++)
{
thread[i] = new Thread(new ThreadStart(GetBaiduRankingData));
thread[i].IsBackground = true;
thread[i].Start();
}
}
private void GetBaiduRankingData()
{
while (KeyWordQueue.Count > 0)
{
string sUrl = "";
int count = 0;
int intPage = 1;
string sCurrentKeyWord = "";
threadList.Add(Thread.CurrentThread);
lock (KeyWordQueue)
{
if (KeyWordQueue.Count > 0)
{
sCurrentKeyWord = KeyWordQueue.Dequeue();
}
}
List
firstRankResultList = new List();
//分页查询,目前是控制只查询前100名
while (count < 100)
{
string sHtmlText = "";
List allRankResultList = new List();
sUrl = GetSearchUrl(count, sCurrentKeyWord);
//Socket获取百度搜索Html文本
//sHtmlText = WebToolKit.Get(sUrl, Encoding.UTF8).Replace("\n", "");
sHtmlText = this.GetHtmlText(sUrl, "utf-8", false, true).Replace("\n", "");
//是否出现验证码
IsAppearVerifyCode(sUrl, sHtmlText);
//包含排名ID和域名的html文本正则表达式
Regex regex = new Regex("<(div|table) class=\"result(?!-opxpath-log)(.*?)\" id=\"(?\\d+)\"(.*?)>(.*?)(.*?)href = \"(?\\S+)\"(.*?)>(?.*?)(.*?)(?.*?)(.*?)", RegexOptions.IgnoreCase);
try
{
#region 取某一页排名
//匹配正则表达式的文本
MatchCollection mcTable = regex.Matches(sHtmlText);
if (mcTable != null && mcTable.Count != 0)
{
//遍历匹配的HTML文本
foreach (Match item in mcTable)
{//循环当前页的结果
string sSiteAddress = Convert.ToString(item.Groups["Url"]);
if (sSiteAddress.Contains(""))
{
sSiteAddress = sSiteAddress.Replace("", "");
}
if (sSiteAddress.Contains(""))
{
sSiteAddress = sSiteAddress.Replace("", "");
}
if (sSiteAddress.Contains("..."))
{
sSiteAddress = sSiteAddress.Substring(0, sSiteAddress.IndexOf("..."));
}
#region 匹配导入的域名对应的排名
string strSite = sSiteAddress.Contains("/") ? sSiteAddress.Substring(0, sSiteAddress.IndexOf("/") + 1) : sSiteAddress;
//strInPutSiteAddress是输入的网址,目前只有单个网址,如何实现多个网址
if (strSite.Contains(strInPutSiteAddress))
{
string sRealSiteAddress = GetRealSiteAddress(Convert.ToString(item.Groups["RealUrl"]));
//string sRealSiteAddress = Convert.ToString(item.Groups["RealUrl"]);
string sRank = Convert.ToString(item.Groups["ID"]);
if (this.rbFirstRank.Checked)
{
var firstRankResult = new KeyWordRankResult() { SiteAddress = strInPutSiteAddress, Rank = Convert.ToInt32(sRank), RealAddress = sRealSiteAddress };
firstRankResultList.Add(firstRankResult);
}
else
{
var allRankResult = new KeyWordRankResult() { SiteAddress = strInPutSiteAddress, Rank = Convert.ToInt32(sRank), RealAddress = sRealSiteAddress };
allRankResultList.Add(allRankResult);
//添加到DataGridView列表
AddDataGridViewData(strInPutSiteAddress, sCurrentKeyWord, sRank, sRealSiteAddress, intPage);
}
}
}
#endregion
}
#endregion
#region 判断域名在当前页是否有排名
if (this.rbAllRank.Checked)
{
if (!allRankResultList.Any())
{
string sRanking = string.Format("第{0}页无排名", intPage);
string sRealAddress = string.Format("第{0}页无数据", intPage);
AddDataGridViewData(strInPutSiteAddress, sCurrentKeyWord, sRanking, sRealAddress, intPage);
}
}
#endregion
}
catch (Exception ex)
{
if (ex.Message.Contains("中止线程"))
{
Thread.ResetAbort();
}
else
{
MessageBox.Show(ex.ToString());
}
}
intPage++;
intHadSearchCount++;
count += intPageNumber;
}
#region 获取首个排名
if (this.rbFirstRank.Checked)
{
if (firstRankResultList.Any())
{
List firstRankList = firstRankResultList.OrderBy(p => p.Rank).ToList();
string sSiteAddress = firstRankList.FirstOrDefault().SiteAddress;
string sRanking = firstRankList.FirstOrDefault().Rank.ToString();
string sRealAddress = firstRankList.FirstOrDefault().RealAddress;
AddDataGridViewData(sSiteAddress, sCurrentKeyWord, sRanking, sRealAddress, intPage);
}
else
{
const string sRanking = "无排名";
const string sRealAddress = "无数据";
AddDataGridViewData(strInPutSiteAddress, sCurrentKeyWord, sRanking, sRealAddress, intPage);
}
}
#endregion
if (intHadSearchCount == this.intSearchCount)
{
sw.Stop();
MessageBox.Show(sw.ElapsedMilliseconds.ToString());
SetUI();
}
}
}
以上介绍了“C#多线程处理百度关键词排名查询的问题”的问题解答,希望对有需要的网友有所帮助。
本文网址链接:http://www.codes51.com/itwd/1021299.html