c# HttpWebRequest通过代理服务器抓取网页内容应用介绍
时间:2020-11-11 11:55:09|栏目:.NET代码|点击: 次
内网用户或代理上网的用户使用
using System.IO;
using System.Net;
public string get_html()
{
string urlStr = "http://www.domain.com"; //?O定要?@取的地址
HttpWebRequest hwr = (HttpWebRequest)HttpWebRequest.Create(urlStr); //建立HttpWebRequest?ο?
hwr.Timeout = 60000; //定?x服?掌鞒??r?r?g
WebProxy proxy = new WebProxy(); //定?x一???W?P?ο?
proxy.Address = new Uri("http://proxy.domain.com:3128"); //?W?P服?掌?:端口
proxy.Credentials = new NetworkCredential("f3210316", "6978233"); //用?裘?,密?a
hwr.UseDefaultCredentials = true; //?⒂镁W?P?J?^
hwr.Proxy = proxy; //?O置?W?P
try
{
HttpWebResponse hwrs = (HttpWebResponse)hwr.GetResponse(); //取得回??
}
catch
{
MessageBox.Show("无法连接代理!");
return;
}
//判断HTTP响应状态
if(hwrs.StatusCode != HttpStatusCode.OK)
{
MessageBox.Show("访问失败!");
hwrs.Close();
return;
}
else
{
Stream s = hwrs.GetResponseStream(); //得到回??的流?ο?
StreamReader sr = new StreamReader(s, Encoding.UTF8); //以UTF-8??a?x取流
StringBuilder content = new StringBuilder(); //
while (sr.Peek() != -1) //每次?x取一行,直到
{ //下一??字??]有?热?
content.Append(sr.ReadLine()+""r"n"); //返回?橹?
} //
//return content.ToString() ;
}
//输出所有的Header(当然包括服务器输出的Cookie)
//for(int ii=0;ii<hwrs.Headers.Count;ii++)
//{
//MessageBox.Show(hwrs.Headers.GetKey(ii)+":"+res.Headers[ii]);
//}
}
大家知道,用HttpWebRequest可以通过Http对网页进行抓取,但是如果是内网,而且是通过代理上网的用户,如果直接进行操作是行不通的。
那有没有什么办法呢?
当然有,呵呵,见以下代码:
string urlStr = "http://www.domain.com"; //?O定要?@取的地址
HttpWebRequest hwr = (HttpWebRequest)HttpWebRequest.Create(urlStr); //建立HttpWebRequest?ο?
hwr.Timeout = 60000; //定?x服?掌鞒??r?r?g
WebProxy proxy = new WebProxy(); //定?x一???W?P?ο?
proxy.Address = new Uri("http://proxy.domain.com:3128"); //?W?P服?掌?:端口
proxy.Credentials = new NetworkCredential("f3210316", "6978233"); //用?裘?,密?a
hwr.UseDefaultCredentials = true; //?⒂镁W?P?J?^
hwr.Proxy = proxy; //?O置?W?P
HttpWebResponse hwrs = (HttpWebResponse)hwr.GetResponse(); //取得回??
Stream s = hwrs.GetResponseStream(); //得到回??的流?ο?
StreamReader sr = new StreamReader(s, Encoding.UTF8); //以UTF-8??a?x取流
StringBuilder content = new StringBuilder(); //
while (sr.Peek() != -1) //每次?x取一行,直到
{ //下一??字??]有?热?
content.Append(sr.ReadLine()+""r"n"); //返回?橹?
} //
return content.ToString() ; //返回得到的字符串
复制代码 代码如下:
using System.IO;
using System.Net;
public string get_html()
{
string urlStr = "http://www.domain.com"; //?O定要?@取的地址
HttpWebRequest hwr = (HttpWebRequest)HttpWebRequest.Create(urlStr); //建立HttpWebRequest?ο?
hwr.Timeout = 60000; //定?x服?掌鞒??r?r?g
WebProxy proxy = new WebProxy(); //定?x一???W?P?ο?
proxy.Address = new Uri("http://proxy.domain.com:3128"); //?W?P服?掌?:端口
proxy.Credentials = new NetworkCredential("f3210316", "6978233"); //用?裘?,密?a
hwr.UseDefaultCredentials = true; //?⒂镁W?P?J?^
hwr.Proxy = proxy; //?O置?W?P
try
{
HttpWebResponse hwrs = (HttpWebResponse)hwr.GetResponse(); //取得回??
}
catch
{
MessageBox.Show("无法连接代理!");
return;
}
//判断HTTP响应状态
if(hwrs.StatusCode != HttpStatusCode.OK)
{
MessageBox.Show("访问失败!");
hwrs.Close();
return;
}
else
{
Stream s = hwrs.GetResponseStream(); //得到回??的流?ο?
StreamReader sr = new StreamReader(s, Encoding.UTF8); //以UTF-8??a?x取流
StringBuilder content = new StringBuilder(); //
while (sr.Peek() != -1) //每次?x取一行,直到
{ //下一??字??]有?热?
content.Append(sr.ReadLine()+""r"n"); //返回?橹?
} //
//return content.ToString() ;
}
//输出所有的Header(当然包括服务器输出的Cookie)
//for(int ii=0;ii<hwrs.Headers.Count;ii++)
//{
//MessageBox.Show(hwrs.Headers.GetKey(ii)+":"+res.Headers[ii]);
//}
}
大家知道,用HttpWebRequest可以通过Http对网页进行抓取,但是如果是内网,而且是通过代理上网的用户,如果直接进行操作是行不通的。
那有没有什么办法呢?
当然有,呵呵,见以下代码:
复制代码 代码如下:
string urlStr = "http://www.domain.com"; //?O定要?@取的地址
HttpWebRequest hwr = (HttpWebRequest)HttpWebRequest.Create(urlStr); //建立HttpWebRequest?ο?
hwr.Timeout = 60000; //定?x服?掌鞒??r?r?g
WebProxy proxy = new WebProxy(); //定?x一???W?P?ο?
proxy.Address = new Uri("http://proxy.domain.com:3128"); //?W?P服?掌?:端口
proxy.Credentials = new NetworkCredential("f3210316", "6978233"); //用?裘?,密?a
hwr.UseDefaultCredentials = true; //?⒂镁W?P?J?^
hwr.Proxy = proxy; //?O置?W?P
HttpWebResponse hwrs = (HttpWebResponse)hwr.GetResponse(); //取得回??
Stream s = hwrs.GetResponseStream(); //得到回??的流?ο?
StreamReader sr = new StreamReader(s, Encoding.UTF8); //以UTF-8??a?x取流
StringBuilder content = new StringBuilder(); //
while (sr.Peek() != -1) //每次?x取一行,直到
{ //下一??字??]有?热?
content.Append(sr.ReadLine()+""r"n"); //返回?橹?
} //
return content.ToString() ; //返回得到的字符串
上一篇:C#语音识别用法实例
栏 目:.NET代码
本文标题:c# HttpWebRequest通过代理服务器抓取网页内容应用介绍
本文地址:http://www.codeinn.net/misctech/21568.html