c#远程html数据抓取实例分享

所属分类: 软件编程 / C#教程 阅读数: 101
收藏 0 赞 0 分享

复制代码 代码如下:

/// <summary>
        /// 获取远程html
        /// </summary>
        /// <param name="url"></param>
        /// <param name="methed"></param>
        /// <param name="param"></param>
        /// <param name="html"></param>
        /// <returns></returns>
        public static bool GetHttp(string url, string methed, string param, out string html)
        {
            methed = methed.ToLower();

            if (param != null && methed == "get" && param.Length > 0)
            {
                url += "?" + param;
            }

            try
            {
                MSXML2.XMLHTTP mx = new MSXML2.XMLHTTPClass();

                mx.open(methed, url, false, null, null);

                if (param != null && methed == "post" && param.Length > 0)
                {
                    mx.setRequestHeader("Content-Length", param.Length.ToString());
                    mx.setRequestHeader("Content-Type", "application/x-www-form-urlencoded");
                }

                mx.send(param);

                if (mx.readyState != 4)
                {
                    html = "远程连接失败:-4";
                    return false;
                }
                html = mx.responseText;
                return true;
            }
            catch (Exception ex)
            {
                html = "远程连接失败:"+ex.Message;
                return false;
            }
        }

        public static bool GetHttp1(string url, string methed, string param, string referer, string encode, out string html)
        {
            //return GetHttp(url,methed,param,out html);

            //string encode = "utf-8";
            //string methed = sendType.ToString();

            if (param != null && methed == "get" && param.Length > 0)
            {
                if (url.IndexOf("?") >= 0)
                {
                    url += "&" + param;
                }
                else
                {
                    url += "?" + param;
                }
            }

            try
            {
                HttpWebRequest webreq = (HttpWebRequest)WebRequest.Create(url);

                webreq.Proxy=null;
                webreq.Timeout = 1000 * 6;
                webreq.ContentType = "application/x-www-form-urlencoded";
                webreq.UserAgent = "User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64; rv:24.0) Gecko/20100101 Firefox/24.0";

                //webreq.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/6.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)";

                //谷歌的:User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36
                //火狐的:User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64; rv:24.0) Gecko/20100101 Firefox/24.0
                //标准格式为: 浏览器标识 (操作系统标识; 加密等级标识; 浏览器语言) 渲染引擎标识 版本信息

                //webreq.AllowAutoRedirect = false;

                //频繁请求一个网址时,过段时间就会出现“基础连接已经关闭”
                //webreq.KeepAlive = false;
                //webreq.ProtocolVersion = HttpVersion.Version10;

                if (referer.Length > 0)
                {
                    webreq.Referer = referer;
                }

                CookieContainer mycookies = new CookieContainer();
                webreq.CookieContainer = mycookies;

                //if (this.cookieList != null)
                //{
                //    webreq.CookieContainer.Add(this.GetCookies(webreq.RequestUri, this.cookieList));
                //}

                webreq.Method = methed;

                //post 开始
                if (param != null && methed == "post")
                {
                    byte[] arrbyte = Encoding.GetEncoding(encode).GetBytes(param);
                    webreq.ContentLength = arrbyte.Length;

                    Stream newStream = webreq.GetRequestStream();
                    newStream.Write(arrbyte, 0, arrbyte.Length);
                    newStream.Close();
                }
                //post 结束

 
                WebResponse w = webreq.GetResponse();

                //返回HTML
                using (HttpWebResponse webres = (HttpWebResponse)webreq.GetResponse())
                {
                    using (Stream dataStream = webres.GetResponseStream())
                    {
                        using (StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding(encode)))
                        {
                            html = reader.ReadToEnd();
                            //this.cookieList = webreq.CookieContainer.GetCookies(webreq.RequestUri);
                            webreq.Abort();//可能会解决卡住或阻塞问题
                        }
                    }
                }
            }
            catch (Exception ex)
            {

                html = "出现异常(HttpHelper.GetHTML),远程连接失败:" + ex.Message + " url:" + url;
                //System.Windows.Forms.MessageBox.Show(html);
                return false;
            }

            return true;
        }

更多精彩内容其他人还在看

c#开发word批量转pdf源码分享

已经安装有Office环境,借助一些简单的代码即可实现批量Word转PDF,看下面的实例源码吧
收藏 0 赞 0 分享

c# xml API操作的小例子

这篇文章主要介绍了c# xml API操作的小例子,有需要的朋友可以参考一下
收藏 0 赞 0 分享

c#唯一值渲染实例代码

这篇文章主要介绍了c#唯一值渲染实例代码,有需要的朋友可以参考一下
收藏 0 赞 0 分享

淘宝IP地址库采集器c#代码

这篇文章主要介绍了淘宝IP地址库采集器c#代码,有需要的朋友可以参考一下
收藏 0 赞 0 分享

C#在后台运行操作(BackgroundWorker用法)示例分享

BackgroundWorker类允许在单独的专用线程上运行操作。如果需要能进行响应的用户界面,而且面临与这类操作相关的长时间延迟,则可以使用BackgroundWorker类方便地解决问题,下面看示例
收藏 0 赞 0 分享

c#文本加密程序代码示例

这是一个加密软件,但只限于文本加密,加了窗口控件的滑动效果,详细看下面的代码
收藏 0 赞 0 分享

c#生成站点地图(SiteMapPath)文件示例程序

这篇文章主要介绍了c#生成站点地图(SiteMapPath)文件的示例,大家参考使用
收藏 0 赞 0 分享

C# 键盘Enter键取代Tab键实现代码

这篇文章主要介绍了C# 键盘Enter键取代Tab键实现代码,有需要的朋友可以参考一下
收藏 0 赞 0 分享

C# WinForm导出Excel方法介绍

在.NET应用中,导出Excel是很常见的需求,导出Excel报表大致有以下三种方式:Office PIA,文件流和NPOI开源库,本文只介绍前两种方式
收藏 0 赞 0 分享

C#串口通信程序实例详解

在.NET平台下创建C#串口通信程序,.NET 2.0提供了串口通信的功能,其命名空间是System.IO.Ports,创建C#串口通信程序的具体实现是如何的呢?让我们开始吧
收藏 0 赞 0 分享
查看更多