samedi 25 juin 2016

A console application to get a web page resource, using c# (javascrpit may casue this)


Aim: To downlad a web site source with using a console application. You can find the used class in the program below.

Question: I use the code below to dowload a data (source) of a web page. Imagine you use chrome; If you enter first this query string, the web page it self redirects you a view html page and you see the data.

1-Entering this url, to show the results it redirects itself to second page below. I makes it by using javascript.

www.xyz.com/aaa.html?search=aaa&id=1

2- it redirects here: www.xyz.com/ViewResult.html

In an explorer It works fine . I see 4 html tables inside the page when I use google chrome view source option. Bu in my application I see only two tables of the 4 . The two tables inside the web page is missing.(the missing two tables are the second and third.)

How can I over come this problem. I want to get the source of the page as I see in chrome.

bonus informations: There is no iframe.

The particular Code :

  string url = "www.xyz.com/aaa.html?search=aaa&id=1";
   WebPage  pG = ss.RequestPage(url, "", "GET");

    pG = ss.RequestPage("www.xyz.com/ViewResult.html");

    string source= pG.Html;


 public WebPage RequestPage(Uri url, string content, string method, string contentType)
        {
            string htmlResult;
            HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url);
            HttpWebResponse response = null;
            ASCIIEncoding encoding = new ASCIIEncoding();
            byte[] contentData = encoding.GetBytes(content);

            request.Proxy = Proxy;
            request.Timeout = 60000;
            request.Method = method;
            request.AllowAutoRedirect = false; // false
            request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
            request.Referer = LastUrl;
            request.KeepAlive = true; //false,

            request.UserAgent = UserAgent;

            request.Headers.Add("Accept-Language", "en-us,en;q=0.5");
            //request.Headers.Add("UA-CPU", "x86");
            request.Headers.Add("Cache-Control", "no-cache");
            request.Headers.Add("Accept-Encoding", "gzip,deflate");

            String cookieString = "";
            foreach (KeyValuePair<String, String> cookiePair in Cookies)
                cookieString += cookiePair.Key + "=" + cookiePair.Value + ";";

            if (cookieString.Length > 2)
            {
                String cookie = cookieString.Substring(0, cookieString.Length - 1);
                request.Headers.Add("Cookie", cookie);
            }

            if (method == "POST")
            {
                request.ContentLength = contentData.Length;
                request.ContentType = contentType;

                Stream contentWriter = request.GetRequestStream();
                contentWriter.Write(contentData, 0, contentData.Length);
                contentWriter.Close();
            }

            int attempts = 0;

            while (true)
            {
                try
                {
                    response = (HttpWebResponse)request.GetResponse();
                    if (response == null)
                        throw new WebException();

                    break;
                }
                catch (WebException)
                {
                    if (response != null)
                        response.Close();

                    if (attempts == PageReattempts)
                    {
                       throw;  
                    }
                    else { }

                    // Wait three seconds before trying again
                    Thread.Sleep(3000);

                }

                attempts += 1;
            }

            // Tokenize cookies
            if (response.Headers["Set-Cookie"] != null)
            {
                String headers = response.Headers["Set-Cookie"].Replace("path=/,", ";").Replace("HttpOnly,", "");
                foreach (String cookie in headers.Split(';'))
                {
                    if (cookie.Contains("="))
                    {
                        String[] splitCookie = cookie.Split('=');
                        String cookieKey = splitCookie[0].Trim();
                        String cookieValue = splitCookie[1].Trim();

                        if (Cookies.ContainsKey(cookieKey))
                            Cookies[cookieKey] = cookieValue;
                        else
                            Cookies.Add(cookieKey, cookieValue);
                    }
                    else
                    {
                        if (Cookies.ContainsKey(cookie))
                            Cookies[cookie] = "";
                        else
                            Cookies.Add(cookie, "");
                    }
                }
            }

            htmlResult = ReadResponseStream(response);
            response.Close();

            if (response.Headers["Location"] != null)
            {
                response.Close();
                Thread.Sleep(1500);
                String newLocation = response.Headers["Location"];
                WebPage result = RequestPage(newLocation);
                return new WebPage(result.Html, new WebPage(htmlResult));
            }

            LastUrl = url.ToString();

            return new WebPage(htmlResult);
        }

Aucun commentaire:

Enregistrer un commentaire