/* C#: The Complete Reference by Herbert Schildt Publisher: Osborne/McGraw-Hill (March 8, 2002) ISBN: 0072134852 */ // MiniCrawler: A skeletal Web crawler. using System; using System.Net; using System.IO; public class MiniCrawler { // Find a link in a content string. static string FindLink(string htmlstr, ref int startloc) { int i; int start, end; string uri = null; string lowcasestr = htmlstr.ToLower(); i = lowcasestr.IndexOf("href="http", startloc); if(i != -1) { start = htmlstr.IndexOf('"', i) + 1; end = htmlstr.IndexOf('"', start); uri = htmlstr.Substring(start, end-start); startloc = end; } return uri; } public static void Main(string[] args) { string link = null; string str; string answer; int curloc; // holds current location in response if(args.Length != 1) { Console.WriteLine("Usage: MiniCrawler <uri>"); return ; } string uristr = args[0]; // holds current URI try { do { Console.WriteLine("Linking to " + uristr); /* Create a WebRequest to the specified URI. */ HttpWebRequest req = (HttpWebRequest) WebRequest.Create(uristr); uristr = null; // disallow further use of this URI // Send that request and return the response. HttpWebResponse resp = (HttpWebResponse) req.GetResponse(); // From the response, obtain an input stream. Stream istrm = resp.GetResponseStream(); // Wrap the input stream in a StreamReader. StreamReader rdr = new StreamReader(istrm); // Read in the entire page. str = rdr.ReadToEnd(); curloc = 0; do { // Find the next URI to link to. link = FindLink(str, ref curloc); if(link != null) { Console.WriteLine("Link found: " + link); Console.Write("Link, More, Quit?"); answer = Console.ReadLine(); if(string.Compare(answer, "L", true) == 0) { uristr = string.Copy(link); break; } else if(string.Compare(answer, "Q", true) == 0) { break; } else if(string.Compare(answer, "M", true) == 0) { Console.WriteLine("Searching for another link."); } } else { Console.WriteLine("No link found."); break; } } while(link.Length > 0); // Close the Response. resp.Close(); } while(uristr != null); } catch(WebException exc) { Console.WriteLine("Network Error: " + exc.Message + " Status code: " + exc.Status); } catch(ProtocolViolationException exc) { Console.WriteLine("Protocol Error: " + exc.Message); } catch(UriFormatException exc) { Console.WriteLine("URI Format Error: " + exc.Message); } catch(NotSupportedException exc) { Console.WriteLine("Unknown Protocol: " + exc.Message); } catch(IOException exc) { Console.WriteLine("I/O Error: " + exc.Message); } Console.WriteLine("Terminating MiniCrawler."); } }
Author: coder
Check the ContentType
using System; using System.IO; using System.Net; class HtmlDump { public static int Main(string[] astrArgs) { WebRequest webreq; WebResponse webres; try { webreq = WebRequest.Create("http://www.kutayzorlu.com/java2s/com/"); webres = webreq.GetResponse(); } catch (Exception exc) { Console.WriteLine("HtmlDump: {0}", exc.Message); return 1; } if (webres.ContentType.Substring(0, 4) != "text") { Console.WriteLine("HtmlDump: URI must be a text type."); return 1; } Stream stream = webres.GetResponseStream(); StreamReader strrdr = new StreamReader(stream); string strLine; while ((strLine = strrdr.ReadLine()) != null){ Console.WriteLine(strLine); } stream.Close(); return 0; } }
Create GetResponse from WebRequest
using System; using System.Net; using System.IO; using System.Drawing; using System.Windows.Forms; public class MainClass { public static void Main() { string picUri = "http://international.us.server12.fileserver.kutayzorlu.com/files/download/2017/01/Hex_RGB4_uuid-fa046b6c-fa2c-4f3e-838c-80ed56faee5c_crc-0.jpg"; string htmlUri = "http://www.apress.com"; WebRequest requestPic = WebRequest.Create(picUri); WebRequest requestHtml = WebRequest.Create(htmlUri); WebResponse responsePic = requestPic.GetResponse(); WebResponse responseHtml = requestHtml.GetResponse(); Image img = Image.FromStream(responsePic.GetResponseStream()); using (StreamReader r = new StreamReader(responseHtml.GetResponseStream())) { Console.WriteLine(r.ReadToEnd()); } } }
Output webpage content
using System.Net; using System; using System.IO; public class WebPagesApp { [STAThread] public static void Main(string[] args) { string s = "http://www.microsoft.com"; Uri uri = new Uri(s); WebRequest req = WebRequest.Create(uri); WebResponse resp = req.GetResponse(); Stream str = resp.GetResponseStream(); StreamReader sr = new StreamReader(str); string t = sr.ReadToEnd(); int i = t.IndexOf("<HEAD>"); int j = t.IndexOf("</HEAD>"); string u = t.Substring(i, j); Console.WriteLine("{0}", u); } }
Download a web page in a thread
using System; using System.Net; using System.Threading; class ThreadTest { static void Main() { new Thread(Download).Start(); Console.WriteLine("download's happening!"); Console.ReadLine(); } static void Download() { using (WebClient wc = new WebClient()) try { wc.Proxy = null; wc.DownloadFile("http://www.google.com", "index.html"); Console.WriteLine("Finished!"); } catch (Exception ex) { } } }
Build the DownloadString
using System; using System.IO; using System.Net; using System.Text.RegularExpressions; class MainClass { private static void Main() { string remoteUri = "http://www.apress.com"; WebClient client = new WebClient(); string str = client.DownloadString(remoteUri); MatchCollection matches = Regex.Matches(str, @"httpS+[^-,;:?].gif"); foreach (Match match in matches) { foreach (Group grp in match.Groups) { string file = grp.Value.Substring(grp.Value.LastIndexOf('/') + 1); try { Console.WriteLine("Downloading {0} to file {1}", grp.Value, file); client.DownloadFile(new Uri(grp.Value), file); } catch { Console.WriteLine("Failed to download {0}", grp.Value); } } } } }
Set the BaseAddress for WebClient
using System; using System.Collections.Generic; using System.Text; using System.Net; class Program { static void Main(string[] args) { WebClient client = new WebClient(); client.BaseAddress = "http://www.microsoft.com"; string data = client.DownloadString("Office"); Console.WriteLine(data); } }