/* C#: The Complete Reference by Herbert Schildt Publisher: Osborne/McGraw-Hill (March 8, 2002) ISBN: 0072134852 */ // MiniCrawler: A skeletal Web crawler. using System; using System.Net; using System.IO; public class MiniCrawler { // Find a link in a content string. static string FindLink(string htmlstr, ref int startloc) { int i; int start, end; string uri = null; string lowcasestr = htmlstr.ToLower(); i = lowcasestr.IndexOf("href="http", startloc); if(i != -1) { start = htmlstr.IndexOf('"', i) + 1; end = htmlstr.IndexOf('"', start); uri = htmlstr.Substring(start, end-start); startloc = end; } return uri; } public static void Main(string[] args) { string link = null; string str; string answer; int curloc; // holds current location in response if(args.Length != 1) { Console.WriteLine("Usage: MiniCrawler <uri>"); return ; } string uristr = args[0]; // holds current URI try { do { Console.WriteLine("Linking to " + uristr); /* Create a WebRequest to the specified URI. */ HttpWebRequest req = (HttpWebRequest) WebRequest.Create(uristr); uristr = null; // disallow further use of this URI // Send that request and return the response. HttpWebResponse resp = (HttpWebResponse) req.GetResponse(); // From the response, obtain an input stream. Stream istrm = resp.GetResponseStream(); // Wrap the input stream in a StreamReader. StreamReader rdr = new StreamReader(istrm); // Read in the entire page. str = rdr.ReadToEnd(); curloc = 0; do { // Find the next URI to link to. link = FindLink(str, ref curloc); if(link != null) { Console.WriteLine("Link found: " + link); Console.Write("Link, More, Quit?"); answer = Console.ReadLine(); if(string.Compare(answer, "L", true) == 0) { uristr = string.Copy(link); break; } else if(string.Compare(answer, "Q", true) == 0) { break; } else if(string.Compare(answer, "M", true) == 0) { Console.WriteLine("Searching for another link."); } } else { Console.WriteLine("No link found."); break; } } while(link.Length > 0); // Close the Response. resp.Close(); } while(uristr != null); } catch(WebException exc) { Console.WriteLine("Network Error: " + exc.Message + " Status code: " + exc.Status); } catch(ProtocolViolationException exc) { Console.WriteLine("Protocol Error: " + exc.Message); } catch(UriFormatException exc) { Console.WriteLine("URI Format Error: " + exc.Message); } catch(NotSupportedException exc) { Console.WriteLine("Unknown Protocol: " + exc.Message); } catch(IOException exc) { Console.WriteLine("I/O Error: " + exc.Message); } Console.WriteLine("Terminating MiniCrawler."); } }