Implements a multi-threaded Web proxy server

image_pdfimage_print

/*
C# Programming Tips & Techniques
by Charles Wright, Kris Jamsa

Publisher: Osborne/McGraw-Hill (December 28, 2001)
ISBN: 0072193794
*/

// Proxy.cs — Implements a multi-threaded Web proxy server
//
// Compile this program with the following command line:
// C:>csc Proxy.cs
using System;
using System.Net;
using System.Net.Sockets;
using System.Text;
using System.IO;
using System.Threading;

namespace nsProxyServer
{
public class ProxyServer
{
static public void Main (string [] args)
{
int Port = 3125;
if (args.Length > 0)
{
try
{
Port = Convert.ToInt32 (args[0]);
}
catch
{
Console.WriteLine (“Please enter a port number.”);
return;
}
}
try
{
// Create a listener for the proxy port
TcpListener sockServer = new TcpListener (Port);
sockServer.Start ();
while (true)
{
// Accept connections on the proxy port.
Socket socket = sockServer.AcceptSocket ();

// When AcceptSocket returns, it means there is a connection. Create
// an instance of the proxy server class and start a thread running.
clsProxyConnection proxy = new clsProxyConnection (socket);
Thread thrd = new Thread (new ThreadStart (proxy.Run));
thrd.Start ();
// While the thread is running, the main program thread will loop around
// and listen for the next connection request.
}
}
catch (IOException e)
{
Console.WriteLine (e.Message);
}
}
}

class clsProxyConnection
{
public clsProxyConnection (Socket sockClient)
{
m_sockClient = sockClient;
}
Socket m_sockClient; //, m_sockServer;
Byte [] readBuf = new Byte [1024];
Byte [] buffer = null;
Encoding ASCII = Encoding.ASCII;

public void Run ()
{
string strFromClient = “”;
try
{
// Read the incoming text on the socket/
int bytes = ReadMessage (m_sockClient,
readBuf, ref strFromClient);
// If it's empty, it's an error, so just return.
// This will termiate the thread.
if (bytes == 0)
return;
// Get the URL for the connection. The client browser sends a GET command
// followed by a space, then the URL, then and identifer for the HTTP version.
// Extract the URL as the string betweeen the spaces.
int index1 = strFromClient.IndexOf (' ');
int index2 = strFromClient.IndexOf (' ', index1 + 1);
string strClientConnection =
strFromClient.Substring (index1 + 1, index2 – index1);

if ((index1 < 0) || (index2 < 0)) { throw (new IOException ()); } // Write a messsage that we are connecting. Console.WriteLine ("Connecting to Site " + strClientConnection); Console.WriteLine ("Connection from " + m_sockClient.RemoteEndPoint); // Create a WebRequest object. WebRequest req = (WebRequest) WebRequest.Create (strClientConnection); // Get the response from the Web site. WebResponse response = req.GetResponse (); int BytesRead = 0; Byte [] Buffer = new Byte[32]; int BytesSent = 0; // Create a response stream object. Stream ResponseStream = response.GetResponseStream(); // Read the response into a buffer. BytesRead = ResponseStream.Read(Buffer,0,32); StringBuilder strResponse = new StringBuilder(""); while (BytesRead != 0) { // Pass the response back to the client strResponse.Append(Encoding.ASCII.GetString(Buffer, 0, BytesRead)); m_sockClient.Send(Buffer, BytesRead, 0); BytesSent += BytesRead; // Read the next part of the response BytesRead = ResponseStream.Read(Buffer, 0, 32); } } catch (FileNotFoundException e) { SendErrorPage (404, "File Not Found", e.Message); } catch (IOException e) { SendErrorPage (503, "Service not available", e.Message); } catch (Exception e) { SendErrorPage (404, "File Not Found", e.Message); Console.WriteLine (e.StackTrace); Console.WriteLine (e.Message); } finally { // Disconnect and close the socket. if (m_sockClient != null) { if (m_sockClient.Connected) { m_sockClient.Close (); } } } // Returning from this method will terminate the thread. } // Write an error response to the client. void SendErrorPage (int status, string strReason, string strText) { SendMessage (m_sockClient, "HTTP/1.0" + " " + status + " " + strReason + " "); SendMessage (m_sockClient, "Content-Type: text/plain" + " "); SendMessage (m_sockClient, "Proxy-Connection: close" + " "); SendMessage (m_sockClient, " "); SendMessage (m_sockClient, status + " " + strReason); SendMessage (m_sockClient, strText); } // Send a string to a socket. void SendMessage (Socket sock, string strMessage) { buffer = new Byte [strMessage.Length + 1]; int len = ASCII.GetBytes (strMessage.ToCharArray(), 0, strMessage.Length, buffer, 0); sock.Send (buffer, len, 0); } // Read a string from a socket. int ReadMessage (Socket sock, byte [] buf, ref string strMessage) { int iBytes = sock.Receive (buf, 1024, 0); strMessage = Encoding.ASCII.GetString (buf); return (iBytes); } } } [/csharp]

MiniCrawler: A skeletal Web crawler

image_pdfimage_print
   

/*
C#: The Complete Reference 
by Herbert Schildt 

Publisher: Osborne/McGraw-Hill (March 8, 2002)
ISBN: 0072134852
*/


// MiniCrawler: A skeletal Web crawler. 
 
using System; 
using System.Net; 
using System.IO; 
 
public class MiniCrawler {  
 
  // Find a link in a content string. 
  static string FindLink(string htmlstr,  
                         ref int startloc) { 
    int i; 
    int start, end; 
    string uri = null; 
    string lowcasestr = htmlstr.ToLower(); 
 
    i = lowcasestr.IndexOf("href="http", startloc); 
    if(i != -1) { 
      start = htmlstr.IndexOf(&#039;"&#039;, i) + 1; 
      end = htmlstr.IndexOf(&#039;"&#039;, start); 
      uri = htmlstr.Substring(start, end-start); 
      startloc = end; 
    } 
             
    return uri; 
  } 
 
  public static void Main(string[] args) { 
    string link = null; 
    string str; 
    string answer; 
 
    int curloc; // holds current location in response 
 
    if(args.Length != 1) { 
      Console.WriteLine("Usage: MiniCrawler <uri>"); 
      return ; 
    } 
 
    string uristr = args[0]; // holds current URI 
 
    try { 
 
      do { 
        Console.WriteLine("Linking to " + uristr); 
 
        /* Create a WebRequest to the specified URI. */
        HttpWebRequest req = (HttpWebRequest) 
               WebRequest.Create(uristr); 
 
        uristr = null; // disallow further use of this URI 
 
        // Send that request and return the response. 
        HttpWebResponse resp = (HttpWebResponse) 
               req.GetResponse(); 
 
        // From the response, obtain an input stream. 
        Stream istrm = resp.GetResponseStream(); 
 
        // Wrap the input stream in a StreamReader. 
        StreamReader rdr = new StreamReader(istrm); 
 
        // Read in the entire page. 
        str = rdr.ReadToEnd(); 
 
        curloc = 0; 
        
        do { 
          // Find the next URI to link to. 
          link = FindLink(str, ref curloc); 
 
          if(link != null) { 
            Console.WriteLine("Link found: " + link); 
 
            Console.Write("Link, More, Quit?"); 
            answer = Console.ReadLine(); 
 
            if(string.Compare(answer, "L", true) == 0) { 
              uristr = string.Copy(link); 
              break; 
            } else if(string.Compare(answer, "Q", true) == 0) { 
              break; 
            } else if(string.Compare(answer, "M", true) == 0) { 
              Console.WriteLine("Searching for another link."); 
            } 
          } else { 
            Console.WriteLine("No link found."); 
            break; 
          } 
 
        } while(link.Length > 0); 
 
        // Close the Response. 
        resp.Close(); 
      } while(uristr != null); 
 
    } catch(WebException exc) { 
      Console.WriteLine("Network Error: " + exc.Message +  
                        "
Status code: " + exc.Status); 
    } catch(ProtocolViolationException exc) { 
      Console.WriteLine("Protocol Error: " + exc.Message); 
    } catch(UriFormatException exc) { 
      Console.WriteLine("URI Format Error: " + exc.Message); 
    } catch(NotSupportedException exc) { 
      Console.WriteLine("Unknown Protocol: " + exc.Message); 
    } catch(IOException exc) { 
      Console.WriteLine("I/O Error: " + exc.Message); 
    } 
 
    Console.WriteLine("Terminating MiniCrawler."); 
  } 
}


           
          


Check the ContentType

image_pdfimage_print
   



using System;
using System.IO;
using System.Net;
   
class HtmlDump
{
     public static int Main(string[] astrArgs)
     {
          WebRequest webreq;
          WebResponse webres;
   
          try
          {
               webreq = WebRequest.Create("http://www.kutayzorlu.com/java2s/com/");
               webres = webreq.GetResponse();
          }
          catch (Exception exc)
          {
               Console.WriteLine("HtmlDump: {0}", exc.Message);
               return 1;
          }
   
          if (webres.ContentType.Substring(0, 4) != "text")
          {
               Console.WriteLine("HtmlDump: URI must be a text type.");
               return 1;
          }
   
          Stream       stream = webres.GetResponseStream();
          StreamReader strrdr = new StreamReader(stream);
          string       strLine;
   
          while ((strLine = strrdr.ReadLine()) != null){
               Console.WriteLine(strLine);
          }
          stream.Close();
          return 0;
     }
}


           
          


Create GetResponse from WebRequest

image_pdfimage_print
   


using System;
using System.Net;
using System.IO;
using System.Drawing;
using System.Windows.Forms;

public class MainClass {
    public static void Main() {
        string picUri = "http://international.us.server12.fileserver.kutayzorlu.com/files/download/2017/01/Hex_RGB4_uuid-fa046b6c-fa2c-4f3e-838c-80ed56faee5c_crc-0.jpg";
        string htmlUri = "http://www.apress.com";

        WebRequest requestPic = WebRequest.Create(picUri);
        WebRequest requestHtml = WebRequest.Create(htmlUri);

        WebResponse responsePic = requestPic.GetResponse();
        WebResponse responseHtml = requestHtml.GetResponse();

        Image img = Image.FromStream(responsePic.GetResponseStream());

        using (StreamReader r = new StreamReader(responseHtml.GetResponseStream())) {
            Console.WriteLine(r.ReadToEnd());
        }
    }
}
           
          


Output webpage content

image_pdfimage_print
   



using System.Net;
using System;
using System.IO;
public class WebPagesApp {
    [STAThread]
    public static void Main(string[] args) {
        string s = "http://www.microsoft.com";
        Uri uri = new Uri(s);
        WebRequest req = WebRequest.Create(uri);
        WebResponse resp = req.GetResponse();
        Stream str = resp.GetResponseStream();
        StreamReader sr = new StreamReader(str);

        string t = sr.ReadToEnd();
        int i = t.IndexOf("<HEAD>");
        int j = t.IndexOf("</HEAD>");
        string u = t.Substring(i, j);
        Console.WriteLine("{0}", u);
    }
}

           
          


Download a web page in a thread

image_pdfimage_print
   


using System;
using System.Net;
using System.Threading;

class ThreadTest {
    static void Main() {
        new Thread(Download).Start();
        Console.WriteLine("download&#039;s happening!");
        Console.ReadLine();
    }

    static void Download() {
        using (WebClient wc = new WebClient())
            try {
                wc.Proxy = null;
                wc.DownloadFile("http://www.google.com", "index.html");
                Console.WriteLine("Finished!");
            } catch (Exception ex) {
            }
    }
}
           
          


Build the DownloadString

image_pdfimage_print
   





using System;
using System.IO;
using System.Net;
using System.Text.RegularExpressions;

class MainClass {
    private static void Main() {
        string remoteUri = "http://www.apress.com";
        WebClient client = new WebClient();
        string str = client.DownloadString(remoteUri);
        MatchCollection matches = Regex.Matches(str, @"httpS+[^-,;:?].gif");
        foreach (Match match in matches) {
            foreach (Group grp in match.Groups) {
                string file = grp.Value.Substring(grp.Value.LastIndexOf(&#039;/&#039;) + 1);
                try {
                    Console.WriteLine("Downloading {0} to file {1}", grp.Value, file);
                    client.DownloadFile(new Uri(grp.Value), file);
                } catch {
                    Console.WriteLine("Failed to download {0}", grp.Value);
                }
            }
        }
    }
}