HTML Parser

image_pdfimage_print
   
 
using System;
using System.Text.RegularExpressions;
using System.IO;
using System.Text;

public class HTMLParser {
    public static void Main(String[] args) {
        FileInfo MyFile = new FileInfo(args[0].ToString());
        if (MyFile.Exists) {
            StreamReader sr = MyFile.OpenText();
            string text = sr.ReadToEnd();
            sr.Close();

            string pattern = @"<ashrefS*/a>";
            MatchCollection patternMatches = Regex.Matches(text, pattern,
              RegexOptions.IgnoreCase);

            foreach (Match nextMatch in patternMatches) {
                Console.WriteLine(nextMatch.ToString());
            }
        } else
            Console.WriteLine("The input file does not exist");
    }
}