Implements a StringTokenizer class for splitting a string into substrings using a set of delimiters.

image_pdfimage_print
   
 

#region Using Directives
using System;
using System.Collections.Generic;
#endregion

/*
 * $author: QmQ
 * $source: http://www.codeproject.com/useritems/SimpleStringTokenizer.asp
 * $date:   10-June-2006
 */
namespace CBRSystem.Data
{

    #region [Summary and remarks]
    /// <summary>
    /// Implements a StringTokenizer class for splitting a string
    /// into substrings using a set of delimiters.
    /// </summary>
    /// <remarks>
    /// C# version of the java.util.StringTokenizer class.
    /// Basicly it&#039;s a wrapper class around the <c>String.Split</c> method.<pare/>
    /// It implements all of it&#039;s Java equivalent methods apart from those only needed by the Enumeration interface.
    /// All implemented Java-compilant methods have their C# equivalents in properties. They however differ in names
    /// since Java uses the (Hungarian-like) notation <c>runMe()</c> while C# uses Camel-cased <c>RunMe()</c> and thus
    /// Java&#039;s <c>nextToken()</c> method is just an alias of the <c>NextToken</c> property.
    /// </remarks>
    #endregion
    public class StringTokenizer : IEnumerable<string>
    {
        /// <summary>
        /// String conatining the default set of delimiters which are <c>" 	

f"</c>:
        /// the space character, the tab character, the newline character, the carriage-return character, and the form-feed character.
        /// </summary>
        public const string DefaultDelimiters = " 	

f";

        private readonly string delims = DefaultDelimiters;
        private string[] tokens = null;
        private int index = 0;
        private string empty = String.Empty;

        #region [Constructors]
        /// <summary>
        /// Constructs a string tokenizer for the specified string using the <see cref="F:DefaultDelimiters">default delimiters</see>.
        /// </summary>
        /// <param name="str">The string to be tokenized.</param>
        /// <exception cref="System.NullReferenceException">Thrown when the passed string is <c>null</c></exception>
        public StringTokenizer(string str)
        {
            Tokenize(str, false, false);
        }

        /// <summary>
        /// Constructs a string tokenizer for the specified string using the given delimiters.
        /// </summary>
        /// <param name="str">The string to be tokenized.</param>
        /// <param name="delims">The delimiters used to tokenize the string (each <see cref="!:char"/> will be used as a delimiter).</param>
        /// <exception cref="System.NullReferenceException">Thrown when the passed string is <c>null</c></exception>
        public StringTokenizer(string str, string delims)
        {
            if(delims!=null) this.delims = delims;
            Tokenize(str, false, false);
        }

        /// <summary>
        /// Constructs a string tokenizer for the specified string using the given delimiters.
        /// </summary>
        /// <param name="str">The string to be tokenized.</param>
        /// <param name="delims">The delimiters used to tokenize the string.</param>
        public StringTokenizer(string str, params char[] delims)
        {
            if (delims != null) this.delims = new string(delims);
            Tokenize(str, false, false);
        }

        /// <summary>
        /// Constructs a string tokenizer for the specified string using the given delimiters and optionally returning them as tokens.
        /// </summary>
        /// <param name="str">The string to be tokenized.</param>
        /// <param name="delims">The delimiters used to tokenize the string (each <see cref="!:char"/> will be used as a delimiter).</param>
        /// <param name="returnDelims">If set to <c>true</c> the encountered delimiters will also be returned as tokens.</param>
        /// <exception cref="System.NullReferenceException">Thrown when the passed string is <c>null</c></exception>
        public StringTokenizer(string str, string delims, bool returnDelims)
        {
            if (delims != null) this.delims = delims;
            Tokenize(str, returnDelims, false);
        }

        /// <summary>
        /// Constructs a string tokenizer for the specified string using the given delimiters,
        /// optionally returning them as tokens. Also empty tokens may be returned using the <see cref="!:String.Empty"/> string.
        /// </summary>
        /// <param name="str">The string to be tokenized.</param>
        /// <param name="delims">The delimiters used to tokenize the string (each <see cref="!:char"/> will be used as a delimiter).</param>
        /// <param name="returnDelims">If set to <c>true</c> the encountered delimiters will also be returned as tokens.</param>
        /// <param name="returnEmpty">If set to <c>true</c> empty tokens will also be returned.</param>
        /// <exception cref="System.NullReferenceException">Thrown when the passed string is <c>null</c></exception>
        public StringTokenizer(string str, string delims, bool returnDelims, bool returnEmpty)
        {
            if (delims != null) this.delims = delims;
            Tokenize(str, returnDelims, returnEmpty);
        }

        /// <summary>
        /// Constructs a string tokenizer for the specified string using the given delimiters,
        /// optionally returning them as tokens. Also empty tokens may be returned using the <paramref name="empty"/> string.
        /// </summary>
        /// <param name="str">The string to be tokenized.</param>
        /// <param name="delims">The delimiters used to tokenize the string (each <see cref="!:char"/> will be used as a delimiter).</param>
        /// <param name="returnDelims">If set to <c>true</c> the encountered delimiters will also be returned as tokens.</param>
        /// <param name="returnEmpty">If set to <c>true</c> empty tokens will also be returned.</param>
        /// <param name="empty">The string to be returned as an empty token.</param>
        /// <exception cref="System.NullReferenceException">Thrown when the passed string is <c>null</c></exception>
        public StringTokenizer(string str, string delims, bool returnDelims, bool returnEmpty, string empty)
        {
            if (delims != null) this.delims = delims;
            this.empty = empty;
            Tokenize(str, returnDelims, returnEmpty);
        }
    #endregion

        #region [The big tokenization method]
        private void Tokenize(string str, bool returnDelims, bool returnEmpty)
        {
            if(returnDelims)
            {
                this.tokens = str.Split(this.delims.ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
                List<string> tmp = new List<string>(tokens.Length << 1);

                int delimIndex = str.IndexOfAny(this.delims.ToCharArray());
                int tokensIndex = 0;
                int prevDelimIdx = delimIndex-1;

                if (delimIndex == 0)
                    do
                    {
                        tmp.Add(new string(str&#91;delimIndex&#93;, 1));
                        prevDelimIdx = delimIndex++;
                        delimIndex = str.IndexOfAny(this.delims.ToCharArray(), delimIndex);
                        if (returnEmpty &amp;&amp; delimIndex == prevDelimIdx + 1)
                            tmp.Add(this.empty);
                    } while (delimIndex == prevDelimIdx + 1);

                while (delimIndex > -1)
                {
                    tmp.Add(this.tokens[tokensIndex++]);

                    do
                    {
                        tmp.Add(new string(str[delimIndex], 1));
                        prevDelimIdx = delimIndex++;
                        delimIndex = str.IndexOfAny(this.delims.ToCharArray(), delimIndex);
                        if (returnEmpty &amp;&amp; delimIndex == prevDelimIdx + 1)
                            tmp.Add(this.empty);
                    } while (delimIndex == prevDelimIdx + 1);

                }
                if (tokensIndex < tokens.Length)
                    tmp.Add(this.tokens&#91;tokensIndex++&#93;);

                this.tokens = tmp.ToArray();
                tmp = null;
            }
            else if (returnEmpty)
            {
                this.tokens = str.Split(this.delims.ToCharArray(), StringSplitOptions.None);
                if (this.empty != String.Empty)
                    for(int i=0; i<this.tokens.Length; i++)
                        if (this.tokens&#91;i&#93; == String.Empty) this.tokens&#91;i&#93; = this.empty;
            }
            else
                this.tokens = str.Split(this.delims.ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
        }
        #endregion

        #region &#91;Properties covering Java methods&#93;
        /// <summary>
        /// Tests if there are more tokens available from this tokenizer&#039;s string.
        /// If this method returns <c>true</c>, then a subsequent
        /// use of the <see cref="P:NextToken"/> property will successfully return a token.
        /// </summary>
        /// <value>
        ///   <c>true</c> if more tokens are available; otherwise <c>false</c>.
        /// </value>
        public bool HasMoreTokens
        {
            get { return this.index < this.tokens.Length; }
        }

        /// <summary>
        /// Gets the next token.
        /// </summary>
        /// <value>The next token.</value>
        /// <exception cref="System.IndexOutOfRangeException">Thrown when trying to get a token which doesn&#039;t exist.
        /// Usually caused by not checking if the <see cref="P:HasMoreTokens"/> property returns <c>true</c> before trying to get the next token.</exception>
        public string NextToken
        {
            get { return this.tokens[index++]; }
        }

        /// <summary>
        /// Counts the <see cref="!:remaining"/> tokens - the number of times the
        /// <see cref="P:NextToken"/> property can be used before it throws an exception.
        /// </summary>
        /// <value>The number of remaining tokens.</value>
        /// <seealso cref="P:Count"/>
        public int CountTokens
        {
            get { return this.tokens.Length - this.index; }
        }
    #endregion

        #region [New methods/properties]
        /// <summary>
        /// Gets the total number of tokens extracted.
        /// </summary>
        /// <remarks>
        /// <see cref="!:Equivalent not available in Java!"/>
        /// This property returns the total number of extracted tokens,
        /// contrary to <see cref="P:CountTokens"/>.
        /// </remarks>
        /// <value>The number of tokens extracted.</value>
        /// <seealso cref="P:StringTokenizer.CountTokens"/>
        public int Count
        {
            get { return this.tokens.Length; }
        }

        /// <summary>
        /// Gets the token with the specified index from the tokenizer without moving the current position index.
        /// </summary>
        /// <remarks><see cref="!:Equivalent not available in Java!"/></remarks>
        /// <param name="index">The index of the token to get.</param>
        /// <value>The token with the given index</value>
        /// <exception cref="System.IndexOutOfRangeException">Thrown when trying to get a token which doesn&#039;t exist, that is when <see cref="!:index"/> is equal or greater then <see cref="!:Count"/> or <see cref="!:index"/> is negative.</exception>
        public string this[int index]
        {
            get { return this.tokens[index]; }
        }

        /// <summary>
        /// Resets the current position index so that the tokens can be extracted again.
        /// </summary>
        /// <remarks><see cref="!:Equivalent not available in Java!"/></remarks>
        public void Reset()
        {
            this.index = 0;
        }

        /// <summary>
        /// Gets the currently set string for empty tokens.
        /// </summary>
        /// <remarks>Default is <c>System.String.Empty</c></remarks>
        /// <value>The empty token string.</value>
        public string EmptyString
        {
            get { return this.empty; }
        }
        #endregion

        #region [Java-compilant methods]
    /*
        /// <summary>
        /// Tests if there are more tokens available from this tokenizer&#039;s string.
        /// If this method returns <c>true</c>, then a subsequent call to <see cref="M:nextToken"/> will successfully return a token.
        /// </summary>
        /// <returns>
        ///   <c>true</c> if and only if there is at least one token in the string after the current position; otherwise <c>false</c>.
        /// </returns>
        /// <seealso cref="M:nextToken"/>
        public bool hasMoreTokens()
        {
            return HasMoreTokens;
        }

        /// <summary>
        /// Returns the next token from this string tokenizer.
        /// </summary>
        /// <returns>The next token from this string tokenizer.</returns>
        public string nextToken()
        {
            return NextToken;
        }

        /// <summary>
        /// Calculates the number of times that this tokenizer&#039;s <see cref="M:nextToken"/> method can be called before it generates an exception. The current position is not advanced.
        /// </summary>
        /// <returns>The number of tokens remaining in the string using the current delimiter set.</returns>
        public int countTokens()
        {
            return CountTokens;
        }
    */
        #endregion

        #region [IEnumerable implementation]
        /// <summary>
        /// Returns an enumerator that iterates through the collection.
        /// </summary>
        /// <returns>
        /// A <see cref="T:System.Collections.Generic.IEnumerator"/> that can be used to iterate through the collection.
        /// </returns>
        public IEnumerator<string> GetEnumerator()
        {
            while (this.HasMoreTokens)
                yield return this.NextToken;
        }

        System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
    {
      return GetEnumerator();
    }
        #endregion

    }
}

   
     


This entry was posted in Data Types. Bookmark the permalink.