Gets an array of sentences from a string.

Posted on February 11, 2011June 27, 2015 by coder

   
 

#region Copyright (c) 2004, Ryan Whitaker
/*********************************************************************************
&#039;
&#039; Copyright (c) 2004 Ryan Whitaker
&#039;
&#039; This software is provided &#039;as-is&#039;, without any express or implied warranty. In no 
&#039; event will the authors be held liable for any damages arising from the use of this 
&#039; software.
&#039; 
&#039; Permission is granted to anyone to use this software for any purpose, including 
&#039; commercial applications, and to alter it and redistribute it freely, subject to the 
&#039; following restrictions:
&#039;
&#039; 1. The origin of this software must not be misrepresented; you must not claim that 
&#039; you wrote the original software. If you use this software in a product, an 
&#039; acknowledgment (see the following) in the product documentation is required.
&#039;
&#039; This product uses software written by the developers of NClassifier
&#039; (http://nclassifier.sourceforge.net).  NClassifier is a .NET port of the Nick
&#039; Lothian&#039;s Java text classification engine, Classifier4J 
&#039; (http://classifier4j.sourceforge.net).
&#039;
&#039; 2. Altered source versions must be plainly marked as such, and must not be 
&#039; misrepresented as being the original software.
&#039;
&#039; 3. This notice may not be removed or altered from any source distribution.
&#039;
&#039;********************************************************************************/
#endregion

using System;
using System.Collections;
using System.Text.RegularExpressions;

namespace NClassifier
{
  public class Utilities
  {


    /// <summary>
    /// Gets an array of sentences.
    /// </summary>
    /// <param name="input">A string that contains sentences.</param>
    /// <returns>An array of strings, each element containing a sentence.</returns>
    public static string[] GetSentences(string input)
    {
      if (input == null)
        return new string[0];
      else
      {
        // split on a ".", a "!", a "?" followed by a space or EOL
        // the original Java regex was (.|!|?)+(s|z)
        string[] result = Regex.Split(input, @"(?:.|!|?)+(?:s+|z)");

        // hacky... doing this to pass the unit tests
        ArrayList list = new ArrayList();
        foreach (string s in result)
          if (s.Length > 0)
            list.Add(s);
        return (string[])list.ToArray(typeof(string));
      }
    }
  }
}

Find all unique words in an array of words.

Posted on February 11, 2011June 27, 2015 by coder

#region Copyright (c) 2004, Ryan Whitaker
/*********************************************************************************
'
' Copyright (c) 2004 Ryan Whitaker
'
' This software is provided 'as-is', without any express or implied warranty. In no
' event will the authors be held liable for any damages arising from the use of this
' software.
'
' Permission is granted to anyone to use this software for any purpose, including
' commercial applications, and to alter it and redistribute it freely, subject to the
' following restrictions:
'
' 1. The origin of this software must not be misrepresented; you must not claim that
' you wrote the original software. If you use this software in a product, an
' acknowledgment (see the following) in the product documentation is required.
'
' This product uses software written by the developers of NClassifier
' (http://nclassifier.sourceforge.net). NClassifier is a .NET port of the Nick
' Lothian's Java text classification engine, Classifier4J
' (http://classifier4j.sourceforge.net).
'
' 2. Altered source versions must be plainly marked as such, and must not be
' misrepresented as being the original software.
'
' 3. This notice may not be removed or altered from any source distribution.
'
'********************************************************************************/
#endregion

using System;
using System.Collections;
using System.Text.RegularExpressions;

namespace NClassifier
{
public class Utilities
{
///

/// Find all unique words in an array of words.
///

/// An array of strings. /// An array of all unique strings. Order is not guaranteed.
public static string[] GetUniqueWords(string[] input)
{
if (input == null)
return new string[0];
else
{
ArrayList result = new ArrayList();
for (int i = 0; i < input.Length; i++) if (!result.Contains(input[i])) result.Add(input[i]); return (string[])result.ToArray("".GetType()); } } } } [/csharp]

Count how many times a word appears in an array of words.

Posted on February 11, 2011June 27, 2015 by coder

using System;
using System.Collections;
using System.Text.RegularExpressions;

namespace NClassifier
{
public class Utilities
{

///

/// Count how many times a word appears in an array of words.
///

/// The word to count. /// A non-null array of words. public static int CountWords(string word, string[] words)
{
// find the index of one of the items in the array
int itemIndex = Array.BinarySearch(words, word);

// iterate backwards until we find the first match
if (itemIndex > 0)
while (itemIndex > 0 && words[itemIndex] == word)
itemIndex–;

// now itemIndex is one item before the start of the words
int count = 0;
while (itemIndex < words.Length && itemIndex >= 0)
{
if (words[itemIndex] == word)
count++;

itemIndex++;

if (itemIndex < words.Length) if (words[itemIndex] != word) break; } return count; } } } [/csharp]

Helper class to split a long word into a single one.

Posted on February 11, 2011June 27, 2015 by coder

/*
* Author: Kishore Reddy
* Url: http://commonlibrarynet.codeplex.com/
* Title: CommonLibrary.NET
* Copyright: ? 2009 Kishore Reddy
* License: LGPL License
* LicenseUrl: http://commonlibrarynet.codeplex.com/license
* Description: A C# based .NET 3.5 Open-Source collection of reusable components.
* Usage: Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an “AS IS” BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using System.Text;

namespace GenericCode
{
///

/// Helper class to split a long word into a single one.
/// Alternative to possibly using Regular expression.
///

public class TextSplitter
{
///

/// Determine how many times the word has to be split.
///

/// /// ///
internal static int GetNumberOfTimesToSplit(int wordLength, int maxCharsInWord)
{
// Validate.
if (wordLength <= maxCharsInWord) return 0; // Now calc. int splitCount = wordLength / maxCharsInWord; int leftOver = wordLength % maxCharsInWord; if (leftOver > 0) splitCount++;

return splitCount;
}
///

/// Split the word, N number of times.
///

/// The text to split. /// 40 chars in each word. /// ” “ ///
internal static string SplitWord(string text, int charsPerWord, string spacer)
{
// Validate.
if (string.IsNullOrEmpty(text)) { return text; }

// Determine how many times we have to split.
int splitCount = GetNumberOfTimesToSplit(text.Length, charsPerWord);

// Validate.
if (splitCount == 0) return text;

// Use buffer instead of string concatenation.
StringBuilder buffer = new StringBuilder();
int currentPosition = 0;

// Split N number of times.
for (int count = 1; count <= splitCount; count++) { string word = (count < splitCount) ? text.Substring(currentPosition, charsPerWord) : text.Substring(currentPosition); buffer.Append(word); // Condition to prevent adding spacer at the end. // This is to leave the supplied text the same except for splitting ofcourse. if (count < splitCount) buffer.Append(spacer); // Move to next split start position. currentPosition += charsPerWord; } return buffer.ToString(); } ///

/// Check the single line of text for long word that exceeds the
/// maximum allowed.
/// If found, splits the word.
///

/// /// ///
public static string CheckAndSplitText(string text, int maxCharsInWord)
{
// Validate.
if (string.IsNullOrEmpty(text)) return text;

bool isSpacerNewLine = false;
int currentPosition = 0;
int ndxSpace = GetIndexOfSpacer(text, currentPosition, ref isSpacerNewLine);

// Case 1: Single long word.
if (ndxSpace < 0 && text.Length > maxCharsInWord) return SplitWord(text, maxCharsInWord, ” “);

StringBuilder buffer = new StringBuilder();

// Now go through all the text and check word and split.
while ((currentPosition < text.Length && ndxSpace > 0))
{
//Lenght of word
int wordLength = ndxSpace – (currentPosition);
string currentWord = text.Substring(currentPosition, wordLength);
string spacer = isSpacerNewLine ? Environment.NewLine : ” “;

if (wordLength > maxCharsInWord)
{
string splitWord = SplitWord(currentWord, maxCharsInWord, ” “);
buffer.Append(splitWord + spacer);
}
else
{
buffer.Append(currentWord + spacer);
}

currentPosition = (isSpacerNewLine) ? ndxSpace + 2 : ndxSpace + 1;
ndxSpace = GetIndexOfSpacer(text, (currentPosition), ref isSpacerNewLine);
}

// Final check.. no space found but check complete length now.
if (currentPosition < text.Length && ndxSpace < 0) { //Lenght of word int wordLength = (text.Length) - currentPosition; string currentWord = text.Substring(currentPosition, wordLength); string spacer = isSpacerNewLine ? Environment.NewLine : " "; if (wordLength > maxCharsInWord)
{
string splitWord = SplitWord(currentWord, maxCharsInWord, ” “);
buffer.Append(splitWord);
}
else
{
buffer.Append(currentWord);
}
}
return buffer.ToString();
}
///

/// Get the index of a spacer ( space” ” or newline )
///

/// /// ///
public static int GetIndexOfSpacer(string txt, int currentPosition, ref bool isNewLine)
{
// Take the first spacer that you find. it could be eithr
// space or newline, if space is before the newline take space
// otherwise newline.
int ndxSpace = txt.IndexOf(” “, currentPosition);
int ndxNewLine = txt.IndexOf(Environment.NewLine, currentPosition);
bool hasSpace = ndxSpace > -1;
bool hasNewLine = ndxNewLine > -1;
isNewLine = false;

// Found both space and newline.
if (hasSpace && hasNewLine)
{
if (ndxSpace < ndxNewLine) { return ndxSpace; } isNewLine = true; return ndxNewLine; } // Found space only. if (hasSpace && !hasNewLine) { return ndxSpace; } // Found newline only. if (!hasSpace && hasNewLine) { isNewLine = true; return ndxNewLine; } // no space or newline. return -1; } } } [/csharp]

Get the index of a spacer ( space” ” or newline )

Posted on February 11, 2011June 27, 2015 by coder

namespace GenericCode
{

public class StringHelpers
{
///

/// Get the index of a spacer ( space” ” or newline )
///

Convert the word(s) in the sentence to sentence case.

Posted on February 11, 2011June 27, 2015 by coder

namespace GenericCode
{

public class StringHelpers
{
///

/// Convert the word(s) in the sentence to sentence case.
/// UPPER = Upper
/// lower = Lower
/// MiXEd = Mixed
///

/// /// ///
public static string ConvertToSentanceCase(string s, char delimiter)
{
// Check null/empty
if (string.IsNullOrEmpty(s))
return s;

s = s.Trim();
if (string.IsNullOrEmpty(s))
return s;

// Only 1 token
if (s.IndexOf(delimiter) < 0) { s = s.ToLower(); s = s[0].ToString().ToUpper() + s.Substring(1); return s; } // More than 1 token. string[] tokens = s.Split(delimiter); StringBuilder buffer = new StringBuilder(); foreach (string token in tokens) { string currentToken = token.ToLower(); currentToken = currentToken[0].ToString().ToUpper() + currentToken.Substring(1); buffer.Append(currentToken + delimiter); } s = buffer.ToString(); return s.TrimEnd(delimiter); } } } [/csharp]

Returns the defaultval if the val string is null or empty.

Posted on February 11, 2011June 27, 2015 by coder

   
 

/*
 * Author: Kishore Reddy
 * Url: http://commonlibrarynet.codeplex.com/
 * Title: CommonLibrary.NET
 * Copyright: ? 2009 Kishore Reddy
 * License: LGPL License
 * LicenseUrl: http://commonlibrarynet.codeplex.com/license
 * Description: A C# based .NET 3.5 Open-Source collection of reusable components.
 * Usage: Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
using System;
using System.Collections.Generic;
using System.Text;



namespace GenericCode
{
   
    public class StringHelpers
    {
        /// <summary>
        /// Returns the defaultval if the val string is null or empty.
        /// Returns the val string otherwise.
        /// </summary>
        /// <param name="val"></param>
        /// <param name="defaultVal"></param>
        /// <returns></returns>
        public static string GetDefaultStringIfEmpty(string val, string defaultVal)
        {
            if (string.IsNullOrEmpty(val)) return defaultVal;

            return val;
        }     
    }
}

Page 73 of 858

« 1 … 71 72 73 74 75 … 858 »