|
|||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||||
java.lang.Objectde.unidu.is.util.StringUtilities
This class provides some convenient static methods for handling strings.
Some of the methods simply use filters from de.unidu.is.text.
| Constructor Summary | |
StringUtilities()
|
|
| Method Summary | |
static java.lang.String |
extractFromHTML(java.lang.String content)
Extracts the text from HTML, removes all tags, replaces well-known entities and removes the rest of them. |
static java.lang.String |
fromXML(java.lang.String text)
Resolves some entities in an XML string. |
static java.lang.String |
getSoundex(java.lang.String text)
Returns the soundex representation of a string. |
static java.lang.String |
implode(java.util.Collection collection,
java.lang.String separator)
Implodes the collection by concatenating all elements, separated by the specified string. |
static java.lang.String |
implode(java.lang.Object[] array,
java.lang.String separator)
Implodes the array by concatenating all elements, separated by the specified string. |
static boolean |
isStopword(java.lang.String term)
Tests whether the specified (unstemmed) term is a stopword. |
static boolean |
isStopwordStemmed(java.lang.String term)
Tests whether the specified (already stemmed) term is a stopword. |
static java.util.Iterator |
parseText(java.lang.String text)
Returns an iterator over all (stemmed) terms embedded in the specified string, after removing stopwords. |
static java.lang.String |
remove(java.lang.String str,
java.lang.String matchStr)
Removes all occurences of a string from another string. |
static java.lang.String |
removeTags(java.lang.String str)
Removes all tags from a string. |
static java.lang.String |
replace(java.lang.String str,
java.lang.String matchStr,
java.lang.String replaceStr)
Replaces all occurences of a string by another string. |
static java.lang.String |
stem(java.lang.String term)
Returns the stemmed term. |
static java.lang.String |
toString(int num,
int length)
Formats the specified number. |
static java.lang.String |
toXML(java.lang.String text)
Converts some characters in a string into entities: These characters are converted: ß " < > & |
| Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Constructor Detail |
public StringUtilities()
| Method Detail |
public static java.lang.String implode(java.lang.Object[] array,
java.lang.String separator)
array - array whose elements have to be concatenatedseparator - string used for separating the array elements
public static java.lang.String implode(java.util.Collection collection,
java.lang.String separator)
collection - collection whose elements have to be concatenatedseparator - string used for separating the colleciton elementspublic static java.lang.String toXML(java.lang.String text)
text - text
public static java.lang.String fromXML(java.lang.String text)
text - XML text
public static java.lang.String remove(java.lang.String str,
java.lang.String matchStr)
str - string to modifymatchStr - string to remove
public static java.lang.String replace(java.lang.String str,
java.lang.String matchStr,
java.lang.String replaceStr)
str - string to modifymatchStr - string to replacereplaceStr - replacement string
public static java.lang.String getSoundex(java.lang.String text)
text - string to convert
public static java.lang.String removeTags(java.lang.String str)
str - string to modify
public static java.lang.String extractFromHTML(java.lang.String content)
content - HTML string
public static java.lang.String stem(java.lang.String term)
term - string to be stemmed
public static boolean isStopword(java.lang.String term)
term - term to be tested
public static boolean isStopwordStemmed(java.lang.String term)
term - term to be tested
public static java.util.Iterator parseText(java.lang.String text)
text - text to be parsed
public static java.lang.String toString(int num,
int length)
num - number to formatlength - exact length of the resulting string
|
|||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||||