cc.factorie.app

strings

package strings

Linear Supertypes
AnyRef, Any
Ordering
  1. Alphabetic
  2. By inheritance
Inherited
  1. strings
  2. AnyRef
  3. Any
  1. Hide All
  2. Show all
Learn more about member selection
Visibility
  1. Public
  2. All

Type Members

  1. class BreakIteratorSegmenter extends StringSegmenter

  2. class RegexSegmenter extends StringSegmenter

  3. class SetBasedStopwords extends StringSet

  4. class Stopwords extends StringSet

  5. trait StringSegmentIterator extends Iterator[String]

  6. trait StringSegmenter extends AnyRef

  7. trait StringSet extends AnyRef

    A collection of standard English "stop words"---common words often left out of processing.

Value Members

  1. object EmptyStringSet extends StringSet

  2. object PorterStemmer

    Rewritten from http://tartarus.

  3. object Stopwords extends Stopwords

  4. object alphaSegmenter extends RegexSegmenter

  5. def charNGrams(word: String, min: Int, max: Int): Seq[String]

    Return Strings representing all possible character sub-sequences of length between "min" and "max", with prepended "<" and appended ">" to indicate start and end of the input string.

  6. def collapseDigits(word: String): String

  7. val containsDigitRegex: Regex

  8. object csvSegmenter extends RegexSegmenter

    For segmenting fields of a comma-separated-value file.

  9. val digitsRegex: Regex

  10. def editDistance(s: String, s2: String, substCost: Int = 1, deleteCost: Int = 1, insertCost: Int = 1): Int

    Implements Levenshtein Distance, with specific operation costs to go from this String to String s2.

  11. object foreignWordSegmenter extends RegexSegmenter

  12. def inputStreamToString(is: InputStream, encoding: String = "UTF-8"): String

    Read the entire contents of the InputStream with the given encoding, and return them as a String.

  13. object nonWhitespaceClassesSegmenter extends RegexSegmenter

  14. object nonWhitespaceSegmenter extends RegexSegmenter

  15. def porterStem(s: String): String

  16. def prefix(word: String, length: Int): String

  17. def readerToString(reader: Reader): String

    Read the entire contents of the Reader and return them as a String.

  18. val recentYearRegex: Regex

  19. def replaceDigits(word: String): String

  20. def simplifyDigits(word: String): String

    Return input string, with digits replaced, either the whole string with "<YEAR>" or "<NUM>" or just the digits replaced with "#"

  21. def stringShape(word: String, maxRepetitions: Int): String

    Return a string that captures the generic "shape" of the original word, mapping lowercase alphabetics to 'a', uppercase to 'A', digits to '1', whitespace to ' '.

    Return a string that captures the generic "shape" of the original word, mapping lowercase alphabetics to 'a', uppercase to 'A', digits to '1', whitespace to ' '. Skip more than 'maxRepetitions' of the same character class.

  22. def suffix(word: String, length: Int): String

  23. object urlSegmenter extends RegexSegmenter

  24. object wordClassesSegmenter extends RegexSegmenter

  25. object wordSegmenter extends RegexSegmenter

Inherited from AnyRef

Inherited from Any

Ungrouped