Token

Instance Constructors

new Token(s: Sentence, tokenString: String)
new Token(doc: Document, tokenString: String)
new Token(sentence: Sentence, s: Int, e: Int)
new Token(doc: Document, s: Int, e: Int)

Token constructions that defaults to placing it in the special Section that encompasses the whole Document.
new Token(sec: Section, s: Int, e: Int)

Create a Token and also append it to the list of Tokens in the Section.
Create a Token and also append it to the list of Tokens in the Section. There must not already be Tokens in the document with higher stringStart indices. Note that the start and end indices are character offsets into the Document string, not the Section string.
new Token(stringStart: Int, stringEnd: Int)

Type Members

type ThisType = Token

Definition Classes
ThisType

Value Members

final def !=(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def !=(arg0: Any): Boolean

Definition Classes
Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def ==(arg0: Any): Boolean

Definition Classes
Any
def _setChainPosition(c: Section, p: Int): Unit

This method should never be called outside Chain.
This method should never be called outside Chain.+= or Chain.insert or Chain.remove

Definition Classes
ChainLink
final def asInstanceOf[T0]: T0

Definition Classes
Any
object attr

A collection of attributes, keyed by the attribute class.
def between(other: Token): Seq[Token]

Definition Classes
ChainLink
def chain: Section

Definition Classes
ChainLink
def chainAfter: IndexedSeq[Token]

Definition Classes
ChainLink
def chainBefore: IndexedSeq[Token]

Definition Classes
ChainLink
def chainHead: Token

Definition Classes
AbstractChainLink
def chainLast: Token

Definition Classes
AbstractChainLink
def charNGrams(min: Int, max: Int): Seq[String]

Return all the word's character subsequences of lengths between min and max.
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
def containsDigit: Boolean

Return true if the word contains at least one digit.
def containsLowerCase: Boolean

Return true if any character of the word is lower case.
def containsUpperCase: Boolean

Return true if any character of the word is upper case.
def docSubstring: String

Return the substring of the original Document string covered by the character indices stringStart to stringEnd.
Return the substring of the original Document string covered by the character indices stringStart to stringEnd. This may be different than the String returned by this.string if the TokenString attribute has been set. (Such substitutions are useful for de-hyphenation, downcasing, and other such modifications.
def document: Document

The Document containing this Token's Section.
The Document containing this Token's Section.

Definition Classes
Token → DocumentSubstring
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
def firstInSeq: Token

Definition Classes
ChainLink
def followsNewline: Boolean

Return true if the character immediately preceding the start of this token is a newline.
Return true if the character immediately preceding the start of this token is a newline. The beginning of the document counts as a newline.
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def getNext: Option[Token]

Definition Classes
ChainLink
def getPrev: Option[Token]

Definition Classes
ChainLink
def hasFollowingWhitespace: Boolean

Return true if the character immediately following the end of this token is a whitespace character (such as space, newline, tab, etc)
def hasNext(n: Int): Boolean

Definition Classes
ChainLink
def hasNext: Boolean

Definition Classes
ChainLink → AbstractChainLink
def hasPrecedingWhitespace: Boolean

Return true if the character immediately preceding the start of this token is a whitespace character (such as space, newline, tab, etc)
def hasPrev(n: Int): Boolean

Definition Classes
ChainLink
def hasPrev: Boolean

Definition Classes
ChainLink → AbstractChainLink
def hashCode(): Int

Definition Classes
AnyRef → Any
def isCapitalized: Boolean

Return true if the first character of the word is upper case.
def isDigits: Boolean
def isInSentence: Boolean
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
def isPunctuation: Boolean
def isSentenceEnd: Boolean
def isSentenceStart: Boolean
def lemma: TokenLemma
def lemmaString: String

Return the lemma of the string contents of the Token, either from its attr[TokenLemma] variable or,if unset, from token.
Return the lemma of the string contents of the Token, either from its attr[TokenLemma] variable or,if unset, from token.string.
def matches(t2: Token): Boolean
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def nerTag: NerTag
def next(n: Int): Token

Return the ChainLink "n" positions ahead.
Return the ChainLink "n" positions ahead. If this goes past the end of the Chain, return null.

Definition Classes
ChainLink → AbstractChainLink
def next: Token

Definition Classes
ChainLink → AbstractChainLink
def nextWindow(n: Int): Seq[Token]

Definition Classes
ChainLink
def normalizedString[C <: TokenString](attrClass: Class[C]): String

Return the string contents of this Token, either from its specified attr[C], or if unset, directly as a substring of the Document.
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def parse: ParseTree
def parseChildren: Seq[Token]
def parseChildrenLabeled(label: CategoricalValue[String]): Seq[Token]
def parseLabel: ParseTreeLabel
def parseLeftChildren: Seq[Token]
def parseLeftChildrenLabeled(label: CategoricalValue[String]): Seq[Token]
def parseParent: Token
def parseParentIndex: Int
def parseRightChildren: Seq[Token]
def parseRightChildrenLabeled(label: CategoricalValue[String]): Seq[Token]
def posTag: PennPosTag
def position: Int

Definition Classes
ChainLink → AbstractChainLink
def positionInSection: Int

Return the 0-start index of this token in its sentence.
Return the 0-start index of this token in its sentence. If not part of a sentence, return -1.
def positionInSentence: Int
def precedesNewline: Boolean

Return true if the character immediately following the end of this token is a newline.
Return true if the character immediately following the end of this token is a newline. The end of the document counts as a newline.
def prev(n: Int): Token

Return the ChainLink "n" positions behind.
Return the ChainLink "n" positions behind. If this goes past the beginning of the Chain, return null.

Definition Classes
ChainLink → AbstractChainLink
def prev: Token

Definition Classes
ChainLink → AbstractChainLink
def prevWindow(n: Int): Seq[Token]

Definition Classes
ChainLink
def section: Section

Just an alias for the "chain" method.
def sentence: Sentence
def sentenceHasNext: Boolean
def sentenceHasPrev: Boolean
def sentenceNext: Token
def sentencePrev: Token
def string: String

Return the string contents of this Token, either from its attr[TokenString] variable or, if unset, directly as a substring of the Document
Return the string contents of this Token, either from its attr[TokenString] variable or, if unset, directly as a substring of the Document

Definition Classes
Token → DocumentSubstring → Observation
val stringEnd: Int

The character offset into the Document.
The character offset into the Document.string at which this DocumentSubstring is over. In other words, the last character of the DocumentSubstring is Document.string(this.stringEnd-1).

Definition Classes
Token → DocumentSubstring
val stringStart: Int

The character offset into the Document.
The character offset into the Document.string at which this DocumentSubstring begins.

Definition Classes
Token → DocumentSubstring
def stringVar: TokenString

Return the Token's string contents as a StringVariable.
Return the Token's string contents as a StringVariable. Repeated calls will return the same Variable (assuming that the attr[TokenString] is not changed).
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Returns a string representation of this Token object, including the prefix "Token(" and its starting character offset.
Returns a string representation of this Token object, including the prefix "Token(" and its starting character offset. If instead you want the string contents of the token use the method "string".

Definition Classes
Token → AnyRef → Any
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
def window(n: Int): Seq[Token]

Definition Classes
ChainLink
def windowWithoutSelf(n: Int): Seq[Token]

Definition Classes
ChainLink
def wordShape(maxRepetitions: Int = 2): String

Return a string that captures the generic "shape" of the original word, mapping lowercase alphabetics to 'a', uppercase to 'A', digits to '1', whitespace to ' '.
Return a string that captures the generic "shape" of the original word, mapping lowercase alphabetics to 'a', uppercase to 'A', digits to '1', whitespace to ' '. Skip more than 'maxRepetitions' of the same character class.

class Token extends Observation[Token] with ChainLink[Token, Section] with DocumentSubstring with Attr

Instance Constructors

new Token(s: Sentence, tokenString: String)

new Token(doc: Document, tokenString: String)

new Token(sentence: Sentence, s: Int, e: Int)

new Token(doc: Document, s: Int, e: Int)

new Token(sec: Section, s: Int, e: Int)

new Token(stringStart: Int, stringEnd: Int)

Type Members

type ThisType = Token

Value Members

final def !=(arg0: AnyRef): Boolean

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: AnyRef): Boolean

final def ==(arg0: Any): Boolean

def _setChainPosition(c: Section, p: Int): Unit

final def asInstanceOf[T0]: T0

object attr

def between(other: Token): Seq[Token]

def chain: Section

def chainAfter: IndexedSeq[Token]

def chainBefore: IndexedSeq[Token]

def chainHead: Token

def chainLast: Token

def charNGrams(min: Int, max: Int): Seq[String]

def clone(): AnyRef

def containsDigit: Boolean

def containsLowerCase: Boolean

def containsUpperCase: Boolean

def docSubstring: String

def document: Document

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

def firstInSeq: Token

def followsNewline: Boolean

final def getClass(): Class[_]

def getNext: Option[Token]

def getPrev: Option[Token]

def hasFollowingWhitespace: Boolean

def hasNext(n: Int): Boolean

def hasNext: Boolean

def hasPrecedingWhitespace: Boolean

def hasPrev(n: Int): Boolean

def hasPrev: Boolean

def hashCode(): Int

def isCapitalized: Boolean

def isDigits: Boolean

def isInSentence: Boolean

final def isInstanceOf[T0]: Boolean

def isPunctuation: Boolean

def isSentenceEnd: Boolean

def isSentenceStart: Boolean

def lemma: TokenLemma

def lemmaString: String

def matches(t2: Token): Boolean

final def ne(arg0: AnyRef): Boolean

def nerTag: NerTag

def next(n: Int): Token

def next: Token

def nextWindow(n: Int): Seq[Token]

def normalizedString[C <: TokenString](attrClass: Class[C]): String

final def notify(): Unit

final def notifyAll(): Unit

def parse: ParseTree

def parseChildren: Seq[Token]

def parseChildrenLabeled(label: CategoricalValue[String]): Seq[Token]

def parseLabel: ParseTreeLabel

def parseLeftChildren: Seq[Token]

def parseLeftChildrenLabeled(label: CategoricalValue[String]): Seq[Token]

def parseParent: Token

def parseParentIndex: Int

def parseRightChildren: Seq[Token]

def parseRightChildrenLabeled(label: CategoricalValue[String]): Seq[Token]

def posTag: PennPosTag

def position: Int

def positionInSection: Int

def positionInSentence: Int