DehyphenatingTokenizer

Instance Constructors

new DehyphenatingTokenizer(tokenizer: T = ..., dictionary: Set[String] = scala.this.Predef.Set.empty[String], useTokens: Boolean)

tokenizer
tokenizer to use to tokenize the doc. Default is DeterministicTokenizer
dictionary
dictionary to lookup to check for merge eligibility
useTokens
if true, other tokens in document are used to check for merge eligibility

Value Members

final def !=(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def !=(arg0: Any): Boolean

Definition Classes
Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def ==(arg0: Any): Boolean

Definition Classes
Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
def buildDictionaryFromDocWords(tokens: Iterable[Token]): Set[String]
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
def documentAnnotationString(document: Document): String

How the annotation of this DocumentAnnotator should be printed as extra information after a one-word-per-line (OWPL) format.
How the annotation of this DocumentAnnotator should be printed as extra information after a one-word-per-line (OWPL) format. If there is no document annotation, return the empty string. Used in Document.owplString.

Definition Classes
DocumentAnnotator
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def hashCode(): Int

Definition Classes
AnyRef → Any
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
def mentionAnnotationString(mention: Mention): String

Definition Classes
DocumentAnnotator
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def phraseAnnotationString(phrase: Phrase): String

Definition Classes
DocumentAnnotator
def postAttrs: Iterable[Class[_]]

Definition Classes
DehyphenatingTokenizer → DocumentAnnotator
def prereqAttrs: Iterable[Class[_]]

Definition Classes
DehyphenatingTokenizer → DocumentAnnotator
def process(document: Document): Document

Definition Classes
DehyphenatingTokenizer → DocumentAnnotator
def processParallel(documents: Iterable[Document], nThreads: Int = ...): Iterable[Document]

Definition Classes
DocumentAnnotator
def processSequential(documents: Iterable[Document]): Iterable[Document]

Definition Classes
DocumentAnnotator
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
def tokenAnnotationString(token: Token): String

How the annotation of this DocumentAnnotator should be printed in one-word-per-line (OWPL) format.
How the annotation of this DocumentAnnotator should be printed in one-word-per-line (OWPL) format. If there is no per-token annotation, return null. Used in Document.owplString.

Definition Classes
DehyphenatingTokenizer → DocumentAnnotator
def tokenize(document: Document): Document
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

class DehyphenatingTokenizer[T <: DocumentAnnotator] extends DocumentAnnotator

Instance Constructors

new DehyphenatingTokenizer(tokenizer: T = ..., dictionary: Set[String] = scala.this.Predef.Set.empty[String], useTokens: Boolean)

Value Members

final def !=(arg0: AnyRef): Boolean

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: AnyRef): Boolean

final def ==(arg0: Any): Boolean

final def asInstanceOf[T0]: T0

def buildDictionaryFromDocWords(tokens: Iterable[Token]): Set[String]

def clone(): AnyRef

def documentAnnotationString(document: Document): String

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

final def getClass(): Class[_]

def hashCode(): Int

final def isInstanceOf[T0]: Boolean

def mentionAnnotationString(mention: Mention): String

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

def phraseAnnotationString(phrase: Phrase): String

def postAttrs: Iterable[Class[_]]

def prereqAttrs: Iterable[Class[_]]

def process(document: Document): Document

def processParallel(documents: Iterable[Document], nThreads: Int = ...): Iterable[Document]

def processSequential(documents: Iterable[Document]): Iterable[Document]

final def synchronized[T0](arg0: ⇒ T0): T0

def toString(): String

def tokenAnnotationString(token: Token): String

def tokenize(document: Document): Document

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from DocumentAnnotator

Inherited from AnyRef

Inherited from Any

Ungrouped