Type Members
-
type
ElementType = Token
-
type
Self = Iterable[Token]
-
type
ThisType = Section
-
type
Value = IndexedSeq[Token]
-
class
WithFilter extends FilterMonadic[A, Repr]
Abstract Value Members
-
abstract
def
document: Document
-
abstract
def
stringEnd: Int
-
abstract
def
stringStart: Int
Concrete Value Members
-
final
def
!=(arg0: AnyRef): Boolean
-
final
def
!=(arg0: Any): Boolean
-
final
def
##(): Int
-
def
++[B >: Token, That](that: GenTraversableOnce[B])(implicit bf: CanBuildFrom[Iterable[Token], B, That]): That
-
def
++:[B >: Token, That](that: Traversable[B])(implicit bf: CanBuildFrom[Iterable[Token], B, That]): That
-
def
++:[B >: Token, That](that: TraversableOnce[B])(implicit bf: CanBuildFrom[Iterable[Token], B, That]): That
-
-
-
def
/:[B](z: B)(op: (B, Token) ⇒ B): B
-
def
:\[B](z: B)(op: (Token, B) ⇒ B): B
-
final
def
==(arg0: AnyRef): Boolean
-
final
def
==(arg0: Any): Boolean
-
-
-
-
def
addString(b: StringBuilder, start: String, sep: String, end: String): StringBuilder
-
def
aggregate[B](z: B)(seqop: (B, Token) ⇒ B, combop: (B, B) ⇒ B): B
-
def
apply(i: Int): Token
-
final
def
asInstanceOf[T0]: T0
-
-
-
def
canEqual(that: Any): Boolean
-
def
chainFreeze(): Unit
-
def
chainFrozen: Boolean
-
def
clone(): AnyRef
-
def
collect[B, That](pf: PartialFunction[Token, B])(implicit bf: CanBuildFrom[Iterable[Token], B, That]): That
-
def
collectFirst[B](pf: PartialFunction[Token, B]): Option[B]
-
def
companion: GenericCompanion[Iterable]
-
def
contains(elem: Any): Boolean
-
def
copyToArray[B >: Token](xs: Array[B], start: Int, len: Int): Unit
-
def
copyToArray[B >: Token](xs: Array[B]): Unit
-
def
copyToArray[B >: Token](xs: Array[B], start: Int): Unit
-
def
copyToBuffer[B >: Token](dest: Buffer[B]): Unit
-
def
count(p: (Token) ⇒ Boolean): Int
-
def
drop(n: Int): Iterable[Token]
-
def
dropRight(n: Int): Iterable[Token]
-
def
dropWhile(p: (Token) ⇒ Boolean): Iterable[Token]
-
final
def
eq(arg0: AnyRef): Boolean
-
def
equals(arg0: Any): Boolean
-
def
exists(f: (Token) ⇒ Boolean): Boolean
-
def
filter(p: (Token) ⇒ Boolean): Iterable[Token]
-
def
filterNot(p: (Token) ⇒ Boolean): Iterable[Token]
-
def
finalize(): Unit
-
def
find(p: (Token) ⇒ Boolean): Option[Token]
-
def
flatMap[B, That](f: (Token) ⇒ GenTraversableOnce[B])(implicit bf: CanBuildFrom[Iterable[Token], B, That]): That
-
def
flatten[B](implicit asTraversable: (Token) ⇒ GenTraversableOnce[B]): Iterable[B]
-
def
fold[A1 >: Token](z: A1)(op: (A1, A1) ⇒ A1): A1
-
def
foldLeft[B](z: B)(op: (B, Token) ⇒ B): B
-
def
foldRight[B](z: B)(op: (Token, B) ⇒ B): B
-
def
forall(p: (Token) ⇒ Boolean): Boolean
-
def
foreach[U](f: (Token) ⇒ U): Unit
-
def
genericBuilder[B]: Builder[B, Iterable[B]]
-
final
def
getClass(): Class[_]
-
def
groupBy[K](f: (Token) ⇒ K): Map[K, Iterable[Token]]
-
def
grouped(size: Int): Iterator[Iterable[Token]]
-
def
hasDefiniteSize: Boolean
-
def
hasSentences: Boolean
-
def
hashCode(): Int
-
-
def
headOption: Option[Token]
-
def
indexOf[B >: Token](elem: B, from: Int): Int
-
def
indexOf[B >: Token](elem: B): Int
-
def
indexWhere(p: (Token) ⇒ Boolean): Int
-
def
indexWhere(p: (Token) ⇒ Boolean, from: Int): Int
-
def
init: Iterable[Token]
-
def
inits: Iterator[Iterable[Token]]
-
def
insert(i: Int, e: Token): Section.this.type
-
def
isEmpty: Boolean
-
final
def
isInstanceOf[T0]: Boolean
-
final
def
isTraversableAgain: Boolean
-
-
-
def
lastOption: Option[Token]
-
def
length: Int
-
-
-
def
map[B, That](f: (Token) ⇒ B)(implicit bf: CanBuildFrom[Iterable[Token], B, That]): That
-
-
def
maxBy[B](f: (Token) ⇒ B)(implicit cmp: Ordering[B]): Token
-
-
def
minBy[B](f: (Token) ⇒ B)(implicit cmp: Ordering[B]): Token
-
def
mkString: String
-
def
mkString(sep: String): String
-
def
mkString(start: String, sep: String, end: String): String
-
final
def
ne(arg0: AnyRef): Boolean
-
def
newBuilder: Builder[Token, Iterable[Token]]
-
def
nonEmpty: Boolean
-
final
def
notify(): Unit
-
final
def
notifyAll(): Unit
-
def
par: ParIterable[Token]
-
def
parCombiner: Combiner[Token, ParIterable[Token]]
-
def
partition(p: (Token) ⇒ Boolean): (Iterable[Token], Iterable[Token])
-
def
product[B >: Token](implicit num: Numeric[B]): B
-
def
reduce[A1 >: Token](op: (A1, A1) ⇒ A1): A1
-
def
reduceLeft[B >: Token](op: (B, Token) ⇒ B): B
-
def
reduceLeftOption[B >: Token](op: (B, Token) ⇒ B): Option[B]
-
def
reduceOption[A1 >: Token](op: (A1, A1) ⇒ A1): Option[A1]
-
def
reduceRight[B >: Token](op: (Token, B) ⇒ B): B
-
def
reduceRightOption[B >: Token](op: (Token, B) ⇒ B): Option[B]
-
def
remove(i: Int): Section.this.type
-
def
repr: Iterable[Token]
-
-
def
sameElements[B >: Token](that: GenIterable[B]): Boolean
-
def
scan[B >: Token, That](z: B)(op: (B, B) ⇒ B)(implicit cbf: CanBuildFrom[Iterable[Token], B, That]): That
-
def
scanLeft[B, That](z: B)(op: (B, Token) ⇒ B)(implicit bf: CanBuildFrom[Iterable[Token], B, That]): That
-
def
scanRight[B, That](z: B)(op: (Token, B) ⇒ B)(implicit bf: CanBuildFrom[Iterable[Token], B, That]): That
-
-
def
seq: Iterable[Token]
-
def
size: Int
-
def
slice(from: Int, until: Int): Iterable[Token]
-
def
sliding(size: Int, step: Int): Iterator[Iterable[Token]]
-
def
sliding(size: Int): Iterator[Iterable[Token]]
-
def
span(p: (Token) ⇒ Boolean): (Iterable[Token], Iterable[Token])
-
def
splitAt(n: Int): (Iterable[Token], Iterable[Token])
-
def
string: String
-
def
stringPrefix: String
-
def
sum[B >: Token](implicit num: Numeric[B]): B
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
-
def
tail: Iterable[Token]
-
def
tails: Iterator[Iterable[Token]]
-
def
take(n: Int): Iterable[Token]
-
def
takeRight(n: Int): Iterable[Token]
-
def
takeWhile(p: (Token) ⇒ Boolean): Iterable[Token]
-
def
thisCollection: Iterable[Token]
-
def
to[Col[_]](implicit cbf: CanBuildFrom[Nothing, Token, Col[Token]]): Col[Token]
-
def
toArray[B >: Token](implicit arg0: ClassTag[B]): Array[B]
-
def
toBuffer[B >: Token]: Buffer[B]
-
def
toCollection(repr: Iterable[Token]): Iterable[Token]
-
def
toIndexedSeq: IndexedSeq[Token]
-
def
toIterable: Iterable[Token]
-
def
toIterator: Iterator[Token]
-
-
def
toMap[T, U](implicit ev: <:<[Token, (T, U)]): Map[T, U]
-
-
def
toSet[B >: Token]: Set[B]
-
def
toStream: Stream[Token]
-
def
toString(): String
-
def
toTraversable: Traversable[Token]
-
-
def
tokenAtCharOffset(charOffset: Int): Option[Token]
-
-
def
transpose[B](implicit asTraversable: (Token) ⇒ GenTraversableOnce[B]): Iterable[Iterable[B]]
-
def
unzip[A1, A2](implicit asPair: (Token) ⇒ (A1, A2)): (Iterable[A1], Iterable[A2])
-
def
unzip3[A1, A2, A3](implicit asTriple: (Token) ⇒ (A1, A2, A3)): (Iterable[A1], Iterable[A2], Iterable[A3])
-
def
value: Value
-
def
view(from: Int, until: Int): IterableView[Token, Iterable[Token]]
-
def
view: IterableView[Token, Iterable[Token]]
-
final
def
wait(): Unit
-
final
def
wait(arg0: Long, arg1: Int): Unit
-
final
def
wait(arg0: Long): Unit
-
def
withFilter(p: (Token) ⇒ Boolean): FilterMonadic[Token, Iterable[Token]]
-
def
zip[A1 >: Token, B, That](that: GenIterable[B])(implicit bf: CanBuildFrom[Iterable[Token], (A1, B), That]): That
-
def
zipAll[B, A1 >: Token, That](that: GenIterable[B], thisElem: A1, thatElem: B)(implicit bf: CanBuildFrom[Iterable[Token], (A1, B), That]): That
-
def
zipWithIndex[A1 >: Token, That](implicit bf: CanBuildFrom[Iterable[Token], (A1, Int), That]): That
Deprecated Value Members
-
def
/:\[A1 >: Token](z: A1)(op: (A1, A1) ⇒ A1): A1
Inherited from Iterable[Token]
Inherited from IterableLike[Token, Iterable[Token]]
Inherited from Equals
Inherited from GenIterable[Token]
Inherited from GenIterableLike[Token, Iterable[Token]]
Inherited from Traversable[Token]
Inherited from GenTraversable[Token]
Inherited from GenericTraversableTemplate[Token, Iterable]
Inherited from TraversableLike[Token, Iterable[Token]]
Inherited from GenTraversableLike[Token, Iterable[Token]]
Inherited from Parallelizable[Token, ParIterable[Token]]
Inherited from TraversableOnce[Token]
Inherited from GenTraversableOnce[Token]
Inherited from FilterMonadic[Token, Iterable[Token]]
Inherited from HasNewBuilder[Token, Iterable[Token]]
Inherited from Any
A part of a Document, delineated by character offsets into the Document's string, and which can hold a sequence of Tokens and a sequence of Sentences.
By defining Section in terms of character offsets instead of by Token positions we gain the ability to (a) split the Document into Sections before tokenization, (b) run different tokenizers in different sections, (c) even have overlapping Sections with alternative tokenization and annotation for the same text.
The canonical sequence of Sections in a Document is available as Document.sections, but a Document may have multiple overlapping Sections (for example to store alternative tokenizations or wholly distinct sets of annotations in other "non-canonical" Sections, which may be stored by some customized scheme in the Document attributes, Document.attr.
In addition to their canonical sequence of Sections, all Documents also have a Section that encompasses the entire Document (even if the Document grows in length). This is accessed via Document.asSection. This is the sole member of the initialized default Document.sections, but be cautious about always using Document.asSection to get the Documents Tokens, sentences and their annotations, because some other processing may reset the canonical sequence of Sections to some other collection.
If you want to tokenize first and then split a Document into Sections, you can tokenize into Document.asSection, and then create new canonical Section at your desired boundaries, and then re-tokenize each Section. (In the future we may provide a way to avoid the computation of re-tokenizing.)