Skip to main content

Class: SentenceSplitter

SentenceSplitter is our default text splitter that supports splitting into sentences, paragraphs, or fixed length chunks with overlap.

Constructors

constructor

new SentenceSplitter(chunkSize?, chunkOverlap?, tokenizer?, tokenizerDecoder?, paragraphSeparator?, chunkingTokenizerFn?)

Parameters

NameTypeDefault value
chunkSizenumberDEFAULT_CHUNK_SIZE
chunkOverlapnumberDEFAULT_CHUNK_OVERLAP
tokenizeranynull
tokenizerDecoderanynull
paragraphSeparatorstring"\n\n\n"
chunkingTokenizerFnanyundefined

Defined in

TextSplitter.ts:33

Properties

chunkOverlap

Private chunkOverlap: number

Defined in

TextSplitter.ts:26


chunkSize

Private chunkSize: number

Defined in

TextSplitter.ts:25


chunkingTokenizerFn

Private chunkingTokenizerFn: any

Defined in

TextSplitter.ts:30


paragraphSeparator

Private paragraphSeparator: string

Defined in

TextSplitter.ts:29


tokenizer

Private tokenizer: any

Defined in

TextSplitter.ts:27


tokenizerDecoder

Private tokenizerDecoder: any

Defined in

TextSplitter.ts:28

Methods

combineTextSplits

combineTextSplits(newSentenceSplits, effectiveChunkSize): TextSplit[]

Parameters

NameType
newSentenceSplitsSplitRep[]
effectiveChunkSizenumber

Returns

TextSplit[]

Defined in

TextSplitter.ts:153


getEffectiveChunkSize

Private getEffectiveChunkSize(extraInfoStr?): number

Parameters

NameType
extraInfoStr?string

Returns

number

Defined in

TextSplitter.ts:72


getParagraphSplits

getParagraphSplits(text, effectiveChunkSize?): string[]

Parameters

NameType
textstring
effectiveChunkSize?number

Returns

string[]

Defined in

TextSplitter.ts:89


getSentenceSplits

getSentenceSplits(text, effectiveChunkSize?): string[]

Parameters

NameType
textstring
effectiveChunkSize?number

Returns

string[]

Defined in

TextSplitter.ts:115


processSentenceSplits

Private processSentenceSplits(sentenceSplits, effectiveChunkSize): SplitRep[]

Parameters

NameType
sentenceSplitsstring[]
effectiveChunkSizenumber

Returns

SplitRep[]

Defined in

TextSplitter.ts:128


splitText

splitText(text, extraInfoStr?): string[]

Parameters

NameType
textstring
extraInfoStr?string

Returns

string[]

Defined in

TextSplitter.ts:233


splitTextWithOverlaps

splitTextWithOverlaps(text, extraInfoStr?): TextSplit[]

Parameters

NameType
textstring
extraInfoStr?string

Returns

TextSplit[]

Defined in

TextSplitter.ts:205