Files
deep-research-web-ui/lib/ai/text-splitter.test.ts
2025-02-11 22:57:49 +08:00

81 lines
2.5 KiB
TypeScript

import assert from 'node:assert'
import { describe, it, beforeEach } from 'node:test'
import { RecursiveCharacterTextSplitter } from './text-splitter'
describe('RecursiveCharacterTextSplitter', () => {
let splitter: RecursiveCharacterTextSplitter
beforeEach(() => {
splitter = new RecursiveCharacterTextSplitter({
chunkSize: 50,
chunkOverlap: 10,
})
})
it('Should correctly split text by separators', () => {
const text = 'Hello world, this is a test of the recursive text splitter.'
// Test with initial chunkSize
assert.deepEqual(splitter.splitText(text), [
'Hello world',
'this is a test of the recursive text splitter',
])
// Test with updated chunkSize
splitter.chunkSize = 100
assert.deepEqual(
splitter.splitText(
'Hello world, this is a test of the recursive text splitter. If I have a period, it should split along the period.',
),
[
'Hello world, this is a test of the recursive text splitter',
'If I have a period, it should split along the period.',
],
)
// Test with another updated chunkSize
splitter.chunkSize = 110
assert.deepEqual(
splitter.splitText(
'Hello world, this is a test of the recursive text splitter. If I have a period, it should split along the period.\nOr, if there is a new line, it should prioritize splitting on new lines instead.',
),
[
'Hello world, this is a test of the recursive text splitter',
'If I have a period, it should split along the period.',
'Or, if there is a new line, it should prioritize splitting on new lines instead.',
],
)
})
it('Should handle empty string', () => {
assert.deepEqual(splitter.splitText(''), [])
})
it('Should handle special characters and large texts', () => {
const largeText = 'A'.repeat(1000)
splitter.chunkSize = 200
assert.deepEqual(
splitter.splitText(largeText),
Array(5).fill('A'.repeat(200)),
)
const specialCharText = 'Hello!@# world$%^ &*( this) is+ a-test'
assert.deepEqual(splitter.splitText(specialCharText), [
'Hello!@#',
'world$%^',
'&*( this)',
'is+',
'a-test',
])
})
it('Should handle chunkSize equal to chunkOverlap', () => {
splitter.chunkSize = 50
splitter.chunkOverlap = 50
assert.throws(
() => splitter.splitText('Invalid configuration'),
new Error('Cannot have chunkOverlap >= chunkSize'),
)
})
})