Files
deep-research-web-ui/lib/ai/text-splitter.test.ts
AnotiaWang 2cbd20a1da init
2025-02-11 09:05:47 +08:00

78 lines
2.5 KiB
TypeScript

import assert from 'node:assert';
import { describe, it, beforeEach } from 'node:test';
import { RecursiveCharacterTextSplitter } from './text-splitter';
describe('RecursiveCharacterTextSplitter', () => {
let splitter: RecursiveCharacterTextSplitter;
beforeEach(() => {
splitter = new RecursiveCharacterTextSplitter({
chunkSize: 50,
chunkOverlap: 10,
});
});
it('Should correctly split text by separators', () => {
const text = 'Hello world, this is a test of the recursive text splitter.';
// Test with initial chunkSize
assert.deepEqual(
splitter.splitText(text),
['Hello world', 'this is a test of the recursive text splitter']
);
// Test with updated chunkSize
splitter.chunkSize = 100;
assert.deepEqual(
splitter.splitText(
'Hello world, this is a test of the recursive text splitter. If I have a period, it should split along the period.'
),
[
'Hello world, this is a test of the recursive text splitter',
'If I have a period, it should split along the period.',
]
);
// Test with another updated chunkSize
splitter.chunkSize = 110;
assert.deepEqual(
splitter.splitText(
'Hello world, this is a test of the recursive text splitter. If I have a period, it should split along the period.\nOr, if there is a new line, it should prioritize splitting on new lines instead.'
),
[
'Hello world, this is a test of the recursive text splitter',
'If I have a period, it should split along the period.',
'Or, if there is a new line, it should prioritize splitting on new lines instead.',
]
);
});
it('Should handle empty string', () => {
assert.deepEqual(splitter.splitText(''), []);
});
it('Should handle special characters and large texts', () => {
const largeText = 'A'.repeat(1000);
splitter.chunkSize = 200;
assert.deepEqual(
splitter.splitText(largeText),
Array(5).fill('A'.repeat(200))
);
const specialCharText = 'Hello!@# world$%^ &*( this) is+ a-test';
assert.deepEqual(
splitter.splitText(specialCharText),
['Hello!@#', 'world$%^', '&*( this)', 'is+', 'a-test']
);
});
it('Should handle chunkSize equal to chunkOverlap', () => {
splitter.chunkSize = 50;
splitter.chunkOverlap = 50;
assert.throws(
() => splitter.splitText('Invalid configuration'),
new Error('Cannot have chunkOverlap >= chunkSize')
);
});
});