78 lines
2.5 KiB
TypeScript
78 lines
2.5 KiB
TypeScript
import assert from 'node:assert';
|
|
import { describe, it, beforeEach } from 'node:test';
|
|
import { RecursiveCharacterTextSplitter } from './text-splitter';
|
|
|
|
describe('RecursiveCharacterTextSplitter', () => {
|
|
let splitter: RecursiveCharacterTextSplitter;
|
|
|
|
beforeEach(() => {
|
|
splitter = new RecursiveCharacterTextSplitter({
|
|
chunkSize: 50,
|
|
chunkOverlap: 10,
|
|
});
|
|
});
|
|
|
|
it('Should correctly split text by separators', () => {
|
|
const text = 'Hello world, this is a test of the recursive text splitter.';
|
|
|
|
// Test with initial chunkSize
|
|
assert.deepEqual(
|
|
splitter.splitText(text),
|
|
['Hello world', 'this is a test of the recursive text splitter']
|
|
);
|
|
|
|
// Test with updated chunkSize
|
|
splitter.chunkSize = 100;
|
|
assert.deepEqual(
|
|
splitter.splitText(
|
|
'Hello world, this is a test of the recursive text splitter. If I have a period, it should split along the period.'
|
|
),
|
|
[
|
|
'Hello world, this is a test of the recursive text splitter',
|
|
'If I have a period, it should split along the period.',
|
|
]
|
|
);
|
|
|
|
// Test with another updated chunkSize
|
|
splitter.chunkSize = 110;
|
|
assert.deepEqual(
|
|
splitter.splitText(
|
|
'Hello world, this is a test of the recursive text splitter. If I have a period, it should split along the period.\nOr, if there is a new line, it should prioritize splitting on new lines instead.'
|
|
),
|
|
[
|
|
'Hello world, this is a test of the recursive text splitter',
|
|
'If I have a period, it should split along the period.',
|
|
'Or, if there is a new line, it should prioritize splitting on new lines instead.',
|
|
]
|
|
);
|
|
});
|
|
|
|
it('Should handle empty string', () => {
|
|
assert.deepEqual(splitter.splitText(''), []);
|
|
});
|
|
|
|
it('Should handle special characters and large texts', () => {
|
|
const largeText = 'A'.repeat(1000);
|
|
splitter.chunkSize = 200;
|
|
assert.deepEqual(
|
|
splitter.splitText(largeText),
|
|
Array(5).fill('A'.repeat(200))
|
|
);
|
|
|
|
const specialCharText = 'Hello!@# world$%^ &*( this) is+ a-test';
|
|
assert.deepEqual(
|
|
splitter.splitText(specialCharText),
|
|
['Hello!@#', 'world$%^', '&*( this)', 'is+', 'a-test']
|
|
);
|
|
});
|
|
|
|
it('Should handle chunkSize equal to chunkOverlap', () => {
|
|
splitter.chunkSize = 50;
|
|
splitter.chunkOverlap = 50;
|
|
assert.throws(
|
|
() => splitter.splitText('Invalid configuration'),
|
|
new Error('Cannot have chunkOverlap >= chunkSize')
|
|
);
|
|
});
|
|
});
|