import assert from 'node:assert' import { describe, it, beforeEach } from 'node:test' import { RecursiveCharacterTextSplitter } from './text-splitter' describe('RecursiveCharacterTextSplitter', () => { let splitter: RecursiveCharacterTextSplitter beforeEach(() => { splitter = new RecursiveCharacterTextSplitter({ chunkSize: 50, chunkOverlap: 10, }) }) it('Should correctly split text by separators', () => { const text = 'Hello world, this is a test of the recursive text splitter.' // Test with initial chunkSize assert.deepEqual(splitter.splitText(text), [ 'Hello world', 'this is a test of the recursive text splitter', ]) // Test with updated chunkSize splitter.chunkSize = 100 assert.deepEqual( splitter.splitText( 'Hello world, this is a test of the recursive text splitter. If I have a period, it should split along the period.', ), [ 'Hello world, this is a test of the recursive text splitter', 'If I have a period, it should split along the period.', ], ) // Test with another updated chunkSize splitter.chunkSize = 110 assert.deepEqual( splitter.splitText( 'Hello world, this is a test of the recursive text splitter. If I have a period, it should split along the period.\nOr, if there is a new line, it should prioritize splitting on new lines instead.', ), [ 'Hello world, this is a test of the recursive text splitter', 'If I have a period, it should split along the period.', 'Or, if there is a new line, it should prioritize splitting on new lines instead.', ], ) }) it('Should handle empty string', () => { assert.deepEqual(splitter.splitText(''), []) }) it('Should handle special characters and large texts', () => { const largeText = 'A'.repeat(1000) splitter.chunkSize = 200 assert.deepEqual( splitter.splitText(largeText), Array(5).fill('A'.repeat(200)), ) const specialCharText = 'Hello!@# world$%^ &*( this) is+ a-test' assert.deepEqual(splitter.splitText(specialCharText), [ 'Hello!@#', 'world$%^', '&*( this)', 'is+', 'a-test', ]) }) it('Should handle chunkSize equal to chunkOverlap', () => { splitter.chunkSize = 50 splitter.chunkOverlap = 50 assert.throws( () => splitter.splitText('Invalid configuration'), new Error('Cannot have chunkOverlap >= chunkSize'), ) }) })