style: prettier format

This commit is contained in:
AnotiaWang
2025-02-11 22:57:49 +08:00
parent 84f63abb3d
commit d027965013
23 changed files with 485 additions and 331 deletions

View File

@@ -1,43 +1,57 @@
import { generateObject, streamText } from 'ai';
import { compact } from 'lodash-es';
import pLimit from 'p-limit';
import { z } from 'zod';
import { parseStreamingJson, type DeepPartial } from '~/utils/json';
import { generateObject, streamText } from 'ai'
import { compact } from 'lodash-es'
import pLimit from 'p-limit'
import { z } from 'zod'
import { parseStreamingJson, type DeepPartial } from '~/utils/json'
import { o3MiniModel, trimPrompt } from './ai/providers';
import { systemPrompt } from './prompt';
import zodToJsonSchema from 'zod-to-json-schema';
import { tavily, type TavilySearchResponse } from '@tavily/core';
import { o3MiniModel, trimPrompt } from './ai/providers'
import { systemPrompt } from './prompt'
import zodToJsonSchema from 'zod-to-json-schema'
import { tavily, type TavilySearchResponse } from '@tavily/core'
export type ResearchResult = {
learnings: string[];
visitedUrls: string[];
};
learnings: string[]
visitedUrls: string[]
}
export interface WriteFinalReportParams {
prompt: string;
learnings: string[];
prompt: string
learnings: string[]
}
// useRuntimeConfig()
// Used for streaming response
export type SearchQuery = z.infer<typeof searchQueriesTypeSchema>['queries'][0];
export type PartialSearchQuery = DeepPartial<SearchQuery>;
export type SearchResult = z.infer<typeof searchResultTypeSchema>;
export type PartialSearchResult = DeepPartial<SearchResult>;
export type SearchQuery = z.infer<typeof searchQueriesTypeSchema>['queries'][0]
export type PartialSearchQuery = DeepPartial<SearchQuery>
export type SearchResult = z.infer<typeof searchResultTypeSchema>
export type PartialSearchResult = DeepPartial<SearchResult>
export type ResearchStep =
| { type: 'generating_query'; result: PartialSearchQuery; nodeId: string }
| { type: 'generated_query'; query: string; result: PartialSearchQuery; nodeId: string }
| {
type: 'generated_query'
query: string
result: PartialSearchQuery
nodeId: string
}
| { type: 'searching'; query: string; nodeId: string }
| { type: 'search_complete'; urls: string[]; nodeId: string }
| { type: 'processing_serach_result'; query: string; result: PartialSearchResult; nodeId: string }
| { type: 'processed_search_result'; query: string; result: SearchResult; nodeId: string }
| {
type: 'processing_serach_result'
query: string
result: PartialSearchResult
nodeId: string
}
| {
type: 'processed_search_result'
query: string
result: SearchResult
nodeId: string
}
| { type: 'error'; message: string; nodeId: string }
| { type: 'complete'; learnings: string[], visitedUrls: string[] };
| { type: 'complete'; learnings: string[]; visitedUrls: string[] }
// increase this if you have higher API rate limits
const ConcurrencyLimit = 2;
const ConcurrencyLimit = 2
// Initialize Firecrawl with optional API key and optional base url
@@ -59,7 +73,7 @@ export const searchQueriesTypeSchema = z.object({
researchGoal: z.string(),
}),
),
});
})
// take en user query, return a list of SERP queries
export function generateSearchQueries({
@@ -67,10 +81,10 @@ export function generateSearchQueries({
numQueries = 3,
learnings,
}: {
query: string;
numQueries?: number;
query: string
numQueries?: number
// optional, if provided, the research will continue from the last learning
learnings?: string[];
learnings?: string[]
}) {
const schema = z.object({
queries: z
@@ -84,40 +98,38 @@ export function generateSearchQueries({
),
}),
)
.describe(`List of SERP queries, max of ${numQueries}`)
.describe(`List of SERP queries, max of ${numQueries}`),
})
const jsonSchema = JSON.stringify(zodToJsonSchema(schema));
const jsonSchema = JSON.stringify(zodToJsonSchema(schema))
const prompt = [
`Given the following prompt from the user, generate a list of SERP queries to research the topic. Return a maximum of ${numQueries} queries, but feel free to return less if the original prompt is clear. Make sure each query is unique and not similar to each other: <prompt>${query}</prompt>\n\n`,
learnings
? `Here are some learnings from previous research, use them to generate more specific queries: ${learnings.join(
'\n',
)}`
? `Here are some learnings from previous research, use them to generate more specific queries: ${learnings.join('\n')}`
: '',
`You MUST respond in JSON with the following schema: ${jsonSchema}`,
].join('\n\n');
].join('\n\n')
return streamText({
model: o3MiniModel,
system: systemPrompt(),
prompt,
});
})
}
export const searchResultTypeSchema = z.object({
learnings: z.array(z.string()),
followUpQuestions: z.array(z.string()),
});
})
function processSearchResult({
query,
result,
numLearnings = 3,
numFollowUpQuestions = 3,
}: {
query: string;
query: string
result: TavilySearchResponse
numLearnings?: number;
numFollowUpQuestions?: number;
numLearnings?: number
numFollowUpQuestions?: number
}) {
const schema = z.object({
learnings: z
@@ -128,25 +140,23 @@ function processSearchResult({
.describe(
`List of follow-up questions to research the topic further, max of ${numFollowUpQuestions}`,
),
});
const jsonSchema = JSON.stringify(zodToJsonSchema(schema));
const contents = compact(result.results.map(item => item.content)).map(
content => trimPrompt(content, 25_000),
);
})
const jsonSchema = JSON.stringify(zodToJsonSchema(schema))
const contents = compact(result.results.map((item) => item.content)).map(
(content) => trimPrompt(content, 25_000),
)
const prompt = [
`Given the following contents from a SERP search for the query <query>${query}</query>, generate a list of learnings from the contents. Return a maximum of ${numLearnings} learnings, but feel free to return less if the contents are clear. Make sure each learning is unique and not similar to each other. The learnings should be concise and to the point, as detailed and information dense as possible. Make sure to include any entities like people, places, companies, products, things, etc in the learnings, as well as any exact metrics, numbers, or dates. The learnings will be used to research the topic further.`,
`<contents>${contents
.map(content => `<content>\n${content}\n</content>`)
.join('\n')}</contents>`,
`<contents>${contents.map((content) => `<content>\n${content}\n</content>`).join('\n')}</contents>`,
`You MUST respond in JSON with the following schema: ${jsonSchema}`,
].join('\n\n');
].join('\n\n')
return streamText({
model: o3MiniModel,
abortSignal: AbortSignal.timeout(60_000),
system: systemPrompt(),
prompt,
});
})
}
export function writeFinalReport({
@@ -155,28 +165,28 @@ export function writeFinalReport({
}: WriteFinalReportParams) {
const learningsString = trimPrompt(
learnings
.map(learning => `<learning>\n${learning}\n</learning>`)
.map((learning) => `<learning>\n${learning}\n</learning>`)
.join('\n'),
150_000,
);
)
const _prompt = [
`Given the following prompt from the user, write a final report on the topic using the learnings from research. Make it as as detailed as possible, aim for 3 or more pages, include ALL the learnings from research:`,
`<prompt>${prompt}</prompt>`,
`Here are all the learnings from previous research:`,
`<learnings>\n${learningsString}\n</learnings>`,
`Write the report in Markdown.`,
`## Deep Research Report`
].join('\n\n');
`## Deep Research Report`,
].join('\n\n')
return streamText({
model: o3MiniModel,
system: systemPrompt(),
prompt: _prompt,
});
})
}
function childNodeId(parentNodeId: string, currentIndex: number) {
return `${parentNodeId}-${currentIndex}`;
return `${parentNodeId}-${currentIndex}`
}
export async function deepResearch({
@@ -187,15 +197,15 @@ export async function deepResearch({
visitedUrls = [],
onProgress,
currentDepth = 1,
nodeId = '0'
nodeId = '0',
}: {
query: string;
breadth: number;
maxDepth: number;
learnings?: string[];
visitedUrls?: string[];
onProgress: (step: ResearchStep) => void;
currentDepth?: number;
query: string
breadth: number
maxDepth: number
learnings?: string[]
visitedUrls?: string[]
onProgress: (step: ResearchStep) => void
currentDepth?: number
nodeId?: string
}): Promise<ResearchResult> {
try {
@@ -203,25 +213,25 @@ export async function deepResearch({
query,
learnings,
numQueries: breadth,
});
const limit = pLimit(ConcurrencyLimit);
})
const limit = pLimit(ConcurrencyLimit)
let searchQueries: PartialSearchQuery[] = [];
let searchQueries: PartialSearchQuery[] = []
for await (const parsedQueries of parseStreamingJson(
searchQueriesResult.textStream,
searchQueriesTypeSchema,
(value) => !!value.queries?.length && !!value.queries[0]?.query
(value) => !!value.queries?.length && !!value.queries[0]?.query,
)) {
if (parsedQueries.queries) {
for (let i = 0; i < searchQueries.length; i++) {
onProgress({
type: 'generating_query',
result: searchQueries[i],
nodeId: childNodeId(nodeId, i)
});
nodeId: childNodeId(nodeId, i),
})
}
searchQueries = parsedQueries.queries;
searchQueries = parsedQueries.queries
}
}
@@ -230,21 +240,22 @@ export async function deepResearch({
type: 'generated_query',
query,
result: searchQueries[i],
nodeId: childNodeId(nodeId, i)
});
nodeId: childNodeId(nodeId, i),
})
}
const results = await Promise.all(
searchQueries.map((searchQuery, i) =>
limit(async () => {
if (!searchQuery?.query) return {
learnings: [],
visitedUrls: [],
}
if (!searchQuery?.query)
return {
learnings: [],
visitedUrls: [],
}
onProgress({
type: 'searching',
query: searchQuery.query,
nodeId: childNodeId(nodeId, i)
nodeId: childNodeId(nodeId, i),
})
try {
// const result = await firecrawl.search(searchQuery.query, {
@@ -255,42 +266,50 @@ export async function deepResearch({
const result = await tvly.search(searchQuery.query, {
maxResults: 5,
})
console.log(`Ran ${searchQuery.query}, found ${result.results.length} contents`);
console.log(
`Ran ${searchQuery.query}, found ${result.results.length} contents`,
)
// Collect URLs from this search
const newUrls = compact(result.results.map(item => item.url));
const newUrls = compact(result.results.map((item) => item.url))
onProgress({
type: 'search_complete',
urls: newUrls,
nodeId: childNodeId(nodeId, i),
})
// Breadth for the next search is half of the current breadth
const nextBreadth = Math.ceil(breadth / 2);
const nextBreadth = Math.ceil(breadth / 2)
const searchResultGenerator = processSearchResult({
query: searchQuery.query,
result,
numFollowUpQuestions: nextBreadth,
});
let searchResult: PartialSearchResult = {};
})
let searchResult: PartialSearchResult = {}
for await (const parsedLearnings of parseStreamingJson(
searchResultGenerator.textStream,
searchResultTypeSchema,
(value) => !!value.learnings?.length
(value) => !!value.learnings?.length,
)) {
searchResult = parsedLearnings;
searchResult = parsedLearnings
onProgress({
type: 'processing_serach_result',
result: parsedLearnings,
query: searchQuery.query,
nodeId: childNodeId(nodeId, i)
});
nodeId: childNodeId(nodeId, i),
})
}
console.log(`Processed search result for ${searchQuery.query}`, searchResult);
const allLearnings = [...learnings, ...(searchResult.learnings ?? [])];
const allUrls = [...visitedUrls, ...newUrls];
const nextDepth = currentDepth + 1;
console.log(
`Processed search result for ${searchQuery.query}`,
searchResult,
)
const allLearnings = [
...learnings,
...(searchResult.learnings ?? []),
]
const allUrls = [...visitedUrls, ...newUrls]
const nextDepth = currentDepth + 1
onProgress({
type: 'processed_search_result',
@@ -299,18 +318,21 @@ export async function deepResearch({
followUpQuestions: searchResult.followUpQuestions ?? [],
},
query: searchQuery.query,
nodeId: childNodeId(nodeId, i)
nodeId: childNodeId(nodeId, i),
})
if (nextDepth < maxDepth && searchResult.followUpQuestions?.length) {
if (
nextDepth < maxDepth &&
searchResult.followUpQuestions?.length
) {
console.warn(
`Researching deeper, breadth: ${nextBreadth}, depth: ${nextDepth}`,
);
)
const nextQuery = `
Previous research goal: ${searchQuery.researchGoal}
Follow-up research directions: ${searchResult.followUpQuestions.map(q => `\n${q}`).join('')}
`.trim();
Follow-up research directions: ${searchResult.followUpQuestions.map((q) => `\n${q}`).join('')}
`.trim()
return deepResearch({
query: nextQuery,
@@ -321,36 +343,38 @@ export async function deepResearch({
onProgress,
currentDepth: nextDepth,
nodeId: childNodeId(nodeId, i),
});
})
} else {
return {
learnings: allLearnings,
visitedUrls: allUrls,
};
}
}
} catch (e: any) {
throw new Error(`Error searching for ${searchQuery.query}, depth ${currentDepth}\nMessage: ${e.message}`)
throw new Error(
`Error searching for ${searchQuery.query}, depth ${currentDepth}\nMessage: ${e.message}`,
)
}
}),
),
);
)
// Conclude results
const _learnings = [...new Set(results.flatMap(r => r.learnings))]
const _visitedUrls = [...new Set(results.flatMap(r => r.visitedUrls))]
const _learnings = [...new Set(results.flatMap((r) => r.learnings))]
const _visitedUrls = [...new Set(results.flatMap((r) => r.visitedUrls))]
// Complete should only be called once
if (nodeId === '0') {
onProgress({
type: 'complete',
learnings: _learnings,
visitedUrls: _visitedUrls,
});
})
}
return {
learnings: _learnings,
visitedUrls: _visitedUrls,
}
} catch (error: any) {
console.error(error);
console.error(error)
onProgress({
type: 'error',
message: error?.message ?? 'Something went wrong',
@@ -361,4 +385,4 @@ export async function deepResearch({
visitedUrls: [],
}
}
}
}