feat: citations in reserch report

This commit is contained in:
AnotiaWang
2025-02-27 13:03:31 +08:00
parent e583b92cbf
commit ad1b3a239c
4 changed files with 194 additions and 53 deletions

View File

@ -2,6 +2,7 @@
import {
deepResearch,
type PartialProcessedSearchResult,
type ProcessedSearchResult,
type ResearchStep,
} from '~/lib/deep-research'
import {
@ -29,7 +30,7 @@
generateLearningsReasoning?: string
searchResults?: WebSearchResult[]
/** Learnings from search results */
learnings?: string[]
learnings?: ProcessedSearchResult['learnings']
status?: DeepResearchNodeStatus
error?: string
}
@ -197,7 +198,6 @@
console.log(`[DeepResearch] complete:`, step)
completeResult.value = {
learnings: step.learnings,
visitedUrls: step.visitedUrls,
}
emit('complete')
isLoading.value = false
@ -246,8 +246,7 @@
try {
let query = getCombinedQuery(form.value, feedback.value)
let existingLearnings: string[] = []
let existingVisitedUrls: string[] = []
let existingLearnings: ProcessedSearchResult['learnings'] = []
let currentDepth = 1
let breadth = form.value.breadth
@ -266,10 +265,6 @@
existingLearnings = parentNodes
.flatMap((n) => n.learnings || [])
.filter(Boolean)
existingVisitedUrls = parentNodes
.flatMap((n) => n.searchResults || [])
.map((r) => r.url)
.filter(Boolean)
}
await deepResearch({
@ -281,7 +276,6 @@
languageCode: locale.value,
searchLanguageCode: config.webSearch.searchLanguage,
learnings: existingLearnings,
visitedUrls: existingVisitedUrls,
onProgress: handleResearchProgress,
})
} catch (error) {

View File

@ -88,7 +88,7 @@
v-for="(learning, index) in node.learnings"
class="prose max-w-none dark:prose-invert break-words"
:key="index"
v-html="marked(`- ${learning}`, { gfm: true })"
v-html="marked(`- ${learning.learning}`, { gfm: true })"
/>
<span v-if="!node.learnings?.length"> - </span>

View File

@ -16,15 +16,13 @@
const loadingExportMarkdown = ref(false)
const reasoningContent = ref('')
const reportContent = ref('')
const reportContainerRef = ref<HTMLElement>()
// Inject global data from index.vue
const form = inject(formInjectionKey)!
const feedback = inject(feedbackInjectionKey)!
const researchResult = inject(researchResultInjectionKey)!
const reportHtml = computed(() =>
marked(reportContent.value, { silent: true, gfm: true, breaks: true }),
)
const isExportButtonDisabled = computed(
() =>
!reportContent.value ||
@ -33,6 +31,116 @@
loadingExportMarkdown.value,
)
const reportHtml = computed(() => {
let html = marked(reportContent.value, {
silent: true,
gfm: true,
breaks: true,
async: false,
})
const learnings = researchResult.value?.learnings ?? []
// 替换引用标记 [数字] 为带有工具提示的 span
html = html.replace(/\[(\d+)\]/g, (match, number) => {
const index = parseInt(number) - 1
const learning =
index >= 0 && index < learnings.length ? learnings[index] : ''
if (!learning) return match
// 使用唯一的 ID 来标识每个 tooltip
const tooltipId = `tooltip-${index}`
return `<span class="citation-ref" data-tooltip-id="${tooltipId}" data-tooltip-url="${
learning.url
}" data-tooltip-content="${encodeURIComponent(
learning.title || learning.url,
)}">
<a href="${learning.url}" target="_blank">${match}</a>
</span>`
})
return `<style>
.citation-ref {
display: inline-block;
vertical-align: super;
font-size: 0.75rem;
font-weight: 500;
color: #3b82f6;
}
.citation-ref a {
text-decoration: none;
color: inherit;
}
</style>
${html}`
})
// 在 DOM 更新后设置 tooltip 事件监听
onMounted(() => {
nextTick(() => {
setupTooltips()
})
})
// 监听报告内容变化,重新设置 tooltip
watch(reportContent, () => {
nextTick(() => {
setupTooltips()
})
})
// 设置 tooltip 事件监听
function setupTooltips() {
if (!reportContainerRef.value) return
// 移除现有的 tooltip 元素
document.querySelectorAll('.citation-tooltip').forEach((el) => el.remove())
// 创建一个通用的 tooltip 元素
const tooltip = document.createElement('div')
tooltip.className =
'citation-tooltip fixed px-2 py-1 bg-gray-800 text-white text-xs rounded z-50 opacity-0 transition-opacity duration-200 max-w-[calc(100vw-2rem)] overflow-hidden text-ellipsis pointer-events-none'
document.body.appendChild(tooltip)
// 为所有引用添加鼠标事件
const refs = reportContainerRef.value.querySelectorAll('.citation-ref')
refs.forEach((ref) => {
ref.addEventListener('mouseenter', (e) => {
const target = e.currentTarget as HTMLElement
const content = decodeURIComponent(target.dataset.tooltipContent || '')
// 设置 tooltip 内容
tooltip.textContent = content
tooltip.style.opacity = '1'
// 计算位置
const rect = target.getBoundingClientRect()
const tooltipRect = tooltip.getBoundingClientRect()
// 默认显示在引用上方
let top = rect.top - tooltipRect.height - 8
let left = rect.left + rect.width / 2
// 如果 tooltip 会超出顶部,则显示在下方
if (top < 10) {
top = rect.bottom + 8
}
// 确保 tooltip 不会超出左右边界
const maxLeft = window.innerWidth - tooltipRect.width - 10
const minLeft = 10
left = Math.min(Math.max(left, minLeft), maxLeft)
tooltip.style.top = `${top}px`
tooltip.style.left = `${left}px`
})
ref.addEventListener('mouseleave', () => {
tooltip.style.opacity = '0'
})
})
}
let printJS: typeof import('print-js') | undefined
async function generateReport() {
@ -42,8 +150,8 @@
reasoningContent.value = ''
try {
// Store a copy of the data
const visitedUrls = researchResult.value.visitedUrls ?? []
const learnings = researchResult.value.learnings ?? []
const learnings = [...researchResult.value.learnings]
console.log(`[generateReport] Generating report. Learnings:`, learnings)
const { fullStream } = writeFinalReport({
prompt: getCombinedQuery(form.value, feedback.value),
language: t('language', {}, { locale: locale.value }),
@ -64,7 +172,12 @@
}
reportContent.value += `\n\n## ${t(
'researchReport.sources',
)}\n\n${visitedUrls.map((url) => `- ${url}`).join('\n')}`
)}\n\n${learnings
.map(
(item, index) =>
`${index + 1}. [${item.title || item.url}](${item.url})`,
)
.join('\n')}`
} catch (e: any) {
console.error(`Generate report failed`, e)
error.value = t('researchReport.generateFailed', [e.message])
@ -126,11 +239,14 @@
loadingExportMarkdown.value = true
try {
// 使用原始的 Markdown 内容,它已经包含了 [1], [2] 等引用角标
const blob = new Blob([reportContent.value], { type: 'text/markdown' })
const url = URL.createObjectURL(blob)
const a = document.createElement('a')
a.href = url
a.download = 'research-report.md'
a.download = `research-report-${
new Date().toISOString().split('T')[0]
}.md`
document.body.appendChild(a)
a.click()
document.body.removeChild(a)
@ -157,7 +273,6 @@
<UButton
icon="i-lucide-refresh-cw"
:loading
:disabled="!reasoningContent && !reportContent && !error"
variant="ghost"
@click="generateReport"
>
@ -207,6 +322,7 @@
/>
<div
ref="reportContainerRef"
v-if="reportContent"
class="prose prose-sm max-w-none break-words p-6 bg-gray-50 dark:bg-gray-800 dark:prose-invert dark:text-white rounded-lg shadow"
v-html="reportHtml"

View File

@ -10,13 +10,12 @@ import type { Locale } from '~/components/LangSwitcher.vue'
import type { DeepResearchNode } from '~/components/DeepResearch/DeepResearch.vue'
export type ResearchResult = {
learnings: string[]
visitedUrls: string[]
learnings: ProcessedSearchResult['learnings']
}
export interface WriteFinalReportParams {
prompt: string
learnings: string[]
learnings: ProcessedSearchResult['learnings']
language: string
}
@ -59,7 +58,7 @@ export type ResearchStep =
nodeId: string
}
| { type: 'error'; message: string; nodeId: string }
| { type: 'complete'; learnings: string[]; visitedUrls: string[] }
| { type: 'complete'; learnings: ProcessedSearchResult['learnings'] }
/**
* Schema for {@link generateSearchQueries} without dynamic descriptions
@ -73,7 +72,7 @@ export const searchQueriesTypeSchema = z.object({
),
})
// take en user query, return a list of SERP queries
// take an user query, return a list of SERP queries
export function generateSearchQueries({
query,
numQueries = 3,
@ -132,13 +131,21 @@ export function generateSearchQueries({
}
export const searchResultTypeSchema = z.object({
learnings: z.array(z.string()),
learnings: z.array(
z.object({
url: z.string(),
learning: z.string(),
/** This is added in {@link deepResearch} */
title: z.string().optional(),
}),
),
followUpQuestions: z.array(z.string()),
})
function processSearchResult({
query,
results,
numLearnings = 3,
numLearnings = 5,
numFollowUpQuestions = 3,
language,
}: {
@ -150,20 +157,36 @@ function processSearchResult({
}) {
const schema = z.object({
learnings: z
.array(z.string())
.describe(`List of learnings, max of ${numLearnings}`),
.array(
z.object({
url: z
.string()
.describe('The source URL from which this learning was extracted'),
learning: z
.string()
.describe(
'A detailed, information-dense insight extracted from the search results. Include specific entities, metrics, numbers, and dates when available',
),
}),
)
.describe(
`Collection of key learnings extracted from search results, each with its source URL. Maximum of ${numLearnings} learnings.`,
),
followUpQuestions: z
.array(z.string())
.describe(
`List of follow-up questions to research the topic further, max of ${numFollowUpQuestions}`,
`List of relevant follow-up questions to explore the topic further, designed to uncover additional insights. Maximum of ${numFollowUpQuestions} questions.`,
),
})
const jsonSchema = JSON.stringify(zodToJsonSchema(schema))
const contents = results.map((item) => trimPrompt(item.content))
const prompt = [
`Given the following contents from a SERP search for the query <query>${query}</query>, generate a list of learnings from the contents. Return a maximum of ${numLearnings} learnings, but feel free to return less if the contents are clear. Make sure each learning is unique and not similar to each other. The learnings should be concise and to the point, as detailed and information dense as possible. Make sure to include any entities like people, places, companies, products, things, etc in the learnings, as well as any exact metrics, numbers, or dates. The learnings will be used to research the topic further.`,
`Given the following contents from a SERP search for the query <query>${query}</query>, extract key learnings from the contents. For each learning, include the source URL. Return a maximum of ${numLearnings} learnings, but feel free to return less if the contents are clear. Make sure each learning is unique and not similar to each other. The learnings should be as detailed and information dense as possible. Include any entities like people, places, companies, products, things, etc in the learnings, as well as any exact metrics, numbers, or dates. Also generate up to ${numFollowUpQuestions} follow-up questions that could help explore this topic further.`,
`<contents>${contents
.map((content) => `<content>\n${content}\n</content>`)
.map(
(content, index) =>
`<content url="${results[index].url}">\n${content}\n</content>`,
)
.join('\n')}</contents>`,
`You MUST respond in JSON matching this JSON schema: ${jsonSchema}`,
languagePrompt(language),
@ -186,15 +209,18 @@ export function writeFinalReport({
}: WriteFinalReportParams) {
const learningsString = trimPrompt(
learnings
.map((learning) => `<learning>\n${learning}\n</learning>`)
.map(
(learning) =>
`<learning url="${learning.url}">\n${learning.learning}\n</learning>`,
)
.join('\n'),
)
const _prompt = [
`Given the following prompt from the user, write a final report on the topic using the learnings from research. Make it as as detailed as possible, aim for 3 or more pages, include ALL the learnings from research:`,
`Given the following prompt from the user, write a final report on the topic using the learnings from research. Make it as detailed as possible, aim for 3 or more pages, include ALL the key insights from research.`,
`<prompt>${prompt}</prompt>`,
`Here are all the learnings from previous research:`,
`<learnings>\n${learningsString}\n</learnings>`,
`Write the report using Markdown.`,
`Write the report using Markdown. When citing information, use numbered citations with superscript numbers in square brackets (e.g., [1], [2], [3]). Each citation should correspond to the index of the source in your learnings list. DO NOT include the actual URLs in the report text - only use the citation numbers.`,
languagePrompt(language),
`## Deep Research Report`,
].join('\n\n')
@ -219,8 +245,7 @@ export async function deepResearch({
maxDepth,
languageCode,
searchLanguageCode,
learnings = [],
visitedUrls = [],
learnings,
onProgress,
currentDepth,
nodeId = '0',
@ -234,9 +259,7 @@ export async function deepResearch({
/** The language of SERP query */
searchLanguageCode?: Locale
/** Accumulated learnings from all nodes visited so far */
learnings?: string[]
/** Accumulated visited URLs from all nodes visited so far */
visitedUrls?: string[]
learnings?: Array<{ url: string; learning: string }>
currentDepth: number
/** Current node ID. Used for recursive calls */
nodeId?: string
@ -269,7 +292,7 @@ export async function deepResearch({
else {
const searchQueriesResult = generateSearchQueries({
query,
learnings,
learnings: learnings?.map((item) => item.learning),
numQueries: breadth,
language,
searchLanguage,
@ -362,8 +385,6 @@ export async function deepResearch({
`[DeepResearch] Searched "${searchQuery.query}", found ${results.length} contents`,
)
// Collect URLs from this search
const newUrls = results.map((item) => item.url).filter(Boolean)
onProgress({
type: 'search_complete',
results,
@ -419,11 +440,17 @@ export async function deepResearch({
`Processed search result for ${searchQuery.query}`,
searchResult,
)
// Assign URL titles to learnings
searchResult.learnings = searchResult.learnings?.map((learning) => {
return {
...learning,
title: results.find((r) => r.url === learning.url)?.title,
}
})
const allLearnings = [
...learnings,
...(learnings ?? []),
...(searchResult.learnings ?? []),
]
const allUrls = [...visitedUrls, ...newUrls]
const nextDepth = currentDepth + 1
onProgress({
@ -458,7 +485,6 @@ export async function deepResearch({
breadth: nextBreadth,
maxDepth,
learnings: allLearnings,
visitedUrls: allUrls,
onProgress,
currentDepth: nextDepth,
nodeId: searchQuery.nodeId,
@ -473,7 +499,6 @@ export async function deepResearch({
} else {
return {
learnings: allLearnings,
visitedUrls: allUrls,
}
}
} catch (e: any) {
@ -488,26 +513,33 @@ export async function deepResearch({
})
return {
learnings: [],
visitedUrls: [],
}
}
}),
),
)
// Conclude results
const _learnings = [...new Set(results.flatMap((r) => r.learnings))]
const _visitedUrls = [...new Set(results.flatMap((r) => r.visitedUrls))]
// Deduplicate
const urlMap = new Map<string, true>()
const finalLearnings: ProcessedSearchResult['learnings'] = []
for (const result of results) {
for (const learning of result.learnings) {
if (!urlMap.has(learning.url)) {
urlMap.set(learning.url, true)
finalLearnings.push(learning)
}
}
}
// Complete should only be called once
if (nodeId === '0') {
onProgress({
type: 'complete',
learnings: _learnings,
visitedUrls: _visitedUrls,
learnings: finalLearnings,
})
}
return {
learnings: _learnings,
visitedUrls: _visitedUrls,
learnings: finalLearnings,
}
} catch (error: any) {
console.error(error)
@ -518,7 +550,6 @@ export async function deepResearch({
})
return {
learnings: [],
visitedUrls: [],
}
}
}