feat(nx-dev): improve link text for ai docs (#18943)

This commit is contained in:
Katerina Skroumpelou 2023-09-01 09:50:50 +03:00 committed by GitHub
parent ad2d1e8a55
commit 1931390bd0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 90 additions and 11 deletions

View File

@ -54,7 +54,7 @@ export function getMessageFromResponse(
export function getListOfSources( export function getListOfSources(
pageSections: PageSection[] pageSections: PageSection[]
): { heading: string; url: string }[] { ): { heading: string; url: string; longer_heading: string }[] {
const uniqueUrlPartials = new Set<string | null>(); const uniqueUrlPartials = new Set<string | null>();
const result = pageSections const result = pageSections
.filter((section) => { .filter((section) => {
@ -72,6 +72,7 @@ export function getListOfSources(
} }
return { return {
heading: section.heading, heading: section.heading,
longer_heading: section.longer_heading,
url: url.toString(), url: url.toString(),
}; };
}); });
@ -90,11 +91,40 @@ ${sourcesMarkdown}
} }
export function toMarkdownList( export function toMarkdownList(
sections: { heading: string; url: string }[] sections: { heading: string; url: string; longer_heading: string }[]
): string { ): string {
return sections const sectionsWithLongerHeadings: {
heading: string;
url: string;
longer_heading: string;
}[] = [];
const headings = new Set<string>();
const sectionsWithUniqueHeadings = sections.filter((section) => {
if (headings.has(section.heading)) {
sectionsWithLongerHeadings.push(section);
return false;
} else {
headings.add(section.heading);
return true;
}
});
const finalSections = sectionsWithUniqueHeadings
.map((section) => `- [${section.heading}](${section.url})`) .map((section) => `- [${section.heading}](${section.url})`)
.join('\n'); .join('\n')
.concat('\n')
.concat(
sectionsWithLongerHeadings
.map(
(section, index) =>
`- [${
section.longer_heading ?? section.heading + ' ' + (index + 1)
}](${section.url})`
)
.join('\n')
);
return finalSections;
} }
export function extractLinksFromSourcesSection(markdown: string): string[] { export function extractLinksFromSourcesSection(markdown: string): string[] {
@ -123,16 +153,35 @@ export function removeSourcesSection(markdown: string): string {
export async function appendToStream( export async function appendToStream(
originalStream: ReadableStream<Uint8Array>, originalStream: ReadableStream<Uint8Array>,
appendContent: string appendContent: string,
stopString: string = '### Sources'
): Promise<ReadableStream<Uint8Array>> { ): Promise<ReadableStream<Uint8Array>> {
const appendText = new TransformStream({ let buffer = '';
flush(ctrl) {
ctrl.enqueue(new TextEncoder().encode(appendContent)); const transformer = new TransformStream<Uint8Array, Uint8Array>({
ctrl.terminate(); async transform(chunk, controller) {
const decoder = new TextDecoder();
buffer += decoder.decode(chunk);
// Attempting to stop it from generating a list of Sources that will be wrong
// TODO(katerina): make sure that this works as expected
if (buffer.includes(stopString)) {
const truncated = buffer.split(stopString)[0];
controller.enqueue(new TextEncoder().encode(truncated));
controller.terminate();
return;
}
controller.enqueue(chunk);
},
flush(controller) {
controller.enqueue(new TextEncoder().encode(appendContent));
controller.terminate();
}, },
}); });
return originalStream.pipeThrough(appendText); return originalStream.pipeThrough(transformer);
} }
export function getLastAssistantIndex(messages: ChatItem[]): number { export function getLastAssistantIndex(messages: ChatItem[]): number {

View File

@ -65,6 +65,7 @@ export interface PageSection {
page_id: number; page_id: number;
content: string; content: string;
heading: string; heading: string;
longer_heading: string;
similarity: number; similarity: number;
slug: string; slug: string;
url_partial: string | null; url_partial: string | null;

View File

@ -307,13 +307,19 @@ async function generateEmbeddings() {
const [responseData] = embeddingResponse.data; const [responseData] = embeddingResponse.data;
const longer_heading = createLongerHeading(heading, url_partial);
const { error: insertPageSectionError, data: pageSection } = const { error: insertPageSectionError, data: pageSection } =
await supabaseClient await supabaseClient
.from('nods_page_section') .from('nods_page_section')
.insert({ .insert({
page_id: page.id, page_id: page.id,
slug, slug,
heading, heading:
heading?.length && heading !== null && heading !== 'null'
? heading
: longer_heading,
longer_heading,
content, content,
url_partial, url_partial,
token_count: embeddingResponse.usage.total_tokens, token_count: embeddingResponse.usage.total_tokens,
@ -433,6 +439,29 @@ function getAllFilesWithItemList(data): WalkEntry[] {
return files; return files;
} }
function createLongerHeading(
heading?: string | null,
url_partial?: string
): string | undefined {
if (url_partial?.length) {
if (heading?.length && heading !== null && heading !== 'null') {
return `${heading}${` - ${
url_partial.split('/')?.[1]?.[0].toUpperCase() +
url_partial.split('/')?.[1]?.slice(1)
}`}`;
} else {
return url_partial
.split('#')[0]
.split('/')
.map((part) =>
part?.length ? part[0].toUpperCase() + part.slice(1) + ' - ' : ''
)
.join('')
.slice(0, -3);
}
}
}
async function main() { async function main() {
await generateEmbeddings(); await generateEmbeddings();
} }