Skip to content

Commit

Permalink
Merge pull request #301 from cofacts/ai-reply-prompts
Browse files Browse the repository at this point in the history
Handle URLs in article when creating AI replies
  • Loading branch information
MrOrz authored Apr 5, 2023
2 parents 7e8d7af + 02204d6 commit 9603680
Show file tree
Hide file tree
Showing 8 changed files with 410 additions and 127 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,14 @@ $ node -- build/scripts/genBERTInputArticles.js -s <Google spreadsheet ID> -o <O

The ground truth files in JSON will be written to output directory

### Generate a new AI reply for the specified article

This command generates a new AI reply even if the article already has an AI reply before.
Suitable for the scenario when the existing AI reply is not appropriate.
```
$ node build/scripts/genAIReply.js -a <articleId> --temperature=1
```

## One-off migration scripts

### Fill in `urls` index and `hyperlinks` field for all articles & replies
Expand Down
252 changes: 153 additions & 99 deletions src/graphql/mutations/CreateAIReply.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,164 @@ import client from 'util/client';
import delayForMs from 'util/delayForMs';
import { AIReply } from 'graphql/models/AIResponse';

const formatter = Intl.DateTimeFormat('zh-TW', {
const monthFormatter = Intl.DateTimeFormat('zh-TW', {
year: 'numeric',
month: 'long',
day: 'numeric',
});

/**
* Create an new AIReply, initially in LOADING state, then becomes ERROR or SUCCESS,
* and returns the AI reply.
* If there is no enough content for AI, it resolves to null.
*/
export async function createNewAIReply({
article,
user,
completionOptions = {},
}) {
// article.hyperlinks deduped by URL.
const dedupedHyperlinks = Object.values(
(article.hyperlinks ?? []).reduce((map, hyperlink) => {
if (
!map[hyperlink.url] ||
/* hyperlink exists, but fetch failed */ !map[hyperlink.url].title
) {
map[hyperlink.url] = hyperlink;
}
return map;
}, {})
);

/**
* Determine if article has no content by replacing all URLs with its scrapped content.
* This will become empty string if and only if:
* - The article only contains URLs, no other text, and
* - All URL scrapping results fail (no title, no summary)
*
* Abort AI reply generation in this case.
*/
const replacedArticleText = dedupedHyperlinks
.reduce(
(text, { url, title, summary }) =>
text.replaceAll(url, `${title} ${summary}`),
article.text
)
.trim();

if (replacedArticleText.length === 0) return null;

// Argumenting hyperlinks with summary and titles
const argumentedArticleText = dedupedHyperlinks.reduce(
(text, { url, title, summary }) =>
title
? text.replaceAll(url, `[${title} ${summary}](${url})`)
: /* Fetch failed, don't replace */ text,
article.text
);

const thisMonth = monthFormatter.format(new Date());
const createdMonth = monthFormatter.format(new Date(article.createdAt));

const completionRequest = {
model: 'gpt-3.5-turbo',
messages: [
{
role: 'system',
content: `現在是${thisMonth}。你是協助讀者進行媒體識讀的小幫手。你說話時總是使用台灣繁體中文。有讀者傳了一則網路訊息給你。這則訊息${createdMonth}就在網路上流傳。`,
},
{
role: 'user',
content: argumentedArticleText,
},
{
role: 'user',
content:
'請問作為閱聽人,我應該注意這則訊息的哪些地方呢?\n請節錄訊息中需要特別留意的地方,說明為何閱聽人需要注意它,謝謝。',
},
],
user: user.id,
temperature: 0,
...completionOptions,
};

const newResponse = {
userId: user.id,
appId: user.appId,
docId: article.id,
type: 'AI_REPLY',
status: 'LOADING',
request: JSON.stringify(completionRequest),
createdAt: new Date(),
};

// Resolves to loading AI Response.
const newResponseIdPromise = client
.index({
index: 'airesponses',
type: 'doc',
body: newResponse,
})
.then(({ body: { result, _id } }) => {
/* istanbul ignore if */
if (result !== 'created') {
throw new Error(`Cannot create AI reply: ${result}`);
}
return _id;
});

const openAIResponsePromise = openai
.createChatCompletion(completionRequest)
.then(({ data }) => data)
.catch(error => {
console.error(error);

/* Resolve with Error instance, which will be used to update AI response below */
/* istanbul ignore else */
if (error instanceof Error) return error;
return new Error(error);
});

// Resolves to completed or errored AI response.
return Promise.all([openAIResponsePromise, newResponseIdPromise])
.then(([apiResult, aiResponseId]) =>
// Update using aiResponse._id according to apiResult
client.update({
index: 'airesponses',
type: 'doc',
id: aiResponseId,
_source: true,
body: {
doc:
apiResult instanceof Error
? {
status: 'ERROR',
text: apiResult.toString(),
updatedAt: new Date(),
}
: {
status: 'SUCCESS',
text: apiResult.choices[0].message.content,
...(apiResult.usage
? {
usage: {
promptTokens: apiResult.usage.prompt_tokens,
completionTokens: apiResult.usage.completion_tokens,
totalTokens: apiResult.usage.total_tokens,
},
}
: undefined),
updatedAt: new Date(),
},
},
})
)
.then(({ body: { _id, get: { _source } } }) => ({ id: _id, ..._source }));
}

export default {
type: new GraphQLNonNull(AIReply),
type: AIReply,
description:
'Create an AI reply for a specific article. If existed, returns an existing one.',
'Create an AI reply for a specific article. If existed, returns an existing one. If information in the article is not sufficient for AI, return null.',
args: {
articleId: { type: new GraphQLNonNull(GraphQLString) },
},
Expand Down Expand Up @@ -108,100 +256,6 @@ export default {
await delayForMs(1000);
}

// Creating new AI response
//
const today = formatter.format(new Date());

const completionRequest = {
model: 'gpt-3.5-turbo',
messages: [
{
role: 'system',
content: `今天是${today}。你是協助讀者進行媒體識讀的小幫手。你說話時總是使用台灣繁體中文。有讀者傳了一則網路訊息給你。`,
},
{
role: 'user',
content: article.text,
},
{
role: 'user',
content:
'請問作為閱聽人,我應該注意這則訊息的哪些地方呢?\n請節錄訊息中需要特別留意的地方,說明為何閱聽人需要注意它,謝謝。',
},
],
};

const newResponse = {
userId: user.id,
appId: user.appId,
docId: articleId,
type: 'AI_REPLY',
status: 'LOADING',
request: JSON.stringify(completionRequest),
createdAt: new Date(),
};

// Resolves to loading AI Response.
const newResponseIdPromise = client
.index({
index: 'airesponses',
type: 'doc',
body: newResponse,
})
.then(({ body: { result, _id } }) => {
/* istanbul ignore if */
if (result !== 'created') {
throw new Error(`Cannot create AI reply: ${result}`);
}
return _id;
});

const openAIResponsePromise = openai
.createChatCompletion(completionRequest)
.then(({ data }) => data)
.catch(error => {
console.error(error);

/* Resolve with Error instance, which will be used to update AI response below */
/* istanbul ignore else */
if (error instanceof Error) return error;
return new Error(error);
});

// Resolves to completed or errored AI response.
return Promise.all([openAIResponsePromise, newResponseIdPromise])
.then(([apiResult, aiResponseId]) =>
// Update using aiResponse._id according to apiResult
client.update({
index: 'airesponses',
type: 'doc',
id: aiResponseId,
_source: true,
body: {
doc:
apiResult instanceof Error
? {
status: 'ERROR',
text: apiResult.toString(),
updatedAt: new Date(),
}
: {
status: 'SUCCESS',
text: apiResult.choices[0].message.content,
...(apiResult.usage
? {
usage: {
promptTokens: apiResult.usage.prompt_tokens,
completionTokens: apiResult.usage.completion_tokens,
totalTokens: apiResult.usage.total_tokens,
},
}
: undefined),
updatedAt: new Date(),
},
},
})
)
.then(({ body: { _id, get: { _source } } }) => ({ id: _id, ..._source }));
return createNewAIReply({ article, user });
},
};
50 changes: 50 additions & 0 deletions src/graphql/mutations/__fixtures__/CreateAIReply.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,35 @@ export default {
'/articles/doc/reported-article': {
text:
'我優秀的斐陶斐大姐是中央銀行退休,她剛看了一下,上網登記除要身份証號碼,還要健保卡號,健保卡號很少會要求提供,被洩漏機會相對少,但這次登記要一次完整的登入雙證件的號碼有點讓人擔憂,連同銀行帳號一併洩漏後果可怕! ',
createdAt: '2020-01-01T00:00:00.000Z',
},
'/articles/doc/ai-replied-article': {
text: 'foo',
createdAt: '2020-01-01T00:00:00.000Z',
},
'/articles/doc/some-article': {
text: 'Some article',
createdAt: '2020-01-01T00:00:00.000Z',
},
'/articles/doc/with-resolved-urls': {
text: 'https://foo.com https://foo.com https://bar.com https://bar.com',
createdAt: '2020-01-01T00:00:00.000Z',
hyperlinks: [
{ url: 'https://foo.com', title: 'Foo-title!', summary: 'Foo summary' },
// Simulate the edge case when there are multiple different entries for 1 URL (should not happen, though...)
{ url: 'https://foo.com', title: '', summary: '' },
// Simulate the case when URL resolution is failed
{ url: 'https://bar.com', title: '', summary: '' },
{ url: 'https://bar.com', title: '', summary: '' },
],
},
'/articles/doc/with-no-resolved-urls': {
text: 'https://foo.com\nhttps://bar.com',
createdAt: '2020-01-01T00:00:00.000Z',
hyperlinks: [
{ url: 'https://foo.com', title: '', summary: '' },
{ url: 'https://bar.com', title: '', summary: '' },
],
},
'/airesponses/doc/ai-reply-old': {
docId: 'ai-replied-article',
Expand All @@ -34,3 +57,30 @@ export default {
createdAt: '2020-01-01T00:00:00.000Z', // Will be filled during test setup
},
};

export const SUCCESS_OPENAI_RESP = {
data: {
id: 'chatcmpl-some-id',
object: 'chat.completion',
created: 1679847676,
model: 'gpt-3.5-turbo-0301',
usage: {
prompt_tokens: 343,
completion_tokens: 64,
total_tokens: 407,
},
choices: [
{
message: {
role: 'assistant',
content:
'閱聽人應該確保登記網站的正確性和安全性,並記得定期更改密碼和密鑰,以保護自己的資訊安全。',
},
finish_reason: 'stop',
index: 0,
},
],
},
status: 200,
statusText: 'OK',
};
Loading

0 comments on commit 9603680

Please sign in to comment.