From c3953c31cbf9abbc65473f957ec32a7d4453e783 Mon Sep 17 00:00:00 2001 From: MrOrz Date: Tue, 4 Apr 2023 13:13:24 +0800 Subject: [PATCH 1/5] [CreateAIReply] make prompt date less specific - So that the chatbot don't give replys to specific to a certain date, such as on the april fool's day. --- src/graphql/mutations/CreateAIReply.js | 7 +++---- src/graphql/mutations/__tests__/CreateAIReply.js | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/graphql/mutations/CreateAIReply.js b/src/graphql/mutations/CreateAIReply.js index 07f8ba3b..a93a7337 100644 --- a/src/graphql/mutations/CreateAIReply.js +++ b/src/graphql/mutations/CreateAIReply.js @@ -6,10 +6,9 @@ import client from 'util/client'; import delayForMs from 'util/delayForMs'; import { AIReply } from 'graphql/models/AIResponse'; -const formatter = Intl.DateTimeFormat('zh-TW', { +const monthFormatter = Intl.DateTimeFormat('zh-TW', { year: 'numeric', month: 'long', - day: 'numeric', }); export default { @@ -110,14 +109,14 @@ export default { // Creating new AI response // - const today = formatter.format(new Date()); + const thisMonth = monthFormatter.format(new Date()); const completionRequest = { model: 'gpt-3.5-turbo', messages: [ { role: 'system', - content: `今天是${today}。你是協助讀者進行媒體識讀的小幫手。你說話時總是使用台灣繁體中文。有讀者傳了一則網路訊息給你。`, + content: `現在是${thisMonth}。你是協助讀者進行媒體識讀的小幫手。你說話時總是使用台灣繁體中文。有讀者傳了一則網路訊息給你。`, }, { role: 'user', diff --git a/src/graphql/mutations/__tests__/CreateAIReply.js b/src/graphql/mutations/__tests__/CreateAIReply.js index 6f7aa55f..6feb358e 100644 --- a/src/graphql/mutations/__tests__/CreateAIReply.js +++ b/src/graphql/mutations/__tests__/CreateAIReply.js @@ -129,7 +129,7 @@ describe('CreateAIReply', () => { "appId": "test", "createdAt": "2020-10-10T00:00:00.000Z", "docId": "reported-article", - "request": "{\\"model\\":\\"gpt-3.5-turbo\\",\\"messages\\":[{\\"role\\":\\"system\\",\\"content\\":\\"今天是2020年10月10日。你是協助讀者進行媒體識讀的小幫手。你說話時總是使用台灣繁體中文。有讀者傳了一則網路訊息給你。\\"},{\\"role\\":\\"user\\",\\"content\\":\\"我優秀的斐陶斐大姐是中央銀行退休,她剛看了一下,上網登記除要身份証號碼,還要健保卡號,健保卡號很少會要求提供,被洩漏機會相對少,但這次登記要一次完整的登入雙證件的號碼有點讓人擔憂,連同銀行帳號一併洩漏後果可怕! \\"},{\\"role\\":\\"user\\",\\"content\\":\\"請問作為閱聽人,我應該注意這則訊息的哪些地方呢?\\\\n請節錄訊息中需要特別留意的地方,說明為何閱聽人需要注意它,謝謝。\\"}]}", + "request": "{\\"model\\":\\"gpt-3.5-turbo\\",\\"messages\\":[{\\"role\\":\\"system\\",\\"content\\":\\"現在是2020年10月。你是協助讀者進行媒體識讀的小幫手。你說話時總是使用台灣繁體中文。有讀者傳了一則網路訊息給你。\\"},{\\"role\\":\\"user\\",\\"content\\":\\"我優秀的斐陶斐大姐是中央銀行退休,她剛看了一下,上網登記除要身份証號碼,還要健保卡號,健保卡號很少會要求提供,被洩漏機會相對少,但這次登記要一次完整的登入雙證件的號碼有點讓人擔憂,連同銀行帳號一併洩漏後果可怕! \\"},{\\"role\\":\\"user\\",\\"content\\":\\"請問作為閱聽人,我應該注意這則訊息的哪些地方呢?\\\\n請節錄訊息中需要特別留意的地方,說明為何閱聽人需要注意它,謝謝。\\"}]}", "status": "LOADING", "type": "AI_REPLY", "userId": "test", From 51fdb96b51a532b8d23bacbf464547d3dc673a6d Mon Sep 17 00:00:00 2001 From: MrOrz Date: Tue, 4 Apr 2023 16:36:26 +0800 Subject: [PATCH 2/5] [CreateAIReply] add genAIReply script that calls createAIReply --- README.md | 8 + src/graphql/mutations/CreateAIReply.js | 197 +++++++++--------- .../mutations/__tests__/CreateAIReply.js | 2 +- src/scripts/__fixtures__/genAIReply.js | 5 + src/scripts/__tests__/genAIReply.js | 41 ++++ src/scripts/genAIReply.js | 57 +++++ src/scripts/removeArticleReply.js | 1 + 7 files changed, 215 insertions(+), 96 deletions(-) create mode 100644 src/scripts/__fixtures__/genAIReply.js create mode 100644 src/scripts/__tests__/genAIReply.js create mode 100644 src/scripts/genAIReply.js diff --git a/README.md b/README.md index b87335df..2fb1cb78 100644 --- a/README.md +++ b/README.md @@ -221,6 +221,14 @@ $ node -- build/scripts/genBERTInputArticles.js -s -o +``` + ## One-off migration scripts ### Fill in `urls` index and `hyperlinks` field for all articles & replies diff --git a/src/graphql/mutations/CreateAIReply.js b/src/graphql/mutations/CreateAIReply.js index a93a7337..a92a3472 100644 --- a/src/graphql/mutations/CreateAIReply.js +++ b/src/graphql/mutations/CreateAIReply.js @@ -11,6 +11,107 @@ const monthFormatter = Intl.DateTimeFormat('zh-TW', { month: 'long', }); +/** + * Create an new AIReply, initially in LOADING state, then becomes ERROR or SUCCESS, + * and returns the AI reply. + */ +export async function createNewAIReply({ article, user }) { + const thisMonth = monthFormatter.format(new Date()); + + const completionRequest = { + model: 'gpt-3.5-turbo', + messages: [ + { + role: 'system', + content: `現在是${thisMonth}。你是協助讀者進行媒體識讀的小幫手。你說話時總是使用台灣繁體中文。有讀者傳了一則網路訊息給你。`, + }, + { + role: 'user', + content: article.text, + }, + { + role: 'user', + content: + '請問作為閱聽人,我應該注意這則訊息的哪些地方呢?\n請節錄訊息中需要特別留意的地方,說明為何閱聽人需要注意它,謝謝。', + }, + ], + user: user.id, + }; + + const newResponse = { + userId: user.id, + appId: user.appId, + docId: article.id, + type: 'AI_REPLY', + status: 'LOADING', + request: JSON.stringify(completionRequest), + createdAt: new Date(), + }; + + // Resolves to loading AI Response. + const newResponseIdPromise = client + .index({ + index: 'airesponses', + type: 'doc', + body: newResponse, + }) + .then(({ body: { result, _id } }) => { + /* istanbul ignore if */ + if (result !== 'created') { + throw new Error(`Cannot create AI reply: ${result}`); + } + return _id; + }); + + const openAIResponsePromise = openai + .createChatCompletion(completionRequest) + .then(({ data }) => data) + .catch(error => { + console.error(error); + + /* Resolve with Error instance, which will be used to update AI response below */ + /* istanbul ignore else */ + if (error instanceof Error) return error; + return new Error(error); + }); + + // Resolves to completed or errored AI response. + return Promise.all([openAIResponsePromise, newResponseIdPromise]) + .then(([apiResult, aiResponseId]) => + // Update using aiResponse._id according to apiResult + client.update({ + index: 'airesponses', + type: 'doc', + id: aiResponseId, + _source: true, + body: { + doc: + apiResult instanceof Error + ? { + status: 'ERROR', + text: apiResult.toString(), + updatedAt: new Date(), + } + : { + status: 'SUCCESS', + text: apiResult.choices[0].message.content, + ...(apiResult.usage + ? { + usage: { + promptTokens: apiResult.usage.prompt_tokens, + completionTokens: apiResult.usage.completion_tokens, + totalTokens: apiResult.usage.total_tokens, + }, + } + : undefined), + updatedAt: new Date(), + }, + }, + }) + ) + .then(({ body: { _id, get: { _source } } }) => ({ id: _id, ..._source })); +} + export default { type: new GraphQLNonNull(AIReply), description: @@ -107,100 +208,6 @@ export default { await delayForMs(1000); } - // Creating new AI response - // - const thisMonth = monthFormatter.format(new Date()); - - const completionRequest = { - model: 'gpt-3.5-turbo', - messages: [ - { - role: 'system', - content: `現在是${thisMonth}。你是協助讀者進行媒體識讀的小幫手。你說話時總是使用台灣繁體中文。有讀者傳了一則網路訊息給你。`, - }, - { - role: 'user', - content: article.text, - }, - { - role: 'user', - content: - '請問作為閱聽人,我應該注意這則訊息的哪些地方呢?\n請節錄訊息中需要特別留意的地方,說明為何閱聽人需要注意它,謝謝。', - }, - ], - }; - - const newResponse = { - userId: user.id, - appId: user.appId, - docId: articleId, - type: 'AI_REPLY', - status: 'LOADING', - request: JSON.stringify(completionRequest), - createdAt: new Date(), - }; - - // Resolves to loading AI Response. - const newResponseIdPromise = client - .index({ - index: 'airesponses', - type: 'doc', - body: newResponse, - }) - .then(({ body: { result, _id } }) => { - /* istanbul ignore if */ - if (result !== 'created') { - throw new Error(`Cannot create AI reply: ${result}`); - } - return _id; - }); - - const openAIResponsePromise = openai - .createChatCompletion(completionRequest) - .then(({ data }) => data) - .catch(error => { - console.error(error); - - /* Resolve with Error instance, which will be used to update AI response below */ - /* istanbul ignore else */ - if (error instanceof Error) return error; - return new Error(error); - }); - - // Resolves to completed or errored AI response. - return Promise.all([openAIResponsePromise, newResponseIdPromise]) - .then(([apiResult, aiResponseId]) => - // Update using aiResponse._id according to apiResult - client.update({ - index: 'airesponses', - type: 'doc', - id: aiResponseId, - _source: true, - body: { - doc: - apiResult instanceof Error - ? { - status: 'ERROR', - text: apiResult.toString(), - updatedAt: new Date(), - } - : { - status: 'SUCCESS', - text: apiResult.choices[0].message.content, - ...(apiResult.usage - ? { - usage: { - promptTokens: apiResult.usage.prompt_tokens, - completionTokens: apiResult.usage.completion_tokens, - totalTokens: apiResult.usage.total_tokens, - }, - } - : undefined), - updatedAt: new Date(), - }, - }, - }) - ) - .then(({ body: { _id, get: { _source } } }) => ({ id: _id, ..._source })); + return createNewAIReply({ article, user }); }, }; diff --git a/src/graphql/mutations/__tests__/CreateAIReply.js b/src/graphql/mutations/__tests__/CreateAIReply.js index 6feb358e..10d91926 100644 --- a/src/graphql/mutations/__tests__/CreateAIReply.js +++ b/src/graphql/mutations/__tests__/CreateAIReply.js @@ -129,7 +129,7 @@ describe('CreateAIReply', () => { "appId": "test", "createdAt": "2020-10-10T00:00:00.000Z", "docId": "reported-article", - "request": "{\\"model\\":\\"gpt-3.5-turbo\\",\\"messages\\":[{\\"role\\":\\"system\\",\\"content\\":\\"現在是2020年10月。你是協助讀者進行媒體識讀的小幫手。你說話時總是使用台灣繁體中文。有讀者傳了一則網路訊息給你。\\"},{\\"role\\":\\"user\\",\\"content\\":\\"我優秀的斐陶斐大姐是中央銀行退休,她剛看了一下,上網登記除要身份証號碼,還要健保卡號,健保卡號很少會要求提供,被洩漏機會相對少,但這次登記要一次完整的登入雙證件的號碼有點讓人擔憂,連同銀行帳號一併洩漏後果可怕! \\"},{\\"role\\":\\"user\\",\\"content\\":\\"請問作為閱聽人,我應該注意這則訊息的哪些地方呢?\\\\n請節錄訊息中需要特別留意的地方,說明為何閱聽人需要注意它,謝謝。\\"}]}", + "request": "{\\"model\\":\\"gpt-3.5-turbo\\",\\"messages\\":[{\\"role\\":\\"system\\",\\"content\\":\\"現在是2020年10月。你是協助讀者進行媒體識讀的小幫手。你說話時總是使用台灣繁體中文。有讀者傳了一則網路訊息給你。\\"},{\\"role\\":\\"user\\",\\"content\\":\\"我優秀的斐陶斐大姐是中央銀行退休,她剛看了一下,上網登記除要身份証號碼,還要健保卡號,健保卡號很少會要求提供,被洩漏機會相對少,但這次登記要一次完整的登入雙證件的號碼有點讓人擔憂,連同銀行帳號一併洩漏後果可怕! \\"},{\\"role\\":\\"user\\",\\"content\\":\\"請問作為閱聽人,我應該注意這則訊息的哪些地方呢?\\\\n請節錄訊息中需要特別留意的地方,說明為何閱聽人需要注意它,謝謝。\\"}],\\"user\\":\\"test\\"}", "status": "LOADING", "type": "AI_REPLY", "userId": "test", diff --git a/src/scripts/__fixtures__/genAIReply.js b/src/scripts/__fixtures__/genAIReply.js new file mode 100644 index 00000000..a5d7fc78 --- /dev/null +++ b/src/scripts/__fixtures__/genAIReply.js @@ -0,0 +1,5 @@ +export default { + '/articles/doc/some-article': { + text: 'Some article', + }, +}; diff --git a/src/scripts/__tests__/genAIReply.js b/src/scripts/__tests__/genAIReply.js new file mode 100644 index 00000000..72b68d39 --- /dev/null +++ b/src/scripts/__tests__/genAIReply.js @@ -0,0 +1,41 @@ +jest.mock('graphql/mutations/CreateAIReply'); + +import client from 'util/client'; +import { loadFixtures, unloadFixtures } from 'util/fixtures'; +import { createNewAIReply } from 'graphql/mutations/CreateAIReply'; +import fixtures from '../__fixtures__/genAIReply'; +import genAIReply, { GENERATOR_USER_ID } from '../genAIReply'; + +beforeEach(() => loadFixtures(fixtures)); + +it('rejects when articleId is not provided', async () => { + await expect( + genAIReply({ articleId: undefined }) + ).rejects.toThrowErrorMatchingInlineSnapshot(`"Please specify articleId"`); +}); + +it('calls AI reply generation as expected', async () => { + createNewAIReply.mockImplementationOnce(async () => undefined); + + await genAIReply({ articleId: 'some-article' }); + + expect(createNewAIReply).toHaveBeenCalledTimes(1); + expect(createNewAIReply.mock.calls[0][0].article).toMatchInlineSnapshot(` + Object { + "id": "some-article", + "text": "Some article", + } + `); + expect(createNewAIReply.mock.calls[0][0].user.appUserId).toBe( + GENERATOR_USER_ID + ); + + // Cleanup generated reviewer user before invoking the mocked createNewAIReply + await client.delete({ + index: 'users', + type: 'doc', + id: createNewAIReply.mock.calls[0][0].user.id, + }); +}); + +afterEach(() => unloadFixtures(fixtures)); diff --git a/src/scripts/genAIReply.js b/src/scripts/genAIReply.js new file mode 100644 index 00000000..7d5e1d40 --- /dev/null +++ b/src/scripts/genAIReply.js @@ -0,0 +1,57 @@ +// eslint-disable no-console +/* + A script that generates AI reply for an article +*/ + +import yargs from 'yargs'; + +import client from 'util/client'; +import { createNewAIReply } from 'graphql/mutations/CreateAIReply'; +import { createOrUpdateUser } from 'util/user'; + +// The identify we use to generate AI reply +const GENERATOR_APP_ID = 'RUMORS_AI'; +export const GENERATOR_USER_ID = 'ai-reply-reviewer'; + +async function main({ articleId } = {}) { + if (!articleId) throw new Error('Please specify articleId'); + + const { + body: { _source: article }, + } = await client.get({ + index: 'articles', + type: 'doc', + id: articleId, + }); + + const { user } = await createOrUpdateUser({ + userId: GENERATOR_USER_ID, + appId: GENERATOR_APP_ID, + }); + + return createNewAIReply({ + article: { + ...article, + id: articleId, + }, + user, + }); +} + +export default main; + +/* istanbul ignore if */ +if (require.main === module) { + const argv = yargs + .options({ + articleId: { + alias: 'a', + description: 'Article ID to generate AI reply', + type: 'string', + demandOption: true, + }, + }) + .help('help').argv; + + main(argv).catch(console.error); +} diff --git a/src/scripts/removeArticleReply.js b/src/scripts/removeArticleReply.js index 55e688fa..e5a06efc 100644 --- a/src/scripts/removeArticleReply.js +++ b/src/scripts/removeArticleReply.js @@ -21,6 +21,7 @@ async function main({ articleId, replyId, userId } = {}) { export default main; +/* istanbul ignore if */ if (require.main === module) { const argv = yargs .options({ From 63957a2ba5d9e5e3670295316040de711e58bf81 Mon Sep 17 00:00:00 2001 From: MrOrz Date: Tue, 4 Apr 2023 23:24:41 +0800 Subject: [PATCH 3/5] [CreateAIReply] add article creation month and completionOption override --- src/graphql/mutations/CreateAIReply.js | 16 ++++++++++++---- .../mutations/__fixtures__/CreateAIReply.js | 3 +++ src/graphql/mutations/__tests__/CreateAIReply.js | 2 +- src/scripts/genAIReply.js | 8 +++++++- 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/src/graphql/mutations/CreateAIReply.js b/src/graphql/mutations/CreateAIReply.js index a92a3472..ee46fb05 100644 --- a/src/graphql/mutations/CreateAIReply.js +++ b/src/graphql/mutations/CreateAIReply.js @@ -14,16 +14,22 @@ const monthFormatter = Intl.DateTimeFormat('zh-TW', { /** * Create an new AIReply, initially in LOADING state, then becomes ERROR or SUCCESS, * and returns the AI reply. + * If there is no enough content for AI, it resolves to null. */ -export async function createNewAIReply({ article, user }) { +export async function createNewAIReply({ + article, + user, + completionOptions = {}, +}) { const thisMonth = monthFormatter.format(new Date()); + const createdMonth = monthFormatter.format(new Date(article.createdAt)); const completionRequest = { model: 'gpt-3.5-turbo', messages: [ { role: 'system', - content: `現在是${thisMonth}。你是協助讀者進行媒體識讀的小幫手。你說話時總是使用台灣繁體中文。有讀者傳了一則網路訊息給你。`, + content: `現在是${thisMonth}。你是協助讀者進行媒體識讀的小幫手。你說話時總是使用台灣繁體中文。有讀者傳了一則網路訊息給你。這則訊息${createdMonth}就在網路上流傳。`, }, { role: 'user', @@ -36,6 +42,8 @@ export async function createNewAIReply({ article, user }) { }, ], user: user.id, + temperature: 0, + ...completionOptions, }; const newResponse = { @@ -113,9 +121,9 @@ export async function createNewAIReply({ article, user }) { } export default { - type: new GraphQLNonNull(AIReply), + type: AIReply, description: - 'Create an AI reply for a specific article. If existed, returns an existing one.', + 'Create an AI reply for a specific article. If existed, returns an existing one. If information in the article is not sufficient for AI, return null.', args: { articleId: { type: new GraphQLNonNull(GraphQLString) }, }, diff --git a/src/graphql/mutations/__fixtures__/CreateAIReply.js b/src/graphql/mutations/__fixtures__/CreateAIReply.js index bd70dd02..ac0d0b72 100644 --- a/src/graphql/mutations/__fixtures__/CreateAIReply.js +++ b/src/graphql/mutations/__fixtures__/CreateAIReply.js @@ -2,12 +2,15 @@ export default { '/articles/doc/reported-article': { text: '我優秀的斐陶斐大姐是中央銀行退休,她剛看了一下,上網登記除要身份証號碼,還要健保卡號,健保卡號很少會要求提供,被洩漏機會相對少,但這次登記要一次完整的登入雙證件的號碼有點讓人擔憂,連同銀行帳號一併洩漏後果可怕! ', + createdAt: '2020-01-01T00:00:00.000Z', }, '/articles/doc/ai-replied-article': { text: 'foo', + createdAt: '2020-01-01T00:00:00.000Z', }, '/articles/doc/some-article': { text: 'Some article', + createdAt: '2020-01-01T00:00:00.000Z', }, '/airesponses/doc/ai-reply-old': { docId: 'ai-replied-article', diff --git a/src/graphql/mutations/__tests__/CreateAIReply.js b/src/graphql/mutations/__tests__/CreateAIReply.js index 10d91926..153ae0f0 100644 --- a/src/graphql/mutations/__tests__/CreateAIReply.js +++ b/src/graphql/mutations/__tests__/CreateAIReply.js @@ -129,7 +129,7 @@ describe('CreateAIReply', () => { "appId": "test", "createdAt": "2020-10-10T00:00:00.000Z", "docId": "reported-article", - "request": "{\\"model\\":\\"gpt-3.5-turbo\\",\\"messages\\":[{\\"role\\":\\"system\\",\\"content\\":\\"現在是2020年10月。你是協助讀者進行媒體識讀的小幫手。你說話時總是使用台灣繁體中文。有讀者傳了一則網路訊息給你。\\"},{\\"role\\":\\"user\\",\\"content\\":\\"我優秀的斐陶斐大姐是中央銀行退休,她剛看了一下,上網登記除要身份証號碼,還要健保卡號,健保卡號很少會要求提供,被洩漏機會相對少,但這次登記要一次完整的登入雙證件的號碼有點讓人擔憂,連同銀行帳號一併洩漏後果可怕! \\"},{\\"role\\":\\"user\\",\\"content\\":\\"請問作為閱聽人,我應該注意這則訊息的哪些地方呢?\\\\n請節錄訊息中需要特別留意的地方,說明為何閱聽人需要注意它,謝謝。\\"}],\\"user\\":\\"test\\"}", + "request": "{\\"model\\":\\"gpt-3.5-turbo\\",\\"messages\\":[{\\"role\\":\\"system\\",\\"content\\":\\"現在是2020年10月。你是協助讀者進行媒體識讀的小幫手。你說話時總是使用台灣繁體中文。有讀者傳了一則網路訊息給你。這則訊息2020年1月就在網路上流傳。\\"},{\\"role\\":\\"user\\",\\"content\\":\\"我優秀的斐陶斐大姐是中央銀行退休,她剛看了一下,上網登記除要身份証號碼,還要健保卡號,健保卡號很少會要求提供,被洩漏機會相對少,但這次登記要一次完整的登入雙證件的號碼有點讓人擔憂,連同銀行帳號一併洩漏後果可怕! \\"},{\\"role\\":\\"user\\",\\"content\\":\\"請問作為閱聽人,我應該注意這則訊息的哪些地方呢?\\\\n請節錄訊息中需要特別留意的地方,說明為何閱聽人需要注意它,謝謝。\\"}],\\"user\\":\\"test\\",\\"temperature\\":0}", "status": "LOADING", "type": "AI_REPLY", "userId": "test", diff --git a/src/scripts/genAIReply.js b/src/scripts/genAIReply.js index 7d5e1d40..1714c967 100644 --- a/src/scripts/genAIReply.js +++ b/src/scripts/genAIReply.js @@ -13,7 +13,7 @@ import { createOrUpdateUser } from 'util/user'; const GENERATOR_APP_ID = 'RUMORS_AI'; export const GENERATOR_USER_ID = 'ai-reply-reviewer'; -async function main({ articleId } = {}) { +async function main({ articleId, ...completionOptions } = {}) { if (!articleId) throw new Error('Please specify articleId'); const { @@ -35,6 +35,7 @@ async function main({ articleId } = {}) { id: articleId, }, user, + completionOptions, }); } @@ -50,6 +51,11 @@ if (require.main === module) { type: 'string', demandOption: true, }, + temperature: { + description: 'Open AI chat completion param', + type: 'number', + demandOption: false, + }, }) .help('help').argv; From 6e971fa6aaa323e4c8e923596c39b44291df0177 Mon Sep 17 00:00:00 2001 From: MrOrz Date: Wed, 5 Apr 2023 01:51:42 +0800 Subject: [PATCH 4/5] [CreateAIReply] implement and test URL resolution --- src/graphql/mutations/CreateAIReply.js | 42 ++++++- .../mutations/__fixtures__/CreateAIReply.js | 47 +++++++ .../mutations/__tests__/CreateAIReply.js | 115 ++++++++++++++---- 3 files changed, 176 insertions(+), 28 deletions(-) diff --git a/src/graphql/mutations/CreateAIReply.js b/src/graphql/mutations/CreateAIReply.js index ee46fb05..50c9d1c4 100644 --- a/src/graphql/mutations/CreateAIReply.js +++ b/src/graphql/mutations/CreateAIReply.js @@ -21,6 +21,46 @@ export async function createNewAIReply({ user, completionOptions = {}, }) { + // article.hyperlinks deduped by URL. + const dedupedHyperlinks = Object.values( + (article.hyperlinks ?? []).reduce((map, hyperlink) => { + if ( + !map[hyperlink.url] || + /* hyperlink exists, but fetch failed */ !map[hyperlink.url].title + ) { + map[hyperlink.url] = hyperlink; + } + return map; + }, {}) + ); + + /** + * Determine if article has no content by replacing all URLs with its scrapped content. + * This will become empty string if and only if: + * - The article only contains URLs, no other text, and + * - All URL scrapping results fail (no title, no summary) + * + * Abort AI reply generation in this case. + */ + const replacedArticleText = dedupedHyperlinks + .reduce( + (text, { url, title, summary }) => + text.replaceAll(url, `${title} ${summary}`), + article.text + ) + .trim(); + + if (replacedArticleText.length === 0) return null; + + // Argumenting hyperlinks with summary and titles + const argumentedArticleText = dedupedHyperlinks.reduce( + (text, { url, title, summary }) => + title + ? text.replaceAll(url, `[${title} ${summary}](${url})`) + : /* Fetch failed, don't replace */ text, + article.text + ); + const thisMonth = monthFormatter.format(new Date()); const createdMonth = monthFormatter.format(new Date(article.createdAt)); @@ -33,7 +73,7 @@ export async function createNewAIReply({ }, { role: 'user', - content: article.text, + content: argumentedArticleText, }, { role: 'user', diff --git a/src/graphql/mutations/__fixtures__/CreateAIReply.js b/src/graphql/mutations/__fixtures__/CreateAIReply.js index ac0d0b72..81322bf7 100644 --- a/src/graphql/mutations/__fixtures__/CreateAIReply.js +++ b/src/graphql/mutations/__fixtures__/CreateAIReply.js @@ -12,6 +12,26 @@ export default { text: 'Some article', createdAt: '2020-01-01T00:00:00.000Z', }, + '/articles/doc/with-resolved-urls': { + text: 'https://foo.com https://foo.com https://bar.com https://bar.com', + createdAt: '2020-01-01T00:00:00.000Z', + hyperlinks: [ + { url: 'https://foo.com', title: 'Foo-title!', summary: 'Foo summary' }, + // Simulate the edge case when there are multiple different entries for 1 URL (should not happen, though...) + { url: 'https://foo.com', title: '', summary: '' }, + // Simulate the case when URL resolution is failed + { url: 'https://bar.com', title: '', summary: '' }, + { url: 'https://bar.com', title: '', summary: '' }, + ], + }, + '/articles/doc/with-no-resolved-urls': { + text: 'https://foo.com\nhttps://bar.com', + createdAt: '2020-01-01T00:00:00.000Z', + hyperlinks: [ + { url: 'https://foo.com', title: '', summary: '' }, + { url: 'https://bar.com', title: '', summary: '' }, + ], + }, '/airesponses/doc/ai-reply-old': { docId: 'ai-replied-article', type: 'AI_REPLY', @@ -37,3 +57,30 @@ export default { createdAt: '2020-01-01T00:00:00.000Z', // Will be filled during test setup }, }; + +export const SUCCESS_OPENAI_RESP = { + data: { + id: 'chatcmpl-some-id', + object: 'chat.completion', + created: 1679847676, + model: 'gpt-3.5-turbo-0301', + usage: { + prompt_tokens: 343, + completion_tokens: 64, + total_tokens: 407, + }, + choices: [ + { + message: { + role: 'assistant', + content: + '閱聽人應該確保登記網站的正確性和安全性,並記得定期更改密碼和密鑰,以保護自己的資訊安全。', + }, + finish_reason: 'stop', + index: 0, + }, + ], + }, + status: 200, + statusText: 'OK', +}; diff --git a/src/graphql/mutations/__tests__/CreateAIReply.js b/src/graphql/mutations/__tests__/CreateAIReply.js index 153ae0f0..3d431cd6 100644 --- a/src/graphql/mutations/__tests__/CreateAIReply.js +++ b/src/graphql/mutations/__tests__/CreateAIReply.js @@ -7,7 +7,7 @@ import delayForMs from 'util/delayForMs'; import gql from 'util/GraphQL'; import { loadFixtures, unloadFixtures, resetFrom } from 'util/fixtures'; -import fixtures from '../__fixtures__/CreateAIReply'; +import fixtures, { SUCCESS_OPENAI_RESP } from '../__fixtures__/CreateAIReply'; import client from 'util/client'; describe('CreateAIReply', () => { @@ -17,6 +17,9 @@ describe('CreateAIReply', () => { afterAll(async () => { await unloadFixtures(fixtures); }); + afterEach(() => { + openai.createChatCompletion.mockReset(); + }); it('throws when specified article does not exist', async () => { const { errors } = await gql` @@ -138,32 +141,7 @@ describe('CreateAIReply', () => { // Simulates API resolves // - resolveAPI({ - data: { - id: 'chatcmpl-some-id', - object: 'chat.completion', - created: 1679847676, - model: 'gpt-3.5-turbo-0301', - usage: { - prompt_tokens: 343, - completion_tokens: 64, - total_tokens: 407, - }, - choices: [ - { - message: { - role: 'assistant', - content: - '閱聽人應該確保登記網站的正確性和安全性,並記得定期更改密碼和密鑰,以保護自己的資訊安全。', - }, - finish_reason: 'stop', - index: 0, - }, - ], - }, - status: 200, - statusText: 'OK', - }); + resolveAPI(SUCCESS_OPENAI_RESP); const { data, errors } = await resp; MockDate.reset(); @@ -312,4 +290,87 @@ describe('CreateAIReply', () => { id, }); }); + + it('replaces URL with hyperlink info', async () => { + const mockFn = openai.createChatCompletion.mockImplementationOnce( + async () => SUCCESS_OPENAI_RESP + ); + + MockDate.set(1602288000000); + + const { + data: { + CreateAIReply: { id }, + }, + } = await gql` + mutation($articleId: String!) { + CreateAIReply(articleId: $articleId) { + id + } + } + `( + { + articleId: 'with-resolved-urls', + }, + { user: { id: 'test', appId: 'test' } } + ); + + MockDate.reset(); + + // Note the URLs being replaced in the message content + // + expect(mockFn.mock.calls).toMatchInlineSnapshot(` + Array [ + Array [ + Object { + "messages": Array [ + Object { + "content": "現在是2020年10月。你是協助讀者進行媒體識讀的小幫手。你說話時總是使用台灣繁體中文。有讀者傳了一則網路訊息給你。這則訊息2020年1月就在網路上流傳。", + "role": "system", + }, + Object { + "content": "[Foo-title! Foo summary](https://foo.com) [Foo-title! Foo summary](https://foo.com) https://bar.com https://bar.com", + "role": "user", + }, + Object { + "content": "請問作為閱聽人,我應該注意這則訊息的哪些地方呢? + 請節錄訊息中需要特別留意的地方,說明為何閱聽人需要注意它,謝謝。", + "role": "user", + }, + ], + "model": "gpt-3.5-turbo", + "temperature": 0, + "user": "test", + }, + ], + ] + `); + + // Cleanup + await client.delete({ + index: 'airesponses', + type: 'doc', + id, + }); + }); + + it('returns null if all URL scrapping are failed', async () => { + const { + data: { CreateAIReply }, + } = await gql` + mutation($articleId: String!) { + CreateAIReply(articleId: $articleId) { + id + } + } + `( + { + articleId: 'with-no-resolved-urls', + }, + { user: { id: 'test', appId: 'test' } } + ); + + expect(CreateAIReply).toBe(null); + expect(openai.createChatCompletion).toBeCalledTimes(0); + }); }); From 02204d660b40efeedf3c65d663c986b458a3eda6 Mon Sep 17 00:00:00 2001 From: MrOrz Date: Wed, 5 Apr 2023 02:24:20 +0800 Subject: [PATCH 5/5] [README] add temperature param to genAIReply doc --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2fb1cb78..6e9ce689 100644 --- a/README.md +++ b/README.md @@ -226,7 +226,7 @@ The ground truth files in JSON will be written to output directory This command generates a new AI reply even if the article already has an AI reply before. Suitable for the scenario when the existing AI reply is not appropriate. ``` -$ node build/scripts/genAIReply.js -a +$ node build/scripts/genAIReply.js -a --temperature=1 ``` ## One-off migration scripts