Skip to content

Commit

Permalink
Crawl urls in prompt and add markdown to context
Browse files Browse the repository at this point in the history
  • Loading branch information
IanPhilips committed Nov 21, 2024
1 parent bb78336 commit d079759
Show file tree
Hide file tree
Showing 8 changed files with 160 additions and 51 deletions.
1 change: 1 addition & 0 deletions backend/api/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
"@giphy/js-fetch-api": "5.0.0",
"@google-cloud/monitoring": "4.0.0",
"@google-cloud/secret-manager": "4.2.1",
"@mendable/firecrawl-js": "1.8.5",
"@supabase/supabase-js": "2.38.5",
"@tiptap/core": "2.0.0-beta.204",
"@tiptap/extension-image": "2.0.0-beta.204",
Expand Down
40 changes: 15 additions & 25 deletions backend/api/src/generate-ai-market-suggestions-2.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { AIGeneratedMarket } from 'common/contract'
import { log } from 'shared/utils'
import {
claudeSystemPrompt,
formattingPrompt,
guidelinesPrompt,
} from 'common/ai-creation-prompts'
import { anythingToRichText } from 'shared/tiptap'
Expand All @@ -12,20 +13,26 @@ import {
largePerplexityModel,
smallPerplexityModel,
} from 'shared/helpers/perplexity'
import { getContentFromPrompt } from './generate-ai-market-suggestions'

// In this version, we use Perplexity to generate context for the prompt, and then Claude to generate market suggestions
export const generateAIMarketSuggestions2: APIHandler<
'generate-ai-market-suggestions-2'
> = async (props, auth) => {
const { prompt, existingTitles } = props
log('Prompt:', prompt)

// Add existing titles to the prompt if provided
const fullPrompt = existingTitles?.length
const promptIncludingTitlesToIgnore = existingTitles?.length
? `${prompt}\n\nPlease suggest new market ideas that are different from these ones:\n${existingTitles
.map((t) => `- ${t}`)
.join('\n')}`
: prompt

const perplexityResponse = await perplexity(prompt, {
const promptIncludingUrlContent = await getContentFromPrompt(
promptIncludingTitlesToIgnore
)

const perplexityResponse = await perplexity(promptIncludingUrlContent, {
model: largePerplexityModel,
})

Expand All @@ -51,28 +58,9 @@ export const generateAIMarketSuggestions2: APIHandler<
}
Here is the user's prompt:
${fullPrompt}
Now, suggest approximately 6 ideas for prediction markets as valid JSON objects that abide by the following Manifold Market schema. Each object should include:
- question (string with 120 characters or less, required)
- Question should be worded as a statement, i.e. Stock price of Tesla above $420 by x date, not Will the stock price of Tesla be above $420 by x date?
- descriptionMarkdown (markdown string, required)
- The description should be a concise summary of the market's context, possible outcomes, sources, and resolution criteria.
- Use any references provided in the References section in the description. Format them with a references header and a bulleted list
- closeDate (string, date in YYYY-MM-DD format, required)
- The close date is when trading stops for the market, and resolution can be made. E.g. if the title includes 'by january 1st 2025', the close date should be 2025-12-31
- outcomeType ("BINARY", "INDEPENDENT_MULTIPLE_CHOICE", "DEPENDENT_MULTIPLE_CHOICE", "POLL", required)
- "BINARY" means there are only two answers, true (yes) or false (no)
- "INDEPENDENT_MULTIPLE_CHOICE" means there are multiple answers, but they are independent of each other (e.g. What will happen during the next presidential debate?)
- "DEPENDENT_MULTIPLE_CHOICE" means there are multiple answers, but they are dependent on each other (e.g. Who will win the presidential election?)
- "POLL" means the question is about a personal matter, i.e. "Should I move to a new city?", "Should I get a new job?", etc.
- answers (array of strings, recommended only if outcomeType is one of the "MULTIPLE_CHOICE" types)
- addAnswersMode ("DISABLED", "ONLY_CREATOR", or "ANYONE", required if one of the "MULTIPLE_CHOICE" types is provided)
- "DISABLED" means that the answers list covers all possible outcomes and no more answers can be added after the market is created
- "ONLY_CREATOR" means that only the creator can add answers after the market is created
- "ANYONE" means that anyone can add answers after the market is created
- reasoning (string, required - extract the reasoning section from each market suggestion)
${promptIncludingUrlContent}
${formattingPrompt}
ONLY return a valid JSON array of market objects and do NOT include any other text.
`
Expand Down Expand Up @@ -105,6 +93,8 @@ export const generateAIMarketSuggestions2: APIHandler<
marketTitles: parsedMarkets.map((m) => m.question),
prompt,
regenerate: !!existingTitles,
hasScrapedContent:
promptIncludingUrlContent !== promptIncludingTitlesToIgnore,
})

return parsedMarkets
Expand Down
81 changes: 59 additions & 22 deletions backend/api/src/generate-ai-market-suggestions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,30 @@ import { AIGeneratedMarket } from 'common/contract'
import { log } from 'shared/utils'
import {
claudeSystemPrompt,
formattingPrompt,
guidelinesPrompt,
} from 'common/ai-creation-prompts'
import { anythingToRichText } from 'shared/tiptap'
import { track } from 'shared/analytics'
import { scrapeUrl } from './helpers/crawl'

// In this version, we use Perplexity to generate market suggestions, and then refine them with Claude
export const generateAIMarketSuggestions: APIHandler<
'generate-ai-market-suggestions'
> = async (props, auth) => {
const { prompt, existingTitles } = props
log('Prompt:', prompt)

// Add existing titles to the prompt if provided
const fullPrompt = existingTitles?.length
const promptIncludingTitlesToIgnore = existingTitles?.length
? `${prompt}\n\nPlease suggest new market ideas that are different from these ones:\n${existingTitles
.map((t) => `- ${t}`)
.join('\n')}`
: prompt

const perplexityResponse = await perplexity(fullPrompt, {
const promptIncludingUrlContent = await getContentFromPrompt(
promptIncludingTitlesToIgnore
)
const perplexityResponse = await perplexity(promptIncludingUrlContent, {
model: largePerplexityModel,
})

Expand All @@ -33,30 +38,13 @@ export const generateAIMarketSuggestions: APIHandler<

// Format the perplexity suggestions for Claude
const claudePrompt = `
Convert these prediction market ideas into valid JSON objects that abide by the following Manifold Market schema. Each object should include:
- question (string with 120 characters or less, required)
- Question should be worded as a statement, i.e. Stock price of Tesla above $420 by x date, not Will the stock price of Tesla be above $420 by x date?
- descriptionMarkdown (markdown string, required)
- The description should be a concise summary of the market's context, possible outcomes, sources, and resolution criteria.
- closeDate (string, date in YYYY-MM-DD format, required)
- The close date is when trading stops for the market, and resolution can be made. E.g. if the title includes 'by january 1st 2025', the close date should be 2025-12-31
- outcomeType ("BINARY", "INDEPENDENT_MULTIPLE_CHOICE", "DEPENDENT_MULTIPLE_CHOICE", "POLL", required)
- "BINARY" means there are only two answers, true (yes) or false (no)
- "INDEPENDENT_MULTIPLE_CHOICE" means there are multiple answers, but they are independent of each other (e.g. What will happen during the next presidential debate?)
- "DEPENDENT_MULTIPLE_CHOICE" means there are multiple answers, but they are dependent on each other (e.g. Who will win the presidential election?)
- "POLL" means the question is about a personal matter, i.e. "Should I move to a new city?", "Should I get a new job?", etc.
- answers (array of strings, recommended only if outcomeType is one of the "MULTIPLE_CHOICE" types)
- addAnswersMode ("DISABLED", "ONLY_CREATOR", or "ANYONE", required if one of the "MULTIPLE_CHOICE" types is provided)
- "DISABLED" means that the answers list covers all possible outcomes and no more answers can be added after the market is created
- "ONLY_CREATOR" means that only the creator can add answers after the market is created
- "ANYONE" means that anyone can add answers after the market is created
- reasoning (string, required - extract the reasoning section from each market suggestion)
${formattingPrompt}
Please review the market suggestions and refine them according to the following guidelines:
${guidelinesPrompt}
Here is the original user's prompt:
${prompt}
${promptIncludingUrlContent}
Here are the market suggestions to refine and convert into valid JSON objects:
${messages.join('\n')}
Expand Down Expand Up @@ -100,7 +88,56 @@ export const generateAIMarketSuggestions: APIHandler<
marketTitles: parsedMarkets.map((m) => m.question),
prompt,
regenerate: !!existingTitles,
hasScrapedContent:
promptIncludingUrlContent !== promptIncludingTitlesToIgnore,
})

return parsedMarkets
}

// Updated regex to match both http(s) and www URLs
const URL_REGEX = /(https?:\/\/[^\s]+|www\.[^\s]+)/g

const extractUrls = (text: string) => {
return text.match(URL_REGEX) || []
}
export const getContentFromPrompt = async (prompt: string) => {
// Check if the prompt is a URL or contains URLs
const urls = extractUrls(prompt)
const urlToContent: Record<string, string | undefined> = Object.fromEntries(
urls.map((url) => [url, undefined])
)
if (urls.length > 0) {
try {
// Scrape all found URLs
const scrapeResults = await Promise.allSettled(
urls.map((url) => scrapeUrl(url))
)

// Match each URL with its scraped content
urls.forEach((url, i) => {
const result = scrapeResults[i]
if (result.status === 'fulfilled') {
urlToContent[url] = result.value.markdown
}
})

log('Scrape results:', urlToContent)
} catch (e) {
log.error('Failed to scrape URLs:', {
error: e,
urls,
})
}
}
// Add scraped content to the prompt if available
const promptIncludingUrlContent = urlToContent
? `${prompt}\n\nWe found the following content from the provided URL(s):\n\n${Object.entries(
urlToContent
)
.map(([url, content]) => `${url}:\n${content}`)
.join('\n\n')}`
: prompt

return promptIncludingUrlContent
}
15 changes: 15 additions & 0 deletions backend/api/src/helpers/crawl.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import FirecrawlApp from '@mendable/firecrawl-js'
import { APIError } from 'common/api/utils'

export const scrapeUrl = async (url: string) => {
const app = new FirecrawlApp({ apiKey: process.env.FIRECRAWL_API_KEY })
const scrapeResponse = await app.scrapeUrl(url, {
formats: ['markdown'],
})

if (!scrapeResponse.success) {
throw new APIError(500, `Failed to scrape: ${scrapeResponse.error}`)
}

return scrapeResponse
}
21 changes: 21 additions & 0 deletions common/src/ai-creation-prompts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,27 @@ Following each market suggestion, add a "Reasoning:" section that addresses the
2. Why it's a good prediction market (e.g., has clear resolution criteria, neither a yes nor no outcome is overwhelmingly likely, etc. from above)
`

export const formattingPrompt = `
Convert these prediction market ideas into valid JSON objects that abide by the following Manifold Market schema. Each object should include:
- question (string with 120 characters or less, required)
- Question should be worded as a statement, i.e. Stock price of Tesla above $420 by x date, not Will the stock price of Tesla be above $420 by x date?
- descriptionMarkdown (markdown string, required)
- The description should be a concise summary of the market's context, possible outcomes, sources, and resolution criteria.
- closeDate (string, date in YYYY-MM-DD format, required)
- The close date is when trading stops for the market, and resolution can be made. E.g. if the title includes 'by january 1st 2025', the close date should be 2025-12-31
- outcomeType ("BINARY", "INDEPENDENT_MULTIPLE_CHOICE", "DEPENDENT_MULTIPLE_CHOICE", "POLL", required)
- "BINARY" means there are only two answers, true (yes) or false (no)
- "INDEPENDENT_MULTIPLE_CHOICE" means there are multiple answers, and ANY of them can resolve yes, e.g. What will happen during the next presidential debate? Which companies will express interest in buying twitter?
- "DEPENDENT_MULTIPLE_CHOICE" means there are multiple answers, but ONLY one can resolve yes, (while the rest resolve no) e.g. Who will win the presidential election?, Who will be the first to express interest in buying twitter?
- "POLL" means the question is about a personal matter, i.e. "Should I move to a new city?", "Should I get a new job?", etc.
- answers (array of strings, recommended only if outcomeType is one of the "DEPENDENT_MULTIPLE_CHOICE" or "INDEPENDENT_MULTIPLE_CHOICE" types)
- addAnswersMode ("DISABLED", "ONLY_CREATOR", or "ANYONE", required if one of the "DEPENDENT_MULTIPLE_CHOICE" or "INDEPENDENT_MULTIPLE_CHOICE" types is provided)
- "DISABLED" means that the answers list covers all possible outcomes and no more answers can be added after the market is created
- "ONLY_CREATOR" means that only the creator can add answers after the market is created
- "ANYONE" means that anyone can add answers after the market is created
- If the addAnswersMode is "ONLY_CREATOR" or "ANYONE", while the outcomeType is "DEPENDENT_MULTIPLE_CHOICE", then Manifold will automatically add the 'Other' option to the answers list, so you do not need to include it in the array.
- reasoning (string, required - extract the reasoning section from each market suggestion)`

export const perplexitySystemPrompt = `You are a helpful assistant that creates engaging prediction markets on Manifold Markets.
Your role is to transform a user's prompt into at least approximately 6 well-structured prediction markets that encourage participation and meaningful forecasting.
`
Expand Down
1 change: 1 addition & 0 deletions common/src/secrets.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ export const secrets = (
'GIDX_ACTIVITY_TYPE_ID',
'ANTHROPIC_API_KEY',
'PERPLEXITY_API_KEY',
'FIRECRAWL_API_KEY',
// Some typescript voodoo to keep the string literal types while being not readonly.
] as const
).concat()
Expand Down
12 changes: 8 additions & 4 deletions web/components/new-contract/ai-market-suggestions-panel.tsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { useState, useCallback } from 'react'
import { api } from 'web/lib/api/api'
import { api, APIError } from 'web/lib/api/api'
import { Button } from '../buttons/button'
import { Col } from '../layout/col'
import { ExpandingInput } from '../widgets/expanding-input'
Expand All @@ -12,6 +12,7 @@ import { usePersistentInMemoryState } from 'web/hooks/use-persistent-in-memory-s
import { usePersistentLocalState } from 'web/hooks/use-persistent-local-state'
import { ALL_CONTRACT_TYPES } from './create-contract-types'
import { track } from 'web/lib/service/analytics'
import { toast } from 'react-hot-toast'

export function AIMarketSuggestionsPanel(props: {
onSelectSuggestion: (suggestion: AIGeneratedMarket) => void
Expand Down Expand Up @@ -48,7 +49,6 @@ export function AIMarketSuggestionsPanel(props: {
}),
])

// Randomly sort results by promptVersion
const combinedResults = [...result1, ...result2].sort(
() => Math.random() - 0.5
)
Expand All @@ -57,7 +57,11 @@ export function AIMarketSuggestionsPanel(props: {
regenerate ? [...combinedResults, ...markets] : combinedResults
)
} catch (e) {
console.error(e)
if (e instanceof APIError) {
toast.error(e.message)
} else {
console.error(e)
}
}
if (regenerate) {
setLoadingMore(false)
Expand Down Expand Up @@ -119,7 +123,7 @@ export function AIMarketSuggestionsPanel(props: {
{loadingSuggestions || loadingMore ? (
<Row className="items-center gap-2">
<LoadingIndicator />
<span>Hold tight, this can take 30 seconds!</span>
<span>Hang on, this can take up to a minute!</span>
</Row>
) : prompt === lastGeneratedPrompt ? (
'Generate more'
Expand Down
40 changes: 40 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -2161,6 +2161,17 @@
dependencies:
lodash "^4.17.21"

"@mendable/[email protected]":
version "1.8.5"
resolved "https://registry.yarnpkg.com/@mendable/firecrawl-js/-/firecrawl-js-1.8.5.tgz#359888e4c44884b3ee3f58d52f035756111f6f73"
integrity sha512-gwXBbekZerL226HEDrNN577+oE1XqrJUeewZ/axMB6OFqSQGGRHGmknDnRa5Ote5ukxmWCIUey/UsnHnfGmf8w==
dependencies:
axios "^1.6.8"
isows "^1.0.4"
typescript-event-target "^1.1.1"
zod "^3.23.8"
zod-to-json-schema "^3.23.0"

"@next/[email protected]":
version "14.1.0"
resolved "https://registry.yarnpkg.com/@next/env/-/env-14.1.0.tgz#43d92ebb53bc0ae43dcc64fb4d418f8f17d7a341"
Expand Down Expand Up @@ -5714,6 +5725,15 @@ axios@^1.6.0:
form-data "^4.0.0"
proxy-from-env "^1.1.0"

axios@^1.6.8:
version "1.7.7"
resolved "https://registry.yarnpkg.com/axios/-/axios-1.7.7.tgz#2f554296f9892a72ac8d8e4c5b79c14a91d0a47f"
integrity sha512-S4kL7XrjgBmvdGut0sN3yJxqYzrDOnivkBiN0OFs6hLiUam3UPvswUo0kqGyhqUZGEOytHyumEdXsAkgCOUf3Q==
dependencies:
follow-redirects "^1.15.6"
form-data "^4.0.0"
proxy-from-env "^1.1.0"

axobject-query@^3.1.1:
version "3.2.1"
resolved "https://registry.yarnpkg.com/axobject-query/-/axobject-query-3.2.1.tgz#39c378a6e3b06ca679f29138151e45b2b32da62a"
Expand Down Expand Up @@ -8703,6 +8723,11 @@ isexe@^2.0.0:
resolved "https://registry.yarnpkg.com/isexe/-/isexe-2.0.0.tgz#e8fbf374dc556ff8947a10dcb0572d633f2cfa10"
integrity sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==

isows@^1.0.4:
version "1.0.6"
resolved "https://registry.yarnpkg.com/isows/-/isows-1.0.6.tgz#0da29d706fa51551c663c627ace42769850f86e7"
integrity sha512-lPHCayd40oW98/I0uvgaHKWCSvkzY27LjWLbtzOm64yQ+G3Q5npjjbdppU65iZXkK1Zt+kH9pfegli0AYfwYYw==

isstream@~0.1.2:
version "0.1.2"
resolved "https://registry.yarnpkg.com/isstream/-/isstream-0.1.2.tgz#47e63f7af55afa6f92e1500e690eb8b8529c099a"
Expand Down Expand Up @@ -12483,6 +12508,11 @@ typed-array-length@^1.0.4:
for-each "^0.3.3"
is-typed-array "^1.1.9"

typescript-event-target@^1.1.1:
version "1.1.1"
resolved "https://registry.yarnpkg.com/typescript-event-target/-/typescript-event-target-1.1.1.tgz#20a6d491b77d2e37dc432c5394ab74c0d7065539"
integrity sha512-dFSOFBKV6uwaloBCCUhxlD3Pr/P1a/tJdcmPrTXCHlEFD3faj0mztjcGn6VBAhQ0/Bdy8K3VWrrqwbt/ffsYsg==

[email protected], typescript@^5.3.2:
version "5.3.2"
resolved "https://registry.yarnpkg.com/typescript/-/typescript-5.3.2.tgz#00d1c7c1c46928c5845c1ee8d0cc2791031d4c43"
Expand Down Expand Up @@ -12977,7 +13007,17 @@ zeed-dom@^0.9.19:
dependencies:
css-what "^6.1.0"

zod-to-json-schema@^3.23.0:
version "3.23.5"
resolved "https://registry.yarnpkg.com/zod-to-json-schema/-/zod-to-json-schema-3.23.5.tgz#ec23def47dcafe3a4d640eba6a346b34f9a693a5"
integrity sha512-5wlSS0bXfF/BrL4jPAbz9da5hDlDptdEppYfe+x4eIJ7jioqKG9uUxOwPzqof09u/XeVdrgFu29lZi+8XNDJtA==

[email protected]:
version "3.21.4"
resolved "https://registry.yarnpkg.com/zod/-/zod-3.21.4.tgz#10882231d992519f0a10b5dd58a38c9dabbb64db"
integrity sha512-m46AKbrzKVzOzs/DZgVnG5H55N1sv1M8qZU3A8RIKbs3mrACDNeIOeilDymVb2HdmP8uwshOCF4uJ8uM9rCqJw==

zod@^3.23.8:
version "3.23.8"
resolved "https://registry.yarnpkg.com/zod/-/zod-3.23.8.tgz#e37b957b5d52079769fb8097099b592f0ef4067d"
integrity sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==

0 comments on commit d079759

Please sign in to comment.