diff --git a/scripts/check-metadata.js b/scripts/check-metadata.js index d9b81e69..18ad38a6 100644 --- a/scripts/check-metadata.js +++ b/scripts/check-metadata.js @@ -2,8 +2,6 @@ const fs = require('fs'); const path = require('path'); const { parse } = require('@babel/parser'); const traverse = require('@babel/traverse').default; -const readline = require('readline'); -const { resolveMetadata } = require('next/dist/lib/metadata/resolve-metadata'); const { generateCombinations, slugify } = require('./create-ai-assisted-dev-tools-comparison-pages'); const { tools } = require('../schema/data/ai-assisted-developer-tools.json'); @@ -11,9 +9,6 @@ const { tools } = require('../schema/data/ai-assisted-developer-tools.json'); const appDir = path.join(process.cwd(), 'src', 'app'); const expectedMetadataFields = ['title', 'description', 'openGraph', 'twitter', 'author', 'date', 'image']; -// Add a debug flag -const debug = process.argv.includes('--debug'); - function analyzeFile(filePath) { try { const content = fs.readFileSync(filePath, 'utf8'); @@ -27,9 +22,7 @@ function analyzeFile(filePath) { const metadataString = directMetadataMatch ? directMetadataMatch[1] : createMetadataMatch[1]; const definedFields = metadataString.match(/(\w+):/g).map(field => field.replace(':', '')); - // Check if createMetadata is used if (createMetadataMatch) { - // Add fields that are always included in createMetadata definedFields.push('openGraph', 'twitter'); } @@ -60,7 +53,7 @@ function analyzeFile(filePath) { ExportDeclaration(path) { if (path.node.declaration && path.node.declaration.id && path.node.declaration.id.name === 'generateMetadata') { hasMetadata = true; - definedFields = ['dynamic']; // Assume dynamic metadata covers all fields + definedFields = ['dynamic']; } }, }); @@ -76,11 +69,9 @@ function analyzeFile(filePath) { function handleDynamicPages() { const dynamicPages = []; - // Handle vector database comparison pages const vectorDbDir = path.join(process.cwd(), 'src', 'app', 'comparisons', 'vector-databases'); dynamicPages.push(path.join(vectorDbDir, 'page.mdx')); - // Handle AI-assisted dev tools comparison pages const combinations = generateCombinations(tools); combinations.forEach(([tool1, tool2]) => { const slug = `${slugify(tool1.name)}-vs-${slugify(tool2.name)}`; @@ -88,7 +79,6 @@ function handleDynamicPages() { dynamicPages.push(path.join(comparisonDir, 'page.mdx')); }); - // Handle the main AI-assisted dev tools comparison post const mainComparisonDir = path.join(process.cwd(), 'src', 'app', 'blog', 'ai-assisted-dev-tools-compared'); dynamicPages.push(path.join(mainComparisonDir, 'page.mdx')); @@ -120,7 +110,6 @@ function generateReport() { traverseDir(appDir); - // Handle dynamic pages const dynamicPages = handleDynamicPages(); dynamicPages.forEach(pagePath => { analyzeAndAddToReport(pagePath, report); @@ -187,7 +176,7 @@ function generatePRComment(report) { comment += "No metadata issues found in this pull request. Great job!\n"; } - comment += "For full details, please check the [metadata-report.md](../artifacts/metadata-reports/metadata-report.md) artifact.\n\n"; + comment += "For full details, please check the metadata-report.md artifact.\n\n"; comment += "## Artifacts\n\n"; comment += "* metadata-report.md\n"; @@ -196,6 +185,16 @@ function generatePRComment(report) { return comment; } +function writeReportAndLog(report) { + const markdownReport = generatePRComment(report); + fs.writeFileSync('metadata-report.md', markdownReport); + + const jsonReport = JSON.stringify(report, null, 2); + fs.writeFileSync('metadata-report.json', jsonReport); + + console.log(markdownReport); +} + async function debugMetadata(report) { const rl = readline.createInterface({ input: process.stdin, @@ -267,16 +266,6 @@ function parseMetadata(fileContent) { return null; } -function writeReportAndLog(report) { - const markdownReport = generatePRComment(report); - fs.writeFileSync('metadata-report.md', markdownReport); - - const jsonReport = JSON.stringify(report, null, 2); - fs.writeFileSync('metadata-report.json', jsonReport); - - console.log(markdownReport); -} - if (process.argv.includes('--debug')) { const report = generateReport(); debugMetadata(report); diff --git a/src/app/chat/ChatPageClient.tsx b/src/app/chat/ChatPageClient.tsx new file mode 100644 index 00000000..5b2da2e5 --- /dev/null +++ b/src/app/chat/ChatPageClient.tsx @@ -0,0 +1,147 @@ +'use client' + +import Link from 'next/link'; +import { Container } from '@/components/Container'; +import { useState, Suspense } from 'react'; +import { useChat } from 'ai/react'; +import { track } from '@vercel/analytics'; +import { clsx } from 'clsx'; +import RandomImage from '@/components/RandomImage'; +import SearchForm from '@/components/SearchForm'; +import { LoadingAnimation } from '@/components/LoadingAnimation'; +import { BlogPostCard } from '@/components/BlogPostCard'; +import { ArticleWithSlug } from '@/lib/shared-types'; + +const prepopulatedQuestions = [ + "What is the programming bug?", + "Why do you love Next.js so much?", + "What do you do at Pinecone?", + "How can I become a better developer?", + "What is ggshield and why is it important?", + "How can I use AI to complete side projects more quickly?" +]; + +export default function ChatPageClient() { + const [isLoading, setIsLoading] = useState(false); + const [articles, setArticles] = useState([]); + + const { messages, input, setInput, handleSubmit } = useChat({ + onResponse(response) { + const sourcesHeader = response.headers.get('x-sources'); + const parsedArticles: ArticleWithSlug[] = sourcesHeader + ? (JSON.parse(atob(sourcesHeader as string)) as ArticleWithSlug[]) + : []; + console.log(`parsedArticle %o`, parsedArticles); + setArticles(parsedArticles); + setIsLoading(false); + }, + headers: {}, + onFinish() { + gtag("event", "chat_question", { + event_category: "chat", + event_label: input, + }); + track("chat", { question: input }); + }, + onError() { + setIsLoading(false); + } + }); + + const handleSearch = async (query: string) => { + setInput(query); + + gtag("event", "chat_use_precanned_question", { + event_category: "chat", + event_label: query, + }); + + track("chat-precanned", { question: query }); + + const customSubmitEvent = { + preventDefault: () => { }, + } as unknown as React.FormEvent; + + await handleSubmit(customSubmitEvent); + }; + + return ( + +
+
+
+

+ Chat with me +

+

+ This experience uses Pinecone, OpenAI and LangChain... +

+

+ Learn how to build this with my tutorial +

+
+
+
}> + + +
+
+ + {/* Chat interface */} +
+ +
+ + {isLoading && messages?.length > 0 && } + + {/* Chat messages and related posts */} +
+
+ {messages.map((m) => ( +
+ + {m.role === 'user' + ? 'You: ' + : "The Ghost of Zachary Proser's Writing: "} + + {m.content} +
+ ))} +
+
+ {Array.isArray(articles) && (articles.length > 0) && ( +
+

Related Posts

+
+ {(articles as ArticleWithSlug[]).map((article) => ( + + ))} +
+
+ )} +
+
+
+ +
+ +
+ ); +} diff --git a/src/app/chat/page.tsx b/src/app/chat/page.tsx index f5843241..aecf02d0 100644 --- a/src/app/chat/page.tsx +++ b/src/app/chat/page.tsx @@ -1,144 +1,12 @@ -'use client' +import { Metadata } from 'next' +import ChatPageClient from './ChatPageClient' +import { createMetadata } from '@/utils/createMetadata' -import Link from 'next/link'; -import { Container } from '@/components/Container'; -import { useState, useEffect, Suspense } from 'react'; -import { useChat } from 'ai/react'; -import { track } from '@vercel/analytics'; -import { clsx } from 'clsx'; -import RandomImage from '@/components/RandomImage'; -import SearchForm from '@/components/SearchForm'; -import { LoadingAnimation } from '@/components/LoadingAnimation'; -import { BlogPostCard } from '@/components/BlogPostCard'; -import { ArticleWithSlug } from '@/lib/shared-types'; - -const prepopulatedQuestions = [ - "What is the programming bug?", - "Why do you love Next.js so much?", - "What do you do at Pinecone?", - "How can I become a better developer?", - "What is ggshield and why is it important?", - "How can I use AI to complete side projects more quickly?" -]; +export const metadata: Metadata = createMetadata({ + title: 'Chat with my writing', + description: 'Chat with a custom RAG pipeline of my blog content using Pinecone, OpenAI, and LangChain.', +}) export default function ChatPage() { - const [isLoading, setIsLoading] = useState(false); - const [articles, setArticles] = useState([]); - - const { messages, input, setInput, handleSubmit } = useChat({ - onResponse(response) { - const sourcesHeader = response.headers.get('x-sources'); - const parsedArticles: ArticleWithSlug[] = sourcesHeader - ? (JSON.parse(atob(sourcesHeader as string)) as ArticleWithSlug[]) - : []; - console.log(`parsedArticle %o`, parsedArticles); - setArticles(parsedArticles); - setIsLoading(false); - }, - headers: {}, - onFinish() { - gtag("event", "chat_question", { - event_category: "chat", - event_label: input, - }); - track("chat", { question: input }); - }, - onError() { - setIsLoading(false); - } - }); - - const handleSearch = async (query: string) => { - setInput(query); - - gtag("event", "chat_use_precanned_question", { - event_category: "chat", - event_label: query, - }); - - track("chat-precanned", { question: query }); - - const customSubmitEvent = { - preventDefault: () => { }, - } as unknown as React.FormEvent; - - await handleSubmit(customSubmitEvent); - }; - - return ( - -
-
-
-

- Chat with me -

-

- This experience uses Pinecone, OpenAI and LangChain... -

-

- Learn how to build this with my tutorial -

-
-
-
}> - - -
-
- - {/* Chat interface */} -
- -
- - {isLoading && messages?.length > 0 && } - - {/* Chat messages and related posts */} -
-
- {messages.map((m) => ( -
- - {m.role === 'user' - ? 'You: ' - : "The Ghost of Zachary Proser's Writing: "} - - {m.content} -
- ))} -
-
- {Array.isArray(articles) && (articles.length > 0) && ( -
-

Related Posts

- {(articles as ArticleWithSlug[]).map((article) => ( - - ))} -
- )} -
-
-
- -
- -
); -} + return +} \ No newline at end of file diff --git a/src/app/demos/embeddings/EmbeddingsDemoClient.tsx b/src/app/demos/embeddings/EmbeddingsDemoClient.tsx new file mode 100644 index 00000000..c61142de --- /dev/null +++ b/src/app/demos/embeddings/EmbeddingsDemoClient.tsx @@ -0,0 +1,123 @@ +'use client' + +import { Container } from '@/components/Container' +import { useState } from 'react'; + +const getColorForToken = (token: string) => { + const tokenId = token.charCodeAt(0); + const hue = (tokenId * 137.508) % 360; + return `hsl(${hue}, 50%, 80%)`; +}; + +export default function EmbeddingsDemoClient() { + const [inputText, setInputText] = useState(''); + const [embeddings, setEmbeddings] = useState([]); + + const generateTokens = async () => { + try { + const response = await fetch('/api/embeddings', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ inputText }), + }); + + const data = await response.json(); + console.log(`data: %o`, data.embeddings); + setEmbeddings(data.embeddings); + } catch (error) { + console.error('Error generating tokens:', error); + // Handle error state + } + } + + const handleInputChange = (e: React.ChangeEvent) => { + setInputText(e.target.value); + }; + + return ( + +
+

Embeddings Demo

+

This interactive demo showcases the process of converting natural language into vectors or embeddings, a fundamental technique used in natural language processing (NLP) and generative AI.

+

This interactive demo lets you type in any text you like and instantly convert it to embeddings. Click the button below to see how your text gets mapped to a numerical representation that captures its semantic meaning.

+
+ +
+ +
+

As you type, your sentence is split into words, the way us humans tend to see and read them:

+
+ {inputText.split(' ').map((word, index) => ( + + {word} + + ))} +
+
+

But how does a machine understand the defining features of your text? Click the button below to convert your text to embeddings.

+ +
+ {embeddings} +
+
+
+

What are embeddings or vectors?

+

+ Embeddings are a powerful machine learning technique that allow computers to understand and represent the meaning and relationships between words and phrases. With embeddings, each word or chunk of text is mapped to a vector of numbers in a high-dimensional space, such that words with similar meanings are located close together.

+
+ +
+

How embeddings models work

+

+ Under the hood, embeddings models like word2vec or GloVe are trained on massive amounts of text data, like all the articles on Wikipedia. The model learns the patterns of which words tend to appear in similar contexts. +

+ +

+ For example, the model might frequently see phrases like &lquot;the king sits on his throne&rquot; and &lquot;the queen sits on her throne&rquot. From many examples like this, it learns that king and queen have similar meanings and usage patterns. The model represents this similarity by assigning king and queen vectors that are close together in the embedding space. +

+ +

+ By doing this for all words across a huge corpus, the model builds up a rich understanding of the relationships between words based on the contexts they appear in. The resulting embedding vectors miraculously seem to capture analogies and hierarchical relationships. +

+
+ +
+

Why embeddings are powerful and having a moment

+

+ Embeddings are incredibly powerful because they allow machine learning models to understand language in a more flexible, nuanced way than just memorizing specific words and phrases. By capturing the semantic relationships between words, embeddings enable all sorts of natural language tasks like analogical reasoning, sentiment analysis, named entity recognition, and more. +

+

We're seeing a boom in embeddings and their applications right now due to several factors:

+ +

1. The rise of transformers and attention-based language models like BERT that generate even richer, more contextual embeddings

+

2. Ever-increasing amounts of text data to train huge embeddings models

+

3. More powerful hardware and techniques for training massive models

+

4. Creative new applications for embeddings, like using them for semantic search, knowledge retrieval, multi-modal learning, and more

+ +

Embeddings are quickly becoming an essential tool that will power the next wave of natural language AI systems. They're a core reason behind the rapid progress in natural language processing and the explosion of generative AI tools we are seeing today.

+
+
+
+ ); +} \ No newline at end of file diff --git a/src/app/demos/embeddings/page.tsx b/src/app/demos/embeddings/page.tsx index ddb4cbc9..a0ee5671 100644 --- a/src/app/demos/embeddings/page.tsx +++ b/src/app/demos/embeddings/page.tsx @@ -1,133 +1,17 @@ -'use client' +import { Metadata } from 'next' +import { Suspense } from 'react' +import { createMetadata } from '@/utils/createMetadata' +import EmbeddingsDemoClient from './EmbeddingsDemoClient' -import { Container } from '@/components/Container' -import { Suspense, useState } from 'react'; +export const metadata: Metadata = createMetadata({ + title: 'Embeddings Demo', + description: 'Interactive demo of converting natural language into vectors or embeddings, a fundamental technique used in natural language processing (NLP) and generative AI.', +}) -const getColorForToken = (token: string) => { - const tokenId = token.charCodeAt(0); - const hue = (tokenId * 137.508) % 360; - return `hsl(${hue}, 50%, 80%)`; -}; - -function EmbeddingsDemo() { - const [inputText, setInputText] = useState(''); - const [embeddings, setEmbeddings] = useState([]); - - const generateTokens = async () => { - try { - const response = await fetch('/api/embeddings', { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - }, - body: JSON.stringify({ inputText }), - }); - - const data = await response.json(); - console.log(`data: %o`, data.embeddings); - setEmbeddings(data.embeddings); - } catch (error) { - console.error('Error generating tokens:', error); - // Handle error state - } - } - - const handleInputChange = (e: React.ChangeEvent) => { - setInputText(e.target.value); - }; - - return ( - -
-

Embeddings Demo

-

This interactive demo showcases the process of converting natural language into vectors or embeddings, a fundamental technique used in natural language processing (NLP) and generative AI.

-

This interactive demo lets you type in any text you like and instantly convert it to embeddings. Click the button below to see how your text gets mapped to a numerical representation that captures its semantic meaning.

-
- -
- -
-

As you type, your sentence is split into words, the way us humans tend to see and read them:

-
- {inputText.split(' ').map((word, index) => ( - - {word} - - ))} -
-
-

But how does a machine understand the defining features of your text? Click the button below to convert your text to embeddings.

- -
- {embeddings} -
-
-
-

What are embeddings or vectors?

-

- Embeddings are a powerful machine learning technique that allow computers to understand and represent the meaning and relationships between words and phrases. With embeddings, each word or chunk of text is mapped to a vector of numbers in a high-dimensional space, such that words with similar meanings are located close together.

-
- -
-

How embeddings models work

-

- Under the hood, embeddings models like word2vec or GloVe are trained on massive amounts of text data, like all the articles on Wikipedia. The model learns the patterns of which words tend to appear in similar contexts. -

- -

- For example, the model might frequently see phrases like &lquot;the king sits on his throne&rquot; and &lquot;the queen sits on her throne&rquot. From many examples like this, it learns that king and queen have similar meanings and usage patterns. The model represents this similarity by assigning king and queen vectors that are close together in the embedding space. -

- -

- By doing this for all words across a huge corpus, the model builds up a rich understanding of the relationships between words based on the contexts they appear in. The resulting embedding vectors miraculously seem to capture analogies and hierarchical relationships. -

-
- - - -
-

Why embeddings are powerful and having a moment

-

- Embeddings are incredibly powerful because they allow machine learning models to understand language in a more flexible, nuanced way than just memorizing specific words and phrases. By capturing the semantic relationships between words, embeddings enable all sorts of natural language tasks like analogical reasoning, sentiment analysis, named entity recognition, and more. -

-

We're seeing a boom in embeddings and their applications right now due to several factors:

- -

1. The rise of transformers and attention-based language models like BERT that generate even richer, more contextual embeddings

-

2. Ever-increasing amounts of text data to train huge embeddings models

-

3. More powerful hardware and techniques for training massive models

-

4. Creative new applications for embeddings, like using them for semantic search, knowledge retrieval, multi-modal learning, and more

- -

Embeddings are quickly becoming an essential tool that will power the next wave of natural language AI systems. They're a core reason behind the rapid progress in natural language processing and the explosion of generative AI tools we are seeing today.

-
-
-
- ); -} - -export default function TokenizationDemoWrapper() { +export default function EmbeddingsDemoPage() { return ( Loading...}> - + - ); -} + ) +} \ No newline at end of file diff --git a/src/app/demos/tokenize/TokenizationDemoClient.tsx b/src/app/demos/tokenize/TokenizationDemoClient.tsx new file mode 100644 index 00000000..b27ae3bb --- /dev/null +++ b/src/app/demos/tokenize/TokenizationDemoClient.tsx @@ -0,0 +1,146 @@ +'use client' + +import { Container } from '@/components/Container' +import { useState } from 'react'; + +const getColorForToken = (token: string) => { + const tokenId = token.charCodeAt(0); + const hue = (tokenId * 137.508) % 360; + return `hsl(${hue}, 50%, 80%)`; +}; + +export default function TokenizationDemoClient() { + const [inputText, setInputText] = useState(''); + const [tokens, setTokens] = useState([]); + + const generateTokens = async () => { + try { + const response = await fetch('/api/tokens', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ inputText }), + }); + + const data = await response.json(); + console.log(`data: %o`, data.tokens); + setTokens(data.tokens); + } catch (error) { + console.error('Error generating tokens:', error); + // Handle error state + } + } + + const handleInputChange = (e: React.ChangeEvent) => { + setInputText(e.target.value); + }; + + return ( + +
+

Tokenization Demo

+

This interactive demo showcases the process of tokenization, a fundamental technique used in natural language processing (NLP) and generative AI.

+

Enter any text into the input field below...

+
+ +
+ +
+

As you type, your sentence is split into words, the way us humans tend to see and read them:

+
+ {inputText.split(' ').map((word, index) => ( + + {word} + + ))} +
+
+

But how does a machine see them? Click the button below to tokenize your text, which will convert your words into token IDs for a given vocabulary.

+ +

These are the token IDs that the tiktoken library assigned to your words. This is closer to how ChatGPT and other LLMs see your text when you write a prompt in natural language:

+
+ {Object.entries(tokens).map(([key, value]) => ( + {value} + ))} +
+
+
+

What is Tokenization?

+

+ Tokenization is the process of breaking down a piece of text into smaller units called tokens. These tokens can be individual words, subwords, or even characters, depending on the tokenization algorithm used.

+ +

The purpose of tokenization is to convert text into a format that can be easily processed and understood by machine learning models, particularly in the field of NLP.

+ +

+ In the context of the current generative AI boom, tokenization has become increasingly important. Language models like GPT (Generative Pre-trained Transformer) rely heavily on tokenization to process and generate human-like text.

+ +

By breaking down text into tokens, these models can learn patterns, relationships, and meanings within the language, enabling them to generate coherent and contextually relevant responses. +

+ +

+ Each token is assigned a unique token ID, which is an integer value representing that specific token. These token IDs serve as a numerical representation of the text, allowing the AI models to perform mathematical operations and learn from the input data efficiently. +

+
+ +
+

The Tiktoken library

+

+ In this demo, we are using the Tiktoken library for tokenization. Tiktoken is a popular tokenization library developed by OpenAI, one of the leading organizations in the field of AI research and development. It is designed to work seamlessly with OpenAI language models, such as GPT-3 and its variants. +

+ +

+ Tiktoken provides a fast and efficient way to tokenize text using the same algorithm and vocabulary as OpenAI's models. It offers support for various encoding schemes, including the commonly used cl100k_base encoding, which has a vocabulary of approximately 100,000 tokens. This is the exact vocabulary used in this demo. +

+ +

+ By using Tiktoken, we ensure that the tokenization process in this demo is consistent with the tokenization used by state-of-the-art language models. +

+
+ +
+

Use cases and importance

+

+ Tokenization is a critical step in various NLP tasks and applications. Here are a few examples where tokenization plays a crucial role: +

+

Language translation

+

Tokenization is used to break down sentences into individual words or subwords, which are then mapped to their corresponding translations in the target language. This enables accurate and efficient language translation systems.

+

Sentiment analysis

+

By tokenizing text, sentiment analysis models can identify and extract sentiment-bearing words or phrases, allowing them to determine the overall sentiment expressed in a piece of text.

+

Text classification

+

Tokenization helps in converting text into a numerical representation that can be fed into machine learning models for text classification tasks, such as spam detection, topic categorization, or genre identification.

+

Text generation

+

Generation: Generative language models like GPT heavily rely on tokenization to generate human-like text. By learning patterns and relationships between tokens, these models can produce coherent and contextually relevant responses, enabling applications like chatbots, content creation, and creative writing assistance.

+
+
+
+ ); +} \ No newline at end of file diff --git a/src/app/demos/tokenize/page.tsx b/src/app/demos/tokenize/page.tsx index 42e002f6..af0a6107 100644 --- a/src/app/demos/tokenize/page.tsx +++ b/src/app/demos/tokenize/page.tsx @@ -1,156 +1,17 @@ -'use client' +import { Metadata } from 'next' +import { Suspense } from 'react' +import { createMetadata } from '@/utils/createMetadata' +import TokenizationDemoClient from './TokenizationDemoClient' -import { Container } from '@/components/Container' -import { Suspense, useState } from 'react'; +export const metadata: Metadata = createMetadata({ + title: 'Tokenization Demo', + description: 'Interactive demo showcasing the process of tokenization, a fundamental technique used in natural language processing (NLP) and generative AI.', +}) -const getColorForToken = (token: string) => { - const tokenId = token.charCodeAt(0); - const hue = (tokenId * 137.508) % 360; - return `hsl(${hue}, 50%, 80%)`; -}; - -function TokenizationDemo() { - const [inputText, setInputText] = useState(''); - const [tokens, setTokens] = useState([]); - - const generateTokens = async () => { - try { - const response = await fetch('/api/tokens', { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - }, - body: JSON.stringify({ inputText }), - }); - - const data = await response.json(); - console.log(`data: %o`, data.tokens); - setTokens(data.tokens); - } catch (error) { - console.error('Error generating tokens:', error); - // Handle error state - } - } - - const handleInputChange = (e: React.ChangeEvent) => { - setInputText(e.target.value); - }; - - return ( - -
-

Tokenization Demo

-

This interactive demo showcases the process of tokenization, a fundamental technique used in natural language processing (NLP) and generative AI.

-

Enter any text into the input field below...

-
- -
- -
-

As you type, your sentence is split into words, the way us humans tend to see and read them:

-
- {inputText.split(' ').map((word, index) => ( - - {word} - - ))} -
-
-

But how does a machine see them? Click the button below to tokenize your text, which will convert your words into token IDs for a given vocabulary.

- -

These are the token IDs that the tiktoken library assigned to your words. This is closer to how ChatGPT and other LLMs see your text when you write a prompt in natural language:

-
- {Object.entries(tokens).map(([key, value]) => ( - {value} - ))} -
-
-
-

What is Tokenization?

-

- Tokenization is the process of breaking down a piece of text into smaller units called tokens. These tokens can be individual words, subwords, or even characters, depending on the tokenization algorithm used.

- -

The purpose of tokenization is to convert text into a format that can be easily processed and understood by machine learning models, particularly in the field of NLP.

- -

- In the context of the current generative AI boom, tokenization has become increasingly important. Language models like GPT (Generative Pre-trained Transformer) rely heavily on tokenization to process and generate human-like text.

- -

By breaking down text into tokens, these models can learn patterns, relationships, and meanings within the language, enabling them to generate coherent and contextually relevant responses. -

- -

- Each token is assigned a unique token ID, which is an integer value representing that specific token. These token IDs serve as a numerical representation of the text, allowing the AI models to perform mathematical operations and learn from the input data efficiently. -

-
- -
-

The Tiktoken library

-

- In this demo, we are using the Tiktoken library for tokenization. Tiktoken is a popular tokenization library developed by OpenAI, one of the leading organizations in the field of AI research and development. It is designed to work seamlessly with OpenAI language models, such as GPT-3 and its variants. -

- -

- Tiktoken provides a fast and efficient way to tokenize text using the same algorithm and vocabulary as OpenAI's models. It offers support for various encoding schemes, including the commonly used cl100k_base encoding, which has a vocabulary of approximately 100,000 tokens. This is the exact vocabulary used in this demo. -

- -

- By using Tiktoken, we ensure that the tokenization process in this demo is consistent with the tokenization used by state-of-the-art language models. -

-
- - - -
-

Use cases and importance

-

- Tokenization is a critical step in various NLP tasks and applications. Here are a few examples where tokenization plays a crucial role: -

-

Language translation

-

Tokenization is used to break down sentences into individual words or subwords, which are then mapped to their corresponding translations in the target language. This enables accurate and efficient language translation systems.

-

Sentiment analysis

-

By tokenizing text, sentiment analysis models can identify and extract sentiment-bearing words or phrases, allowing them to determine the overall sentiment expressed in a piece of text.

-

Text classification

-

Tokenization helps in converting text into a numerical representation that can be fed into machine learning models for text classification tasks, such as spam detection, topic categorization, or genre identification.

-

Text generation

-

Generation: Generative language models like GPT heavily rely on tokenization to generate human-like text. By learning patterns and relationships between tokens, these models can produce coherent and contextually relevant responses, enabling applications like chatbots, content creation, and creative writing assistance.

-
-
-
- ); -} - -export default function TokenizationDemoWrapper() { +export default function TokenizationDemoPage() { return ( Loading...}> - + - ); -} + ) +} \ No newline at end of file diff --git a/src/app/learn/page.tsx b/src/app/learn/page.tsx index 917d99e4..90d0ca5b 100644 --- a/src/app/learn/page.tsx +++ b/src/app/learn/page.tsx @@ -1,10 +1,11 @@ import Image from 'next/image'; +import { Metadata } from 'next'; +import { createMetadata } from '@/utils/createMetadata'; import { Container } from "@/components/Container"; import CourseIndex from '@/components/CourseIndex'; import { Button } from "@/components/Button"; -import projectBasedLearning from '@/images/project-based-learning.webp' import advice from '@/images/advice.webp' import projects from '@/images/projects.webp' import portfolio from '@/images/portfolio.webp' @@ -13,6 +14,11 @@ import skills from '@/images/skills.webp' import community from '@/images/community.webp' import mentorship from '@/images/mentorship.webp' +export const metadata: Metadata = createMetadata({ + title: "Learn AI Development - School for Hackers", + description: "Project-based learning program to build real-world AI applications. Develop marketable skills, get mentorship, and build your portfolio to advance your career in AI development.", +}); + type Feature = { name: string description: string @@ -197,4 +203,4 @@ export default async function LearnPage() { ) -} +} \ No newline at end of file diff --git a/src/app/learn/waitinglist/page.tsx b/src/app/learn/waitinglist/page.tsx index e74d11bc..383a795b 100644 --- a/src/app/learn/waitinglist/page.tsx +++ b/src/app/learn/waitinglist/page.tsx @@ -1,4 +1,6 @@ import { auth } from '../../../../auth' +import { Metadata } from 'next'; +import { createMetadata } from '@/utils/createMetadata'; import { SimpleLayout } from '@/components/SimpleLayout'; import GithubSignin from '@/components/github-signin'; @@ -10,6 +12,11 @@ import CourseIndex from '@/components/CourseIndex'; import zpSchoolForHackers from '@/images/zp-school-for-hackers.webp' +export const metadata: Metadata = createMetadata({ + title: "Join the Waiting List - School for Hackers", + description: "Sign up for our waiting list and be the first to know when our AI development courses launch. Learn to build cutting-edge AI applications through project-based learning.", +}); + export default async function LearningHome() { const session = await auth(); @@ -56,4 +63,4 @@ export default async function LearningHome() { ) -} +} \ No newline at end of file diff --git a/src/app/waitinglist/page.jsx b/src/app/waitinglist/WaitingListClient.jsx similarity index 97% rename from src/app/waitinglist/page.jsx rename to src/app/waitinglist/WaitingListClient.jsx index a35e5421..fe9cce27 100644 --- a/src/app/waitinglist/page.jsx +++ b/src/app/waitinglist/WaitingListClient.jsx @@ -2,12 +2,10 @@ import { useState, useEffect } from "react"; import { useSearchParams } from "next/navigation"; - import { Container } from "@/components/Container"; - import ProductWaitinglistForm from "@/components/ProductWaitinglistForm"; -export default function Page() { +export default function WaitingListClient() { const [userEmail, setUserEmail] = useState(""); const [productSlug, setProductSlug] = useState(""); const [productName, setProductName] = useState(""); @@ -29,12 +27,10 @@ export default function Page() { Your thirst for knowledge is commendable! -

But hand-crafted project-based learning takes time.

-

Enter your email below to secure your spot on the waiting list, and we'll let you know as soon as{" "} {productName} is ready. @@ -45,7 +41,6 @@ export default function Page() { productSlug={productSlug} productName={productName} /> - ); -} +} \ No newline at end of file diff --git a/src/app/waitinglist/page.tsx b/src/app/waitinglist/page.tsx new file mode 100644 index 00000000..640e7c8c --- /dev/null +++ b/src/app/waitinglist/page.tsx @@ -0,0 +1,12 @@ +import { Metadata } from 'next' +import { createMetadata } from '@/utils/createMetadata' +import WaitingListClient from './WaitingListClient' + +export const metadata: Metadata = createMetadata({ + title: "Join the Waiting List - School for Hackers", + description: "Sign up for our waiting list and be the first to know when our AI development courses launch. Learn to build cutting-edge AI applications through project-based learning.", +}); + +export default function Page() { + return +} \ No newline at end of file diff --git a/src/components/RandomImage.jsx b/src/components/RandomImage.jsx index 15237915..ce711caa 100644 --- a/src/components/RandomImage.jsx +++ b/src/components/RandomImage.jsx @@ -64,7 +64,7 @@ const RandomImage = () => { style={{ cursor: 'pointer' }}> Zachary Motherfucking Proser diff --git a/src/components/SearchForm.tsx b/src/components/SearchForm.tsx index 81ed60c5..86640b7f 100644 --- a/src/components/SearchForm.tsx +++ b/src/components/SearchForm.tsx @@ -8,7 +8,6 @@ interface SearchFormProps { const SearchForm: React.FC = ({ suggestedSearches, onSearch, setIsLoading }) => { const [query, setQuery] = useState(""); - const [suggestions] = useState(suggestedSearches); const [showSuggestions, setShowSuggestions] = useState(true); const handleInputChange = (e: ChangeEvent) => { @@ -60,10 +59,10 @@ const SearchForm: React.FC = ({ suggestedSearches, onSearch, se > ➔ - {showSuggestions && suggestions.length > 0 && ( + {showSuggestions && suggestedSearches.length > 0 && (

Popular searches
- {suggestions.map((suggestion, index) => ( + {suggestedSearches.map((suggestion, index) => (
= ({ suggestedSearches, onSearch, se setQuery(suggestion); onSearch(suggestion); setShowSuggestions(false); - setIsLoading(true); // Ensure loading state is set when suggestion is clicked + setIsLoading(true); }} > {suggestion} @@ -84,5 +83,4 @@ const SearchForm: React.FC = ({ suggestedSearches, onSearch, se ); }; -export default SearchForm; - +export default SearchForm; \ No newline at end of file