Skip to content

Commit

Permalink
improve for [locale]
Browse files Browse the repository at this point in the history
  • Loading branch information
KishiTheMechanic committed Oct 29, 2024
1 parent e4f6462 commit 56b68c0
Show file tree
Hide file tree
Showing 2 changed files with 135 additions and 123 deletions.
2 changes: 1 addition & 1 deletion deno.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@elsoul/fresh-sitemap",
"version": "1.0.2",
"version": "1.0.3",
"description": "Lightweight global state management library for Fresh framework using Preact signals.",
"runtimes": ["deno", "browser"],
"exports": "./mod.ts",
Expand Down
256 changes: 134 additions & 122 deletions mod.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { join, SEPARATOR } from 'jsr:@std/[email protected]'
import { join, normalize, SEPARATOR } from 'jsr:@std/[email protected]'
import { ensureFile, exists } from 'jsr:@std/[email protected]'

export interface SiteMapEntry {
Expand All @@ -9,24 +9,28 @@ export interface SiteMapEntry {
export type Sitemap = SiteMapEntry[]

export interface SiteMapOptions {
languages: string[]
defaultLanguage: string
languages?: string[]
defaultLanguage?: string
}

/**
* Generates a sitemap XML for given directories and base URL.
* @param basename - The base URL of the website (e.g., 'https://example.com')
* @param distDirectory - The directory containing route files
* @param articlesDirectory - The directory containing articles in markdown format
* @param options - Options for sitemap generation
* @returns Generated sitemap as an XML string
*/
export async function generateSitemapXML(
basename: string,
distDirectory: string,
articlesDirectory: string,
options: SiteMapOptions,
options: SiteMapOptions = {},
): Promise<string> {
const routesSitemap = await generateSitemap(basename, distDirectory, options)
const articlesSitemap = await generateArticlesSitemap(
basename,
articlesDirectory,
distDirectory,
options,
)
const sitemap = [...routesSitemap, ...articlesSitemap]
Expand All @@ -35,6 +39,8 @@ export async function generateSitemapXML(

/**
* Generates the robots.txt file content.
* @param domain - The domain of the website (e.g., 'example.com')
* @returns Generated robots.txt content
*/
function generateRobotsTxt(domain: string): string {
return `# *
Expand All @@ -44,21 +50,27 @@ Allow: /
# Host
Host: https://${domain}
/* Sitemaps */
# Sitemaps
Sitemap: https://${domain}/sitemap.xml
`
}

/**
* Saves the generated sitemap XML and robots.txt to the specified file paths.
* @param basename - The base URL of the website
* @param distDirectory - Directory containing route files
* @param articlesDirectory - Directory containing articles
* @param sitemapPath - Path where sitemap.xml will be saved
* @param robotsPath - Path where robots.txt will be saved
* @param options - Options for sitemap generation
*/
export async function saveSitemapAndRobots(
basename: string,
distDirectory: string,
articlesDirectory: string,
sitemapPath: string,
robotsPath: string,
options: SiteMapOptions,
options: SiteMapOptions = {},
): Promise<void> {
const domain = new URL(basename).hostname
const sitemapXML = await generateSitemapXML(
Expand All @@ -77,150 +89,169 @@ export async function saveSitemapAndRobots(
}

/**
* Generates sitemap entries for static routes, including the mandatory [locale] directory.
* Generates sitemap entries for static routes, excluding dynamic and grouping directories.
* @param basename - The base URL of the website (e.g., 'https://example.com')
* @param distDirectory - Directory containing route files
* @param options - Options for sitemap generation, including languages and default language
* @returns Array of sitemap entries
*/
async function generateSitemap(
basename: string,
distDirectory: string,
options: SiteMapOptions,
): Promise<Sitemap> {
const sitemapSet = new Set<string>() // Unique paths for the final sitemap
const pathMap: Record<string, number> = {} // Store paths with a flag (1 for include, 0 for exclude)

// Recursively collect all paths in the directory
async function collectPaths(directory: string): Promise<void> {
for await (const entry of Deno.readDir(directory)) {
const entryPath = join(directory, entry.name)
if (entry.isDirectory) {
if (entry.name === '[locale]') {
// Process each language directory within [locale]
await processLocaleDirectory(entryPath)
} else {
await collectPaths(entryPath)
}
}
}
function removeLocaleFromPath(path: string): string {
return path.replace('/[locale]/', '/')
}

// Process each language directory inside [locale]
async function processLocaleDirectory(directory: string): Promise<void> {
for await (const entry of Deno.readDir(directory)) {
if (entry.isDirectory) {
const lang = entry.name
if (options.languages.includes(lang)) {
await processLanguageRoutes(join(directory, lang), lang)
}
}
// Process each path segment without modifying it
function processPathSegments(path: string): void {
// Skip non-.tsx files
if (!path.endsWith('.tsx')) return

// Initialize path in the map with an inclusion flag
pathMap[path] = 1

// Exclude paths containing '_'
if (path.includes('_')) {
pathMap[path] = 0 // Set to 0 if the path contains '_'
return // Exit early if excluded
}
if (path.includes('[')) {
pathMap[path] = 0 // Set to 0 if the path contains '_'
return // Exit early if excluded
}
if (path.includes(']')) {
pathMap[path] = 0 // Set to 0 if the path contains '_'
return // Exit early if excluded
}
}

// Process routes within a specific language directory
async function processLanguageRoutes(
directory: string,
lang: string,
): Promise<void> {
for await (const entry of Deno.readDir(directory)) {
const entryPath = join(directory, entry.name)
if (entry.isFile && entry.name.endsWith('.tsx')) {
await processFile(entryPath, lang)
} else if (entry.isDirectory) {
await processLanguageRoutes(entryPath, lang)
}
// Recursively collect all paths in the directory
async function addDirectory(directory: string) {
for await (const path of stableRecurseFiles(directory)) {
const removedLocalePath = removeLocaleFromPath(path)
processPathSegments(removedLocalePath)
}
}

// Process each .tsx file
async function processFile(filePath: string, lang: string): Promise<void> {
const relativePath = filePath.substring(distDirectory.length)
const pathSegments = relativePath.split(SEPARATOR).filter(Boolean)
function arrayToObject(arr: string[]): Record<string, number> {
const result: Record<string, number> = {}

// Exclude files starting with '_'
if (pathSegments.some((segment) => segment.startsWith('_'))) {
return
for (const segment of arr) {
result[segment] = 1 // Set each segment as a key with value 1
}

// Exclude dynamic routes (those with square brackets)
if (
pathSegments.some((segment) =>
segment.includes('[') || segment.includes(']')
)
) {
return
return result
}

function checkSegments(
pathMap: Record<string, number>,
): Record<string, number> {
for (const key in pathMap) {
if (key.startsWith('(') && key.endsWith(')')) {
pathMap[key] = 0
}
if (key === 'routes') {
pathMap[key] = 0
}
}
return pathMap
}

await addDirectory(distDirectory)

const mtime = (await Deno.stat(filePath)).mtime ?? new Date()
// Populate sitemap entries based on pathMap
for (const path in pathMap) {
if (pathMap[path] === 1) {
const filePath = join(path) // Use original path for checking
if (!(await exists(filePath))) {
continue // Skip if file does not exist
}
const { mtime } = await Deno.stat(filePath)

// Remove [locale] and language from path segments
const urlSegments = pathSegments.slice(2)
// Clean the path for the sitemap
const pathSegments = path.split(SEPARATOR)

let urlPath = urlSegments.join('/')
const segCheckObj = arrayToObject(pathSegments)

// Remove 'index' from the path
urlPath = urlPath.replace(/index\.tsx$/, '')
urlPath = urlPath.replace(/\.tsx$/, '')
const checkedSegments = checkSegments(segCheckObj)

// Ensure the URL starts with '/'
urlPath = '/' + urlPath
const neededSegmentsPath = pathSegments
.filter((segment) => checkedSegments[segment] === 1)
.join('/')

// Remove any trailing slashes
urlPath = urlPath.replace(/\/$/, '')
const cleanedPath = neededSegmentsPath.replace(/\.tsx$/, '')
.replace(/\index$/, '')

// Build the full URL with language prefix
const loc = basename.replace(/\/+$/, '') + `/${lang}` + urlPath
sitemapSet.add(
JSON.stringify({
loc: basename + '/' + cleanedPath,
lastmod: (mtime ?? new Date()).toISOString(),
}),
)

// Add to the sitemap set
sitemapSet.add(
JSON.stringify({
loc: loc,
lastmod: mtime.toISOString(),
}),
)
options.languages?.forEach((lang) => {
if (lang !== options.defaultLanguage) {
sitemapSet.add(
JSON.stringify({
loc: `${basename}/${lang}${cleanedPath}`,
lastmod: (mtime ?? new Date()).toISOString(),
}),
)
}
})
}
}

await collectPaths(distDirectory)
console.log('Final Sitemap Set:', sitemapSet)

return Array.from(sitemapSet).map((entry) => JSON.parse(entry)) as Sitemap
}

/**
* Generates sitemap entries for markdown articles, mapping to /[locale]/[...slug] routes.
* Generates sitemap entries for markdown articles, respecting language settings.
* @param basename - The base URL
* @param articlesDirectory - Directory containing article markdown files
* @param options - Options for sitemap generation, including languages
* @returns Array of sitemap entries for articles
*/
async function generateArticlesSitemap(
basename: string,
articlesDirectory: string,
distDirectory: string,
options: SiteMapOptions,
): Promise<Sitemap> {
const sitemap: Sitemap = []
const languages = options.languages || []

if (!(await exists(articlesDirectory))) return sitemap

// Check if there is a dynamic route that can handle the articles
const dynamicRoutePath = findDynamicRoute(distDirectory)

if (!dynamicRoutePath) {
console.warn('Dynamic route for articles not found.')
return sitemap
}

// Function to process each markdown file
async function addMarkdownFile(path: string) {
const relPath = path.substring(articlesDirectory.length).replace(
/\.md$/,
'',
)
const segments = relPath.split(SEPARATOR).filter(Boolean)
const slug = segments.join('/')

const mtime = (await Deno.stat(path)).mtime ?? new Date()

// For each language, generate the URL
for (const lang of options.languages) {
// Construct the URL path as /[locale]/[...slug]
const urlPath = `/${lang}/${slug}`
const segments = relPath.split(SEPARATOR).map((segment) =>
segment.replace(/^en\//, '')
)
const pathname = normalize(`/${segments.join('/')}`).replace(/\/index$/, '')

const urlPaths = languages.length > 0
? languages.map((
lang,
) => (lang === options.defaultLanguage
? pathname
: `/${lang}${pathname}`)
)
: [pathname]

for (const urlPath of urlPaths) {
const { mtime } = await Deno.stat(path)
sitemap.push({
loc: basename.replace(/\/+$/, '') + urlPath,
lastmod: mtime.toISOString(),
lastmod: (mtime ?? new Date()).toISOString(),
})
}
}
Expand All @@ -234,31 +265,10 @@ async function generateArticlesSitemap(
return sitemap
}

/**
* Finds the dynamic route file that can handle articles (e.g., [...slug].tsx)
*/
function findDynamicRoute(distDirectory: string): string | null {
const dynamicRoutePattern = /\[\.\.\..*\]\.tsx$/

// Use a stack for directories to process
const directories = [distDirectory]
while (directories.length > 0) {
const currentDir = directories.pop()!
for (const entry of Deno.readDirSync(currentDir)) {
const entryPath = join(currentDir, entry.name)
if (entry.isFile && dynamicRoutePattern.test(entry.name)) {
// Found the dynamic route
return entryPath
} else if (entry.isDirectory) {
directories.push(entryPath)
}
}
}
return null
}

/**
* Recursively iterates through a directory to retrieve all file paths in a stable, sorted order.
* @param directory - Directory path to recurse
* @returns Generator of file paths
*/
async function* stableRecurseFiles(directory: string): AsyncGenerator<string> {
const itr = Deno.readDir(directory)
Expand All @@ -281,6 +291,8 @@ async function* stableRecurseFiles(directory: string): AsyncGenerator<string> {

/**
* Converts a Sitemap array to an XML string in the required format.
* @param sitemap - Array of sitemap entries
* @returns Generated XML string
*/
function sitemapToXML(sitemap: Sitemap): string {
return `<?xml version="1.0" encoding="UTF-8"?>
Expand Down

0 comments on commit 56b68c0

Please sign in to comment.