diff --git a/tests/sitemap-index.test.ts b/tests/sitemap-index.test.ts index 8a207c9..525fb28 100644 --- a/tests/sitemap-index.test.ts +++ b/tests/sitemap-index.test.ts @@ -1,6 +1,6 @@ import { SitemapStream } from '../index'; import { tmpdir } from 'os'; -import { resolve, join } from 'path'; +import { join, resolve } from 'path'; import { existsSync, unlinkSync, @@ -11,9 +11,9 @@ import { SitemapIndexStream, SitemapAndIndexStream, } from '../lib/sitemap-index-stream'; -import { streamToPromise } from '../dist'; +import { streamToPromise } from '../lib/sitemap-stream'; import { finished as finishedCallback } from 'stream'; -import { WriteStream } from 'node:fs'; +import { readFileSync, WriteStream } from 'node:fs'; import { promisify } from 'util'; const finished = promisify(finishedCallback); @@ -134,6 +134,8 @@ describe('sitemapAndIndex', () => { resolve(targetFolder, `./sitemap-1.xml`), resolve(targetFolder, `./sitemap-2.xml`), resolve(targetFolder, `./sitemap-3.xml`), + resolve(targetFolder, `./sitemap-4.xml`), + resolve(targetFolder, `./sitemap-index.xml`), ]); }); @@ -143,6 +145,8 @@ describe('sitemapAndIndex', () => { resolve(targetFolder, `./sitemap-1.xml`), resolve(targetFolder, `./sitemap-2.xml`), resolve(targetFolder, `./sitemap-3.xml`), + resolve(targetFolder, `./sitemap-4.xml`), + resolve(targetFolder, `./sitemap-index.xml`), ]); }); @@ -155,7 +159,15 @@ describe('sitemapAndIndex', () => { const sm = new SitemapStream(); const path = `./sitemap-${i}.xml`; - const ws = sm.pipe(createWriteStream(resolve(targetFolder, path))); + const outputStream = createWriteStream(resolve(targetFolder, path)); + + // Streams do not automatically propagate errors + // We must propagate this up to the SitemapStream + outputStream.on('error', (err) => { + sm.emit('error', err); + }); + + const ws = sm.pipe(outputStream); return [new URL(path, baseURL).toString(), sm, ws]; }, }); @@ -222,4 +234,292 @@ describe('sitemapAndIndex', () => { ) ).toBe(false); }); + + it('writes to index file', async () => { + const baseURL = 'https://example.com/sub/'; + + const sms = new SitemapAndIndexStream({ + limit: 2, + getSitemapStream: (i: number): [string, SitemapStream, WriteStream] => { + const sm = new SitemapStream(); + const path = `./sitemap-${i}.xml`; + + // This will not throw even though it will fail + // `outputStream.writable === true` + // `outputStream.closed === false` + const outputStream = createWriteStream(resolve(targetFolder, path)); + + // Streams do not automatically propagate errors + // We must propagate this up to the SitemapStream + outputStream.on('error', (err) => { + sm.emit('error', err); + }); + + const ws = sm.pipe(outputStream); + return [new URL(path, baseURL).toString(), sm, ws]; + }, + }); + + // Pipe the index stream to a file + const indexStream = createWriteStream( + resolve(targetFolder, `./sitemap-index.xml`) + ); + sms.pipe(indexStream); + await writeData(sms, 'https://1.example.com/a'); + await writeData(sms, 'https://2.example.com/a'); + await writeData(sms, 'https://3.example.com/a'); + sms.end(); + await expect(finished(sms)).resolves.toBeUndefined(); + + await finished(indexStream); + + expect(existsSync(resolve(targetFolder, `./sitemap-index.xml`))).toBe(true); + expect(existsSync(resolve(targetFolder, `./sitemap-0.xml`))).toBe(true); + expect(existsSync(resolve(targetFolder, `./sitemap-1.xml`))).toBe(true); + expect(existsSync(resolve(targetFolder, `./sitemap-2.xml`))).toBe(false); + + // Read the first sitemap to make sure it was written + const sitemap0 = await streamToPromise( + createReadStream(resolve(targetFolder, `./sitemap-0.xml`)) + ); + expect(sitemap0.toString()).toContain('https://1.example.com/a'); + + // Read the last sitemap to make sure it was written + const sitemap1 = await streamToPromise( + createReadStream(resolve(targetFolder, `./sitemap-1.xml`)) + ); + expect(sitemap1.toString()).toContain('https://3.example.com/a'); + + // Read the index to make sure it was written + const indexText = readFileSync( + resolve(targetFolder, `./sitemap-index.xml`), + 'utf-8' + ); + expect(indexText).toContain(`${baseURL}sitemap-0`); + expect(indexText).toContain(`${baseURL}sitemap-1`); + expect(indexText).not.toContain(`${baseURL}sitemap-2`); + }); + + it('does not hang if last sitemap is filled', async () => { + const baseURL = 'https://example.com/sub/'; + + const sms = new SitemapAndIndexStream({ + limit: 2, + getSitemapStream: (i: number): [string, SitemapStream, WriteStream] => { + const sm = new SitemapStream(); + const path = `./sitemap-${i}.xml`; + + // This will not throw even though it will fail + // `outputStream.writable === true` + // `outputStream.closed === false` + const outputStream = createWriteStream(resolve(targetFolder, path)); + + // Streams do not automatically propagate errors + // We must propagate this up to the SitemapStream + outputStream.on('error', (err) => { + sm.emit('error', err); + }); + + const ws = sm.pipe(outputStream); + return [new URL(path, baseURL).toString(), sm, ws]; + }, + }); + + // Pipe the index stream to a file + const indexStream = createWriteStream( + resolve(targetFolder, `./sitemap-index.xml`) + ); + sms.pipe(indexStream); + await writeData(sms, 'https://1.example.com/a'); + await writeData(sms, 'https://2.example.com/a'); + sms.end(); + await expect(finished(sms)).resolves.toBeUndefined(); + + await finished(indexStream); + + expect(existsSync(resolve(targetFolder, `./sitemap-index.xml`))).toBe(true); + expect(existsSync(resolve(targetFolder, `./sitemap-0.xml`))).toBe(true); + expect(existsSync(resolve(targetFolder, `./sitemap-1.xml`))).toBe(false); + + const sitemap0Raw = readFileSync( + resolve(targetFolder, `./sitemap-0.xml`), + 'utf-8' + ); + expect(sitemap0Raw).toContain('https://1.example.com/a'); + expect(sitemap0Raw).toContain('https://2.example.com/a'); + expect(sitemap0Raw).not.toContain('https://3.example.com/a'); + + // Read the first sitemap to make sure it was written + const sitemap0 = await streamToPromise( + createReadStream(resolve(targetFolder, `./sitemap-0.xml`)) + ); + expect(sitemap0.toString()).toContain('https://1.example.com/a'); + + // Read the index to make sure it was written + const indexText = readFileSync( + resolve(targetFolder, `./sitemap-index.xml`), + 'utf-8' + ); + expect(indexText).toContain(`${baseURL}sitemap-0`); + expect(indexText).not.toContain(`${baseURL}sitemap-1`); + }); + + it('deterministically finishes writing each sitemap file before creating a new one', async () => { + const baseURL = 'https://example.com/sub/'; + + const sms = new SitemapAndIndexStream({ + limit: 5000, + getSitemapStream: (i: number): [string, SitemapStream, WriteStream] => { + const sm = new SitemapStream(); + const path = `./sitemap-${i}.xml`; + + // This will not throw even though it will fail + // `outputStream.writable === true` + // `outputStream.closed === false` + const outputStream = createWriteStream(resolve(targetFolder, path)); + + // Streams do not automatically propagate errors + // We must propagate this up to the SitemapStream + outputStream.on('error', (err) => { + sm.emit('error', err); + }); + + const ws = sm.pipe(outputStream); + return [new URL(path, baseURL).toString(), sm, ws]; + }, + }); + + // Pipe the index stream to a file + const indexStream = createWriteStream( + resolve(targetFolder, `./sitemap-index.xml`) + ); + sms.pipe(indexStream); + for (let i = 0; i < 5000; i++) { + // Intentionally write while ignoring back pressure to stress test + // the rolling to new files + sms.write(`https://1.example.com/a${i}`); + } + for (let i = 0; i < 5000; i++) { + sms.write(`https://2.example.com/a${i}`); + } + for (let i = 0; i < 1; i++) { + sms.write(`https://3.example.com/a${i}`); + } + sms.end(); + await expect(finished(sms)).resolves.toBeUndefined(); + + await finished(indexStream); + + expect(existsSync(resolve(targetFolder, `./sitemap-index.xml`))).toBe(true); + expect(existsSync(resolve(targetFolder, `./sitemap-0.xml`))).toBe(true); + expect(existsSync(resolve(targetFolder, `./sitemap-1.xml`))).toBe(true); + expect(existsSync(resolve(targetFolder, `./sitemap-2.xml`))).toBe(true); + expect(existsSync(resolve(targetFolder, `./sitemap-3.xml`))).toBe(false); + + // Make sure the very first file is completed + const sitemap0Raw = readFileSync( + resolve(targetFolder, `./sitemap-0.xml`), + 'utf-8' + ); + expect(sitemap0Raw).toContain(''); + expect(sitemap0Raw).toContain('https://1.example.com/a0'); + expect(sitemap0Raw).toContain('https://1.example.com/a4999'); + expect(sitemap0Raw).toContain(''); + + // Make sure the first rolled file is completed + const sitemap1Raw = readFileSync( + resolve(targetFolder, `./sitemap-1.xml`), + 'utf-8' + ); + expect(sitemap1Raw).toContain(''); + expect(sitemap1Raw).toContain('https://2.example.com/a0'); + expect(sitemap1Raw).toContain('https://2.example.com/a4999'); + expect(sitemap1Raw).toContain(''); + + // Make sure the last file is completed + const sitemap2Raw = readFileSync( + resolve(targetFolder, `./sitemap-2.xml`), + 'utf-8' + ); + expect(sitemap2Raw).toContain(''); + expect(sitemap2Raw).toContain('https://3.example.com/a0'); + expect(sitemap2Raw).toContain(''); + expect(sitemap2Raw).not.toContain('https://3.example.com/a1'); + + // Read the index to make sure it was written + const indexText = readFileSync( + resolve(targetFolder, `./sitemap-index.xml`), + 'utf-8' + ); + expect(indexText).toContain(''); + expect(indexText).not.toContain(`${baseURL}sitemap-3`); + }); + + it('works if no items written at all', async () => { + const baseURL = 'https://example.com/sub/'; + + const sms = new SitemapAndIndexStream({ + limit: 2, + getSitemapStream: (i: number): [string, SitemapStream, WriteStream] => { + const sm = new SitemapStream(); + const path = `./sitemap-${i}.xml`; + + // This will not throw even though it will fail + // `outputStream.writable === true` + // `outputStream.closed === false` + const outputStream = createWriteStream(resolve(targetFolder, path)); + + // Streams do not automatically propagate errors + // We must propagate this up to the SitemapStream + outputStream.on('error', (err) => { + sm.emit('error', err); + }); + + const ws = sm.pipe(outputStream); + return [new URL(path, baseURL).toString(), sm, ws]; + }, + }); + + // Pipe the index stream to a file + const indexStream = createWriteStream( + resolve(targetFolder, `./sitemap-index.xml`) + ); + sms.pipe(indexStream); + sms.end(); + await expect(finished(sms)).resolves.toBeUndefined(); + + await finished(indexStream); + + expect(existsSync(resolve(targetFolder, `./sitemap-index.xml`))).toBe(true); + expect(existsSync(resolve(targetFolder, `./sitemap-0.xml`))).toBe(false); + + // Read the first sitemap to make sure it was NOT written + await expect( + streamToPromise( + createReadStream(resolve(targetFolder, `./sitemap-0.xml`)) + ) + ).rejects.toThrow('ENOENT'); + + // Read the index to make sure it was written + const indexText = readFileSync( + resolve(targetFolder, `./sitemap-index.xml`), + 'utf-8' + ); + expect(indexText).toContain(`${baseURL}sitemap-0`); + expect(indexText).toContain(`${baseURL}sitemap-1`); + expect(indexText).not.toContain(`${baseURL}sitemap-2`); + }); }); + +function writeData(sms: SitemapStream, data: any): Promise { + if (!sms.write(data)) { + return new Promise((resolve) => { + sms.once('drain', resolve); + }); + } + return Promise.resolve(); +}