From 19a7b01f0e4caaada0d0b24b07f2ba4194e7d3a0 Mon Sep 17 00:00:00 2001 From: jjrv Date: Fri, 30 Jun 2017 08:59:10 +0300 Subject: [PATCH] Rewrite and add tests. --- package.json | 4 +- src/Address.ts | 54 +- src/Cache.ts | 612 ++++++++++-------- src/cget.ts | 2 - src/index.ts | 9 + src/mkdirp.ts | 22 +- src/tsconfig.json | 4 +- test/cache/example.invalid/index.html | 5 - test/cache/localhost/index.html | 1 + test/cache/localhost/missing.html.header.json | 4 + .../redirected-index.html.header.json | 5 + .../redirected-missing.html.header.json | 5 + test/ds9k.ts | 149 +++++ test/test.ts | 200 ++++++ test/tsconfig.json | 13 +- 15 files changed, 758 insertions(+), 331 deletions(-) delete mode 100644 src/cget.ts create mode 100644 src/index.ts delete mode 100644 test/cache/example.invalid/index.html create mode 100644 test/cache/localhost/index.html create mode 100644 test/cache/localhost/missing.html.header.json create mode 100644 test/cache/localhost/redirected-index.html.header.json create mode 100644 test/cache/localhost/redirected-missing.html.header.json create mode 100644 test/ds9k.ts create mode 100644 test/test.ts diff --git a/package.json b/package.json index f10b13f..4125ece 100644 --- a/package.json +++ b/package.json @@ -2,8 +2,8 @@ "name": "cget", "version": "0.1.0", "description": "Robust streaming parallel download manager with filesystem cache", - "main": "dist/cget.js", - "typings": "dist/cget.d.ts", + "main": "dist/index.js", + "typings": "dist/index.d.ts", "scripts": { "tsc": "tsc", "prepublish": "tsc -p src", diff --git a/src/Address.ts b/src/Address.ts index 00ea4b6..90b4487 100644 --- a/src/Address.ts +++ b/src/Address.ts @@ -4,21 +4,6 @@ import * as path from 'path'; import * as url from 'url'; -export function sanitizeUrl(urlRemote: string) { - var urlParts = url.parse(urlRemote, false, true); - var origin = urlParts.host || ''; - - if((urlParts.pathname || '').charAt(0) != '/') origin += '/'; - - origin += urlParts.pathname; - return([ - urlParts.protocol || 'http:', - '//', - url.resolve('', origin), - urlParts.search || '' - ].join('')); -} - /** Last line of defence to filter malicious paths. */ export function sanitizePath(path: string) { @@ -36,32 +21,39 @@ export function sanitizePath(path: string) { export class Address { constructor(uri: string, cwd?: string) { - var urn: string | null = null; - var url: string | null = null; - var cachePath: string; - if(uri.match(/^\.?\.?\//)) { // The URI looks more like a local path. - cachePath = path.resolve(cwd || '.', uri); - url = 'file://' + cachePath; + this.path = path.resolve(cwd || '.', uri); + this.url = 'file://' + this.path; this.isLocal = true; } else if(uri.substr(0, 5) == 'file:') { - cachePath = path.resolve(uri.substr(5)); - url = 'file://' + cachePath; + this.path = path.resolve(uri.substr(5)); + this.url = 'file://' + this.path; this.isLocal = true; } else if(uri.substr(0, 4) == 'urn:') { - urn = uri; - cachePath = urn.substr(4).replace(/:/g, '/'); + this.urn = uri; + this.path = sanitizePath(this.urn.substr(4).replace(/:/g, '/')); } else { // If the URI is not a URN address, interpret it as a URL address and clean it up. - url = sanitizeUrl(uri); - cachePath = uri.substr(uri.indexOf(':') + 1); + + const parts = url.parse(uri, false, true); + const origin = parts.host || ''; + + const slash = ((parts.pathname || '').charAt(0) == '/') ? '' : '/'; + + this.url = ( + (parts.protocol || 'http:') + '//' + + url.resolve('', origin + slash + parts.pathname) + + (parts.search || '') + ); + + this.path = sanitizePath( + url.resolve('', origin.replace(/:.*/, '') + slash + parts.pathname) + + (parts.search || '') + ); } - this.uri = (urn || url)!; - this.urn = urn; - this.url = url; - this.path = this.isLocal ? cachePath : sanitizePath(cachePath); + this.uri = (this.urn || this.url)!; } uri: string; diff --git a/src/Cache.ts b/src/Cache.ts index 67d17ca..343bafd 100644 --- a/src/Cache.ts +++ b/src/Cache.ts @@ -21,6 +21,8 @@ import { Address } from './Address'; export interface FetchOptions { allowLocal?: boolean; allowRemote?: boolean; + allowCacheRead?: boolean; + allowCacheWrite?: boolean; forceHost?: string; forcePort?: number; username?: string; @@ -34,8 +36,22 @@ export interface CacheOptions extends FetchOptions { concurrency?: number; } -type InternalHeaders = { [key: string]: number | string | string[] }; -export type Headers = { [key: string]: string | string[] }; +export interface InternalHeaders { + [key: string]: number | string | string[] | undefined + + 'cget-message'?: string; +}; + +export interface Headers { + [key: string]: string | string[] | undefined +}; + +export interface RedirectResult { + address: Address; + cachePath: string; + headers: InternalHeaders; + oldHeaders?: InternalHeaders[]; +} interface RedirectSpec { address: Address; @@ -44,42 +60,143 @@ interface RedirectSpec { headers: Headers; } -export class CacheResult { - constructor(streamOut: stream.Readable, address: Address, status: number, message: string, headers: Headers) { - this.stream = streamOut; - this.address = address; +const defaultHeaders = { + 'cget-status': 200, + 'cget-message': 'OK' +}; + +const internalHeaderTbl: { [key: string]: boolean } = { + 'cget-status': true, + 'cget-message': true, + 'cget-target': true +}; + +const retryCodeTbl: { [key: string]: boolean } = {}; + +for( + let code of ( + 'EAI_AGAIN ECONNREFUSED ECONNRESET EHOSTUNREACH' + + ' ENOTFOUND EPIPE ESOCKETTIMEDOUT ETIMEDOUT' + ).split(' ') +) { + retryCodeTbl[code] = true; +} - this.status = status; - this.message = message; +function removeInternalHeaders(headers: Headers | InternalHeaders) { + const output: Headers = {}; - this.headers = headers; + for(let key of Object.keys(headers)) { + if(!internalHeaderTbl[key]) output[key] = headers[key] as (string | string[]); } - stream: stream.Readable; - address: Address; + return(output); +} - status: number; - message: string; +/** Get path to headers for a locally cached file. */ - headers: Headers; +export function getHeaderPath(cachePath: string) { + return(cachePath + '.header.json'); } -export class CacheError extends Error { - status: number; +function storeHeaders(cachePath: string, headers: Headers, extra: InternalHeaders ) { + const output: InternalHeaders = {}; + + for(let key of Object.keys(headers)) output[key] = headers[key]; + for(let key of Object.keys(extra)) output[key] = extra[key]; + + return(fsa.writeFile( + getHeaderPath(cachePath), + JSON.stringify(extra), + { encoding: 'utf8' } + )); +} + +export function getHeaders(cachePath: string) { + return( + fsa.readFile( + getHeaderPath(cachePath), + { encoding: 'utf8' } + ).then(JSON.parse).catch( + /** If headers are not found, invent some. */ + (err: NodeJS.ErrnoException) => defaultHeaders + ) + ); +} + +function openLocal( + { address, cachePath, headers }: RedirectResult, + opened: (result: CacheResult) => void +) { + const streamIn = fs.createReadStream(cachePath); + + // Resolve promise with headers if stream opens successfully. + streamIn.on('open', () => { + opened(new CacheResult( + streamIn, + address, + +(headers['cget-status'] || 200), + '' + (headers['cget-message'] || 'OK'), + removeInternalHeaders(headers) + )); + }); + + return(new Promise((resolve, reject) => { + // Cached file doesn't exist or IO error. + streamIn.on('error', reject); + streamIn.on('end', resolve); + })); +} + +export class CacheResult { + constructor( + public stream: stream.Readable, + public address: Address, + public status: number, + public message: string, + public headers: Headers + ) {} +} + +export class CachedError extends Error { + constructor( + public status: number, + message?: string, + headers: Headers | InternalHeaders = {} + ) { + super(status + (message ? ' ' + message : '')); + + this.headers = removeInternalHeaders(headers); + } + headers: Headers; + /** Workaround for instanceof (prototype chain is messed up after inheriting Error in ES5). */ + isCachedError = true; } -export class Cache { +export class Deferred { + constructor() { + this.promise = new Promise((resolve, reject) => { + this.resolve = resolve; + this.reject = reject; + }); + } + + promise: Promise; + resolve: (result?: Type | Promise) => void; + reject: (err?: any) => void; +} - constructor(basePath: string, options?: CacheOptions) { - if(!options) options = {}; +export class Cache { + constructor(basePath?: string, options: CacheOptions = {}) { this.basePath = path.resolve('.', basePath || 'cache'); this.indexName = options.indexName || 'index.html'; this.fetchQueue = new TaskQueue(Promise, options.concurrency || 2); this.allowLocal = options.allowLocal || false; - this.allowRemote = options.allowRemote || !('allowRemote' in options); + this.allowRemote = options.allowRemote || options.allowRemote === void 0; + this.allowCacheRead = options.allowCacheRead || options.allowCacheRead === void 0; + this.allowCacheWrite = options.allowCacheWrite || options.allowCacheWrite === void 0; this.forceHost = options.forceHost; this.forcePort = options.forcePort; @@ -96,7 +213,7 @@ export class Cache { message: message, headers: headers }) => this.createCachePath(address).then((cachePath: string) => - this.storeHeaders(cachePath, headers, { + storeHeaders(cachePath, headers, { 'cget-status': status, 'cget-message': message, 'cget-target': target.uri @@ -135,19 +252,49 @@ export class Cache { return(isDir(cachePath).then(makeValidPath)); } - /** Get path to headers for a locally cached file. */ + /** Check if there are cached headers with errors or redirecting the URL. */ + + getRedirect(address: Address, oldHeaders: InternalHeaders[] = []): Promise { + const cachePath = this.getCachePath(address); + + return(cachePath.then(getHeaders).then((headers: InternalHeaders) => { + const status = +(headers['cget-status'] || 0); + + if(status && status >= 300 && status <= 308 && headers['location']) { + oldHeaders.push(headers); + + return(this.getRedirect( + new Address(url.resolve( + address.url!, + '' + (headers['cget-target'] || headers['location']) + )), + oldHeaders + )); + } + + if(status && status != 200 && (status < 500 || status >= 600)) { + throw(new CachedError(status, headers['cget-message'], headers)); + } - static getHeaderPath(cachePath: string) { - return(cachePath + '.header.json'); + const result: RedirectResult = { address, cachePath: cachePath.value(), headers }; + + if(oldHeaders.length) result.oldHeaders = oldHeaders; + + return(result); + })); } /** Test if an address is cached. */ isCached(uri: string) { return(this.getCachePath(new Address(uri)).then((cachePath: string) => - fsa.stat(cachePath) - .then((stats: fs.Stats) => !stats.isDirectory()) - .catch((err: NodeJS.ErrnoException) => false) + fsa.stat( + cachePath + ).then( + (stats: fs.Stats) => !stats.isDirectory() + ).catch( + (err: NodeJS.ErrnoException) => false + ) )); } @@ -155,42 +302,15 @@ export class Cache { private createCachePath(address: Address) { return(this.getCachePath(address).then((cachePath: string) => - mkdirp(path.dirname(cachePath), this.indexName).then(() => cachePath) + mkdirp( + path.dirname(cachePath), + this.indexName + ).then( + () => cachePath + ) )); } - /** Check if there are cached headers with errors or redirecting the URL. */ - - private static getRedirect(cachePath: string) { - return( - fsa.readFile( - Cache.getHeaderPath(cachePath), - { encoding: 'utf8' } - ).then(JSON.parse).catch( - (err: any) => ({}) - ).then((headers: InternalHeaders) => { - const status = headers['cget-status'] as number; - - if(!status) return(null); - - if(status >= 300 && status <= 308 && headers['location']) { - return((headers['cget-target'] || headers['location']) as string); - } - - if(status != 200 && (status < 500 || status >= 600)) { - var err = new CacheError(status + ' ' + headers['cget-message']); - - err.headers = Cache.removeInternalHeaders(headers); - err.status = status; - - throw(err); - } - - return(null); - }) - ); - } - /** Store custom data related to a URL-like address, * for example an XML namespace. * @return Promise resolving to true after all data is written. */ @@ -209,165 +329,115 @@ export class Cache { * Returns the file's URL after redirections * and a readable stream of its contents. */ - fetch(uri: string, options?: FetchOptions) { - if(!options) options = {}; - + fetch(uri: string, options: FetchOptions = {}) { const address = new Address(uri, this.cwd || options.cwd); + const allowLocal = (options.allowLocal !== void 0) ? options.allowLocal : this.allowLocal; + const allowRemote = (options.allowRemote !== void 0) ? options.allowRemote : this.allowRemote; + const allowCacheRead = (options.allowCacheRead !== void 0) ? options.allowCacheRead : this.allowCacheRead; + const allowCacheWrite = (options.allowCacheWrite !== void 0) ? options.allowCacheWrite : this.allowCacheWrite; + let isOpened = false; + let isErrored = false; + let handler: ( + opened: (result: CacheResult) => void, + ) => Promise; + + if(address.isLocal && allowLocal) { + handler = (opened) => this.fetchLocal( + address, + options, + opened + ); + } else if(!address.isLocal && allowCacheRead) { + handler = (opened) => this.fetchCached( + address, + options, + opened + ).catch((err: CachedError | NodeJS.ErrnoException) => { + // Re-throw HTTP and unexpected errors. + if((err as CachedError).isCachedError || (err as NodeJS.ErrnoException).code != 'ENOENT' || !allowRemote) { + throw(err); + } - if(address.isLocal) { - if(!(options.allowLocal || (options.allowLocal !== false && this.allowLocal))) { - return(Promise.reject(new Error('Access denied to url ' + address.url))); - } - - return(new Promise((resolve, reject) => - this.fetchQueue.add(() => new Promise((resolveTask, rejectTask) => - this.fetchLocal( - address, - options!, - resolveTask, - rejectTask - ).then(resolve, reject) - )) - )); - } - - return(new Promise((resolve, reject) => - this.fetchQueue.add(() => new Promise((resolveTask, rejectTask) => - this.fetchCached( + return(this.fetchRemote( address, - options!, - resolveTask - ).catch((err: CacheError | NodeJS.ErrnoException) => { - // Re-throw HTTP and unexpected errors. - if(err instanceof CacheError || err.code != 'ENOENT') { - rejectTask(err); - throw(err); - } + options, + opened + )); + }); + } else if(!address.isLocal && allowRemote) { + handler = (opened) => this.fetchRemote( + address, + options, + opened + ); + } else { + return(Promise.reject(new CachedError(403, 'Access denied to url ' + address.url))); + } - if(address.url && !address.isLocal) { - if(!(options!.allowRemote || (options!.allowRemote !== false && this.allowRemote))) { - return(Promise.reject(new Error('Access denied to url ' + address.url))); - } - return(this.fetchRemote(address, options!, resolveTask, rejectTask)); - } else { - rejectTask(err); - throw(err); + return(new Promise((opened, errored) => + this.fetchQueue.add( + () => handler((result: CacheResult) => { + if(!isErrored) { + isOpened = true; + opened(result); } - }).then(resolve, reject) - )) + }).catch((err: CachedError | NodeJS.ErrnoException) => { + if(!isOpened) errored(err); + isErrored = true; + }) + ) )); } private fetchLocal( address: Address, options: FetchOptions, - resolveTask: () => void, - rejectTask: (err?: NodeJS.ErrnoException) => void + opened: (result: CacheResult) => void ) { - var streamIn = fs.createReadStream(address.path); - - return( - new Promise((resolve, reject) => { - // Resolve promise with headers if stream opens successfully. - streamIn.on('open', () => resolve(Cache.defaultHeaders)); - - // Cached file doesn't exist or IO error. - streamIn.on('error', (err: NodeJS.ErrnoException) => { - reject(err); - rejectTask(err); - throw(err); - }); - - streamIn.on('end', resolveTask); - }).then((headers: InternalHeaders) => new CacheResult( - streamIn, - address, - headers['cget-status'] as number, - headers['cget-message'] as string, - Cache.removeInternalHeaders(headers) - )) - ); - } - - private fetchCached(address: Address, options: FetchOptions, resolveTask: () => void) { - var streamIn: fs.ReadStream; - - // Any errors shouldn't be handled here, but instead in the caller. - - return( - this.getCachePath(address).then((cachePath: string) => - Cache.getRedirect(cachePath).then((urlRemote: string) => - urlRemote ? this.getCachePath(new Address(urlRemote)) : cachePath - ) - ).then((cachePath: string) => new Promise((resolve, reject) => { - streamIn = fs.createReadStream(cachePath); - - // Resolve promise with headers if stream opens successfully. - streamIn.on('open', () => resolve( - fsa.readFile( - Cache.getHeaderPath(cachePath), - { encoding: 'utf8' } - ).then( - /** Parse headers stored as JSON. */ - (data: string) => JSON.parse(data) - ).catch( - /** If headers are not found, invent some. */ - (err: NodeJS.ErrnoException) => Cache.defaultHeaders - ) - )); - - // Cached file doesn't exist. - streamIn.on('error', reject); + const result = { + address, + cachePath: address.path, + headers: defaultHeaders + }; - streamIn.on('end', resolveTask); - })).then((headers: InternalHeaders) => new CacheResult( - streamIn, - address, - headers['cget-status'] as number, - headers['cget-message'] as string, - Cache.removeInternalHeaders(headers) - )) - ); + return(openLocal(result, opened)); } - private storeHeaders(cachePath: string, headers: Headers, extra: InternalHeaders ) { - for(let key of Object.keys(headers)) { - if(!extra.hasOwnProperty(key)) extra[key] = headers[key] - } - - return(fsa.writeFile( - Cache.getHeaderPath(cachePath), - JSON.stringify(extra), - { encoding: 'utf8' } + private fetchCached( + address: Address, + options: FetchOptions, + opened: (result: CacheResult) => void + ) { + return(this.getRedirect(address).then( + (result: RedirectResult) => openLocal(result, opened) )); } private fetchRemote( address: Address, options: FetchOptions, - resolveTask: () => void, - rejectTask: (err?: NodeJS.ErrnoException) => void + opened: (result: CacheResult | Promise) => void ) { + const allowCacheRead = (options.allowCacheRead !== void 0) ? options.allowCacheRead : this.allowCacheRead; + const allowCacheWrite = (options.allowCacheWrite !== void 0) ? options.allowCacheWrite : this.allowCacheWrite; var urlRemote = address.url!; - var redirectList: RedirectSpec[] = []; - var found = false; - var resolve: (result: any) => void; - var reject: (err: any) => void; - var promise = new Promise((res, rej) => { - resolve = res; - reject = rej; - }); + let resolved = false; + let found = false; + let streamRequest: request.Request; + const streamBuffer = new stream.PassThrough(); + const redirectList: RedirectSpec[] = []; + const deferred = new Deferred(); - var streamBuffer = new stream.PassThrough(); + function die(err: NodeJS.ErrnoException | CachedError) { + if(resolved) return; + resolved = true; - function die(err: NodeJS.ErrnoException) { // Abort and report. - if(streamRequest) streamRequest.abort(); - - reject(err); - rejectTask(err); + streamRequest.abort(); streamBuffer.emit('error', err); + + deferred.reject(err); } const requestConfig: request.CoreOptions = { @@ -381,24 +451,25 @@ export class Cache { headers: res.headers }); - urlRemote = url.resolve(urlRemote, res.headers.location as string); + urlRemote = url.resolve(urlRemote, '' + res.headers.location); address = new Address(urlRemote); - this.fetchCached(address, options, resolveTask).then((result: CacheResult) => { - // File was already found in cache so stop downloading. - - streamRequest.abort(); + if(!allowCacheRead) return(true); - if(found) return; + this.fetchCached(address, options, opened).then((result: CacheResult) => { + if(found || resolved) return; found = true; + resolved = true; + + // File was already found in cache so stop downloading. + streamRequest.abort(); this.addLinks(redirectList, address).finally(() => { - resolve(result); + deferred.resolve(result); }); }).catch((err: NodeJS.ErrnoException) => { if(err.code != 'ENOENT' && err.code != 'ENOTDIR') { - // Weird! - die(err); + // Weird error! Let's try to download the remote file anyway. } }); @@ -416,23 +487,19 @@ export class Cache { }; } - var streamRequest = request.get( + streamRequest = request.get( Cache.forceRedirect(urlRemote, options), requestConfig ); streamRequest.on('error', (err: NodeJS.ErrnoException) => { // Check if retrying makes sense for this error. - if(( - 'EAI_AGAIN ECONNREFUSED ECONNRESET EHOSTUNREACH ' + - 'ENOTFOUND EPIPE ESOCKETTIMEDOUT ETIMEDOUT ' - ).indexOf(err.code || '') < 0) { + if(retryCodeTbl[err.code || '']) { + console.error('SHOULD RETRY'); + die(err); + } else { die(err); } - - console.error('SHOULD RETRY'); - - die(err); }); streamRequest.on('response', (res: http.IncomingMessage) => { @@ -441,67 +508,83 @@ export class Cache { const status = res.statusCode!; - if(status != 200) { - if(status < 500 || status >= 600) { - var err = new CacheError(status + ' ' + res.statusMessage); + if(status >= 500 && status < 600) { + // TODO + console.error('SHOULD RETRY'); + die(new Error('RETRY')); + } else if(status != 200) { + var err = new CachedError(status, res.statusMessage, res.headers); + if(allowCacheWrite) { this.createCachePath(address).then((cachePath: string) => - this.storeHeaders(cachePath, res.headers, { + storeHeaders(cachePath, res.headers, { 'cget-status': status, - 'cget-message': res.statusMessage! + 'cget-message': res.statusMessage }) ); - - err.headers = res.headers; - err.status = status; - - reject(err); - rejectTask(err); - return; } - // TODO - console.error('SHOULD RETRY'); - - die(new Error('RETRY')); + die(err); } streamRequest.pause(); - this.createCachePath(address).then((cachePath: string) => { - var streamOut = fs.createWriteStream(cachePath); + const cacheReady = ( !allowCacheWrite ? Promise.resolve(null) : + this.createCachePath(address).then((cachePath: string) => { + var streamOut = fs.createWriteStream(cachePath); - streamOut.on('finish', () => { - // Output stream file handle stays open after piping unless manually closed. + streamOut.on('finish', () => { + // Output stream file handle stays open after piping unless manually closed. + streamOut.close(); + }); - streamOut.close(); - }); + streamRequest.pipe(streamOut, { end: true }); - streamRequest.pipe(streamOut, {end: true}); - streamRequest.pipe(streamBuffer, {end: true}); + return(cachePath); + }).catch( + // Can't write to cache for some reason. Carry on... + (err: NodeJS.ErrnoException) => null + ) + ); + + const pipeReady = cacheReady.then((cachePath: string | null) => { + const tasks: Promise[] = []; + + streamRequest.pipe(streamBuffer, { end: true }); streamRequest.resume(); - return( - Promise.join( - this.addLinks(redirectList, address), - this.storeHeaders(cachePath, res.headers, { - 'cget-status': res.statusCode!, - 'cget-message': res.statusMessage! - }) - ).finally( - () => resolve(new CacheResult( - streamBuffer as any as stream.Readable, - address, - res.statusCode!, - res.statusMessage!, - res.headers - )) - ) - ); - }).catch(die); + if(allowCacheWrite) { + tasks.push(this.addLinks(redirectList, address)); + if(cachePath) { + tasks.push( + storeHeaders(cachePath, res.headers, { + 'cget-status': res.statusCode, + 'cget-message': res.statusMessage + }) + ); + } + } + + return(Promise.all(tasks)); + }).catch((err: NodeJS.ErrnoException) => { + // Unable to save metadata in the cache. Carry on... + }); + + pipeReady.then(() => opened(new CacheResult( + streamBuffer as any as stream.Readable, + address, + res.statusCode!, + res.statusMessage!, + res.headers + ))); }); - streamRequest.on('end', resolveTask); + streamRequest.on('end', () => { + if(resolved) return; + resolved = true; + + deferred.resolve(); + }); if(options.forceHost || options.forcePort || this.forceHost || this.forcePort) { // Monkey-patch request to support forceHost when running tests. @@ -512,28 +595,7 @@ export class Cache { }; } - return(promise); - } - - private static defaultHeaders = { - 'cget-status': 200, - 'cget-message': 'OK' - }; - - private static internalHeaderTbl: { [key: string]: boolean } = { - 'cget-status': true, - 'cget-message': true, - 'cget-target': true - }; - - private static removeInternalHeaders(headers: InternalHeaders) { - const output: Headers = {}; - - for(let key of Object.keys(headers)) { - if(!Cache.internalHeaderTbl[key]) output[key] = headers[key] as string; - } - - return(output); + return(deferred.promise); } private static forceRedirect(urlRemote: string, options: FetchOptions) { @@ -570,6 +632,8 @@ export class Cache { private allowLocal: boolean; private allowRemote: boolean; + private allowCacheRead: boolean; + private allowCacheWrite: boolean; private forceHost?: string; private forcePort?: number; private cwd: string; diff --git a/src/cget.ts b/src/cget.ts deleted file mode 100644 index 6dc6608..0000000 --- a/src/cget.ts +++ /dev/null @@ -1,2 +0,0 @@ -export { Address } from './Address'; -export { FetchOptions, Cache, CacheResult } from './Cache'; diff --git a/src/index.ts b/src/index.ts new file mode 100644 index 0000000..4b9ce09 --- /dev/null +++ b/src/index.ts @@ -0,0 +1,9 @@ +export { Address } from './Address'; +export { + FetchOptions, + Cache, + CacheResult, + RedirectResult, + Headers, + getHeaderPath +} from './Cache'; diff --git a/src/mkdirp.ts b/src/mkdirp.ts index 02a89d7..454dadf 100644 --- a/src/mkdirp.ts +++ b/src/mkdirp.ts @@ -14,7 +14,6 @@ export const fsa = { open: Promise.promisify(fs.open), rename: Promise.promisify(fs.rename) as any as (src: string, dst: string) => Promise<{}>, mkdir: Promise.promisify(fs.mkdir) as (name: string) => Promise<{}>, - read: Promise.promisify(fs.read), readFile: Promise.promisify(fs.readFile) as any as (name: string, options: {encoding: string; flag?: string;}) => Promise, writeFile: Promise.promisify(fs.writeFile) as (name: string, content: string, options: {encoding: string; flag?: string;}) => Promise<{}> }; @@ -48,25 +47,26 @@ export function mkdirp(pathName: string, indexName: string) { pathPrefix = prefixList.join(path.sep); - return(Promise.try(() => fsa.stat(pathPrefix)).then((stats: fs.Stats) => { + return(fsa.stat(pathPrefix).then((stats: fs.Stats): {} | undefined => { if(stats.isFile()) { // Trying to convert a file into a directory. // Rename the file to indexName and move it into the new directory. var tempPath = pathPrefix + '.' + makeTempSuffix(6); - return(Promise.try(() => - fsa.rename(pathPrefix, tempPath) - ).then(() => - fsa.mkdir(pathPrefix) - ).then(() => - fsa.rename(tempPath, path.join(pathPrefix, indexName)) - )); + return( + fsa.rename( + pathPrefix, + tempPath + ).then(() => + fsa.mkdir(pathPrefix) + ).then(() => + fsa.rename(tempPath, path.join(pathPrefix, indexName)) + ) + ); } else if(!stats.isDirectory()) { throw(new Error('Tried to create a directory inside something weird: ' + pathPrefix)); } - - return(null as any as {}); }).catch((err: NodeJS.ErrnoException) => { // Re-throw unexpected errors. if(err.code != 'ENOENT' && err.code != 'ENOTDIR') throw(err); diff --git a/src/tsconfig.json b/src/tsconfig.json index f2150af..98f9740 100644 --- a/src/tsconfig.json +++ b/src/tsconfig.json @@ -2,7 +2,7 @@ "compileOnSave": true, "compilerOptions": { "declaration": true, - "lib": ["dom", "es5", "es2015.collection"], + "lib": ["es5", "es2015.collection"], "module": "commonjs", "moduleResolution": "node", "noImplicitAny": true, @@ -14,6 +14,6 @@ "target": "es5" }, "files": [ - "cget.ts" + "index.ts" ] } diff --git a/test/cache/example.invalid/index.html b/test/cache/example.invalid/index.html deleted file mode 100644 index 124f157..0000000 --- a/test/cache/example.invalid/index.html +++ /dev/null @@ -1,5 +0,0 @@ - - Test - -

Test

- diff --git a/test/cache/localhost/index.html b/test/cache/localhost/index.html new file mode 100644 index 0000000..18ecdcb --- /dev/null +++ b/test/cache/localhost/index.html @@ -0,0 +1 @@ + diff --git a/test/cache/localhost/missing.html.header.json b/test/cache/localhost/missing.html.header.json new file mode 100644 index 0000000..8995859 --- /dev/null +++ b/test/cache/localhost/missing.html.header.json @@ -0,0 +1,4 @@ +{ + "cget-status": 404, + "cget-message": "Not Found" +} diff --git a/test/cache/localhost/redirected-index.html.header.json b/test/cache/localhost/redirected-index.html.header.json new file mode 100644 index 0000000..8bd1ffa --- /dev/null +++ b/test/cache/localhost/redirected-index.html.header.json @@ -0,0 +1,5 @@ +{ + "cget-status": 302, + "cget-message": "Found", + "location": "/index.html" +} diff --git a/test/cache/localhost/redirected-missing.html.header.json b/test/cache/localhost/redirected-missing.html.header.json new file mode 100644 index 0000000..68dfe77 --- /dev/null +++ b/test/cache/localhost/redirected-missing.html.header.json @@ -0,0 +1,5 @@ +{ + "cget-status": 302, + "cget-message": "Moved Temporarily", + "location": "/missing.html" +} diff --git a/test/ds9k.ts b/test/ds9k.ts new file mode 100644 index 0000000..93bec4f --- /dev/null +++ b/test/ds9k.ts @@ -0,0 +1,149 @@ +// This file is part of cget, copyright (c) 2015-2017 BusFaster Ltd. +// Released under the MIT license, see LICENSE. + +/** @file This is the DeathServer 9000. + * It's meant to be the flakiest, most unreliable and overall worst + * HTTP server imaginable. Built for testing cget, which should still + * successfully download files from it. + * Also guaranteed to come with glaring SECURITY FLAWS and REMOTE EXPLOITS. + * Please avoid running on public-facing or production systems. */ + +import * as fs from 'fs'; +import * as url from 'url'; +import * as http from 'http'; + +import * as Promise from 'bluebird'; + +import { Address, Cache, CacheResult, RedirectResult, getHeaderPath } from '..'; + +export const enum ProblemBase { + close = 1, + length = close * 8, + status = length * 16 +} + +/** Flags for things that will go wrong. + * Note that closing the connection in the middle of data, + * without timeout or content length, cannot be distinguished from success. */ + +export const enum Problem { + none = 0, + + closeMask = ProblemBase.close * 3, + closeBeforeHeader = ProblemBase.close * 1, + closeAfterHeader = ProblemBase.close * 2, + closeDuringData = ProblemBase.close * 3, + + timeout = ProblemBase.close * 4, + + lengthMask = ProblemBase.length * 15, + contentLengthMissing = ProblemBase.length * 1, + contentLengthIncorrect = ProblemBase.length * 2, + rangeUnsupported = ProblemBase.length * 4, + rangeIncorrect = ProblemBase.length * 8, + + statusMask = ProblemBase.status * 3, + statusCode = ProblemBase.status * 1, + redirectLoop = ProblemBase.status * 2 +}; + +var cache = new Cache( + process.argv[2], + { + // Deny access to file:// URLs. + allowLocal: false + } +); + +class Error9k extends Error { + /** @param code HTTP status code. + * @param headers Optional extra headers (clobbered by send method). */ + constructor(public code?: number, public headers: http.ServerResponseHeaders = {}) { + super(code ? http.STATUS_CODES[code] : 'Unknown error'); + } + + send = (res: http.ServerResponse) => { + if(!this.code) return; + + const headers = this.headers; + const body = new Buffer(this.code + ' ' + this.message + '\n', 'utf-8'); + + headers['Content-Type'] = 'text/plain'; + headers['Content-Length'] = body.length; + + res.writeHead(this.code, headers); + res.end(body); + } + + /** Workaround for instanceof (prototype chain is messed up after inheriting Error in ES5). */ + isError9k = true; +} + +export function requestHandler(req: http.IncomingMessage, res: http.ServerResponse) { + const parts = url.parse(req.url!); + const match = (parts.query || '').match(/problem=([0-9]+)/); + const problem = match ? match[1] as Problem : Problem.none; + const problemClose = problem & Problem.closeMask; + + const host = req.headers.host! as string; + const address = new Address('http://' + host.replace(/:.*/, '') + parts.pathname); + const headers: http.ServerResponseHeaders = {}; + let cachePath: string; + + cache.getRedirect(address).then((result: RedirectResult) => { + const oldHeaders = result.oldHeaders && result.oldHeaders[0]; + + if(oldHeaders) { + throw(new Error9k( + +(oldHeaders['cget-status'] || 0), + { Location: '' + (oldHeaders['cget-target'] || oldHeaders['location']) } + )); + } + + cachePath = result.cachePath; + + for(let key of Object.keys(result.headers)) { + const value = result.headers[key]; + if(typeof(value) != 'undefined') headers[key] = value; + } + + return(Promise.promisify(fs.stat)(cachePath)); + }).then((stats: fs.Stats) => { + // if(!headers['Content-Type']) { + // headers['Content-Type'] = 'text/plain;charset=utf-8'; + // } + + headers['Content-Length'] = stats.size; + + if(problemClose == Problem.closeBeforeHeader) throw(new Error9k()); + res.writeHead(200, headers); + if(problemClose == Problem.closeAfterHeader) throw(new Error9k()); + + fs.createReadStream(cachePath, { encoding: null as any, start: 0 }).pipe(res); + }).catch((err: NodeJS.ErrnoException | Error9k) => { + const err9k = ('isError9k' in err) ? err as Error9k : new Error9k(404); + + if(err9k.code) err9k.send(res); + + if(!(problem & Problem.timeout)) { + res.end(); + (res as any).connection.destroy(); + } + }); +} + +export function startServer(port = 8080) { + const server = http.createServer(requestHandler); + + const ready = new Promise((resolve, reject) => { + server.listen(port, () => { + // Always print an annoying message to discourage users. + console.error('DeathServer 9000 active. Run for your life.'); + resolve(); + }); + + server.on('error', reject); + }); + + return(ready); +} diff --git a/test/test.ts b/test/test.ts new file mode 100644 index 0000000..3546825 --- /dev/null +++ b/test/test.ts @@ -0,0 +1,200 @@ +import * as path from 'path'; +import * as stream from 'stream'; +import * as cget from '..'; + +import { startServer } from './ds9k'; + +const content = '\n'; + +function expectedResult(name: string, result: cget.CacheResult, status: number) { + const chunkList: Buffer[] = []; + + if(result.status != status) { + console.error('Error in test: ' + name); + console.error('Expected status: ' + status); + console.error('Got status: ' + result.status); + } + + result.stream.on('data', (chunk: Buffer) => chunkList.push(chunk)); + + result.stream.on('error', (err: Error) => { + console.error('Error in test: ' + name); + console.error('Stream reported error:'); + console.error(err); + }); + + result.stream.on('end', () => { + const data = Buffer.concat(chunkList).toString('utf-8'); + + if(data != content) { + console.error('Error in test: ' + name); + console.error('Incorrect data:'); + console.error(data); + } else { + console.log('Success in test: ' + name); + } + }) +} + +function unexpectedResult(name: string, result: any) { + console.error('Error in test: ' + name); + console.error('Expected error...'); + console.error('Got result:'); + console.error(result); +} + +function unexpectedError(name: string, err: Error) { + console.error('Error in test: ' + name); + console.error('Unexpected error:'); + console.error(err); +} + +function expectedError(name: string, err: Error, code: number | string) { + const result = (err as any).code as number || (err as any).status as string; + + if(result == code) { + console.log('Success in test: ' + name + ' (' + result + ')'); + } else { + console.error('Error in test: ' + name); + console.error('Expected status: ' + code); + console.error('Got status: ' + result); + } +} + +function runTests(port: number, concurrency: number) { + const cwd = __dirname; + const cachePath = path.resolve(cwd, 'cache'); + + const storedPath = 'cache/localhost/index.html'; + const missingName = 'missing-' + Math.random(); + const origin = 'http://localhost:' + port; + + const validLocal = [ + 'file://' + path.resolve(__dirname, storedPath), + './' + storedPath, + '../test/' + storedPath + ]; + + const invalidLocal = [ + 'file://' + missingName + '/index.html', + 'file://' + missingName + '/', + 'file://' + missingName, + './' + missingName, + '../test/' + missingName, + '../test/' + missingName + '/', + '../test/' + missingName + '/index.html' + ]; + + const validCached = [ + origin + '/index.html', + origin + '/', + origin + // origin + '/redirected-index.html' + ]; + + const invalidCached = [ + 'ENOENT', origin + '/' + missingName, + 'ENOENT', 'http://example.invalid/', + 404, origin + '/missing.html', + 404, origin + '/redirected-missing.html' + ]; + + const localLive = new cget.Cache(cachePath, { + allowLocal: true, + allowRemote: false, + allowCacheRead: false, + concurrency, + cwd + }); + + const remoteCache = new cget.Cache(cachePath, { + allowLocal: false, + allowRemote: false, + allowCacheRead: true, + concurrency, + cwd + }); + + const remoteLive = new cget.Cache(cachePath, { + allowLocal: false, + allowRemote: true, + allowCacheRead: false, + concurrency, + cwd + }); + + for(let num = 0; num < validLocal.length; ++num) { + const name = 'Valid local fetch ' + num; + + localLive.fetch( + validLocal[num] + ).then((result: cget.CacheResult) => + expectedResult(name, result, 200) + ).catch((err: Error) => unexpectedError(name, err)); + } + + for(let num = 0; num < validLocal.length; ++num) { + const name = 'Forbidden local fetch ' + num; + + remoteCache.fetch( + validLocal[num] + ).then((result: cget.CacheResult) => + unexpectedResult(name, result) + ).catch((err: Error) => expectedError(name, err, 403)); + } + + for(let num = 0; num < invalidLocal.length; ++num) { + const name = 'Invalid local fetch ' + num; + + localLive.fetch( + invalidLocal[num] + ).then((result: cget.CacheResult) => + unexpectedResult(name, result) + ).catch((err: Error) => expectedError(name, err, 'ENOENT')); + } + + for(let num = 0; num < validCached.length; ++num) { + const name = 'Valid cached fetch ' + num; + + remoteCache.fetch( + validCached[num] + ).then((result: cget.CacheResult) => + expectedResult(name, result, 200) + ).catch((err: Error) => unexpectedError(name, err)); + } + + for(let num = 0; num < invalidCached.length; num += 2) { + const name = 'Invalid cached fetch ' + (num / 2); + + remoteCache.fetch( + invalidCached[num + 1] as string + ).then((result: cget.CacheResult) => + unexpectedResult(name, result) + ).catch((err: Error) => expectedError(name, err, invalidCached[num])); + } + + for(let num = 0; num < validCached.length; ++num) { + const name = 'Forbidden cached fetch ' + num; + + localLive.fetch( + validCached[num] + ).then((result: cget.CacheResult) => + unexpectedResult(name, result) + ).catch((err: Error) => expectedError(name, err, 403)); + } + + for(let num = 0; num < validCached.length; ++num) { + const name = 'Valid remote fetch ' + num; + + remoteLive.fetch( + validCached[num] + ).then((result: cget.CacheResult) => + expectedResult(name, result, 200) + ).catch((err: Error) => unexpectedError(name, err)); + } +} + +startServer(8080).then(() => { + runTests(8080, Infinity); + runTests(8080, 1); +}); diff --git a/test/tsconfig.json b/test/tsconfig.json index 4d9d2b4..b814e52 100644 --- a/test/tsconfig.json +++ b/test/tsconfig.json @@ -1,14 +1,19 @@ { + "compileOnSave": true, "compilerOptions": { "declaration": false, + "lib": ["es5", "es2015.collection"], "module": "commonjs", + "moduleResolution": "node", "noImplicitAny": true, + "noImplicitThis": true, + "removeComments": false, + "sourceMap": false, + "strictNullChecks": true, "target": "es5" }, "files": [ - "../typings/tsd.d.ts", - "../dist/cget.d.ts", - - "serve.ts" + "ds9k.ts", + "test.ts" ] }