From d6f9fc297f167407f9bf764f9a06d3f45df25e27 Mon Sep 17 00:00:00 2001 From: Shivam singh <140813296+this-is-shivamsingh@users.noreply.github.com> Date: Tue, 27 Aug 2024 12:54:26 +0530 Subject: [PATCH] Invalid URL Fix: Decoding manually for some reserved characters (#1682) * fix: decoding manually for reserved characters * chore: typo fix * test: test fix on percy.test.js and snapshot.test.js * chore: revert file * test: test fix * revert: Removing modifySnapshotUrl from snaphshot config * test: added 1 more test for multiple reserved character * feat: preserving reserved character from encoding * test: test fix + only show warning for network logs if snapshot.url != request.url * chore: null check for passing tests * test: test coverage * test: test fix * chore: remvoving some excessive comments --- packages/core/src/discovery.js | 2 +- packages/core/src/network.js | 10 +- packages/core/src/snapshot.js | 20 +++- packages/core/src/utils.js | 70 +++++++++++++ packages/core/test/discovery.test.js | 43 ++++++++ packages/core/test/snapshot.test.js | 143 +++++++++++++++++++++++++++ packages/core/test/utils.test.js | 48 +++++++++ 7 files changed, 333 insertions(+), 3 deletions(-) create mode 100644 packages/core/test/utils.test.js diff --git a/packages/core/src/discovery.js b/packages/core/src/discovery.js index fdc0d247c..dc57ffad0 100644 --- a/packages/core/src/discovery.js +++ b/packages/core/src/discovery.js @@ -342,7 +342,7 @@ export function createDiscoveryQueue(percy) { authorization: snapshot.discovery.authorization, userAgent: snapshot.discovery.userAgent, captureMockedServiceWorker: snapshot.discovery.captureMockedServiceWorker, - meta: snapshot.meta, + meta: { ...snapshot.meta, snapshotURL: snapshot.url }, // enable network inteception intercept: { diff --git a/packages/core/src/network.js b/packages/core/src/network.js index 4598e024e..66841152c 100644 --- a/packages/core/src/network.js +++ b/packages/core/src/network.js @@ -1,7 +1,7 @@ import { request as makeRequest } from '@percy/client/utils'; import logger from '@percy/logger'; import mime from 'mime-types'; -import { DefaultMap, createResource, hostnameMatches, normalizeURL, waitFor } from './utils.js'; +import { DefaultMap, createResource, hostnameMatches, normalizeURL, waitFor, decodeAndEncodeURLWithLogging } from './utils.js'; const MAX_RESOURCE_SIZE = 25 * (1024 ** 2); // 25MB const ALLOWED_STATUSES = [200, 201, 301, 302, 304, 307, 308]; @@ -207,6 +207,14 @@ export class Network { // do not handle data urls if (request.url.startsWith('data:')) return; + // Browsers handle URL encoding leniently. + // This code checks for issues such as `%` and leading spaces and warns the user accordingly. + decodeAndEncodeURLWithLogging(request.url, this.log, { + meta: { ...this.meta, url: request.url }, + shouldLogWarning: request.url !== this.meta?.snapshotURL, + warningMessage: `An invalid URL was detected for url: ${request.url} - the snapshot may fail on Percy. Please verify that your asset URL is valid.` + }); + if (this.intercept) { this.#pending.set(requestId, event); if (this.captureMockedServiceWorker) { diff --git a/packages/core/src/snapshot.js b/packages/core/src/snapshot.js index bcf58ffd4..341a42b33 100644 --- a/packages/core/src/snapshot.js +++ b/packages/core/src/snapshot.js @@ -7,7 +7,8 @@ import { request, hostnameMatches, yieldTo, - snapshotLogName + snapshotLogName, + decodeAndEncodeURLWithLogging } from './utils.js'; import { JobData } from './wait-for-job.js'; @@ -24,6 +25,21 @@ function validURL(url, base) { } } +function validateAndFixSnapshotUrl(snapshot) { + let log = logger('core:snapshot'); + // encoding snapshot url, if contians invalid URI characters/syntax + let modifiedURL = decodeAndEncodeURLWithLogging(snapshot.url, log, { + meta: { snapshot: { name: snapshot.name || snapshot.url } }, + shouldLogWarning: true, + warningMessage: `Invalid URL detected for url: ${snapshot.url} - the snapshot may fail on Percy. Please confirm that your website URL is valid.` + }); + + if (modifiedURL !== snapshot.url) { + log.debug(`Snapshot URL modified to: ${modifiedURL}`); + snapshot.url = modifiedURL; + } +} + // used to deserialize regular expression strings const RE_REGEXP = /^\/(.+)\/(\w+)?$/; @@ -86,6 +102,8 @@ function mapSnapshotOptions(snapshots, context) { // transform snapshot URL shorthand into an object if (typeof snapshot === 'string') snapshot = { url: snapshot }; + if (process.env.PERCY_MODIFY_SNAPSHOT_URL !== 'false') validateAndFixSnapshotUrl(snapshot); + // normalize the snapshot url and use it for the default name let url = validURL(snapshot.url, context?.baseUrl); snapshot.name ||= `${url.pathname}${url.search}${url.hash}`; diff --git a/packages/core/src/utils.js b/packages/core/src/utils.js index 02b79de09..81fc06e38 100644 --- a/packages/core/src/utils.js +++ b/packages/core/src/utils.js @@ -375,6 +375,76 @@ export function base64encode(content) { .toString('base64'); } +const RESERVED_CHARACTERS = { + '%3A': ':', + '%23': '#', + '%24': '$', + '%26': '&', + '%2B': '+', + '%2C': ',', + '%2F': '/', + '%3B': ';', + '%3D': '=', + '%3F': '?', + '%40': '@' +}; + +function _replaceReservedCharactersWithPlaceholder(url) { + let result = url; + let matchedPattern = {}; + let placeHolderCount = 0; + for (let key of Object.keys(RESERVED_CHARACTERS)) { + let regex = new RegExp(key, 'g'); + if (regex.test(result)) { + let placeholder = `__PERCY_PLACEHOLDER_${placeHolderCount}__`; + result = result.replace(regex, placeholder); + matchedPattern[placeholder] = key; + placeHolderCount++; + } + } + return { url: result, matchedPattern }; +} + +function _replacePlaceholdersWithReservedCharacters(matchedPattern, url) { + let result = url; + for (let [key, value] of Object.entries(matchedPattern)) { + let regex = new RegExp(key, 'g'); + result = result.replace(regex, value); + } + return result; +} + +// This function replaces invalid character that are not the +// part of valid URI syntax with there correct encoded value. +// Also, if a character is a part of valid URI syntax, those characters +// are not encoded +// Eg: [abc] -> gets encoded to %5Babc%5D +// ab c -> ab%20c +export function decodeAndEncodeURLWithLogging(url, logger, options = {}) { + // In case the url is partially encoded, then directly using encodeURI() + // will encode those characters again. Therefore decodeURI once helps is decoding + // partially encoded URL and then after encoding it again, full URL get encoded + // correctly. + const { + meta, + shouldLogWarning, + warningMessage + } = options; + try { + let { url: placeholderURL, matchedPattern } = _replaceReservedCharactersWithPlaceholder(url); + let decodedURL = decodeURI(placeholderURL); + let encodedURL = encodeURI(decodedURL); + encodedURL = _replacePlaceholdersWithReservedCharacters(matchedPattern, encodedURL); + return encodedURL; + } catch (error) { + logger.debug(error, meta); + if (error.name === 'URIError' && shouldLogWarning) { + logger.warn(warningMessage); + } + return url; + } +} + export function snapshotLogName(name, meta) { if (meta?.snapshot?.testCase) { return `testCase: ${meta.snapshot.testCase}, ${name}`; diff --git a/packages/core/test/discovery.test.js b/packages/core/test/discovery.test.js index b181add91..318a590f8 100644 --- a/packages/core/test/discovery.test.js +++ b/packages/core/test/discovery.test.js @@ -647,6 +647,49 @@ describe('Discovery', () => { ])); }); + it('debug error log only for invalid network url', async () => { + let err = new Error('Some Invalid Error'); + spyOn(global, 'decodeURI').and.callFake((url) => { + if (url === 'http://localhost:8000/style.css') { + throw err; + } + return url; + }); + + percy.loglevel('debug'); + await percy.snapshot({ + name: 'test snapshot', + url: 'http://localhost:8000', + domSnapshot: testDOM + }); + + expect(logger.stderr).toEqual(jasmine.arrayContaining([ + '[percy:core:page] Navigate to: http://localhost:8000/', + '[percy:core:discovery] Handling request: http://localhost:8000/', + '[percy:core:discovery] - Serving root resource', + `[percy:core:discovery] ${err.stack}`, + '[percy:core:discovery] Handling request: http://localhost:8000/style.css', + '[percy:core:discovery] Handling request: http://localhost:8000/img.gif' + ])); + }); + + it('detect invalid network url', async () => { + percy.loglevel('debug'); + await percy.snapshot({ + name: 'test snapshot', + url: 'http://localhost:8000', + domSnapshot: testDOM.replace('style.css', 'http://localhost:404/%style.css') + }); + + expect(logger.stderr).toEqual(jasmine.arrayContaining([ + '[percy:core:discovery] Handling request: http://localhost:8000/', + '[percy:core:discovery] - Serving root resource', + '[percy:core:discovery] An invalid URL was detected for url: http://localhost:404/%style.css - the snapshot may fail on Percy. Please verify that your asset URL is valid.', + '[percy:core:discovery] Handling request: http://localhost:404/%style.css', + '[percy:core:discovery] Handling request: http://localhost:8000/img.gif' + ])); + }); + it('allows setting a custom discovery user-agent', async () => { let userAgent; diff --git a/packages/core/test/snapshot.test.js b/packages/core/test/snapshot.test.js index 9ca321e78..456b9afa3 100644 --- a/packages/core/test/snapshot.test.js +++ b/packages/core/test/snapshot.test.js @@ -132,6 +132,149 @@ describe('Snapshot', () => { ]); }); + describe('snapshot urls', () => { + beforeEach(() => { + percy.loglevel('debug'); + }); + + describe('with invalid snapshot URL', () => { + const sharedExpectModifiedSnapshotURL = (expectedURL) => { + expect(logger.stderr).toEqual(jasmine.arrayContaining([ + `[percy:core:snapshot] Snapshot URL modified to: ${expectedURL}` + ])); + }; + + it('modifies if url contains space', async () => { + await percy.snapshot([ + { url: 'http://localhost:8000/ a' } + ]); + + sharedExpectModifiedSnapshotURL('http://localhost:8000/%20a'); + }); + + it('modifies if url contains [ ] to %5B and %5D', async () => { + let givenURL = 'http://localhost:8000/advanced-search/cf1a5848-f658-4939-be11-dcb7fd081043/results/[%229db14794-47f0-406a-a3b4-3f89280122b7%22]/structures/by-ccdc-number/123457'; + await percy.snapshot([ + { url: givenURL } + ]); + + sharedExpectModifiedSnapshotURL('http://localhost:8000/advanced-search/cf1a5848-f658-4939-be11-dcb7fd081043/results/%5B%229db14794-47f0-406a-a3b4-3f89280122b7%22%5D/structures/by-ccdc-number/123457'); + }); + + it('modifies if url contains {} to %7B and %7D', async () => { + let givenURL = 'http://localhost:8000/advanced-search/cf1a5848-f658-4939-be11-dctwo-{ abc [dbd] }/2253'; + await percy.snapshot([ + { url: givenURL } + ]); + + sharedExpectModifiedSnapshotURL('http://localhost:8000/advanced-search/cf1a5848-f658-4939-be11-dctwo-%7B%20abc%20%5Bdbd%5D%20%7D/2253'); + }); + + it('modifies if url is partially encoded', async () => { + // Here some character are encoded properly, this test ensure those encoed character should not gets encoded again + let partiallyEncodedURL = 'http://localhost:8000/advanced-search/cf1a5848-f658-4939-be11-dctwo-%7B%20abc%20[dbd]%20%7D/2253'; + await percy.snapshot([ + { url: partiallyEncodedURL } + ]); + + sharedExpectModifiedSnapshotURL('http://localhost:8000/advanced-search/cf1a5848-f658-4939-be11-dctwo-%7B%20abc%20%5Bdbd%5D%20%7D/2253'); + }); + + it('do not encode reserved URI character(%2F) in snapshot url', async () => { + let partiallyEncodedURL = 'http://localhost:8000/https%2F/abc[123].html'; + await percy.snapshot([ + { url: partiallyEncodedURL } + ]); + + sharedExpectModifiedSnapshotURL('http://localhost:8000/https%2F/abc%5B123%5D.html'); + }); + + it('do not double encode if multiple reserved URI characters in snapshot url', async () => { + let partiallyEncodedURL = 'http://localhost:8000/https%2F/abc[123]%3A%3F_abc.html'; + await percy.snapshot([ + { url: partiallyEncodedURL } + ]); + + sharedExpectModifiedSnapshotURL('http://localhost:8000/https%2F/abc%5B123%5D%3A%3F_abc.html'); + }); + + describe('does not modifies snapshot url', () => { + let partiallyEncodedURL = 'http://localhost:8000/https%2F/abc[123].html'; + const sharedExpect = () => { + expect(logger.stderr).toEqual(jasmine.arrayContaining([ + '[percy:core:snapshot] Received snapshot: /https%2F/abc[123].html', + '[percy:core:snapshot] - url: http://localhost:8000/https%2F/abc[123].html' + ])); + + expect(logger.stderr).not.toEqual(jasmine.arrayContaining([ + '[percy:core:snapshot] Snapshot URL modified to: http://localhost:8000/https//abc%5B123%5D.html' + ])); + }; + + describe('with PERCY_MODIFY_SNAPSHOT_URL=false', () => { + beforeEach(() => { + process.env.PERCY_MODIFY_SNAPSHOT_URL = 'false'; + }); + + afterEach(() => { + process.env.PERCY_MODIFY_SNAPSHOT_URL = 'true'; + }); + + it('uses original snapshot url', async () => { + await percy.snapshot([ + { url: partiallyEncodedURL } + ]); + + sharedExpect(); + }); + }); + }); + + it('warns for invalid characters that can not get encoded properly', async () => { + let givenURL = 'http://localhost:8000/advanced-search/cf1a5848%-f658-4939-be11-dct'; + await percy.snapshot([ + { url: givenURL } + ]); + + expect(logger.stderr).toEqual(jasmine.arrayContaining([ + `[percy:core:snapshot] Invalid URL detected for url: ${givenURL} - the snapshot may fail on Percy. Please confirm that your website URL is valid.` + ])); + }); + + it('debug logs and does not warn if error other than URIError', async () => { + spyOn(global, 'decodeURI').and.callFake(() => { + throw new Error('Some Invalid Error'); + }); + + let givenURL = 'http://localhost:8000/'; + await percy.snapshot([ + { url: givenURL } + ]); + + expect(logger.stderr).not.toEqual(jasmine.arrayContaining([ + `[percy] Invalid URL detected for url: ${givenURL}` + ])); + }); + }); + + describe('with valid snpashot url', () => { + const sharedExpectNonModifiedSnapshotURL = (expectedURL) => { + expect(logger.stdout).not.toEqual(jasmine.arrayContaining([ + `[percy] Snapshot URL modified to: ${expectedURL}` + ])); + }; + + it('not modifies if the url does not invalid characters', async () => { + let givenURL = 'http://localhost:8000/advanced-search/cf1a5848-f658-4939-be11-dx3'; + await percy.snapshot([ + { url: givenURL } + ]); + + sharedExpectNonModifiedSnapshotURL(givenURL); + }); + }); + }); + it('errors if the url is invalid', async () => { await percy.snapshot({ name: 'test snapshot', diff --git a/packages/core/test/utils.test.js b/packages/core/test/utils.test.js new file mode 100644 index 000000000..2a43d7c8f --- /dev/null +++ b/packages/core/test/utils.test.js @@ -0,0 +1,48 @@ +import { decodeAndEncodeURLWithLogging } from '../src/utils.js'; +import { logger } from './helpers/index.js'; +import percyLogger from '@percy/logger'; + +describe('utils', () => { + let log; + beforeEach(async () => { + log = percyLogger(); + logger.reset(true); + await logger.mock({ level: 'debug' }); + }); + + describe('decodeAndEncodeURLWithLogging', () => { + it('does not warn invalid url when options params is null', async () => { + decodeAndEncodeURLWithLogging('https://abc.com/test%abc', log); + expect(logger.stderr).toEqual([]); + }); + + it('does not warn invalid url when shouldLogWarning = false', async () => { + decodeAndEncodeURLWithLogging('https://abc.com/test%abc', log, + { + shouldLogWarning: false + }); + + expect(logger.stderr).toEqual([]); + }); + + it('returns modified url', async () => { + const url = decodeAndEncodeURLWithLogging('https://abc.com/test[ab]c', log, + { + shouldLogWarning: false + }); + expect(logger.stderr).toEqual([]); + expect(url).toEqual('https://abc.com/test%5Bab%5Dc'); + }); + + it('warns if invalid url when shouldLogWarning = true', async () => { + decodeAndEncodeURLWithLogging( + 'https://abc.com/test%abc', + log, + { + shouldLogWarning: true, + warningMessage: 'some Warning Message' + }); + expect(logger.stderr).toEqual(['[percy] some Warning Message']); + }); + }); +});