From 39f17cd25648420c52ceac2eb05f04da165c077a Mon Sep 17 00:00:00 2001 From: Kevin Day Date: Mon, 30 Jan 2023 10:29:25 -0600 Subject: [PATCH] Issue 497: Sanitize ID Field. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide a simple character sanitizer that strips out all ascii and non-ascii characters that are not in the specified range or list of simple characters. The added unit test includes one or more Unicode whitespace that may or may not be visibly printed. I generated these using my utf8 program, like this: ``` echo "h$(utf8 -cB U+200A)i$(utf8 -cB U+200D)j" ``` The code points can be verified using a program like my byte_dump program: ``` echo -en "a b c\fd e f g h i‍j" | byte_dump -wUt 4 ``` --- .../app/components/facetWidgetComponent.js | 4 +- .../webapp/app/filters/simpleAsciiFilter.js | 48 +++++++++ .../app/views/components/facetWidget.html | 4 +- .../unit/filters/simpleAsciiFilterTest.js | 98 +++++++++++++++++++ 4 files changed, 150 insertions(+), 4 deletions(-) create mode 100644 src/main/webapp/app/filters/simpleAsciiFilter.js create mode 100644 src/main/webapp/tests/unit/filters/simpleAsciiFilterTest.js diff --git a/src/main/webapp/app/components/facetWidgetComponent.js b/src/main/webapp/app/components/facetWidgetComponent.js index 68a8eb92..2d7e993d 100644 --- a/src/main/webapp/app/components/facetWidgetComponent.js +++ b/src/main/webapp/app/components/facetWidgetComponent.js @@ -32,7 +32,7 @@ sage.component("facetWidget", { $scope.addFacetFilter = function(facetName) { if (!$scope.findFilterByFacet($scope.$ctrl.facet.label, facetName)) { $scope.closeMoreFacets(); - angular.element("#moreFacetsModal-" + $scope.$ctrl.facet.label.split(' ').join('-')).on('hidden.bs.modal', function (e) { + angular.element("#moreFacetsModal-" + $filter('simpleAscii')($scope.$ctrl.facet.label)).on('hidden.bs.modal', function (e) { $scope.$ctrl.discoveryContext.addFilter($scope.$ctrl.facet.label, $scope.$ctrl.facet.key, facetName).then(function() { $scope.$ctrl.resetSearch(); }); @@ -87,7 +87,7 @@ sage.component("facetWidget", { $scope.moreFacets.push(...facets); $scope.moreFacetsLabel = $scope.$ctrl.facet.label; - ModalService.openModal("#moreFacetsModal-" + $scope.$ctrl.facet.label.split(' ').join('-')); + ModalService.openModal("#moreFacetsModal-" + $filter('simpleAscii')($scope.$ctrl.facet.label)); }; $scope.closeMoreFacets = function() { diff --git a/src/main/webapp/app/filters/simpleAsciiFilter.js b/src/main/webapp/app/filters/simpleAsciiFilter.js new file mode 100644 index 00000000..f44005b1 --- /dev/null +++ b/src/main/webapp/app/filters/simpleAsciiFilter.js @@ -0,0 +1,48 @@ +sage.filter('simpleAscii', function() { + return function(text, usedOps) { + var sanitized = ""; + var digit = 0; + + if (angular.isDefined(text) && angular.isString(text)) { + for (var i = 0; i < text.length; i++) { + digit = text.charCodeAt(i); + + if (digit > 0x2F && digit < 0x3A) { + sanitized += text[i]; + } else if (digit > 0x40 && digit < 0x5B) { + sanitized += text[i]; + } else if (digit > 0x60 && digit < 0x7B) { + sanitized += text[i]; + } else { + switch (digit) { + case 0x2B: + case 0x2D: + case 0x2E: + case 0x5F: + case 0x7E: + sanitized += text[i]; + break; + + default: + // Two-byte UTF-8. + if ((digit & 0xe0) == 0xc0) { + i++; + } + // Three-byte UTF-8. + else if ((digit & 0xf0) == 0xe0) { + i += 2; + } + // Four-byte UTF-8. + else if ((digit & 0xf8) == 0xf0) { + i += 3; + } + + break; + } + } + } + } + + return sanitized; + }; +}); diff --git a/src/main/webapp/app/views/components/facetWidget.html b/src/main/webapp/app/views/components/facetWidget.html index 4ab3af58..afc9f241 100644 --- a/src/main/webapp/app/views/components/facetWidget.html +++ b/src/main/webapp/app/views/components/facetWidget.html @@ -1,9 +1,9 @@
-
\ No newline at end of file + diff --git a/src/main/webapp/tests/unit/filters/simpleAsciiFilterTest.js b/src/main/webapp/tests/unit/filters/simpleAsciiFilterTest.js new file mode 100644 index 00000000..d72cf686 --- /dev/null +++ b/src/main/webapp/tests/unit/filters/simpleAsciiFilterTest.js @@ -0,0 +1,98 @@ +describe("filter: simpleAscii", function () { + var $scope, MockedUser, filter; + + var initializeVariables = function () { + inject(function (_$q_) { + $q = _$q_; + + MockedUser = new mockUser($q); + }); + }; + + var initializeFilter = function (settings) { + inject(function (_$filter_, _$rootScope_) { + $scope = _$rootScope_.$new(); + + filter = _$filter_("simpleAscii"); + }); + }; + + beforeEach(function () { + module("core"); + module("sage"); + module("templates"); + module("mock.user", function ($provide) { + var User = function () { + return MockedUser; + }; + $provide.value("User", User); + }); + module("mock.userService"); + + installPromiseMatchers(); + initializeVariables(); + initializeFilter(); + }); + + afterEach(function () { + $scope.$destroy(); + }); + + describe("Is the filter", function () { + it("defined", function () { + expect(filter).toBeDefined(); + }); + }); + + describe("Does the filter", function () { + it("return nothing on empty input", function () { + var result; + + result = filter(""); + + expect(result).toBe(""); + }); + + it("all valid characters", function () { + var result; + var all = "abcdefghijklmnopqrstuvwxyz"; + all += "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + all += "1234567890"; + all += "+-._~"; + + result = filter(all); + + expect(result).toBe(all); + }); + + it("without whitespace", function () { + var result; + + // The character U+200A is between the 'h' and 'i' while U+200D is between the 'i' and 'j'. + // These may not normally display in a text editor. + var all = "a b c\fd e f g h i‍j"; + + result = filter(all); + + expect(result).toBe("abcdefghij"); + }); + + it("without most symbols", function () { + var result; + var all = "a`!@#$%^&*()=b{}[];:'\"\\|,<>/?"; + + result = filter(all); + + expect(result).toBe("ab"); + }); + + it("skips non-ascii unicode characters", function () { + var result; + var all = "a↡b𒆷c𔙃d𔑳↡𒆷𔙃e"; + + result = filter(all); + + expect(result).toBe("abcde"); + }); + }); +});