diff --git a/README.md b/README.md index 8d84e81..8b27433 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,28 @@ var geojson = geobuf.decode(new Pbf(data)); Given a [Pbf](https://github.com/mapbox/pbf) object with Geobuf data, return a GeoJSON object. When loading Geobuf data over `XMLHttpRequest`, you need to set `responseType` to [`arraybuffer`](https://developer.mozilla.org/en-US/docs/Web/API/XMLHttpRequest/responseType). +### compress + +```js +var geojson = geobuf.compress(geobuf.decode(new Pbf(data))); +``` + +Given a GeoJSON object (or array of GeoJSON objects), returns an equivalent object with lower memory usage (avoid wasting memory usage on excess array capacity). +This may be useful if GeoJSON objects are kept around for a long time after creating them. + +```js +// To additionally deduplicate identical arrays +// (may be unsafe if the geodata points are modified by callers) +var geojson = geobuf.compress(geobuf.decode(new Pbf(data)), new Map(), new Map()); +// To reuse caches when deduplicating multiple geobuf objects: +// (may be unsafe if the geodata points are modified by callers) +var cache = new Map(); +var numericArrayCache = new Map(); +var geojson = geobuf.compress(geobuf.decode(new Pbf(data)), cache, numericArrayCache); +``` + +When `Map` is unavailable, this returns the original object without attempting to compress. + ## Install Node and Browserify: diff --git a/compress.js b/compress.js new file mode 100644 index 0000000..c70b2e8 --- /dev/null +++ b/compress.js @@ -0,0 +1,96 @@ +'use strict'; + +if (typeof Map == 'undefined' || !Object.entries) { + module.exports = function compress(value) { + return value; + }; + return; +} + +/** + * @param {array} value + * @returns {Boolean} is this an array where all fields are numbers (including the empty array). + */ +function isNumericArray(value) { + for (var i = 0; i < value.length; i++) { + if (typeof(value[i]) !== 'number') { + return false; + } + } + return true; +} + +/** + * Compress data returned by geobuf's decode function. + * Objects are modified in place. + * + * This is useful in cases where the polygons will be used for a long time. + * By default, arrays are reserved with extra capacity that won't be used. + * (The empty array starts with a capacity of 16 elements by now, + * which is inefficient for decoded points of length 2) + * + * This has an optional option to deduplicate identical points, + * which may be useful for collections of polygons sharing points as well + * as for calling compress multiple times with different objects. + * + * @param {any} value the value to compress. + * @param {Map} [cache] by default, a new cache is created each time for external calls to compress. + * Must support get/has/set. + * @param {null|Map} [numericArrayCache] if non-null, this will be used to deduplicate + * numeric arrays of any length, including empty arrays. + * + * This deduplication may be unsafe if callers would modify arrays. + * @return {any} value with all fields compressed. + */ +function compress(value, cache = new Map(), numericArrayCache = null) { + if (cache.has(value)) { + return cache.get(value); + } + if (Array.isArray(value)) { + // By default, v8 allocates an array with a capacity of 16 elements. + // This wastes memory for small arrays such as Points of length 2. + // + // The function slice is used because it was available in older JS versions + // and experimentally appears to reduce capacity used. + var result = value.slice(); + if (numericArrayCache && isNumericArray(result)) { + var cacheKey = JSON.stringify(result); + var cachedEntry = numericArrayCache.get(cacheKey); + if (cachedEntry) { + cache.set(value, cachedEntry); + return cachedEntry; + } + // Reuse array instances such as [], [1.5, 1.5] + numericArrayCache.set(cacheKey, result); + cache.set(value, result); + // Nothing left to compress. + return result; + } + // Store this in the cache immediately to guard against infinite recursion on + // invalid inputs. + cache.set(value, result); + for (var i = 0; i < result.length; i++) { + result[i] = compress(result[i], cache, numericArrayCache); + } + return result; + } else if (value && typeof value === 'object') { + // Compress fields of the object in place. + // Set this to the cache immediately to prevent infinite recursion on invalid data. + cache.set(value, value) + var entries = Object.entries(value); + for (var i = 0; i < entries.length; i++) { + var entry = entries[i]; + var field = entry[1]; + var compressedValue = compress(field, cache, numericArrayCache); + if (field !== compressedValue) { + // Replace object field for this key with the compressed version + value[entry[0]] = compressedValue; + } + } + } else if (typeof value === 'string') { + // Deduplicate strings. + cache.set(value, value); + } + return value; +} +module.exports = compress; diff --git a/index.js b/index.js index 8a66e8b..3d92561 100644 --- a/index.js +++ b/index.js @@ -2,3 +2,4 @@ exports.encode = require('./encode'); exports.decode = require('./decode'); +exports.compress = require('./compress'); diff --git a/test/validate.test.js b/test/validate.test.js index c31d697..0dd0732 100644 --- a/test/validate.test.js +++ b/test/validate.test.js @@ -82,6 +82,63 @@ test('roundtrip a circle with potential accumulating error', function (t) { t.end(); }); +test('can compress memory', function (t) { + if (typeof Map === 'undefined') { + t.end(); + return; + } + // Generate an invalid shape with duplicate points. + var feature = { + 'type': 'MultiPolygon', + 'coordinates': [[[]]] + }; + var points = 16; + for (var i = 0; i <= points; i++) { + feature.coordinates[0][0].push([ + Math.cos(Math.PI * 2.0 * (i % 4) / points), + Math.sin(Math.PI * 2.0 * (i % 4) / points) + ]); + } + var roundTripped = geobuf.decode(new Pbf(geobuf.encode(feature, new Pbf()))); + var originalJSON = JSON.stringify(roundTripped); + var compressedFeature = geobuf.compress(roundTripped); + var compressedJSON = JSON.stringify(compressedFeature) + var c = compressedFeature.coordinates; + t.same(compressedJSON, originalJSON); + t.same(c[0][0][0], c[0][0][4], 'should be points with equivalent data'); + t.notStrictEqual(c[0][0][0], c[0][0][4], 'should not deduplicate different array instances by default'); + t.same(c[0][0][0], [1, 0], 'should preserve value'); + t.end(); +}); +test('can compress memory and deduplicate points', function (t) { + if (typeof Map === 'undefined') { + t.end(); + return; + } + // Generate an invalid shape with duplicate points. + var feature = { + 'type': 'MultiPolygon', + 'coordinates': [[[]]] + }; + var points = 12; + for (var i = 0; i <= points; i++) { + feature.coordinates[0][0].push([ + Math.cos(Math.PI * 2.0 * (i % 4) / points), + Math.sin(Math.PI * 2.0 * (i % 4) / points) + ]); + } + var roundTripped = geobuf.decode(new Pbf(geobuf.encode(feature, new Pbf()))); + var originalJSON = JSON.stringify(roundTripped); + var compressedFeature = geobuf.compress(roundTripped, new Map(), new Map()); + var compressedJSON = JSON.stringify(compressedFeature) + var points = compressedFeature.coordinates[0][0]; + t.same(compressedJSON, originalJSON); + t.same(points[0], points[4], 'should be points with equivalent data'); + t.strictEqual(points[0], points[4], 'should deduplicate different array instances when cache passed in'); + t.strictEqual(points[0], points[8], 'should deduplicate different array instances when cache passed in'); + t.same(points[0], [1, 0], 'should preserve value'); + t.end(); +}); function roundtripTest(geojson) { return function (t) { var buf = geobuf.encode(geojson, new Pbf());