Skip to content

Commit

Permalink
fix(web): prevent writing large auspice json to local storage
Browse files Browse the repository at this point in the history
After `dataset-json-url` param is used, the entire auspice json was stored in local storage. For large files this would be bigger that what browsers allow and the store operation would fail. Subsequent reads would retrieve either nothing or some fragment of data.

Here I add some logic to make sure we store auspice json in memory only. Sadly, this means that page reload wipes all the data and we currently don't have a good mechanism to store the previous dataset URL in order to re-fetch it. This will need to be implemented. Currently I opted for discarding the stored dataset if it was auspice dataset. This should be alright, as the feature was primarily designed to be used when navigating to Nextclade with url params set.

This PR only implements a workaround for something that's fundamentally poorly designed and broken. The full dataset description object is still saved to local storage. The definitive fix requires serious restructuring and altering many components. It will be implemented later (#1464)

But this workaround should allow most use-cases of `dataset-json-url` to work and makes this auspice dataset feature releasable.

Can be tested with a large tree like this:

```
?dataset-json-url=https://nextstrain.org/ncov/gisaid/global/all-time
```
  • Loading branch information
ivan-aksamentov committed Jun 4, 2024
1 parent 29c49e5 commit 8326ecb
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 20 deletions.
3 changes: 2 additions & 1 deletion packages/nextclade-web/.eslintrc.js
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ module.exports = {
'no-shadow': 'off',
'no-unused-vars': 'off',
'only-ascii/only-ascii': 'warn',
'prefer-const': ['warn', { destructuring: 'all' }],
'prefer-for-of': 'off',
'prettier/prettier': 'warn',
'react-hooks/exhaustive-deps': [
Expand All @@ -148,7 +149,7 @@ module.exports = {
'react/state-in-constructor': 'off',
'security/detect-non-literal-fs-filename': 'off',
'security/detect-object-injection': 'off',
'sonarjs/cognitive-complexity': ['warn', 20],
'sonarjs/cognitive-complexity': 'off',
'unicorn/consistent-function-scoping': 'off',
'unicorn/escape-case': 'off',
'unicorn/filename-case': 'off',
Expand Down
6 changes: 1 addition & 5 deletions packages/nextclade-web/src/io/fetchDatasets.ts
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,7 @@ export async function initializeDatasets(datasetServerUrl: string, urlQuery: Par
const minimizerIndexVersion = await getCompatibleMinimizerIndexVersion(datasetServerUrl, datasetsIndexJson)

// Check if URL params specify dataset params and try to find the corresponding dataset
const currentDataset:
| (Dataset & {
auspiceJson?: AuspiceTree
})
| undefined = await getDatasetFromUrlParams(urlQuery, datasets)
const currentDataset = await getDatasetFromUrlParams(urlQuery, datasets)

return { datasets, currentDataset, minimizerIndexVersion }
}
Expand Down
6 changes: 3 additions & 3 deletions packages/nextclade-web/src/io/fetchSingleDatasetAuspice.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ export async function fetchSingleDatasetAuspice(datasetJsonUrl_: string) {
}
}

const currentDataset: Dataset & { auspiceJson?: AuspiceTree } = {
const currentDataset: Dataset = {
path: datasetJsonUrl,
capabilities: {
primers: false,
Expand All @@ -50,8 +50,8 @@ export async function fetchSingleDatasetAuspice(datasetJsonUrl_: string) {
name,
...pathogen?.attributes,
},
type: 'auspiceJson',
version,
auspiceJson,
}

const datasets = [currentDataset]
Expand All @@ -60,5 +60,5 @@ export async function fetchSingleDatasetAuspice(datasetJsonUrl_: string) {
const defaultDatasetName = currentDatasetName
const defaultDatasetNameFriendly = attrStrMaybe(currentDataset.attributes, 'name') ?? currentDatasetName

return { datasets, defaultDataset, defaultDatasetName, defaultDatasetNameFriendly, currentDataset }
return { datasets, defaultDataset, defaultDatasetName, defaultDatasetNameFriendly, currentDataset, auspiceJson }
}
14 changes: 11 additions & 3 deletions packages/nextclade-web/src/io/fetchSingleDatasetDirectory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import axios from 'axios'
import urljoin from 'url-join'
import { mapValues } from 'lodash'
import { concurrent } from 'fasy'
import { attrStrMaybe, AuspiceTree, Dataset, VirusProperties } from 'src/types'
import { attrStrMaybe, Dataset, VirusProperties } from 'src/types'
import { removeTrailingSlash } from 'src/io/url'
import { axiosFetch, axiosHead, axiosHeadOrUndefined } from 'src/io/axiosFetch'
import { sanitizeError } from 'src/helpers/sanitizeError'
Expand All @@ -17,14 +17,15 @@ export async function fetchSingleDatasetDirectory(

const files = mapValues(pathogen.files, (file) => (file ? urljoin(datasetRootUrl, file) : file))

const currentDataset: Dataset & { auspiceJson?: AuspiceTree } = {
const currentDataset: Dataset = {
path: datasetRootUrl,
capabilities: {
primers: false,
qc: [],
},
...pathogen,
files,
type: 'directory',
}

const datasets = [currentDataset]
Expand All @@ -51,7 +52,14 @@ export async function fetchSingleDatasetDirectory(
Object.entries(files).filter(([_, key]) => !['examples', 'readme'].includes(key)),
)

return { datasets, defaultDataset, defaultDatasetName, defaultDatasetNameFriendly, currentDataset }
return {
datasets,
defaultDataset,
defaultDatasetName,
defaultDatasetNameFriendly,
currentDataset,
auspiceJson: undefined,
}
}

async function fetchPathogenJson(datasetRootUrl: string) {
Expand Down
26 changes: 18 additions & 8 deletions packages/nextclade-web/src/pages/_app.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import { RecoilEnv, RecoilRoot, useRecoilCallback, useRecoilState, useRecoilValu
import { AppProps } from 'next/app'
import { useRouter } from 'next/router'
import dynamic from 'next/dynamic'
import type { Dataset, AuspiceTree } from 'src/types'
import { sanitizeError } from 'src/helpers/sanitizeError'
import { useRunAnalysis } from 'src/hooks/useRunAnalysis'
import i18nAuspice, { changeAuspiceLocale } from 'src/i18n/i18n.auspice'
Expand Down Expand Up @@ -97,24 +96,35 @@ function RecoilStateInitializer() {

const datasetInfo = await fetchSingleDataset(urlQuery)
if (!isNil(datasetInfo)) {
const { datasets, currentDataset } = datasetInfo
return { datasets, currentDataset, minimizerIndexVersion: undefined }
const { datasets, currentDataset, auspiceJson } = datasetInfo
return { datasets, currentDataset, minimizerIndexVersion: undefined, auspiceJson }
}
return { datasets, currentDataset, minimizerIndexVersion }
return { datasets, currentDataset, minimizerIndexVersion, auspiceJson: undefined }
})
.catch((error) => {
// Dataset error is fatal and we want error to be handled in the ErrorBoundary
setInitialized(false)
set(globalErrorAtom, sanitizeError(error))
throw error
})
.then(async ({ datasets, currentDataset, minimizerIndexVersion }) => {
.then(async ({ datasets, currentDataset, minimizerIndexVersion, auspiceJson }) => {
set(datasetsAtom, { datasets })
const previousDataset = await getPromise(datasetCurrentAtom)
const dataset: (Dataset & { auspiceJson?: AuspiceTree }) | undefined = currentDataset ?? previousDataset
let previousDataset = await getPromise(datasetCurrentAtom)
if (previousDataset?.type === 'auspiceJson') {
// Disregard previously saved dataset if it's Auspice dataset, because the data is no longer available.
// We might re-fetch instead, but need to persist URL for that somehow.
previousDataset = undefined
}

const dataset = currentDataset ?? previousDataset
set(datasetCurrentAtom, dataset)
set(minimizerIndexVersionAtom, minimizerIndexVersion)
set(datasetJsonAtom, dataset?.auspiceJson)

if (dataset?.type === 'auspiceJson' && !isNil(auspiceJson)) {
set(datasetJsonAtom, auspiceJson)
} else {
set(datasetJsonAtom, undefined)
}
return dataset
})
.then(async (dataset) => {
Expand Down
11 changes: 11 additions & 0 deletions packages/nextclade/src/io/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ pub struct Dataset {
#[serde(default, skip_serializing_if = "DatasetVersion::is_empty")]
pub version: DatasetVersion,

#[serde(default, skip_serializing_if = "Option::is_none")]
pub r#type: Option<DatasetType>,

#[serde(flatten)]
pub other: serde_json::Value,
}
Expand Down Expand Up @@ -386,3 +389,11 @@ pub struct MinimizerIndexVersion {
#[serde(flatten)]
pub other: serde_json::Value,
}

#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
#[serde(rename_all = "camelCase")]
pub enum DatasetType {
Directory,
AuspiceJson,
Other,
}

0 comments on commit 8326ecb

Please sign in to comment.