Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add indexing for scan results in Azure Table Storage #744

Merged
merged 1 commit into from
Nov 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 26 additions & 3 deletions api/functions/commands.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,26 @@ const {
const azure = require('azure-storage');
const slug = require('slug');

exports.insertScanResult = (api, buildId, runId, data, buildDate) => {
exports.insertScanResult = async (api, buildId, runId, data, buildDate, url) => {
const entGen = azure.TableUtilities.entityGenerator;
let entity = {
PartitionKey: entGen.String(api),
RowKey: entGen.String(`${api}-${newGuid()}`),
buildId: entGen.String(buildId),
runId: entGen.String(runId),
buildDate: entGen.DateTime(buildDate),
apiKey: entGen.String(api)
};
let entityRunIndexed = {
...entity,
PartitionKey: entGen.String(`${api}-${runId}`),
};
let entityUrlIndexed = {
...entity,
PartitionKey: entGen.String(`${api}-${slug(url)}`),
};
await insertEntity(TABLE.ScanResults, replaceProp(data, entityRunIndexed));
await insertEntity(TABLE.ScanResults, replaceProp(data, entityUrlIndexed));
return insertEntity(TABLE.ScanResults, replaceProp(data, entity));
};

Expand Down Expand Up @@ -105,16 +116,28 @@ exports.addHTMLHintRulesForEachRun = (api, data) => {
);
};

exports.insertScanSummary = (api, buildId, runId, buildDate, data) => {
exports.insertScanSummary = async (api, buildId, runId, buildDate, data) => {
var entGen = azure.TableUtilities.entityGenerator;
// use Log tail pattern to get native sort from Table Storage
var entity = {
const entity = {
PartitionKey: entGen.String(api),
RowKey: entGen.String(getReversedTick()),
buildId: entGen.String(buildId),
runId: entGen.String(runId),
buildDate: entGen.DateTime(buildDate),
scanResultVersion: entGen.Int32(2),
apiKey: entGen.String(api)
};
const entityRunIndexed = {
...entity,
PartitionKey: entGen.String(`${api}-${runId}`),
};
let entityUrlIndexed = {
...entity,
PartitionKey: entGen.String(`${api}-${slug(data.url)}`),
};
await insertEntity(TABLE.Scans, replaceProp(data, entityRunIndexed));
await insertEntity(TABLE.Scans, replaceProp(data, entityUrlIndexed));
return insertEntity(TABLE.Scans, replaceProp(data, entity));
};

Expand Down
4 changes: 2 additions & 2 deletions api/functions/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,6 @@ app.post('/scanresult/:api/:buildId', async (req, res) => {
const buildId = req.params.buildId;
const runId = newGuid();
const buildDate = new Date();
const unscannableLinks = await getUnscannableLinks();

const uid = await getUserIdFromApiKey(apikey);
if (!uid) {
Expand Down Expand Up @@ -314,7 +313,8 @@ app.post('/scanresult/:api/:buildId', async (req, res) => {
buildId,
runId,
brokenLinkData,
buildDate
buildDate,
url
);
cb(data);
}, {
Expand Down
78 changes: 57 additions & 21 deletions api/functions/queries.js
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,14 @@ exports.getConfig = (api) =>

exports.getScanDetails = async (runId) => {
const scan = await exports.getSummaryById(runId);
const filter = odata`PartitionKey eq ${scan.partitionKey} and src ge ${scan.url} and src le ${incrementString(scan.url)}`;
let filter;

if (scan.scanResultVersion === 2) {
filter = `PartitionKey eq '${scan.partitionKey}-${slug(scan.url)}'`;
} else {
filter = odata`PartitionKey eq ${scan.partitionKey} and src ge ${scan.url} and src le ${incrementString(scan.url)}`;
}

const entity = new TableClient(azureUrl, TABLE.ScanResults, credential).listEntities({
queryOptions: { filter }
});
Expand Down Expand Up @@ -208,7 +215,10 @@ exports.getAllPublicSummary = (showAll) =>
for await (const item of entity) {
result.push(item);
}
resolve(result)

resolve(result.filter((value, index, self) => {
return self.findIndex(v => v.runId === value.runId) === index;
}))
} else {
// Top 500 scans in last 24 months
var date = new Date();
Expand All @@ -218,24 +228,39 @@ exports.getAllPublicSummary = (showAll) =>
queryOptions: { filter: odata`isPrivate eq ${false} and buildDate gt datetime'${date.toISOString()}'` }
});
const iterator = entity.byPage({ maxPageSize: parseInt(process.env.MAX_SCAN_SIZE) });
let result = [];
for await (const item of iterator) {
resolve(item)
result = item;
break;
}

resolve(result.filter((value, index, self) => {
return self.findIndex(v => v.runId === value.runId) === index;
}))
}
});

exports.getSummaryById = (runId) =>
getRun(runId).then((doc) =>
new Promise(async (resolve) => {
const entity = new TableClient(azureUrl, TABLE.Scans, credential).listEntities({
queryOptions: { filter: odata`PartitionKey eq ${doc.apikey} and runId eq ${doc.runId}` }
});
let result = []
for await (const item of entity) {
result.push(item);
const getSummary = async (filter) => {
const entity = new TableClient(azureUrl, TABLE.Scans, credential).listEntities({
queryOptions: { filter }
});
let result = []
for await (const item of entity) {
result.push(item);
}
return result[0];
};

let summary = await getSummary(`PartitionKey eq '${doc.apikey}-${doc.runId}'`);

if (!summary) {
summary = await getSummary(odata`PartitionKey eq ${doc.apikey} and runId eq ${doc.runId}`);
}
resolve(result[0] || {})

resolve(summary || {});
}));

exports.getLatestSummaryFromUrlAndApi = (url, api) =>
Expand Down Expand Up @@ -272,19 +297,30 @@ exports.getAlertEmailAddressesFromTokenAndUrl = (api, url) =>

exports.getAllScanSummaryFromUrl = (url, api) =>
new Promise(async (resolve) => {
const entity = new TableClient(azureUrl, TABLE.Scans, credential).listEntities({
queryOptions: { filter: odata`url eq ${url} and PartitionKey eq ${api}` }
});
const iterator = entity.byPage({ maxPageSize: 10 });
for await (const item of iterator) {
if (item[0]) {
const existing = await getExistingBrokenLinkCount(item[0].runId);
item[0].totalUniqueBrokenLinksExisting = existing;
const getSummary = async (filter) => {
const entity = new TableClient(azureUrl, TABLE.Scans, credential).listEntities({
queryOptions: { filter }
});
const iterator = entity.byPage({ maxPageSize: 10 });
let result;
for await (const item of iterator) {
if (item[0]) {
const existing = await getExistingBrokenLinkCount(item[0].runId);
item[0].totalUniqueBrokenLinksExisting = existing;
}
result = item;
break;
}

resolve(item);
break;
return result;
};

let summary = await getSummary(`PartitionKey eq '${api}-${slug(url)}'`);

if (!summary) {
summary = await getSummary(odata`url eq ${url} and PartitionKey eq ${api}`);
}

resolve(summary);
});

exports.getUnscannableLinks = () =>
Expand Down
4 changes: 2 additions & 2 deletions ui/src/components/summaryitemcomponents/CardSummary.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
export let value;
export let isHtmlHintComp = false;
export let isLighthouseAudit = false;

let showShareAlert;
let previousScans = [];
let sharedEmailAddresses = [];
Expand Down Expand Up @@ -86,7 +86,7 @@
<button
type="button"
class="bg-white hover:bg-gray-800 hover:text-white font-semibold py-2 px-4 border rounded"
on:click={navigateTo(`/scanCompare/${value.partitionKey}/${convertSpecialCharUrl(value.url)}/${value.buildDate}`)}
on:click={navigateTo(`/scanCompare/${value.apiKey || value.partitionKey}/${convertSpecialCharUrl(value.url)}/${value.buildDate}`)}
>
<i class="fas fa-code-compare"></i>
{previousScans[0].runId !== value.runId ? "Compare to latest scan" : "Compare to previous scan"}
Expand Down
Loading