Skip to content

Commit

Permalink
DOP-4830: Smartling can ingest search manifests from Netlify (#7)
Browse files Browse the repository at this point in the history
* DOP-5018 add logic for deleting stale properties, documents

* DOP-5018 tests for deleteStale WIP

* DOP-5018 improvements made while writing tests

* DOP-5018 more mock data, finished tests

* DOP-5018 add last test

* DOP-5018 cleaning up

* DOP-4830 initial logic for uploading manifests to s3

* DOP-4830 try pushing up without credentials

* DOP-4830 add credentials

* DOP-4830 add region

* DOP-4830 add try catch to connection

* DOP-4830 put credentials in consts

* DOP-4830 fix typo

* DOP-4830 change comment

* DOP-4830 refactor and add s3 upload test

* DOP-4830 cleaning

* DOP-4830 cleaning

* DOP-4830 cleaning

* DOP-4830 cleaning tests

* DOP-4830 addressing comments

* DOP-5023 addressing nits
  • Loading branch information
anabellabuckvar authored Sep 27, 2024
1 parent a962c38 commit d2bb40b
Show file tree
Hide file tree
Showing 17 changed files with 3,777 additions and 1,884 deletions.
4,906 changes: 3,333 additions & 1,573 deletions search-manifest/package-lock.json

Large diffs are not rendered by default.

63 changes: 33 additions & 30 deletions search-manifest/package.json
Original file line number Diff line number Diff line change
@@ -1,32 +1,35 @@
{
"name": "netlify-search-integration",
"version": "0.0.1",
"main": "src/index.ts",
"type": "module",
"scripts": {
"build": "netlify-integration build -a",
"dev": "netlify-integration dev -a",
"preview": "netlify-integration preview",
"test": "vitest"
},
"dependencies": {
"@netlify/sdk": "^1.60.2-pr-1468.3",
"bson": "^6.8.0",
"crypto": "^1.0.1",
"deps": "^1.0.0",
"fs": "^0.0.1-security",
"jsonpath-plus": "^9.0.0",
"legacy": "^0.0.3",
"lts": "^1.2.0",
"mongodb": "^6.8.0",
"mongodb-memory-server": "^10.0.0",
"node": "^20.5.0",
"path": "^0.12.7",
"peer": "^1.0.2",
"typescript": "^5.5.2"
},
"devDependencies": {
"@types/node": "^20.14.9",
"vitest": "^2.0.5"
}
"name": "netlify-search-integration",
"version": "0.0.1",
"main": "src/index.ts",
"type": "module",
"scripts": {
"build": "netlify-integration build -a",
"dev": "netlify-integration dev -a",
"preview": "netlify-integration preview",
"test": "vitest"
},
"dependencies": {
"@aws-sdk/client-s3": "^3.654.0",
"@aws-sdk/lib-storage": "^3.657.0",
"@netlify/sdk": "^1.60.2-pr-1468.3",
"bson": "^6.8.0",
"crypto": "^1.0.1",
"deps": "^1.0.0",
"fs": "^0.0.1-security",
"jsonpath-plus": "^9.0.0",
"legacy": "^0.0.3",
"lts": "^1.2.0",
"mongodb": "^6.8.0",
"mongodb-memory-server": "^10.0.0",
"node": "^20.5.0",
"path": "^0.12.7",
"peer": "^1.0.2",
"typescript": "^5.5.2"
},
"devDependencies": {
"@types/node": "^20.14.9",
"aws-sdk-client-mock": "^4.0.2",
"vitest": "^2.0.5"
}
}
25 changes: 13 additions & 12 deletions search-manifest/src/generateManifest/manifest.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import type { ManifestEntry } from './manifestEntry';

export class Manifest {
url: string;
global?: boolean;
documents: ManifestEntry[];
url: string;
global: boolean;
documents: ManifestEntry[];

constructor(url = '', includeInGlobalSearch = false) {
this.url = url;
Expand All @@ -16,14 +16,15 @@ export class Manifest {
this.documents.push(document);
}

export() {
//return the manifest as json
const manifest = {
url: this.url,
includeInGlobalSearch: this.global,
documents: this.documents,
};
export() {
//return the manifest as JSON formatted string
const manifest = {
url: this.url,
includeInGlobalSearch: this.global,
documents: this.documents,
};

return manifest;
}
//TODO: check that .stringify has exactly the same functionality + output as python "dumps" as was used in Mut
return JSON.stringify(manifest);
}
}
98 changes: 57 additions & 41 deletions search-manifest/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,35 +1,37 @@
// Documentation: https://sdk.netlify.com
import { NetlifyIntegration } from '@netlify/sdk';
import { Manifest } from './generateManifest/manifest';
import { promisify } from 'util';
import { BSON } from 'bson';
import { Document } from './generateManifest/document';
import { uploadManifest } from './uploadToAtlas/uploadManifest';
import { NetlifyIntegration } from "@netlify/sdk";
import { Manifest } from "./generateManifest/manifest";
import { promisify } from "util";
import { BSON } from "bson";
import { Document } from "./generateManifest/document";
import { uploadManifest } from "./uploadToAtlas/uploadManifest";

import { readdir, readFileSync } from "fs";
import getProperties from "./uploadToAtlas/getProperties";
import { uploadManifestToS3 } from "./uploadToS3/uploadManifest";
import { teardown } from "./uploadToAtlas/searchConnector";
import { s3UploadParams } from "./types";

const readdirAsync = promisify(readdir);

const integration = new NetlifyIntegration();

export const generateManifest = async () => {
// create Manifest object
const manifest = new Manifest();
console.log('=========== generating manifests ================');

//go into documents directory and get list of file entries
const entries = await readdirAsync('documents', { recursive: true });

const mappedEntries = entries.filter((fileName) => {
return (
fileName.includes('.bson') &&
!fileName.includes('images') &&
!fileName.includes('includes') &&
!fileName.includes('sharedinclude')
);
});
// create Manifest object
const manifest = new Manifest();
console.log("=========== generating manifests ================");

//go into documents directory and get list of file entries
const entries = await readdirAsync("documents", { recursive: true });

const mappedEntries = entries.filter((fileName) => {
return (
fileName.includes(".bson") &&
!fileName.includes("images") &&
!fileName.includes("includes") &&
!fileName.includes("sharedinclude")
);
});

process.chdir("documents");
for (const entry of mappedEntries) {
Expand All @@ -46,35 +48,49 @@ export const generateManifest = async () => {

//Return indexing data from a page's AST for search purposes.
integration.addBuildEventHandler(
'onSuccess',
async ({ utils: { run }, netlifyConfig }) => {
// Get content repo zipfile in AST representation.
"onSuccess",
async ({ utils: { run }, netlifyConfig }) => {
// Get content repo zipfile in AST representation.

await run.command('unzip -o bundle.zip');
const branch = netlifyConfig.build?.environment['BRANCH'];
await run.command("unzip -o bundle.zip");
const branch = netlifyConfig.build?.environment["BRANCH"];

//use export function for uploading to S3
const manifest = await generateManifest();
//use export function for uploading to S3
const manifest = await generateManifest();

console.log("=========== finished generating manifests ================");
//TODO: create an interface for this return type
const {
searchProperty,
projectName,
url,
includeInGlobalSearch,
}: {
searchProperty: string;
projectName: string;
url: string;
includeInGlobalSearch: boolean;
} = await getProperties(branch);

//uploads manifests to S3
console.log("=========== Uploading Manifests to S3=================");
//upload manifests to S3
const uploadParams: s3UploadParams = {
bucket: "docs-search-indexes-test",
//TODO: change this values based on environments
prefix: "search-indexes/ab-testing",
fileName: `${projectName}-${branch}.json`,
manifest: manifest.export(),
};

const s3Status = await uploadManifestToS3(uploadParams);

console.log(`S3 upload status: ${JSON.stringify(s3Status)}`);
console.log("=========== Finished Uploading to S3 ================");

try {
const {
searchProperty,
url,
includeInGlobalSearch,
}: {
searchProperty: string;
url: string;
includeInGlobalSearch: boolean;
} = await getProperties(branch);

manifest.url = url;
manifest.global = includeInGlobalSearch;

//TODO: upload manifests to S3

//uploads manifests to atlas
console.log("=========== Uploading Manifests =================");
await uploadManifest(manifest, searchProperty);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,33 +1,40 @@
import type { WithId } from 'mongodb';
import type { ManifestEntry } from '../generateManifest/manifestEntry';
import type { WithId } from "mongodb";
import type { ManifestEntry } from "./generateManifest/manifestEntry";

export interface RefreshInfo {
export type RefreshInfo = {
deleted: number;
upserted: number;
modified: number;
dateStarted: Date;
elapsedMS: number;
}
};

export type s3UploadParams = {
bucket: string;
prefix: string;
fileName: string;
manifest: string;
};

interface EnvironmentConfig {
type EnvironmentConfig = {
dev: string;
stg: string;
dotcomstg: string;
dotcomprd: string;
prd: string;
}
};

export interface DocsetsDocument extends WithId<Document> {
url: EnvironmentConfig;
prefix: EnvironmentConfig;
}

export interface DatabaseDocument extends ManifestEntry {
url: string;
lastModified: Date;
manifestRevisionId: string;
searchProperty: string[];
includeInGlobalSearch: boolean;
url: string;
lastModified: Date;
manifestRevisionId: string;
searchProperty: string[];
includeInGlobalSearch: boolean;
}

export interface ReposBranchesDocument extends WithId<Document> {
Expand Down
2 changes: 1 addition & 1 deletion search-manifest/src/uploadToAtlas/deleteStaleProperties.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { db, teardown } from "./searchConnector";
import { DatabaseDocument } from "./types";
import { DatabaseDocument } from "../types";

const ATLAS_SEARCH_URI = `mongodb+srv://${process.env.MONGO_ATLAS_USERNAME}:${process.env.MONGO_ATLAS_PASSWORD}@${process.env.MONGO_ATLAS_SEARCH_HOST}/?retryWrites=true&w=majority`;

Expand Down
77 changes: 38 additions & 39 deletions search-manifest/src/uploadToAtlas/getProperties.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ import {
DatabaseDocument,
DocsetsDocument,
ReposBranchesDocument,
} from "./types";
import { assertTrailingSlash } from "./utils";
} from "../types";
import { assertTrailingSlash } from "../utils";
import { deleteStaleProperties } from "./deleteStaleProperties";

// helper function to find the associated branch
Expand All @@ -20,16 +20,16 @@ export const getBranch = (branches: Array<BranchEntry>, branchName: string) => {
};

const getProperties = async (branchName: string) => {
const ATLAS_CLUSTER0_URI = `mongodb+srv://${process.env.MONGO_ATLAS_USERNAME}:${process.env.MONGO_ATLAS_PASSWORD}@${process.env.MONGO_ATLAS_CLUSTER0_HOST}/?retryWrites=true&w=majority`;
const SNOOTY_DB_NAME = `${process.env.MONGO_ATLAS_POOL_DB_NAME}`;
const REPO_NAME = process.env.REPO_NAME;
const ATLAS_CLUSTER0_URI = `mongodb+srv://${process.env.MONGO_ATLAS_USERNAME}:${process.env.MONGO_ATLAS_PASSWORD}@${process.env.MONGO_ATLAS_CLUSTER0_HOST}/?retryWrites=true&w=majority`;
const SNOOTY_DB_NAME = `${process.env.MONGO_ATLAS_POOL_DB_NAME}`;
const REPO_NAME = process.env.REPO_NAME;

//check that an environment variable for repo name was set
if (!REPO_NAME) {
throw new Error(
'No repo name supplied as environment variable, manifest cannot be uploaded to Atlas Search.Documents collection ',
);
}
//check that an environment variable for repo name was set
if (!REPO_NAME) {
throw new Error(
"No repo name supplied as environment variable, manifest cannot be uploaded to Atlas Search.Documents collection "
);
}

let dbSession: Db;
let repos_branches: Collection<DatabaseDocument>;
Expand All @@ -50,34 +50,34 @@ const getProperties = async (branchName: string) => {
throw new Error(`issue starting session for Snooty Pool Database ${e}`);
}

const query = {
repoName: REPO_NAME,
};
const query = {
repoName: REPO_NAME,
};

try {
repo = await repos_branches.findOne<ReposBranchesDocument>(query, {
projection: {
_id: 0,
project: 1,
search: 1,
branches: 1,
prodDeployable: 1,
internalOnly: 1,
},
});
if (!repo) {
throw new Error(
`Could not get repos_branches entry for repo ${REPO_NAME}, ${repo}, ${JSON.stringify(
query,
)}`,
);
}
} catch (e) {
console.error(`Error while getting repos_branches entry in Atlas: ${e}`);
throw e;
}
try {
repo = await repos_branches.findOne<ReposBranchesDocument>(query, {
projection: {
_id: 0,
project: 1,
search: 1,
branches: 1,
prodDeployable: 1,
internalOnly: 1,
},
});
if (!repo) {
throw new Error(
`Could not get repos_branches entry for repo ${REPO_NAME}, ${repo}, ${JSON.stringify(
query
)}`
);
}
} catch (e) {
console.error(`Error while getting repos_branches entry in Atlas: ${e}`);
throw e;
}

const { project } = repo;
const { project } = repo;

try {
const docsetsQuery = { project: { $eq: project } };
Expand Down Expand Up @@ -123,8 +123,7 @@ const getProperties = async (branchName: string) => {
console.error(`Error`, e);
throw e;
}

return { searchProperty, url, includeInGlobalSearch };
return { searchProperty, projectName: project, url, includeInGlobalSearch };
};

export default getProperties;
Loading

0 comments on commit d2bb40b

Please sign in to comment.