Skip to content

Commit

Permalink
packages/db normalizeUpload now builds CAR CIDs from s3 urls not just…
Browse files Browse the repository at this point in the history
… carpark r2 urls
  • Loading branch information
gobengo committed Jan 9, 2024
1 parent d35bf20 commit 5221545
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 13 deletions.
10 changes: 8 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions packages/db/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"license": "(Apache-2.0 OR MIT)",
"dependencies": {
"@supabase/postgrest-js": "^0.37.0",
"multiformats": "^13.0.0",
"p-retry": "^4.6.1"
},
"devDependencies": {
Expand Down
3 changes: 2 additions & 1 deletion packages/db/test/upload.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,7 @@ describe('upload', () => {
const contentCid = 'bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi'
const sourceCid = 'QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR'
const exampleCarParkUrl = 'https://carpark-dev.web3.storage/bagbaiera6xcx7hiicm7sc523axbjf2otuu5nptt6brdzt4a5ulgn6qcfdwea/bagbaiera6xcx7hiicm7sc523axbjf2otuu5nptt6brdzt4a5ulgn6qcfdwea.car'
const exampleS3Url = 'https://dotstorage-dev-0.s3.us-east-1.amazonaws.com/raw/bafybeiao32xtnrlibcekpw3vyfi5txgrmvvrua4pccx3xik33ll3qhko2q/2/ciqplrl7tuebgpzbo5nqlqus5hj2kowxzz7ayr4z6ao2ftg7ibcr3ca.car'
const created = new Date().toISOString()
const name = `rand-${Math.random().toString().slice(2)}`
await client.createUpload({
Expand All @@ -293,7 +294,7 @@ describe('upload', () => {
dagSize,
name,
pins: [initialPinData],
backupUrls: [`https://backup.cid/${created}`, exampleCarParkUrl],
backupUrls: [`https://backup.cid/${created}`, exampleCarParkUrl, exampleS3Url],
created
})

Expand Down
55 changes: 45 additions & 10 deletions packages/db/utils.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
import * as Link from 'multiformats/link'
import * as Digest from 'multiformats/hashes/digest'
import { fromString } from 'uint8arrays'

/**
* Normalize upload item.
*
Expand All @@ -12,7 +16,7 @@ export function normalizeUpload (upload) {
delete nUpload.sourceCid

/** @type {import('./db-client-types').UploadItemOutput['parts']} */
const parts = [...carUrlsFromBackupUrls(backupUrls)]
const parts = [...carCidV1Base32sFromBackupUrls(backupUrls)]

return {
...nUpload,
Expand All @@ -26,20 +30,24 @@ export function normalizeUpload (upload) {
}

/**
* given array of backup_urls from uploads table, return a set of ipfs:// URIs for any CAR files in the backup_urls
* given array of backup_urls from uploads table, return a corresponding set of CAR CIDv1 using base32 multihash
* for any CAR files in the backup_urls.
* @param {string[]} backupUrls
* @returns {Iterable<string>}
*/
function carUrlsFromBackupUrls (backupUrls) {
const carCIDUrls = new Set()
function carCidV1Base32sFromBackupUrls (backupUrls) {
const carCidStrings = new Set()
for (const backupUrl of backupUrls) {
// match cid v1 starting with 'ba'.
// there are also backupUrls from s3 with .car suffix and path stem is base32(multihash) (not a CID). exclude those.
const carCidFileSuffixMatch = String(backupUrl).match(/\/(ba[^/]+).car$/)
if (!carCidFileSuffixMatch) continue
carCIDUrls.add(carCidFileSuffixMatch[1])
let carCid
try {
carCid = bucketKeyToPartCID(backupUrl)
} catch (error) {
console.warn('error extracting car CID from bucket URL', error)
}
if ( ! carCid) continue

Check failure on line 47 in packages/db/utils.js

View workflow job for this annotation

GitHub Actions / Lint

There should be no space after this paren

Check failure on line 47 in packages/db/utils.js

View workflow job for this annotation

GitHub Actions / Lint

Unexpected space after unary operator '!'
carCidStrings.add(carCid.toString())
}
return carCIDUrls
return carCidStrings
}

/**
Expand Down Expand Up @@ -155,3 +163,30 @@ export function safeNumber (num) {
}
return num
}

const CAR_CODE = 0x0202

/**
* Attempts to extract a CAR CID from a bucket key.
*
* @param {string} key
*/
const bucketKeyToPartCID = key => {
const filename = String(key.split('/').at(-1))
const [hash] = filename.split('.')
try {
// recent buckets encode CAR CID in filename
const cid = Link.parse(hash).toV1()
if (cid.code === CAR_CODE) return cid
throw new Error('not a CAR CID')
} catch (err) {
// older buckets base32 encode a CAR multihash <base32(car-multihash)>.car
try {
const digestBytes = fromString(hash, 'base32')
const digest = Digest.decode(digestBytes)
return Link.create(CAR_CODE, digest)
} catch (error) {
// console.warn('error trying to create CID from s3 key', error)
}
}
}

0 comments on commit 5221545

Please sign in to comment.