-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add validation for exported ActivityPub tarballs #7
Changes from 1 commit
e91222a
ac6fb51
6b5266f
e7ef58f
c869227
6afa20d
7bfa145
3a13b9e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
import * as tar from 'tar-stream' | ||
import { Readable } from 'stream' | ||
import YAML from 'yaml' | ||
|
||
/** | ||
* Validates the structure and content of an exported ActivityPub tarball. | ||
* @param tarBuffer - A Buffer containing the .tar archive. | ||
* @returns A promise that resolves to an object with `valid` (boolean) and `errors` (string[]). | ||
*/ | ||
export async function validateExportStream( | ||
tarBuffer: Buffer | ||
): Promise<{ valid: boolean; errors: string[] }> { | ||
const extract = tar.extract() | ||
const errors: string[] = [] | ||
const requiredFiles = [ | ||
'manifest.yaml', | ||
'activitypub/actor.json', | ||
'activitypub/outbox.json' | ||
] | ||
const foundFiles = new Set() | ||
|
||
return await new Promise((resolve) => { | ||
extract.on('entry', (header, stream, next) => { | ||
const fileName = header.name | ||
foundFiles.add(fileName) | ||
|
||
let content = '' | ||
stream.on('data', (chunk) => { | ||
content += chunk.toString() | ||
}) | ||
|
||
stream.on('end', () => { | ||
try { | ||
// Validate JSON files | ||
if (fileName.endsWith('.json')) { | ||
JSON.parse(content) // Throws an error if content is not valid JSON | ||
} | ||
|
||
// Validate manifest file | ||
if (fileName === 'manifest.yaml') { | ||
const manifest = YAML.parse(content) | ||
if (!manifest['ubc-version']) { | ||
errors.push('Manifest is missing required field: ubc-version') | ||
} | ||
if (!manifest.contents?.activitypub) { | ||
errors.push( | ||
'Manifest is missing required field: contents.activitypub' | ||
) | ||
} | ||
} | ||
} catch (error: any) { | ||
errors.push(`Error processing file ${fileName}: ${error.message}`) | ||
} | ||
next() | ||
}) | ||
|
||
stream.on('error', (error) => { | ||
errors.push(`Stream error on file ${fileName}: ${error.message}`) | ||
next() | ||
}) | ||
}) | ||
|
||
extract.on('finish', () => { | ||
// Check if all required files are present | ||
for (const file of requiredFiles) { | ||
if (!foundFiles.has(file)) { | ||
errors.push(`Missing required file: ${file}`) | ||
} | ||
} | ||
|
||
resolve({ | ||
valid: errors.length === 0, | ||
errors | ||
}) | ||
}) | ||
|
||
extract.on('error', (error) => { | ||
errors.push(`Error during extraction: ${error.message}`) | ||
resolve({ | ||
valid: false, | ||
errors | ||
}) | ||
}) | ||
|
||
// Convert Buffer to a Readable stream and pipe it to the extractor | ||
const stream = Readable.from(tarBuffer) | ||
stream.pipe(extract) | ||
}) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
import { expect } from 'chai' | ||
import { readFileSync } from 'fs' | ||
import { validateExportStream } from '../src/verify' | ||
|
||
describe('validateExportStream', () => { | ||
it('should validate a valid tarball', async () => { | ||
// Load a valid tarball (e.g., exported-profile-valid.tar) | ||
const tarBuffer = readFileSync( | ||
'test/fixtures/tarball-samples/valid-export.tar' | ||
) | ||
const result = await validateExportStream(tarBuffer) | ||
|
||
expect(result.valid).to.be.true | ||
expect(result.errors).to.be.an('array').that.is.empty | ||
}) | ||
|
||
it('should fail if manifest.yaml is missing', async () => { | ||
// Load a tarball with missing manifest.yaml | ||
const tarBuffer = readFileSync( | ||
'test/fixtures/tarball-samples/missing-manifest.tar' | ||
) | ||
const result = await validateExportStream(tarBuffer) | ||
|
||
expect(result.valid).to.be.false | ||
}) | ||
|
||
it('should fail if actor.json is missing', async () => { | ||
// Load a tarball with missing actor.json | ||
const tarBuffer = readFileSync( | ||
'test/fixtures/tarball-samples/missing-actor.tar' | ||
) | ||
const result = await validateExportStream(tarBuffer) | ||
|
||
expect(result.valid).to.be.false | ||
console.log(JSON.stringify(result.errors)) | ||
}) | ||
|
||
// it('should fail if outbox.json is missing', async () => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These tests look useful. Can we include them uncommented? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah i will but it will add more tar files to the src, do you have a better way instead having them in the source code? |
||
// // Load a tarball with missing outbox.json | ||
// const tarBuffer = readFileSync( | ||
// 'test/fixtures/exported-profile-missing-outbox.tar' | ||
// ) | ||
// const result = await validateExportStream(tarBuffer) | ||
|
||
// expect(result.valid).to.be.false | ||
// expect(result.errors).to.include( | ||
// 'Missing required file: activitypub/outbox.json' | ||
// ) | ||
// }) | ||
|
||
// it('should fail if actor.json contains invalid JSON', async () => { | ||
// // Load a tarball with invalid JSON in actor.json | ||
// const tarBuffer = readFileSync( | ||
// 'test/fixtures/exported-profile-invalid-actor-json.tar' | ||
// ) | ||
// const result = await validateExportStream(tarBuffer) | ||
|
||
// expect(result.valid).to.be.false | ||
// expect(result.errors).to.include( | ||
// 'Error processing file activitypub/actor.json: Unexpected token } in JSON at position 42' | ||
// ) | ||
// }) | ||
|
||
// it('should fail if manifest.yaml is invalid', async () => { | ||
// // Load a tarball with invalid manifest.yaml | ||
// const tarBuffer = readFileSync( | ||
// 'test/fixtures/exported-profile-invalid-manifest.tar' | ||
// ) | ||
// const result = await validateExportStream(tarBuffer) | ||
|
||
// expect(result.valid).to.be.false | ||
// expect(result.errors).to.include( | ||
// 'Manifest is missing required field: ubc-version' | ||
// ) | ||
// }) | ||
}) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will you please make it so
tarBuffer
can be a ReadableStream? That way, if the export is really big and the tar is really big, it doesn't have to be buffered in memory all at once.I think you should be able have tar-stream parse the stream, async iterate through the tar entries, and ensure each entry is valid, all without every buffering all the entries in memory
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you r abs right, i should consider that, thanks