admin管理员组文章数量:1291062
It's pretty straightforward. Using this code, any image file that is uploaded, is corrupt and cannot be opened. PDFs seem fine, but I noticed it's injecting values into text-based files. It's the correct file size in s3, not zero like something went wrong. I'm not sure if it's a problem w/ Express, the SDK, or a bination of both? Is it Postman? I built something similar in a work project in March of this year, and it worked flawlessly. I no longer have access to that code to pare.
No errors, no indication of any problems.
const aws = require("aws-sdk");
const stream = require("stream");
const express = require("express");
const router = express.Router();
const AWS_ACCESS_KEY_ID = "XXXXXXXXXXXXXXXXXXXX";
const AWS_SECRET_ACCESS_KEY = "superSecretAccessKey";
const BUCKET_NAME = "my-bucket";
const BUCKET_REGION = "us-east-1";
const s3 = new aws.S3({
region: BUCKET_REGION,
accessKeyId: AWS_ACCESS_KEY_ID,
secretAccessKey: AWS_SECRET_ACCESS_KEY
});
const uploadStream = key => {
let streamPass = new stream.PassThrough();
let params = {
Bucket: BUCKET_NAME,
Key: key,
Body: streamPass
};
let streamPromise = s3.upload(params, (err, data) => {
if (err) {
console.error("ERROR: uploadStream:", err);
} else {
console.log("INFO: uploadStream:", data);
}
}).promise();
return {
streamPass: streamPass,
streamPromise: streamPromise
};
};
router.post("/upload", async (req, res) => {
try {
let key = req.query.file_name;
let { streamPass, streamPromise } = uploadStream(key);
req.pipe(streamPass);
await streamPromise;
res.status(200).send({ result: "Success!" });
} catch (e) {
res.status(500).send({ result: "Fail!" });
}
});
module.exports = router;
Here's my package.json:
{
"name": "expresss3streampass",
"version": "0.0.0",
"private": true,
"scripts": {
"start": "node ./bin/www"
},
"dependencies": {
"aws-sdk": "^2.812.0",
"cookie-parser": "~1.4.4",
"debug": "~2.6.9",
"express": "~4.16.1",
"morgan": "~1.9.1"
}
}
UPDATE:
After further testing, I noticed plain-text files are being changed by Postman. For example, this source file:
{
"question_id": null,
"position_type_id": 1,
"question_category_id": 1,
"position_level_id": 1,
"question": "Do you test your code before calling it \"done\"?",
"answer": "Candidate should respond that they at least happy path test every feature and bug fix they write.",
"active": 1
}
...looks like this after it lands in the bucket:
----------------------------472518836063077482836177
Content-Disposition: form-data; name="file"; filename="question.json"
Content-Type: application/json
{
"question_id": null,
"position_type_id": 1,
"question_category_id": 1,
"position_level_id": 1,
"question": "Do you test your code before calling it \"done\"?",
"answer": "Candidate should respond that they at least happy path test every feature and bug fix they write.",
"active": 1
}
----------------------------472518836063077482836177--
I have to think this is the problem. Postman is the only thing that changed in this equation, from when this code first worked for me. My request headers look like this:
I was the one who had originally added the "application/x-www-form-urlencoded" header. If I use that now, I end up with a file that has 0 bytes, in the bucket.
It's pretty straightforward. Using this code, any image file that is uploaded, is corrupt and cannot be opened. PDFs seem fine, but I noticed it's injecting values into text-based files. It's the correct file size in s3, not zero like something went wrong. I'm not sure if it's a problem w/ Express, the SDK, or a bination of both? Is it Postman? I built something similar in a work project in March of this year, and it worked flawlessly. I no longer have access to that code to pare.
No errors, no indication of any problems.
const aws = require("aws-sdk");
const stream = require("stream");
const express = require("express");
const router = express.Router();
const AWS_ACCESS_KEY_ID = "XXXXXXXXXXXXXXXXXXXX";
const AWS_SECRET_ACCESS_KEY = "superSecretAccessKey";
const BUCKET_NAME = "my-bucket";
const BUCKET_REGION = "us-east-1";
const s3 = new aws.S3({
region: BUCKET_REGION,
accessKeyId: AWS_ACCESS_KEY_ID,
secretAccessKey: AWS_SECRET_ACCESS_KEY
});
const uploadStream = key => {
let streamPass = new stream.PassThrough();
let params = {
Bucket: BUCKET_NAME,
Key: key,
Body: streamPass
};
let streamPromise = s3.upload(params, (err, data) => {
if (err) {
console.error("ERROR: uploadStream:", err);
} else {
console.log("INFO: uploadStream:", data);
}
}).promise();
return {
streamPass: streamPass,
streamPromise: streamPromise
};
};
router.post("/upload", async (req, res) => {
try {
let key = req.query.file_name;
let { streamPass, streamPromise } = uploadStream(key);
req.pipe(streamPass);
await streamPromise;
res.status(200).send({ result: "Success!" });
} catch (e) {
res.status(500).send({ result: "Fail!" });
}
});
module.exports = router;
Here's my package.json:
{
"name": "expresss3streampass",
"version": "0.0.0",
"private": true,
"scripts": {
"start": "node ./bin/www"
},
"dependencies": {
"aws-sdk": "^2.812.0",
"cookie-parser": "~1.4.4",
"debug": "~2.6.9",
"express": "~4.16.1",
"morgan": "~1.9.1"
}
}
UPDATE:
After further testing, I noticed plain-text files are being changed by Postman. For example, this source file:
{
"question_id": null,
"position_type_id": 1,
"question_category_id": 1,
"position_level_id": 1,
"question": "Do you test your code before calling it \"done\"?",
"answer": "Candidate should respond that they at least happy path test every feature and bug fix they write.",
"active": 1
}
...looks like this after it lands in the bucket:
----------------------------472518836063077482836177
Content-Disposition: form-data; name="file"; filename="question.json"
Content-Type: application/json
{
"question_id": null,
"position_type_id": 1,
"question_category_id": 1,
"position_level_id": 1,
"question": "Do you test your code before calling it \"done\"?",
"answer": "Candidate should respond that they at least happy path test every feature and bug fix they write.",
"active": 1
}
----------------------------472518836063077482836177--
I have to think this is the problem. Postman is the only thing that changed in this equation, from when this code first worked for me. My request headers look like this:
I was the one who had originally added the "application/x-www-form-urlencoded" header. If I use that now, I end up with a file that has 0 bytes, in the bucket.
Share Improve this question edited Dec 18, 2020 at 21:09 Tsar Bomba asked Dec 18, 2020 at 19:18 Tsar BombaTsar Bomba 1,1066 gold badges31 silver badges64 bronze badges 4- 1 Is there a specific reason you don't want to use multer? – relief.melone Commented Dec 21, 2020 at 22:02
- @relief.melone Yes - this is streaming files rather than blowing out the container with a file that might be too large, or locking the thread on a large upload. – Tsar Bomba Commented Dec 22, 2020 at 13:04
- 1 u can utilize streams with multer as well instead of uploading it to the container first. I am not sure but I'd be pretty confident that multer-s3-storage does just that and I do it as well in the storage engine i wrote for multer gitlab./relief-melone/multer-s3-sharp-resizer. However I am still not sure what's causing the problems with your code as my approach is pretty much the same as yours (and I'm using Postman to test too). I will still take a closer look at this as soon as I get the time. I understand that you not only want to make it work but understand what's going wrong. – relief.melone Commented Dec 22, 2020 at 13:40
- is the goal of your server is only to pick a file from a user and put it on an S3 ? – Benjamin Filiatrault Commented Dec 25, 2020 at 16:35
2 Answers
Reset to default 6 +200Multer is the way to go. I updated the answer, scroll down to "A Better Solution".
It provides a few different modes, but as far as I could tell, you have to write a custom storage handler in order to access the underlying Stream, otherwise it's going to buffer all the data in memory and only callback once it's done.
If you check req.file
in your route handler, Multer would normally provide a Buffer under the buffer
field, but it's no longer present as I don't pass anything along in the callback, so I'm reasonably confident this is streaming as expected.
Below is a working solution.
Note: parse.single('image')
is passed into the route handler. This refers to the multi-part field name I used.
const aws = require('aws-sdk');
const stream = require('stream');
const express = require('express');
const router = express.Router();
const multer = require('multer')
const AWS_ACCESS_KEY_ID = "XXXXXXXXXXXXXXXXXXXX";
const AWS_SECRET_ACCESS_KEY = "superSecretAccessKey";
const BUCKET_NAME = "my-bucket";
const BUCKET_REGION = "us-east-1";
const s3 = new aws.S3({
region: BUCKET_REGION,
accessKeyId: AWS_ACCESS_KEY_ID,
secretAccessKey: AWS_SECRET_ACCESS_KEY
});
const uploadStream = key => {
let streamPass = new stream.PassThrough();
let params = {
Bucket: BUCKET_NAME,
Key: key,
Body: streamPass
};
let streamPromise = s3.upload(params, (err, data) => {
if (err) {
console.error('ERROR: uploadStream:', err);
} else {
console.log('INFO: uploadStream:', data);
}
}).promise();
return {
streamPass: streamPass,
streamPromise: streamPromise
};
};
class CustomStorage {
_handleFile(req, file, cb) {
let key = req.query.file_name;
let { streamPass, streamPromise } = uploadStream(key);
file.stream.pipe(streamPass)
streamPromise.then(() => cb(null, {}))
}
}
const storage = new CustomStorage();
const parse = multer({storage});
router.post('/upload', parse.single('image'), async (req, res) => {
try {
res.status(200).send({ result: 'Success!' });
} catch (e) {
console.log(e)
res.status(500).send({ result: 'Fail!' });
}
});
module.exports = router;
Update: A Better Solution
The Multer based solution I provided above is a bit hacky. So I took a look under the hood to see how it worked. This solution just uses Busboy to parse and stream the file. Multer is really just a wrapper for this with some disk I/O convenience functions.
const aws = require('aws-sdk');
const express = require('express');
const Busboy = require('busboy');
const router = express.Router();
const AWS_ACCESS_KEY_ID = "XXXXXXXXXXXXXXXXXXXX";
const AWS_SECRET_ACCESS_KEY = "superSecretAccessKey";
const BUCKET_NAME = "my-bucket";
const BUCKET_REGION = "us-east-1";
const s3 = new aws.S3({
region: BUCKET_REGION,
accessKeyId: AWS_ACCESS_KEY_ID,
secretAccessKey: AWS_SECRET_ACCESS_KEY
});
function multipart(request){
return new Promise(async (resolve, reject) => {
const headers = request.headers;
const busboy = new Busboy({ headers });
// you may need to add cleanup logic using 'busboy.on' events
busboy.on('error', err => reject(err))
busboy.on('file', function (fieldName, fileStream, fileName, encoding, mimeType) {
const params = {
Bucket: BUCKET_NAME,
Key: fileName,
Body: fileStream
};
s3.upload(params).promise().then(() => resolve());
})
request.pipe(busboy)
})
}
router.post('/upload', async (req, res) => {
try {
await multipart(req)
res.status(200).send({ result: 'Success!' });
} catch (e) {
console.log(e)
res.status(500).send({ result: 'Fail!' });
}
});
module.exports = router;
As far as I can tell, Postman is behaving as it should — the "text-injection" is actually a web standard, used to identify/demarcate files on upload. Please see this MDN Web Doc as well as this one for why.
It's actually injecting that part regardless of the file type:
let streamPass = new stream.PassThrough();
// adding this
const chunks = [];
streamPass.on('data', (chunk) => chunks.push(chunk) );
streamPass.on("end", () => {
body = Buffer.concat(chunks).toString();
console.log(chunks, chunks.length)
console.log("finished", body); // <-- see it here
});
I tried several methods to control/change this, with no luck on a simple method — from the Postman end, I don't think this is a setting that can be changed, and from the NodeJS end...I mean it's possible, but the solution will most likely be clunky/plicated, which I suspect you don't want. (I could be wrong though...)
Given the above, I'll join @relief.melone in remending multer
as a simple solution.
If you'd like to use multer
with streams
, try this: (I've indicated where I made changes to your code):
// const uploadStream = (key) => {
const uploadStream = (key, mime_type) => { // <- adding the mimetype
let streamPass = new stream.PassThrough();
let params = {
Bucket: BUCKET_NAME,
Key: key,
Body: streamPass,
ACL: 'public-read', // <- you can remove this
ContentType: mime_type // <- adding the mimetype
};
let streamPromise = s3.upload(params, (err, data) => {
if (err) {
console.error("ERROR: uploadStream:", err);
} else {
console.log("INFO: uploadStream:", data);
}
}).promise();
return {
streamPass: streamPass,
streamPromise: streamPromise
};
};
// router.post("/upload", async (req, res) => {
router.post("/upload", multer().single('file'), async (req, res) => { // <- we're adding multer
try {
let key = req.query.file_name;
// === change starts here
// console.log(req.file); // <- if you want to see, unment this file
let { streamPass, streamPromise } = uploadStream(key, req.file.mimetype); // adding the mimetype
var bufferStream = new stream.PassThrough();
bufferStream.end(req.file.buffer);
bufferStream.pipe(streamPass); // no longer req.pipe(streamPass);
// === change ends here
await streamPromise;
res.status(200).send({ result: "Success!" });
} catch (e) {
console.log(e)
res.status(500).send({ result: "Fail!" });
}
});
本文标签:
版权声明:本文标题:javascript - AWS SDK file upload to S3 via NodeExpress using stream PassThrough - file is always corrupt - Stack Overflow 内容由网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:http://www.betaflare.com/web/1741526132a2383475.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论