admin管理员组

文章数量:1122846

I'm building a web-based video editor where users can:

Add multiple videos Add images Add text overlays with background color

Frontend sends coordinates where each element's (x,y) represents its center position. on click of the export button i want all data to be exported as one final video on click i send the data to the backend like -

 const exportAllVideos = async () => {
    try {
      const formData = new FormData();
        
      
      const normalizedVideos = videos.map(video => ({
          ...video,
          startTime: parseFloat(video.startTime),
          endTime: parseFloat(video.endTime),
          duration: parseFloat(video.duration)
      })).sort((a, b) => a.startTime - b.startTime);

      
      for (const video of normalizedVideos) {
          const response = await fetch(video.src);
          const blobData = await response.blob();
          const file = new File([blobData], `${video.id}.mp4`, { type: "video/mp4" });
          formData.append("videos", file);
      }

      
      const normalizedImages = images.map(image => ({
          ...image,
          startTime: parseFloat(image.startTime),
          endTime: parseFloat(image.endTime),
          x: parseInt(image.x),
          y: parseInt(image.y),
          width: parseInt(image.width),
          height: parseInt(image.height),
          opacity: parseInt(image.opacity)
      }));

      
      for (const image of normalizedImages) {
          const response = await fetch(image.src);
          const blobData = await response.blob();
          const file = new File([blobData], `${image.id}.png`, { type: "image/png" });
          formData.append("images", file);
      }

      
      const normalizedTexts = texts.map(text => ({
          ...text,
          startTime: parseFloat(text.startTime),
          endTime: parseFloat(text.endTime),
          x: parseInt(text.x),
          y: parseInt(text.y),
          fontSize: parseInt(text.fontSize),
          opacity: parseInt(text.opacity)
      }));

      
      formData.append("metadata", JSON.stringify({
          videos: normalizedVideos,
          images: normalizedImages,
          texts: normalizedTexts
      }));

      const response = await fetch("my_flask_endpoint", {
          method: "POST",
          body: formData
      });

      if (!response.ok) {
        
          console.log('wtf', response);
          
      }

      const finalVideo = await response.blob();
      const url = URL.createObjectURL(finalVideo);
      const a = document.createElement("a");
      a.href = url;
      a.download = "final_video.mp4";
      a.click();
      URL.revokeObjectURL(url);

    } catch (e) {
      console.log(e, "err");
    }
  };

the frontend data for each object that is text image and video we are storing it as an array of objects below is the Data strcutre for each object -

the frontend data for each
  const newVideo = {
      id: uuidv4(),
      src: URL.createObjectURL(videoData.videoBlob),
      originalDuration: videoData.duration,
      duration: videoData.duration,
      startTime: 0,
      playbackOffset: 0,
      endTime: videoData.endTime || videoData.duration,
      isPlaying: false,
      isDragging: false,
      speed: 1,
      volume: 100,
      x: window.innerHeight / 2,
      y: window.innerHeight / 2,
      width: videoData.width,
      height: videoData.height,
    };
    const newTextObject = {
      id: uuidv4(),
      description: text,
      opacity: 100,
      x: containerWidth.width / 2,
      y: containerWidth.height / 2,
      fontSize: 18,
      duration: 20,
      endTime: 20,
      startTime: 0,
      color: "#ffffff",
      backgroundColor: hasBG,
      padding: 8,
      fontWeight: "normal",
      width: 200,
      height: 40,
    };

    const newImage = {
      id: uuidv4(),
      src: URL.createObjectURL(imageData),
      x: containerWidth.width / 2,
      y: containerWidth.height / 2,
      width: 200,
      height: 200,
      borderRadius: 0,
      startTime: 0,
      endTime: 20,
      duration: 20,
      opacity: 100,
    };

BACKEND CODE -

import os
import shutil
import subprocess
from flask import Flask, request, send_file
import ffmpeg
import json
from werkzeug.utils import secure_filename
import uuid
from flask_cors import CORS


app = Flask(__name__)
CORS(app, resources={r"/*": {"origins": "*"}})



UPLOAD_FOLDER = 'temp_uploads'
if not os.path.exists(UPLOAD_FOLDER):
    os.makedirs(UPLOAD_FOLDER)


@app.route('/')
def home():
    return 'Hello World'


OUTPUT_WIDTH = 1920
OUTPUT_HEIGHT = 1080



@app.route('/process', methods=['POST'])
def process_video():
    work_dir = None
    try:
        work_dir = os.path.abspath(os.path.join(UPLOAD_FOLDER, str(uuid.uuid4())))
        os.makedirs(work_dir)
        print(f"Created working directory: {work_dir}")

        metadata = json.loads(request.form['metadata'])
        print("Received metadata:", json.dumps(metadata, indent=2))
        
        video_paths = []
        videos = request.files.getlist('videos')
        for idx, video in enumerate(videos):
            filename = f"video_{idx}.mp4"
            filepath = os.path.join(work_dir, filename)
            video.save(filepath)
            if os.path.exists(filepath) and os.path.getsize(filepath) > 0:
                video_paths.append(filepath)
                print(f"Saved video to: {filepath} Size: {os.path.getsize(filepath)}")
            else:
                raise Exception(f"Failed to save video {idx}")

        image_paths = []
        images = request.files.getlist('images')
        for idx, image in enumerate(images):
            filename = f"image_{idx}.png"
            filepath = os.path.join(work_dir, filename)
            image.save(filepath)
            if os.path.exists(filepath):
                image_paths.append(filepath)
                print(f"Saved image to: {filepath}")

        output_path = os.path.join(work_dir, 'output.mp4')

        filter_parts = []

        base_duration = metadata["videos"][0]["duration"] if metadata["videos"] else 10
        filter_parts.append(f'color=c=black:s={OUTPUT_WIDTH}x{OUTPUT_HEIGHT}:d={base_duration}[canvas];')

        for idx, (path, meta) in enumerate(zip(video_paths, metadata['videos'])):
            x_pos = int(meta.get("x", 0) - (meta.get("width", 0) / 2))
            y_pos = int(meta.get("y", 0) - (meta.get("height", 0) / 2))
            
            filter_parts.extend([
                f'[{idx}:v]setpts=PTS-STARTPTS,scale={meta.get("width", -1)}:{meta.get("height", -1)}[v{idx}];',
                f'[{idx}:a]asetpts=PTS-STARTPTS[a{idx}];'
            ])

            if idx == 0:
                filter_parts.append(
                    f'[canvas][v{idx}]overlay=x={x_pos}:y={y_pos}:eval=init[temp{idx}];'
                )
            else:
                filter_parts.append(
                    f'[temp{idx-1}][v{idx}]overlay=x={x_pos}:y={y_pos}:'
                    f'enable=\'between(t,{meta["startTime"]},{meta["endTime"]})\':eval=init'
                    f'[temp{idx}];'
                )

        last_video_temp = f'temp{len(video_paths)-1}'

        if video_paths:
            audio_mix_parts = []
            for idx in range(len(video_paths)):
                audio_mix_parts.append(f'[a{idx}]')
            filter_parts.append(f'{"".join(audio_mix_parts)}amix=inputs={len(video_paths)}[aout];')

        
        if image_paths:
            for idx, (img_path, img_meta) in enumerate(zip(image_paths, metadata['images'])):
                input_idx = len(video_paths) + idx
                
                
                x_pos = int(img_meta["x"] - (img_meta["width"] / 2))
                y_pos = int(img_meta["y"] - (img_meta["height"] / 2))
                
                filter_parts.extend([
                    f'[{input_idx}:v]scale={img_meta["width"]}:{img_meta["height"]}[img{idx}];',
                    f'[{last_video_temp}][img{idx}]overlay=x={x_pos}:y={y_pos}:'
                    f'enable=\'between(t,{img_meta["startTime"]},{img_meta["endTime"]})\':'
                    f'alpha={img_meta["opacity"]/100}[imgout{idx}];'
                ])
                last_video_temp = f'imgout{idx}'

        if metadata.get('texts'):
            for idx, text in enumerate(metadata['texts']):
                next_output = f'text{idx}' if idx < len(metadata['texts']) - 1 else 'vout'
                
                escaped_text = text["description"].replace("'", "\\'")
                
                x_pos = int(text["x"] - (text["width"] / 2))
                y_pos = int(text["y"] - (text["height"] / 2))
                
                text_filter = (
                    f'[{last_video_temp}]drawtext=text=\'{escaped_text}\':'
                    f'x={x_pos}:y={y_pos}:'
                    f'fontsize={text["fontSize"]}:'
                    f'fontcolor={text["color"]}'
                )
                
                if text.get('backgroundColor'):
                    text_filter += f':box=1:boxcolor={text["backgroundColor"]}:boxborderw=5'
                
                if text.get('fontWeight') == 'bold':
                    text_filter += ':font=Arial-Bold'
                
                text_filter += (
                    f':enable=\'between(t,{text["startTime"]},{text["endTime"]})\''
                    f'[{next_output}];'
                )
                
                filter_parts.append(text_filter)
                last_video_temp = next_output
        else:
            filter_parts.append(f'[{last_video_temp}]null[vout];')

        
        filter_complex = ''.join(filter_parts)

        
        cmd = [
            'ffmpeg',
            *sum([['-i', path] for path in video_paths], []),
            *sum([['-i', path] for path in image_paths], []),
            '-filter_complex', filter_complex,
            '-map', '[vout]'
        ]
        
        
        if video_paths:
            cmd.extend(['-map', '[aout]'])
        
        cmd.extend(['-y', output_path])

        print(f"Running ffmpeg command: {' '.join(cmd)}")
        result = subprocess.run(cmd, capture_output=True, text=True)
        
        if result.returncode != 0:
            print(f"FFmpeg error output: {result.stderr}")
            raise Exception(f"FFmpeg processing failed: {result.stderr}")

        return send_file(
            output_path,
            mimetype='video/mp4',
            as_attachment=True,
            download_name='final_video.mp4'
        )

    except Exception as e:
        print(f"Error in video processing: {str(e)}")
        return {'error': str(e)}, 500
    
    finally:
        if work_dir and os.path.exists(work_dir):
            try:
                print(f"Directory contents before cleanup: {os.listdir(work_dir)}")
                if not os.environ.get('FLASK_DEBUG'):
                    shutil.rmtree(work_dir)
                else:
                    print(f"Keeping directory for debugging: {work_dir}")
            except Exception as e:
                print(f"Cleanup error: {str(e)}")

                
if __name__ == '__main__':
    app.run(debug=True, port=8000)

I'm also attaching what the final thing looks like on the frontend web vs in the downloaded video and as u can see the downloaded video has all coords and positions messed up be it of the texts, images as well as videos

can somebody please help me figure this out :)

本文标签: