Convert images into Video using Python

Audio Output

In this article, I will guide you on how to convert multiple images into video files using Python script.

I will be showing you this tutorial using the pydub library along with PIL (Python Imaging Library) to create a video with images.

If you want to create a video with input from multiple image files, you can accept a list of image file paths as input. Each image will be displayed for a certain duration in the resulting video. Here’s an updated version of the script that incorporates multiple image files and the duration will be 30 seconds.

from pydub import AudioSegment
from pydub.generators import Sine
from PIL import Image, ImageDraw, ImageFont
import imageio
import os

def create_video(title, duration, image_files, output_path):
    # Set the size and background color of the video
    video_width = 1280
    video_height = 720
    background_color = (0, 0, 0)  # Black

    # Create a blank image with the specified size and background color
    image = Image.new("RGB", (video_width, video_height), background_color)
    draw = ImageDraw.Draw(image)

    # Calculate the position to center the image
    image_width = video_width // 2
    image_height = video_height // 2
    image_x = (video_width - image_width) // 2
    image_y = (video_height - image_height) // 2

    # Save the image frames
    frames_path = os.path.join(output_path, "frames")
    os.makedirs(frames_path, exist_ok=True)

    frame_duration = 30  # Duration for each image (in seconds)
    num_frames = int(duration * 30)  # Total number of frames in the video
    num_images = len(image_files)  # Total number of images

    # Calculate the number of times each image should repeat
    image_repeat = num_frames // (num_images * frame_duration)
    remainder = num_frames % (num_images * frame_duration)

    frame_idx = 0
    for i, image_file in enumerate(image_files):
        # Open the image file
        img = Image.open(image_file)
        img = img.resize((image_width, image_height))

        # Paste the image onto the blank image
        image.paste(img, (image_x, image_y))

        # Save the current image frames
        for _ in range(image_repeat * frame_duration):
            frame_path = os.path.join(frames_path, f"frame_{frame_idx}.png")
            image.save(frame_path)
            frame_idx += 1

        # If there's a remainder, save additional frames for this image
        if remainder > 0:
            frame_path = os.path.join(frames_path, f"frame_{frame_idx}.png")
            image.save(frame_path)
            frame_idx += 1
            remainder -= 1

        # Close the opened image file
        img.close()

    # Convert the frames into a video using imageio
    audio = Sine(440).to_audio_segment(duration * 1000)  # Generate a 440 Hz audio
    frames = [imageio.imread(os.path.join(frames_path, f"frame_{i}.png")) for i in range(num_frames)]
    output_file = os.path.join(output_path, f"{title}.mp4")
    imageio.mimsave(output_file, frames, fps=30)

    # Add audio to the video
    video_with_audio = AudioSegment.from_file(output_file, format="mp4") + audio
    video_with_audio.export(output_file, format="mp4")

    # Delete the frames directory
    for frame_path in os.listdir(frames_path):
        os.remove(os.path.join(frames_path, frame_path))
    os.rmdir(frames_path)

# Example usage
title = "My Video"
duration = 30  # In seconds
image_files = ["path/to/image1.jpg", "path/to/image2.jpg", "path/to/image3.jpg"]
output_path = "path/to/output"

create_video(title, duration, image_files, output_path)

In this updated version, the frame_duration variable is set to 30 seconds, indicating the desired duration for each image in the video. The total number of frames (num_frames) is calculated based on the desired duration of the video and the frame rate (30 frames per second). The image_repeat variable is calculated as the number of times each image should repeat to fill the desired duration, and the remainder variable represents any remaining frames that need to be distributed among the images.

The script then saves the image frames, repeating each image the specified number of times (image_repeat) and allocating any remaining frames if applicable. Finally, the frames are converted into a video using imageio and audio is added to the resulting video.

Make sure to replace the “path/to/imageX.jpg“ placeholders with the actual paths to your desired image files.

If you are getting this error:

RuntimeWarning: Couldn’t find ffprobe or avprobe – defaulting to ffprobe, but may not work warn(“Couldn’t find ffprobe or avprobe – defaulting to ffprobe, but may not work”, RuntimeWarning)

Then follow the below steps.

The warning you’re seeing indicates that the ffprobe or avprobe executable was not found, which is used by the imageio library to gather information about the video file. Although this warning doesn’t affect the functionality of the script, you can install the necessary dependency to resolve the warning.

To fix the Runtime Warning, you can install ffmpeg on your system, which provides the ffprobe executable. Follow the steps below to install ffmpeg:

Visit the official FFmpeg website: https://ffmpeg.org/
Download the appropriate version of ffmpeg for your operating system.
Install ffmpeg following the instructions provided for your specific OS.

Once you have ffmpeg installed, you should no longer see the Runtime Warning when running the script.

If you have already installed ffmpeg and the warning persists, you might need to add the ffmpeg executable to your system’s PATH environment variable. This ensures that the script can locate the ffprobe executable. Refer to the documentation or instructions specific to your operating system for information on how to set the PATH variable.

Remember to restart your Python environment after installing or modifying the PATH variable to ensure the changes take effect.

Audio Output

If you want to add Audio Output in the same script so that later this audio will be used for Video.

To add text input as audio in English (male) for each image in the same script, you can use a text-to-speech library like pyttsx3. Here’s an updated version of the script that incorporates text-to-speech functionality.

from pydub import AudioSegment
from pydub.generators import Sine
from PIL import Image, ImageDraw, ImageFont
import imageio
import os
import pyttsx3

def create_video(title, duration, image_files, text_inputs, output_path):
    # Set the size and background color of the video
    video_width = 1280
    video_height = 720
    background_color = (0, 0, 0)  # Black

    # Create a blank image with the specified size and background color
    image = Image.new("RGB", (video_width, video_height), background_color)
    draw = ImageDraw.Draw(image)

    # Calculate the position to center the image
    image_width = video_width // 2
    image_height = video_height // 2
    image_x = (video_width - image_width) // 2
    image_y = (video_height - image_height) // 2

    # Save the image frames and audio files
    frames_path = os.path.join(output_path, "frames")
    os.makedirs(frames_path, exist_ok=True)

    frame_duration = 30  # Duration for each image (in seconds)
    num_frames = int(duration * 30)  # Total number of frames in the video
    num_images = len(image_files)  # Total number of images

    # Calculate the number of times each image should repeat
    image_repeat = num_frames // (num_images * frame_duration)
    remainder = num_frames % (num_images * frame_duration)

    frame_idx = 0
    audio_segments = []
    for i, image_file in enumerate(image_files):
        # Open the image file
        img = Image.open(image_file)
        img = img.resize((image_width, image_height))

        # Paste the image onto the blank image
        image.paste(img, (image_x, image_y))

        # Save the current image frames
        for _ in range(image_repeat * frame_duration):
            frame_path = os.path.join(frames_path, f"frame_{frame_idx}.png")
            image.save(frame_path)
            frame_idx += 1

        # If there's a remainder, save additional frames for this image
        if remainder > 0:
            frame_path = os.path.join(frames_path, f"frame_{frame_idx}.png")
            image.save(frame_path)
            frame_idx += 1
            remainder -= 1

        # Generate audio for the current text input
        text_input = text_inputs[i]
        audio_segment = generate_audio(text_input, duration=frame_duration * 1000)
        audio_segments.append(audio_segment)

        # Close the opened image file
        img.close()

    # Combine audio segments for each image
    combined_audio = AudioSegment.silent(duration=0)
    for audio_segment in audio_segments:
        combined_audio += audio_segment

    # Convert the frames into a video using imageio
    frames = [imageio.imread(os.path.join(frames_path, f"frame_{i}.png")) for i in range(num_frames)]
    output_file = os.path.join(output_path, f"{title}.mp4")
    imageio.mimsave(output_file, frames, fps=30)

    # Add audio to the video
    video_with_audio = AudioSegment.from_file(output_file, format="mp4") + combined_audio
    video_with_audio.export(output_file, format="mp4")

    # Delete the frames directory
    for frame_path in os.listdir(frames_path):
        os.remove(os.path.join(frames_path, frame_path))
    os.rmdir(frames_path)

def generate_audio(text, duration):
    engine = pyttsx3.init()
    engine.setProperty("rate", 150)  # Adjust the speech rate
    audio_segment = AudioSegment.silent(duration=0)
    audio_file = engine.save_to_file(text, "temp.wav")
    engine.runAndWait()
    audio_segment = AudioSegment.from_file("temp.wav", format="wav")
    os.remove("temp.wav")
    return audio_segment

# Example usage
title = "My Video"
duration = 30  # In seconds
image_files = ["path/to/image1.jpg", "path/to/image2.jpg", "path/to/image3.jpg"]
text_inputs = ["Text input for image 1", "Text input for image 2", "Text input for image 3"]
output_path = "path/to/output"

create_video(title, duration, image_files, text_inputs, output_path)

In this updated version, the audio segments for each text input are stored in the audio_segments list while iterating over the images. After generating the audio segment using generate_audio(), it is appended to the list.

Later, the audio segments are combined into a single audio segment (combined_audio). Finally, the combined_audio is merged with the video’s existing audio to produce the final video output.

Ensure that you have the necessary dependencies installed (pydub, Pillow, imageio, pyttsx3) and replace the placeholder paths (“path/to/imageX.jpg”) with the actual paths to your image files.

Convert images into Video using Python

Table of Contents

Audio Output

Leave a Reply Cancel Reply