Add images support

2026-04-27 05:01:45 +00:00 · 2026-01-25 17:34:00 -08:00 · 2026-01-25 17:34:00 -08:00 · 1404d7863e
commit 1404d7863e
parent 85d38918f7
4 changed files with 75 additions and 16 deletions
--- a/README.md
+++ b/README.md
@ -3,13 +3,14 @@ Generates an output video with overlayed audio and effects using a list of audio

 ## How it works
 Here's how it works:
-1. Gets all video & audio sources to use from the `input_video_sources/` and `input_audio_sources/` directories. This is where you put your sources.
+1. Gets all sources to use from the `input_video_sources/`, `input_audio_sources/` and `input_image_sources/` directories. This is where you put your sources.
 2. Gets a seed for the random number generator, which can be used if a user wants to re-generate the same exact video again.
 3. Chooses _n_ random videos where _n_ is the number of videos the user wants to merge into the final results (e.g. if you want to merge 40 different videos, it chooses 40 random videos). The script will first to attempt to avoid duplicate videos. If the chosen video amount is higher than the available videos, then the script will add duplicate videos to meet that amount.
 4. Applies a bunch of random effects to the video files, which include trimming the video to be a random short length, mirroring the video, speeding it up or slowing it down, reversing the video, as well as other effects.
 5. Merges all the now effect-applied videos into one big video.
-6. Applies effects to the audio files, which include trimming the audio to be a random short length. The script will also attempt to repeat audio clips less than 5 seconds to give a repetitive-like nature to the end result.
-7. The merged video clip & merged audio clip is then merged together to produce the final result, which is put into the `output/` directory.
+6. Applies images onto this big video at random, at random positions and sizes.
+7. Also adds audio files at random spots, and can additionally apply effects to them, which include trimming the audio to be a random short length. The script will also attempt to repeat audio clips less than 5 seconds to give a repetitive-like nature to the end result.
+8. Everything is then merged together to produce the final result, which is put into the `output/` directory.

 ## Requirements
 Python `3.9` or greater is required. This project has been tested on Python `3.11`.
@ -35,7 +36,10 @@ If you'd prefer to use traditional pip/venv:
 Please take a look at the **requirements** above before following these steps. **uv** is the recommended install method.

 1. Download the GitHub repo (click the green 'code' button, then 'Download ZIP') and extract it somewhere.
-2. Put a list of desired video sources into the `input_video_sources/` directory, and a list of desired audio sources into the `input_audio_sources` directory. I recommend keeping the video file formats the same, but it shouldn't matter. The resolution of videos does not matter, either.
+2. Put a list of desired sources into their respective directories:
+    - Videos in `input_video_sources/`
+    - Sounds in `input_audio_sources/`
+    - Images in `input_image_sources/`
 3. Run the script, and choose a seed if you want to reproduce the same video later. Otherwise, type 'any' to choose a random seed.
 4. Choose the amount of videos you want the script to merge together to produce the final result.
 5. The script will do the rest, and generate the final video in the root directory, which will be formatted as `result_seed-<seed>_<number-of-videos>_<effects-enabled>.mp4`.
--- a/input_image_sources/tennis-ball-bird.jpg
+++ b/input_image_sources/tennis-ball-bird.jpg
--- a/input_image_sources/troll-face.jpg
+++ b/input_image_sources/troll-face.jpg
--- a/main.py
+++ b/main.py
@ -4,8 +4,7 @@ from sys import maxsize
 from os import listdir, mkdir, path

 # MoviePy modules
-from moviepy import VideoFileClip, AudioFileClip, CompositeAudioClip, concatenate_videoclips, Effect, vfx
-from moviepy.Clip import Clip
+from moviepy import VideoFileClip, AudioFileClip, CompositeAudioClip, CompositeVideoClip, ImageClip, concatenate_videoclips, vfx, VideoClip

 from src import custom_effects

@ -15,28 +14,36 @@ from tqdm import tqdm
 """

    TODO:
-        flash random images in short bursts at long intervals
        try to overlap videos more and distort them
        spread out audio duplication a bit, so they don't end up directly next to one another
 """

 VIDEO_SOURCE_FOLDER = "input_video_sources"
 AUDIO_SOURCE_FOLDER = "input_audio_sources"
+IMAGE_SOURCE_FOLDER = "input_image_sources"

 videoFiles = [VIDEO_SOURCE_FOLDER + "/" + vid for vid in listdir(VIDEO_SOURCE_FOLDER)]
 audioFiles = [AUDIO_SOURCE_FOLDER + "/" + vid for vid in listdir(AUDIO_SOURCE_FOLDER)]
+imageFiles = [IMAGE_SOURCE_FOLDER + "/" + vid for vid in listdir(IMAGE_SOURCE_FOLDER)]

 # All video clips will be trimmed to be between these two lengths
-# default: 0.5, 3.5 | chaos: 0.3, 1.2
-video_clip_times = (0.4, 0.8)
-# All audio clips will be trimemd to be between these two lengths
-# default: 0.7, 13. chaos: 0.7, 3.0
+# default: 0.4, 3.0 | chaos: 0.3, 1.2
+video_clip_times = (0.4, 3.0)
+# All audio clips will be trimmed to be between these two lengths
+# default: 0.7, 3.0 | chaos: 0.2, 1.0
 audio_clip_times = (0.7, 3.0)
+# All image clips will be trimmed to be between these two lengths
+# default: 0.3, 0.8 | chaos: 0.1, 0.5
+image_clip_times = (0.3, 0.8)

 # Audio amount multiplier (based off the amount of videos)
 # e.g. 60 videos with a multiplier of 0.75 means 45 audio clips will be put into the final result.
-# default: 0.75. chaos: 1.5
-AUDIO_AMOUNT_MULTIPLIER = 1.5
+# default: 0.75 | chaos: 1.5
+AUDIO_AMOUNT_MULTIPLIER = 0.75
+
+# Image amount multiplier (pretty much ditto as audio amount multiplier)
+# default: 0.6 | chaos: 2.0
+IMAGE_AMOUNT_MULTIPLIER = 2

 # (min, max) random values range, inclusive
 #integer. default: 1, 7
@ -92,6 +99,7 @@ videoEffects = [

 videoObjects = []
 audioObjects = []
+imageObjects = []

 videoAmount = input("Amount of videos: ")
 while not videoAmount.isdecimal():
@ -114,7 +122,6 @@ if videoAmount > len(videoFiles): #if there is a higher chosen amount than total
    randomVideos += additionalVideos

 print("")
-print(f"Compiling {videoAmount} videos... ", end="\r")

 with tqdm(desc="Compiling videos", total=len(randomVideos)) as pbar:
    for index, video in enumerate(randomVideos):
@ -138,7 +145,7 @@ with tqdm(desc="Compiling videos", total=len(randomVideos)) as pbar:

 print("Finished compiling videos.")

-finalVideo = concatenate_videoclips(videoObjects, method="compose") # method="compose"
+finalVideo: VideoClip = concatenate_videoclips(videoObjects, method="compose")

 audioAmount = int(videoAmount*AUDIO_AMOUNT_MULTIPLIER)

@ -151,7 +158,6 @@ if audioAmount > len(audioFiles):
    randomSounds += additionalAudio

 print("")
-print(f"Compiling {audioAmount} sounds...", end="\r")

 copiedSoundAmount = 0
 with tqdm(desc="Compiling sounds", total=len(randomSounds)) as pbar:
@ -200,6 +206,53 @@ with tqdm(desc="Compiling sounds", total=len(randomSounds)) as pbar:

 print(f"Finished compiling audio. Added {copiedSoundAmount} duplicate sounds, total {audioAmount+copiedSoundAmount}.")

+imageAmount = int(videoAmount * IMAGE_AMOUNT_MULTIPLIER)
+
+randomImages = rng.sample(imageFiles, k=min(audioAmount, len(imageFiles)))
+
+if imageAmount > len(imageFiles):
+    imageAmountToAdd = imageAmount - len(imageFiles)
+    print(f"Chosen image amount is higher than available image amount - re-using {imageAmountToAdd} image sources...")
+    additionalImages = rng.choices(imageFiles, k=imageAmountToAdd)
+    randomImages += additionalImages
+
+print("")
+
+with tqdm(desc="Compiling images", total=len(randomImages)) as pbar:
+    for index, imagePath in enumerate(randomImages):
+        # Load the image as a clip
+        clipDuration = rng.uniform(*image_clip_times)
+        newClip = ImageClip(imagePath, duration=clipDuration)
+        
+        # Resize the image randomly, relative to the video's output resolution
+        clipWidthNormal = rng.uniform(0.15, 1)
+        clipWidth = int(finalVideo.w * clipWidthNormal)
+        clipHeightNormal = rng.uniform(0.15, 1)
+        clipHeight = int(finalVideo.h * clipHeightNormal)
+        newClip = newClip.with_effects([vfx.Resize((clipWidth, clipHeight))])
+
+        # Place the image randomly (coordinates-wise) in the final video
+        clipOffsetXNormal = rng.uniform(0, 1)
+        clipOffsetX = min(
+            int(finalVideo.w * clipOffsetXNormal),
+            finalVideo.w - clipWidth
+        )
+        clipOffsetYNormal = rng.uniform(0, 1)
+        clipOffsetY = min(
+            int(finalVideo.h * clipOffsetYNormal),
+            finalVideo.h - clipHeight
+        )
+        newClip = newClip.with_position((clipOffsetX, clipOffsetY))
+
+        # Place the image at a random spot (duration-wise) in the final video
+        newClip = newClip.with_start(rng.uniform(0, finalVideo.duration - newClip.duration))
+
+        imageObjects.append(newClip)
+
+        pbar.update(1)
+
+finalVideo = CompositeVideoClip([finalVideo] + imageObjects)
+
 # The video's filename
 finalVideoFilename = f"output/result_seed-{seed}_{videoAmount}{'_effects' if shouldUseEffects else ''}.mp4"

@ -226,3 +279,5 @@ for video in videoObjects:
    video.close()
 for audio in audioObjects:
    audio.close()
+for image in imageObjects:
+    image.close()