734 lines
28 KiB
Python
734 lines
28 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
render_tts_ascii.py — mode #6 demo: TTS narration ASCII video ("warm machine oracle").
|
|
|
|
Pipeline:
|
|
1. Synthesize narration locally with espeak-ng (or espeak fallback).
|
|
2. Decode the WAV and compute per-frame RMS + transient features.
|
|
3. Render an ASCII-glyph grid over a non-flat textured background, with
|
|
section transitions (amber data fog -> cyan/purple signal rings ->
|
|
punctuation star-map iris -> title card) reactive to the audio.
|
|
4. Overlay a typewriter quote and CRT post-process (scanlines, grain, bloom).
|
|
5. Mux PNG frames + padded audio into MP4 via ffmpeg.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import math
|
|
import os
|
|
import shutil
|
|
import struct
|
|
import subprocess
|
|
import sys
|
|
import wave
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
import numpy as np
|
|
from PIL import Image, ImageDraw, ImageFilter, ImageFont
|
|
|
|
# ------------------------------------------------------------------- config --
|
|
ROOT = Path(__file__).resolve().parent
|
|
OUT_DIR = ROOT / "output"
|
|
TMP_DIR = ROOT / "_tmp"
|
|
FRAMES = TMP_DIR / "frames"
|
|
LOGS = TMP_DIR / "logs"
|
|
WAV_RAW = TMP_DIR / "narration_raw.wav"
|
|
WAV_PAD = TMP_DIR / "narration_pad.wav"
|
|
OUT_MP4 = OUT_DIR / "tts_ascii_example.mp4"
|
|
|
|
WIDTH, HEIGHT = 960, 540
|
|
FPS = 24
|
|
DUR_MIN = 10.0 # seconds
|
|
DUR_MAX = 12.0
|
|
|
|
NARRATION = ("Listen closely. The terminal hums softly. "
|
|
"Each tiny glyph, a small star. "
|
|
"Small text can hold a whole universe. "
|
|
"Welcome to the warm machine.")
|
|
|
|
QUOTE = "small text can hold a whole universe"
|
|
QUOTE_LINE_BREAK = 21 # split index for two-line layout
|
|
|
|
TITLE_LINES = [
|
|
"WARM MACHINE ORACLE",
|
|
"ascii / tts / signal",
|
|
]
|
|
|
|
# Color palette (linear-ish RGB, 0..1)
|
|
C_AMBER = np.array([1.00, 0.69, 0.25], dtype=np.float32)
|
|
C_TEAL = np.array([0.16, 0.83, 0.83], dtype=np.float32)
|
|
C_PURPLE = np.array([0.71, 0.27, 0.90], dtype=np.float32)
|
|
C_HOTAMB = np.array([1.00, 0.92, 0.78], dtype=np.float32)
|
|
C_DEEPBG = np.array([0.020, 0.025, 0.060], dtype=np.float32)
|
|
|
|
# Glyph palettes (intensity-ordered, low -> high). ASCII-safe fallbacks below.
|
|
PALETTE_FOG_PREF = " .'`,:;-~+*=oO#"
|
|
PALETTE_CIRCUIT_PREF = " .,:-=+*xX#%@▒▓█"
|
|
PALETTE_STAR_PREF = " .,'`*+oO●★"
|
|
PALETTE_FOG_ASCII = " .'`,:;-~+*=oO#"
|
|
PALETTE_CIRCUIT_ASCII= " .,:-=+*xX#%@&"
|
|
PALETTE_STAR_ASCII = " .,'`*+oO#@"
|
|
|
|
|
|
# -------------------------------------------------------------- subprocess --
|
|
def run(cmd, log_name: str, check: bool = True) -> int:
|
|
"""Run a command, redirecting stdout+stderr to LOGS/<log_name>.log."""
|
|
LOGS.mkdir(parents=True, exist_ok=True)
|
|
log_path = LOGS / log_name
|
|
with open(log_path, "ab") as f:
|
|
f.write(("\n$ " + " ".join(map(str, cmd)) + "\n").encode())
|
|
f.flush()
|
|
r = subprocess.run(cmd, stdout=f, stderr=subprocess.STDOUT)
|
|
if check and r.returncode != 0:
|
|
sys.stderr.write(f"[error] {cmd[0]} failed (rc={r.returncode}); see {log_path}\n")
|
|
raise SystemExit(r.returncode)
|
|
return r.returncode
|
|
|
|
|
|
# ---------------------------------------------------------------- tts step --
|
|
def generate_tts(text: str, out_path: Path) -> Path:
|
|
"""Probe espeak-ng then espeak. Synthesize to WAV at out_path."""
|
|
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
for exe in ("espeak-ng", "espeak"):
|
|
if shutil.which(exe):
|
|
# Slower, lower pitch — feels like a warm oracle.
|
|
cmd = [exe, "-s", "148", "-p", "38", "-a", "180",
|
|
"-v", "en+m3", "-w", str(out_path), text]
|
|
run(cmd, "tts.log")
|
|
if out_path.exists() and out_path.stat().st_size > 1024:
|
|
return out_path
|
|
raise RuntimeError("Neither espeak-ng nor espeak is available.")
|
|
|
|
|
|
# ------------------------------------------------------------- audio utils --
|
|
def load_wav_mono(path: Path) -> tuple[np.ndarray, int]:
|
|
with wave.open(str(path), "rb") as w:
|
|
sr = w.getframerate()
|
|
ch = w.getnchannels()
|
|
sw = w.getsampwidth()
|
|
n = w.getnframes()
|
|
raw = w.readframes(n)
|
|
if sw == 2:
|
|
arr = np.frombuffer(raw, dtype="<i2").astype(np.float32) / 32768.0
|
|
elif sw == 1:
|
|
arr = (np.frombuffer(raw, dtype="u1").astype(np.float32) - 128.0) / 128.0
|
|
elif sw == 4:
|
|
arr = np.frombuffer(raw, dtype="<i4").astype(np.float32) / 2147483648.0
|
|
else:
|
|
raise RuntimeError(f"Unsupported sample width: {sw}")
|
|
if ch > 1:
|
|
arr = arr.reshape(-1, ch).mean(axis=1)
|
|
return arr.astype(np.float32), sr
|
|
|
|
|
|
def write_wav_mono(path: Path, audio: np.ndarray, sr: int) -> None:
|
|
audio = np.clip(audio, -1.0, 1.0)
|
|
pcm = (audio * 32767.0).astype("<i2").tobytes()
|
|
with wave.open(str(path), "wb") as w:
|
|
w.setnchannels(1)
|
|
w.setsampwidth(2)
|
|
w.setframerate(sr)
|
|
w.writeframes(pcm)
|
|
|
|
|
|
def pad_or_trim_audio(in_path: Path, out_path: Path, target_dur: float) -> float:
|
|
audio, sr = load_wav_mono(in_path)
|
|
target_n = int(round(target_dur * sr))
|
|
if len(audio) < target_n:
|
|
# Pad with subtle hum-floor (very quiet white noise) so things don't feel dead.
|
|
pad = (np.random.RandomState(7).randn(target_n - len(audio))
|
|
.astype(np.float32) * 0.0008)
|
|
audio = np.concatenate([audio, pad])
|
|
else:
|
|
audio = audio[:target_n]
|
|
write_wav_mono(out_path, audio, sr)
|
|
return len(audio) / sr
|
|
|
|
|
|
def compute_features(audio: np.ndarray, sr: int, fps: int,
|
|
n_frames: int) -> tuple[np.ndarray, np.ndarray]:
|
|
"""Per-video-frame RMS and transient (positive-rectified energy diff)."""
|
|
spf = sr / fps
|
|
rms = np.zeros(n_frames, dtype=np.float32)
|
|
for i in range(n_frames):
|
|
s = int(i * spf)
|
|
e = min(len(audio), int((i + 1) * spf))
|
|
if e > s:
|
|
seg = audio[s:e]
|
|
rms[i] = float(np.sqrt(np.mean(seg * seg) + 1e-12))
|
|
# Light smoothing
|
|
if len(rms) > 5:
|
|
k = np.array([0.15, 0.7, 0.15], dtype=np.float32)
|
|
rms = np.convolve(rms, k, mode="same")
|
|
# Normalize
|
|
rmax = float(rms.max()) if rms.max() > 0 else 1.0
|
|
rms_n = rms / rmax
|
|
# Transients: positive change
|
|
diff = np.diff(rms_n, prepend=rms_n[0])
|
|
trans = np.clip(diff, 0.0, None)
|
|
tmax = float(trans.max()) if trans.max() > 0 else 1.0
|
|
trans_n = trans / tmax
|
|
# Smooth transients with a short decay so beats persist a few frames
|
|
decayed = np.zeros_like(trans_n)
|
|
acc = 0.0
|
|
for i, v in enumerate(trans_n):
|
|
acc = max(v, acc * 0.78)
|
|
decayed[i] = acc
|
|
return rms_n, decayed
|
|
|
|
|
|
# ---------------------------------------------------------- font + sprites --
|
|
FONT_CANDIDATES = [
|
|
"/usr/share/fonts/truetype/dejavu/DejaVuSansMono-Bold.ttf",
|
|
"/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf",
|
|
"/usr/share/fonts/truetype/liberation/LiberationMono-Bold.ttf",
|
|
"/usr/share/fonts/truetype/liberation/LiberationMono-Regular.ttf",
|
|
"/usr/share/fonts/truetype/freefont/FreeMonoBold.ttf",
|
|
]
|
|
QUOTE_FONT_CANDIDATES = [
|
|
"/usr/share/fonts/truetype/dejavu/DejaVuSansMono-Bold.ttf",
|
|
"/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf",
|
|
"/usr/share/fonts/truetype/liberation/LiberationMono-Bold.ttf",
|
|
]
|
|
TITLE_FONT_CANDIDATES = [
|
|
"/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
|
|
"/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
|
|
"/usr/share/fonts/truetype/freefont/FreeSansBold.ttf",
|
|
]
|
|
|
|
|
|
def find_font(candidates) -> str:
|
|
for p in candidates:
|
|
if Path(p).exists():
|
|
return p
|
|
raise RuntimeError("No suitable font found from candidates.")
|
|
|
|
|
|
def font_supports(font: ImageFont.FreeTypeFont, ch: str) -> bool:
|
|
"""True if the font produces any inked pixels for `ch`."""
|
|
try:
|
|
mask = font.getmask(ch, mode="L")
|
|
except Exception:
|
|
return False
|
|
if mask is None:
|
|
return False
|
|
arr = np.asarray(mask, dtype=np.uint8)
|
|
return arr.size > 0 and int(arr.max()) > 0
|
|
|
|
|
|
def filter_palette(palette_pref: str, palette_ascii: str,
|
|
font: ImageFont.FreeTypeFont) -> str:
|
|
# Always keep the leading space.
|
|
chars = [" "]
|
|
for ch in palette_pref[1:]:
|
|
if ch == " ":
|
|
continue
|
|
if font_supports(font, ch):
|
|
chars.append(ch)
|
|
# If only the space made it, fall back to ASCII-safe.
|
|
if len(chars) < 4:
|
|
chars = [" "] + [c for c in palette_ascii[1:] if c == " " or font_supports(font, c)]
|
|
# Strip duplicates while preserving order
|
|
seen = set()
|
|
clean = []
|
|
for c in chars:
|
|
if c not in seen:
|
|
seen.add(c)
|
|
clean.append(c)
|
|
chars = clean
|
|
if len(chars) < 4:
|
|
# absolute last-ditch fallback
|
|
chars = list(" .:-=+*#@")
|
|
return "".join(chars)
|
|
|
|
|
|
def build_sprite_array(font: ImageFont.FreeTypeFont, charset: str,
|
|
cell_w: int, cell_h: int) -> tuple[np.ndarray, dict]:
|
|
"""For each char, render alpha into a (cell_h, cell_w) uint8 numpy array."""
|
|
char_to_idx = {}
|
|
sprites = np.zeros((len(charset), cell_h, cell_w), dtype=np.uint8)
|
|
for i, ch in enumerate(charset):
|
|
char_to_idx[ch] = i
|
|
if ch == " ":
|
|
continue
|
|
img = Image.new("L", (cell_w, cell_h), 0)
|
|
d = ImageDraw.Draw(img)
|
|
# Center each glyph in its cell.
|
|
d.text((cell_w / 2, cell_h / 2), ch, fill=255, font=font, anchor="mm")
|
|
sprites[i] = np.asarray(img, dtype=np.uint8)
|
|
return sprites, char_to_idx
|
|
|
|
|
|
# -------------------------------------------------------- math + envelopes --
|
|
def trap(t: float, in0: float, in1: float, out0: float, out1: float) -> float:
|
|
"""Trapezoidal envelope: ramps 0->1 over [in0,in1], holds, ramps 1->0 over [out0,out1]."""
|
|
if t <= in0:
|
|
return 0.0
|
|
if t < in1:
|
|
return (t - in0) / max(in1 - in0, 1e-6)
|
|
if t < out0:
|
|
return 1.0
|
|
if t < out1:
|
|
return 1.0 - (t - out0) / max(out1 - out0, 1e-6)
|
|
return 0.0
|
|
|
|
|
|
def smoothstep(x: float) -> float:
|
|
x = max(0.0, min(1.0, x))
|
|
return x * x * (3.0 - 2.0 * x)
|
|
|
|
|
|
# ---------------------------------------------------------------- fields ---
|
|
def fog_field(rows: int, cols: int, t: float, rng_state: np.random.RandomState) -> np.ndarray:
|
|
"""Slow rolling amber data-fog noise (sum of low-freq sinusoids)."""
|
|
x = np.arange(cols, dtype=np.float32)[None, :]
|
|
y = np.arange(rows, dtype=np.float32)[:, None]
|
|
v = np.zeros((rows, cols), dtype=np.float32)
|
|
v += np.sin(x * 0.11 + t * 0.55)
|
|
v += np.cos(y * 0.17 + t * 0.41)
|
|
v += np.sin((x + y) * 0.08 + t * 0.33)
|
|
v += np.cos((x * 0.31 - y * 0.24) + t * 0.27)
|
|
v += 0.35 * np.sin(x * 0.42 + y * 0.39 - t * 0.9)
|
|
v -= v.min()
|
|
if v.max() > 0:
|
|
v /= v.max()
|
|
# Tilt brighter toward center vertically for a soft horizon.
|
|
cy = rows / 2.0
|
|
vert = 1.0 - np.abs((np.arange(rows) - cy) / cy) # 0..1
|
|
v = v * (0.55 + 0.45 * vert[:, None])
|
|
return v.astype(np.float32)
|
|
|
|
|
|
def rings_field(rows: int, cols: int, t: float, energy: float,
|
|
pulse: float) -> tuple[np.ndarray, np.ndarray]:
|
|
"""Concentric signal rings. Returns (intensity, angle_norm)."""
|
|
cx = (cols - 1) / 2.0
|
|
cy = (rows - 1) / 2.0
|
|
x = np.arange(cols, dtype=np.float32)[None, :] - cx
|
|
y = (np.arange(rows, dtype=np.float32)[:, None] - cy) * 1.9 # aspect correction
|
|
r = np.sqrt(x * x + y * y)
|
|
# Multi-band sin rings, advanced by time and pulse.
|
|
phase = r * 0.55 - t * 4.5 - pulse * 2.2
|
|
band = 0.5 + 0.5 * np.sin(phase)
|
|
band2 = 0.5 + 0.5 * np.sin(r * 0.22 - t * 1.2)
|
|
field = band * (0.55 + 0.45 * band2)
|
|
# Falloff from center so the rings glow outward then fade.
|
|
rn = r / (np.hypot(cx, cy * 1.9) + 1e-6)
|
|
falloff = np.exp(-((rn - 0.55 - 0.18 * energy) ** 2) / 0.18)
|
|
field = field * (0.4 + 0.9 * falloff)
|
|
field = np.clip(field, 0, 1)
|
|
ang = (np.arctan2(y, x) + math.pi) / (2 * math.pi) # 0..1
|
|
return field.astype(np.float32), ang.astype(np.float32)
|
|
|
|
|
|
def star_field(rows: int, cols: int, t: float,
|
|
seed: int = 13) -> np.ndarray:
|
|
"""Sparse star-map: thresholded noise that twinkles."""
|
|
rng = np.random.RandomState(seed)
|
|
base = rng.rand(rows, cols).astype(np.float32)
|
|
twinkle = 0.5 + 0.5 * np.sin(
|
|
np.arange(rows * cols).reshape(rows, cols) * 0.7 + t * 4.0
|
|
)
|
|
# Stars: top 12% of values, modulated by twinkle.
|
|
mask = (base > 0.88).astype(np.float32)
|
|
near = (base > 0.78).astype(np.float32) * 0.35
|
|
out = mask * (0.7 + 0.3 * twinkle) + near * twinkle
|
|
return np.clip(out, 0, 1).astype(np.float32)
|
|
|
|
|
|
# ----------------------------------------------------------- frame render --
|
|
def render_frame(f_idx: int,
|
|
total_frames: int,
|
|
total_dur: float,
|
|
rms: np.ndarray,
|
|
trans: np.ndarray,
|
|
sprites: np.ndarray,
|
|
char_to_idx: dict,
|
|
palettes: dict,
|
|
cell_w: int,
|
|
cell_h: int,
|
|
cols: int,
|
|
rows: int,
|
|
quote_font: ImageFont.FreeTypeFont,
|
|
title_font: ImageFont.FreeTypeFont,
|
|
cursor_glyph: str,
|
|
rng: np.random.RandomState) -> Image.Image:
|
|
t = f_idx / FPS
|
|
D = total_dur
|
|
e = float(rms[f_idx])
|
|
p = float(trans[f_idx])
|
|
|
|
# Section envelopes (relative to total duration).
|
|
w_fog = trap(t, 0.0, 0.06 * D, 0.32 * D, 0.45 * D)
|
|
w_rings = trap(t, 0.28 * D, 0.40 * D, 0.78 * D, 0.90 * D)
|
|
w_iris = trap(t, 0.58 * D, 0.62 * D, 0.71 * D, 0.76 * D)
|
|
w_title = trap(t, 0.82 * D, 0.92 * D, D + 1.0, D + 2.0)
|
|
s = w_fog + w_rings + w_iris + 1e-6
|
|
|
|
# --- Fields -------------------------------------------------------------
|
|
fog_v = fog_field(rows, cols, t, rng)
|
|
rings_v, ang = rings_field(rows, cols, t, energy=e, pulse=p)
|
|
stars_v = star_field(rows, cols, t)
|
|
|
|
# Per-cell intensity (weighted combination + audio modulation).
|
|
intensity = (w_fog * fog_v
|
|
+ w_rings * rings_v
|
|
+ w_iris * stars_v)
|
|
intensity *= (0.55 + 0.55 * e)
|
|
# Transient pulse: brighten a radial wave outward.
|
|
if p > 0.02:
|
|
cy_, cx_ = (rows - 1) / 2.0, (cols - 1) / 2.0
|
|
xx = np.arange(cols)[None, :] - cx_
|
|
yy = (np.arange(rows)[:, None] - cy_) * 1.9
|
|
rr = np.sqrt(xx * xx + yy * yy)
|
|
rr_n = rr / (np.hypot(cx_, cy_ * 1.9) + 1e-6)
|
|
wave = np.exp(-((rr_n - (0.15 + 0.85 * p)) ** 2) / 0.04) * p * 0.75
|
|
intensity += wave.astype(np.float32)
|
|
|
|
intensity = np.clip(intensity, 0.0, 1.4)
|
|
# Adaptive tonemap: percentile-based.
|
|
hi = float(np.percentile(intensity, 96))
|
|
if hi > 0.05:
|
|
intensity = np.clip(intensity / hi, 0.0, 1.0)
|
|
else:
|
|
intensity = np.zeros_like(intensity)
|
|
|
|
# --- Char selection (pick palette by dominant section) ------------------
|
|
if w_iris > 0.40 and w_iris >= max(w_fog, w_rings) * 0.85:
|
|
pal = palettes["star"]
|
|
elif w_rings > w_fog:
|
|
pal = palettes["circuit"]
|
|
else:
|
|
pal = palettes["fog"]
|
|
n_pal = len(pal)
|
|
idx_in_pal = np.clip((intensity * (n_pal - 1)).astype(np.int32), 0, n_pal - 1)
|
|
pal_lookup = np.array([char_to_idx[c] for c in pal], dtype=np.int32)
|
|
sprite_idx = pal_lookup[idx_in_pal] # (rows, cols)
|
|
|
|
# --- Per-cell color -----------------------------------------------------
|
|
col_field = np.zeros((rows, cols, 3), dtype=np.float32)
|
|
col_field += w_fog * C_AMBER[None, None, :]
|
|
ring_color = (1.0 - ang)[..., None] * C_TEAL[None, None, :] + ang[..., None] * C_PURPLE[None, None, :]
|
|
col_field += w_rings * ring_color
|
|
col_field += w_iris * C_HOTAMB[None, None, :]
|
|
col_field /= s
|
|
# Subtle hue drift modulated by transient.
|
|
col_field += (p * 0.18) * (C_HOTAMB[None, None, :] - col_field)
|
|
col_field = np.clip(col_field, 0.0, 1.0)
|
|
# Multiply by intensity for brightness.
|
|
col_field *= intensity[..., None]
|
|
|
|
# --- Compose ASCII grid into a pixel image -------------------------------
|
|
# alpha tiles: (rows, cols, cell_h, cell_w)
|
|
tiles = sprites[sprite_idx] # (rows, cols, ch, cw)
|
|
alpha_big = tiles.transpose(0, 2, 1, 3).reshape(rows * cell_h, cols * cell_w)
|
|
alpha_f = alpha_big.astype(np.float32) / 255.0
|
|
# color_big: repeat each cell's color across its cell pixels
|
|
color_big = np.repeat(np.repeat(col_field, cell_h, axis=0), cell_w, axis=1)
|
|
|
|
# Canvas may be slightly smaller than WIDTH/HEIGHT — pad to full size.
|
|
grid_h, grid_w = alpha_f.shape
|
|
base = np.tile(C_DEEPBG, (HEIGHT, WIDTH, 1)).astype(np.float32)
|
|
# Faint background haze (low intensity field) for the unused gutter pixels too.
|
|
base += 0.03 * np.tile(C_PURPLE, (HEIGHT, WIDTH, 1)) * (0.4 + 0.6 * w_rings)
|
|
base += 0.02 * np.tile(C_AMBER, (HEIGHT, WIDTH, 1)) * (0.4 + 0.6 * w_fog)
|
|
|
|
# Offset grid to roughly center (in case grid_w/grid_h < canvas).
|
|
ox = (WIDTH - grid_w) // 2
|
|
oy = (HEIGHT - grid_h) // 2
|
|
base[oy:oy + grid_h, ox:ox + grid_w, :] += alpha_f[..., None] * color_big
|
|
|
|
# Iris brief darkening: pull background toward black, leave stars bright.
|
|
if w_iris > 0.05:
|
|
darken = 1.0 - 0.85 * w_iris
|
|
base *= darken
|
|
|
|
img_f = np.clip(base, 0.0, 1.0)
|
|
|
|
# --- Postprocess: scanlines, grain, bloom -------------------------------
|
|
img_f = postprocess(img_f, rng)
|
|
|
|
img_u8 = (img_f * 255.0 + 0.5).astype(np.uint8)
|
|
out = Image.fromarray(img_u8, mode="RGB")
|
|
|
|
# --- Overlays: quote + title -------------------------------------------
|
|
draw_quote_overlay(out, t, D, e, p, quote_font, cursor_glyph)
|
|
if w_title > 0.01:
|
|
draw_title_overlay(out, w_title, title_font)
|
|
|
|
return out
|
|
|
|
|
|
# --------------------------------------------------------------- postproc --
|
|
def postprocess(img: np.ndarray, rng: np.random.RandomState) -> np.ndarray:
|
|
H, W = img.shape[:2]
|
|
# Scanlines: dim alternate rows.
|
|
sl = np.ones(H, dtype=np.float32)
|
|
sl[::2] = 0.86
|
|
img = img * sl[:, None, None]
|
|
|
|
# Grain: subtle additive monochrome noise.
|
|
grain = (rng.rand(H, W).astype(np.float32) - 0.5) * 0.045
|
|
img = img + grain[..., None]
|
|
|
|
# Cheap bloom: blur the bright channel, add back.
|
|
bright = np.maximum(0.0, img - 0.55)
|
|
bright_u8 = np.clip(bright * 255.0, 0, 255).astype(np.uint8)
|
|
blurred = np.asarray(
|
|
Image.fromarray(bright_u8, "RGB").filter(ImageFilter.GaussianBlur(radius=5)),
|
|
dtype=np.float32,
|
|
) / 255.0
|
|
img = img + blurred * 0.65
|
|
|
|
# Subtle vignette.
|
|
yy = np.linspace(-1.0, 1.0, H, dtype=np.float32)[:, None]
|
|
xx = np.linspace(-1.0, 1.0, W, dtype=np.float32)[None, :]
|
|
r2 = xx * xx + yy * yy
|
|
vign = 1.0 - 0.35 * np.clip(r2, 0.0, 1.0)
|
|
img = img * vign[..., None]
|
|
|
|
return np.clip(img, 0.0, 1.0)
|
|
|
|
|
|
# ----------------------------------------------------------------- text ----
|
|
def draw_quote_overlay(img: Image.Image, t: float, D: float,
|
|
rms_v: float, pulse_v: float,
|
|
font: ImageFont.FreeTypeFont, cursor_glyph: str) -> None:
|
|
start = 0.32 * D
|
|
end = 0.75 * D
|
|
if t < start:
|
|
return
|
|
progress = min(1.0, (t - start) / max(end - start, 1e-6))
|
|
progress = smoothstep(progress)
|
|
n_target = int(round(progress * len(QUOTE)))
|
|
visible = QUOTE[:n_target]
|
|
|
|
if len(visible) <= QUOTE_LINE_BREAK:
|
|
line1, line2 = visible, ""
|
|
else:
|
|
line1 = QUOTE[:QUOTE_LINE_BREAK]
|
|
line2 = visible[QUOTE_LINE_BREAK:]
|
|
|
|
blink_on = (int(t * 2.6) % 2 == 0)
|
|
cursor = cursor_glyph if blink_on else " "
|
|
if n_target < len(QUOTE):
|
|
if line2 or len(visible) >= QUOTE_LINE_BREAK:
|
|
line2 = (line2 + cursor)[:32]
|
|
else:
|
|
line1 = (line1 + cursor)[:32]
|
|
|
|
draw = ImageDraw.Draw(img, "RGBA")
|
|
|
|
# Backdrop card.
|
|
box_w, box_h = 700, 120
|
|
cx = WIDTH // 2
|
|
y_top = int(HEIGHT * 0.66)
|
|
backdrop = Image.new("RGBA", (box_w, box_h), (6, 8, 22, 195))
|
|
img.paste(backdrop, (cx - box_w // 2, y_top), backdrop)
|
|
|
|
# Borders glow (top + bottom).
|
|
glow = int(160 + 70 * rms_v)
|
|
draw.line([(cx - box_w // 2 + 12, y_top + 2),
|
|
(cx + box_w // 2 - 12, y_top + 2)],
|
|
fill=(255, 180, 80, glow), width=2)
|
|
draw.line([(cx - box_w // 2 + 12, y_top + box_h - 3),
|
|
(cx + box_w // 2 - 12, y_top + box_h - 3)],
|
|
fill=(70, 220, 220, glow), width=2)
|
|
|
|
# Glow text (drawn twice: blurred behind, sharp on top).
|
|
glow_layer = Image.new("RGBA", img.size, (0, 0, 0, 0))
|
|
gd = ImageDraw.Draw(glow_layer)
|
|
base_color = (255, 200, 110, 230)
|
|
bright_color = (255, 235, 200, 255)
|
|
line1_y = y_top + 30
|
|
line2_y = y_top + 75
|
|
gd.text((cx, line1_y), line1, font=font, anchor="mm", fill=base_color)
|
|
gd.text((cx, line2_y), line2, font=font, anchor="mm", fill=base_color)
|
|
blurred = glow_layer.filter(ImageFilter.GaussianBlur(radius=4))
|
|
img.paste(blurred, (0, 0), blurred)
|
|
|
|
draw.text((cx, line1_y), line1, font=font, anchor="mm", fill=bright_color)
|
|
color2 = (255, 220, 160, 255) if pulse_v > 0.15 else (255, 200, 120, 255)
|
|
draw.text((cx, line2_y), line2, font=font, anchor="mm", fill=color2)
|
|
|
|
|
|
def draw_title_overlay(img: Image.Image, weight: float,
|
|
font_pair: tuple[ImageFont.FreeTypeFont, ImageFont.FreeTypeFont]) -> None:
|
|
big, small = font_pair
|
|
draw = ImageDraw.Draw(img, "RGBA")
|
|
a = int(255 * smoothstep(weight))
|
|
a2 = int(220 * smoothstep(weight))
|
|
cx = WIDTH // 2
|
|
y = int(HEIGHT * 0.18)
|
|
# Glow
|
|
glow_layer = Image.new("RGBA", img.size, (0, 0, 0, 0))
|
|
gd = ImageDraw.Draw(glow_layer)
|
|
gd.text((cx, y), TITLE_LINES[0], font=big, anchor="mm", fill=(255, 190, 100, a))
|
|
gd.text((cx, y + 48), TITLE_LINES[1], font=small, anchor="mm", fill=(80, 220, 220, a2))
|
|
blurred = glow_layer.filter(ImageFilter.GaussianBlur(radius=6))
|
|
img.paste(blurred, (0, 0), blurred)
|
|
draw.text((cx, y), TITLE_LINES[0], font=big, anchor="mm", fill=(255, 230, 170, a))
|
|
draw.text((cx, y + 48), TITLE_LINES[1], font=small, anchor="mm", fill=(160, 240, 240, a2))
|
|
|
|
|
|
# ----------------------------------------------------------------- mux -----
|
|
def mux_video(frames_glob_dir: Path, audio_path: Path,
|
|
out_path: Path, total_dur: float) -> None:
|
|
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
cmd = [
|
|
"ffmpeg", "-y",
|
|
"-framerate", str(FPS),
|
|
"-i", str(frames_glob_dir / "f_%05d.png"),
|
|
"-i", str(audio_path),
|
|
"-map", "0:v:0", "-map", "1:a:0",
|
|
"-c:v", "libx264", "-pix_fmt", "yuv420p",
|
|
"-preset", "medium", "-crf", "20",
|
|
"-c:a", "aac", "-b:a", "160k",
|
|
"-t", f"{total_dur:.3f}",
|
|
"-movflags", "+faststart",
|
|
str(out_path),
|
|
]
|
|
run(cmd, "ffmpeg_mux.log")
|
|
|
|
|
|
def ffprobe_info(path: Path) -> dict:
|
|
cmd = ["ffprobe", "-v", "error",
|
|
"-show_entries", "stream=codec_type,codec_name,duration:format=duration",
|
|
"-of", "default=noprint_wrappers=1", str(path)]
|
|
log_path = LOGS / "ffprobe.log"
|
|
with open(log_path, "ab") as f:
|
|
f.write(("\n$ " + " ".join(cmd) + "\n").encode())
|
|
f.flush()
|
|
r = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=f)
|
|
out = r.stdout.decode("utf-8", "replace")
|
|
with open(log_path, "ab") as f:
|
|
f.write(out.encode())
|
|
info = {"streams": [], "format_duration": None}
|
|
current = None
|
|
for line in out.splitlines():
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
if "=" not in line:
|
|
continue
|
|
key, _, val = line.partition("=")
|
|
if key == "codec_type":
|
|
current = {"codec_type": val}
|
|
info["streams"].append(current)
|
|
elif key == "codec_name" and current is not None:
|
|
current["codec_name"] = val
|
|
elif key == "duration":
|
|
if current is not None and "codec_type" in current and "stream_duration" not in current:
|
|
current["stream_duration"] = val
|
|
else:
|
|
info["format_duration"] = val
|
|
return info
|
|
|
|
|
|
# ----------------------------------------------------------------- main ----
|
|
def main() -> int:
|
|
for d in (OUT_DIR, TMP_DIR, FRAMES, LOGS):
|
|
d.mkdir(parents=True, exist_ok=True)
|
|
# Clean prior frames.
|
|
for p in FRAMES.glob("*.png"):
|
|
p.unlink()
|
|
|
|
print("[1/6] Generating TTS narration with espeak-ng...")
|
|
generate_tts(NARRATION, WAV_RAW)
|
|
audio, sr = load_wav_mono(WAV_RAW)
|
|
audio_dur = len(audio) / sr
|
|
print(f" raw audio: {audio_dur:.2f}s @ {sr}Hz")
|
|
|
|
total_dur = max(DUR_MIN, min(DUR_MAX, audio_dur + 0.8))
|
|
total_frames = int(round(total_dur * FPS))
|
|
print(f" target video: {total_dur:.2f}s, {total_frames} frames @ {FPS}fps")
|
|
|
|
print("[2/6] Padding/trimming audio to match video duration...")
|
|
final_audio_dur = pad_or_trim_audio(WAV_RAW, WAV_PAD, total_dur)
|
|
print(f" padded audio: {final_audio_dur:.2f}s")
|
|
|
|
audio_pad, _ = load_wav_mono(WAV_PAD)
|
|
rms_n, trans_n = compute_features(audio_pad, sr, FPS, total_frames)
|
|
|
|
print("[3/6] Loading fonts and building glyph sprites...")
|
|
grid_font_path = find_font(FONT_CANDIDATES)
|
|
quote_font_path = find_font(QUOTE_FONT_CANDIDATES)
|
|
title_font_path = find_font(TITLE_FONT_CANDIDATES)
|
|
|
|
grid_font = ImageFont.truetype(grid_font_path, size=18)
|
|
quote_font = ImageFont.truetype(quote_font_path, size=26)
|
|
title_big = ImageFont.truetype(title_font_path, size=40)
|
|
title_sm = ImageFont.truetype(title_font_path, size=22)
|
|
|
|
# Figure out cell size from font metrics.
|
|
asc, desc = grid_font.getmetrics()
|
|
cell_h = max(asc + desc, 18)
|
|
# Use 'M' advance, clamp wider so dense glyphs don't clip.
|
|
try:
|
|
adv = int(round(grid_font.getlength("M")))
|
|
except Exception:
|
|
adv = grid_font.getbbox("M")[2]
|
|
cell_w = max(adv, 10)
|
|
cols = WIDTH // cell_w
|
|
rows = HEIGHT // cell_h
|
|
print(f" grid: {cols}x{rows} cells, cell={cell_w}x{cell_h}")
|
|
|
|
# Filter palettes against the chosen font.
|
|
pal_fog = filter_palette(PALETTE_FOG_PREF, PALETTE_FOG_ASCII, grid_font)
|
|
pal_circuit = filter_palette(PALETTE_CIRCUIT_PREF, PALETTE_CIRCUIT_ASCII, grid_font)
|
|
pal_star = filter_palette(PALETTE_STAR_PREF, PALETTE_STAR_ASCII, grid_font)
|
|
# Union charset for sprite atlas.
|
|
charset = "".join(sorted(set(pal_fog + pal_circuit + pal_star)))
|
|
if " " not in charset:
|
|
charset = " " + charset
|
|
print(f" palettes — fog:{len(pal_fog)} circuit:{len(pal_circuit)} star:{len(pal_star)}; atlas={len(charset)}")
|
|
|
|
sprites, char_to_idx = build_sprite_array(grid_font, charset, cell_w, cell_h)
|
|
palettes = {"fog": pal_fog, "circuit": pal_circuit, "star": pal_star}
|
|
cursor_glyph = "_" if font_supports(quote_font, "_") else "|"
|
|
|
|
print("[4/6] Rendering frames...")
|
|
rng = np.random.RandomState(42)
|
|
for f_idx in range(total_frames):
|
|
img = render_frame(
|
|
f_idx=f_idx,
|
|
total_frames=total_frames,
|
|
total_dur=total_dur,
|
|
rms=rms_n, trans=trans_n,
|
|
sprites=sprites, char_to_idx=char_to_idx,
|
|
palettes=palettes,
|
|
cell_w=cell_w, cell_h=cell_h, cols=cols, rows=rows,
|
|
quote_font=quote_font,
|
|
title_font=(title_big, title_sm),
|
|
cursor_glyph=cursor_glyph,
|
|
rng=rng,
|
|
)
|
|
img.save(FRAMES / f"f_{f_idx:05d}.png", optimize=False, compress_level=1)
|
|
if (f_idx + 1) % 24 == 0 or f_idx == total_frames - 1:
|
|
print(f" frame {f_idx + 1}/{total_frames}")
|
|
|
|
print("[5/6] Muxing video + audio with ffmpeg...")
|
|
mux_video(FRAMES, WAV_PAD, OUT_MP4, total_dur)
|
|
|
|
print("[6/6] Verifying output with ffprobe...")
|
|
info = ffprobe_info(OUT_MP4)
|
|
stream_types = [s.get("codec_type") for s in info["streams"]]
|
|
print(f" streams: {info['streams']}")
|
|
print(f" format duration: {info['format_duration']}")
|
|
has_video = "video" in stream_types
|
|
has_audio = "audio" in stream_types
|
|
dur = float(info["format_duration"] or 0.0)
|
|
if not has_video:
|
|
print("[FAIL] no video stream in output"); return 2
|
|
if not has_audio:
|
|
print("[FAIL] no audio stream in output"); return 3
|
|
if not (DUR_MIN - 0.5 <= dur <= DUR_MAX + 0.5):
|
|
print(f"[FAIL] duration {dur:.2f}s outside expected window"); return 4
|
|
print(f"[OK] {OUT_MP4} — video+audio, {dur:.2f}s")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|