improvements
All checks were successful
ci/woodpecker/push/build Pipeline was successful

This commit is contained in:
Jake Walker 2025-01-09 11:28:18 +00:00
parent 07587f2c90
commit 80085fcad1
5 changed files with 142 additions and 26 deletions

101
main.py
View file

@ -1,9 +1,13 @@
import hashlib
import shutil
import time
import urllib.parse
from datetime import datetime, timezone
import uuid
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Optional
import ffmpeg
import structlog
from podgen import Episode, Media, Podcast
from pydantic import BaseModel, Field
@ -13,7 +17,22 @@ from watchdog.observers import Observer
from process import AudioProcessor
from settings import Settings
EXTENSIONS = [".m4a"]
EXTENSIONS = [
".aac",
".ac3",
".aif",
".aiff",
".ape",
".flac",
".m4a",
".mp3",
".ogg",
".opus",
".ra",
".ram",
".wav",
".wma",
]
META_FILENAME = "meta.json"
DESCRIPTION_EXTENSION = "txt"
@ -24,6 +43,7 @@ class PodcastMeta(BaseModel):
name: str
description: str
explicit: bool = Field(default=True)
output_name: str = Field(default_factory=lambda: str(uuid.uuid4()))
class PodcastGenerator:
@ -56,6 +76,23 @@ class PodcastGenerator:
with open(self.settings.directory / feed_name / META_FILENAME, "r") as f:
return PodcastMeta.model_validate_json(f.read())
def get_audio_duration(self, filename: Path) -> Optional[timedelta]:
probe = ffmpeg.probe(str(filename))
stream = next(
(stream for stream in probe["streams"] if stream["codec_type"] == "audio"),
None,
)
return (
timedelta(seconds=float(stream["duration"]))
if stream is not None and "duration" in stream
else None
)
def generate_all_feeds(self) -> None:
shutil.rmtree(self.settings.output_directory, ignore_errors=True)
for feed_name in self.settings.feeds:
self.generate_feed(feed_name)
def generate_feed(self, feed_name: str) -> None:
log.info("Generating feed for %s", feed_name)
@ -64,16 +101,20 @@ class PodcastGenerator:
feed = Podcast(
name=podcast_meta.name,
description=podcast_meta.description,
website=urllib.parse.urljoin(self.settings.url_base, feed_name),
website=urllib.parse.urljoin(
self.settings.url_base, podcast_meta.output_name
),
explicit=podcast_meta.explicit,
feed_url=urllib.parse.urljoin(self.settings.url_base, f"{feed_name}.xml"),
feed_url=urllib.parse.urljoin(
self.settings.url_base, f"{podcast_meta.output_name}/feed.xml"
),
)
output_dir = self.settings.output_directory / feed_name
output_dir = self.settings.output_directory / podcast_meta.output_name
feed_episodes_dir = self.settings.directory / feed_name / "episodes"
shutil.rmtree(output_dir, ignore_errors=True)
output_dir.mkdir()
output_dir.mkdir(parents=True)
for file in feed_episodes_dir.glob("*"):
if file.suffix not in EXTENSIONS:
@ -89,7 +130,13 @@ class PodcastGenerator:
except AttributeError:
file_date = datetime.now().timestamp()
h = hashlib.sha256()
with open(file, "rb") as f:
for byte_block in iter(lambda: f.read(4096), b""):
h.update(byte_block)
episode = Episode(
id=h.hexdigest(),
title=file.stem,
media=Media(
urllib.parse.urljoin(
@ -97,6 +144,7 @@ class PodcastGenerator:
urllib.parse.quote(f"{feed_name}/{file.name}"),
),
file.stat().st_size,
duration=self.get_audio_duration(file),
),
publication_date=datetime.fromtimestamp(file_date, timezone.utc),
)
@ -106,12 +154,14 @@ class PodcastGenerator:
)
if description_filename.is_file():
with open(description_filename, "r") as f:
episode.long_summary = f.read()
content = f.read()
if content.strip() != "":
episode.long_summary = content.strip()
shutil.copyfile(file, output_dir / file.name)
feed.add_episode(episode)
output_feed_file = self.settings.output_directory / f"{feed_name}.xml"
output_feed_file = output_dir / "feed.xml"
log.info("Saving feed to %s", output_feed_file)
with open(output_feed_file, "w") as f:
feed.rss_file(f)
@ -120,16 +170,22 @@ class PodcastGenerator:
class GeneratorEventHandler(FileSystemEventHandler):
def __init__(self, settings: Settings):
self.settings = settings
self.audio_processor = AudioProcessor()
self.generator = PodcastGenerator(settings=settings)
self.audio_processor = AudioProcessor(
generate_callback=lambda: self.generator.generate_all_feeds()
)
self.generate_time: Optional[datetime] = None
self.audio_processor.start_processing()
self.generator.generate_all_feeds()
super().__init__()
def on_any_event(self, event):
src_path = Path(event.src_path)
# log.debug("Got file watch event", e=event)
for feed_name in self.settings.feeds:
feed_consume_dir = self.settings.directory / feed_name / "consume"
feed_meta_path = self.settings.directory / feed_name / META_FILENAME
@ -137,7 +193,11 @@ class GeneratorEventHandler(FileSystemEventHandler):
# if a file is created in a consume directory
if event.event_type == "created":
if src_path.parent != feed_consume_dir:
if (
src_path.parent != feed_consume_dir
or src_path.suffix not in EXTENSIONS
or src_path.name.startswith(".")
):
continue
output_path = (
@ -155,11 +215,14 @@ class GeneratorEventHandler(FileSystemEventHandler):
output_path.parent / f"{output_path.stem}.{DESCRIPTION_EXTENSION}",
"a",
).close()
self.generator.generate_feed(feed_name)
# if a file is modified in the episodes directory or meta has changed
if src_path == feed_meta_path or feed_episodes_dir in src_path.parents:
self.generator.generate_feed(feed_name)
if (
src_path == feed_meta_path
or feed_episodes_dir in src_path.parents
and not event.is_directory
):
self.generate_time = datetime.now() + timedelta(minutes=1)
if __name__ == "__main__":
@ -167,17 +230,23 @@ if __name__ == "__main__":
log.info("Loaded settings", settings=settings)
event_handler = GeneratorEventHandler(settings)
observer = Observer()
observer.schedule(
GeneratorEventHandler(settings), settings.directory, recursive=True
)
observer.schedule(event_handler, settings.directory, recursive=True)
observer.start()
log.info("Listening for changes at %s...", settings.directory)
try:
while True:
if (
event_handler.generate_time is not None
and datetime.now() >= event_handler.generate_time
):
event_handler.generate_time = None
event_handler.generator.generate_all_feeds()
time.sleep(1)
finally:
observer.stop()
observer.join()
observer.join()

View file

@ -1,7 +1,10 @@
import queue
import shutil
import tempfile
import threading
import time
from pathlib import Path
from typing import Optional
from typing import Callable, Optional
import structlog
from ffmpeg_normalize import FFmpegNormalize
@ -9,15 +12,17 @@ from ffmpeg_normalize import FFmpegNormalize
from settings import Settings
DELETE_INPUTS = Settings().delete_consume_files
CONSUME_DELAY = Settings().consume_delay
log = structlog.get_logger()
class AudioProcessor:
def __init__(self):
def __init__(self, generate_callback: Callable[[], None]):
self.queue: queue.Queue[(Path, Path)] = queue.Queue()
self.is_running = False
self.processor_thread: Optional[threading.Thread] = None
self.generate_callback = generate_callback
def add_file(self, input_filename: Path, output_filename: Path) -> None:
self.queue.put((input_filename, output_filename))
@ -46,15 +51,41 @@ class AudioProcessor:
input_filename=input_filename,
output_filename=output_filename,
)
ffmpeg_normalize = FFmpegNormalize(
"ebu", audio_codec="aac", audio_bitrate="192k"
)
ffmpeg_normalize.add_media_file(str(input_filename), str(output_filename))
ffmpeg_normalize.run_normalization()
# delete the original
if DELETE_INPUTS:
output_filename.unlink()
if not input_filename.is_file():
log.error("Could not process non-file", input_filename=input_filename)
return
# wait for file to finish uploading
current_size = input_filename.stat().st_size
while True:
time.sleep(CONSUME_DELAY)
if input_filename.stat().st_size != current_size:
log.debug(
"Waiting for file to finish uploading",
input_filename=input_filename,
)
current_size = input_filename.stat().st_size
continue
break
with tempfile.TemporaryDirectory() as tmp:
input_temp_path = Path(tmp) / input_filename.name
output_temp_path = Path(tmp) / output_filename.name
# copy to temp directory
shutil.move(input_filename, input_temp_path)
ffmpeg_normalize = FFmpegNormalize(
"ebu", audio_codec="aac", audio_bitrate="192k"
)
ffmpeg_normalize.add_media_file(str(input_temp_path), str(output_temp_path))
ffmpeg_normalize.run_normalization()
shutil.move(output_temp_path, output_filename)
self.generate_callback()
def _process_queue(self) -> None:
while self.is_running:

View file

@ -6,6 +6,7 @@ readme = "README.md"
requires-python = ">=3.13"
dependencies = [
"ffmpeg-normalize>=1.31.0",
"ffmpeg-python>=0.2.0",
"podgen>=1.1.0",
"pydantic>=2.10.5",
"pydantic-settings>=2.7.1",

View file

@ -11,5 +11,6 @@ class Settings(BaseSettings):
feeds: Set[str] = Field(default={"default"})
url_base: str = Field(default="https://example.com")
delete_consume_files: bool = Field(default=False)
consume_delay: int = Field(default=300)
model_config = SettingsConfigDict(env_nested_delimiter="__", env_prefix="PG_")

14
uv.lock
View file

@ -99,6 +99,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/83/8f/2e78af50943e498855802c293f3256e97a12101dccb233e5abd848fd4b8e/ffmpeg_progress_yield-0.11.3-py2.py3-none-any.whl", hash = "sha256:a7277e386d30b27ce513ec50a4a97fee403e48172a5370e05584350ee85db205", size = 11693 },
]
[[package]]
name = "ffmpeg-python"
version = "0.2.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "future" },
]
sdist = { url = "https://files.pythonhosted.org/packages/dd/5e/d5f9105d59c1325759d838af4e973695081fbbc97182baf73afc78dec266/ffmpeg-python-0.2.0.tar.gz", hash = "sha256:65225db34627c578ef0e11c8b1eb528bb35e024752f6f10b78c011f6f64c4127", size = 21543 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d7/0c/56be52741f75bad4dc6555991fabd2e07b432d333da82c11ad701123888a/ffmpeg_python-0.2.0-py3-none-any.whl", hash = "sha256:ac441a0404e053f8b6a1113a77c0f452f1cfc62f6344a769475ffdc0f56c23c5", size = 25024 },
]
[[package]]
name = "future"
version = "1.0.0"
@ -148,6 +160,7 @@ version = "0.1.0"
source = { virtual = "." }
dependencies = [
{ name = "ffmpeg-normalize" },
{ name = "ffmpeg-python" },
{ name = "podgen" },
{ name = "pydantic" },
{ name = "pydantic-settings" },
@ -158,6 +171,7 @@ dependencies = [
[package.metadata]
requires-dist = [
{ name = "ffmpeg-normalize", specifier = ">=1.31.0" },
{ name = "ffmpeg-python", specifier = ">=0.2.0" },
{ name = "podgen", specifier = ">=1.1.0" },
{ name = "pydantic", specifier = ">=2.10.5" },
{ name = "pydantic-settings", specifier = ">=2.7.1" },