improvements
All checks were successful
ci/woodpecker/push/build Pipeline was successful

This commit is contained in:
Jake Walker 2025-01-09 11:28:18 +00:00
parent 07587f2c90
commit 80085fcad1
5 changed files with 142 additions and 26 deletions

101
main.py
View file

@ -1,9 +1,13 @@
import hashlib
import shutil import shutil
import time import time
import urllib.parse import urllib.parse
from datetime import datetime, timezone import uuid
from datetime import datetime, timedelta, timezone
from pathlib import Path from pathlib import Path
from typing import Optional
import ffmpeg
import structlog import structlog
from podgen import Episode, Media, Podcast from podgen import Episode, Media, Podcast
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
@ -13,7 +17,22 @@ from watchdog.observers import Observer
from process import AudioProcessor from process import AudioProcessor
from settings import Settings from settings import Settings
EXTENSIONS = [".m4a"] EXTENSIONS = [
".aac",
".ac3",
".aif",
".aiff",
".ape",
".flac",
".m4a",
".mp3",
".ogg",
".opus",
".ra",
".ram",
".wav",
".wma",
]
META_FILENAME = "meta.json" META_FILENAME = "meta.json"
DESCRIPTION_EXTENSION = "txt" DESCRIPTION_EXTENSION = "txt"
@ -24,6 +43,7 @@ class PodcastMeta(BaseModel):
name: str name: str
description: str description: str
explicit: bool = Field(default=True) explicit: bool = Field(default=True)
output_name: str = Field(default_factory=lambda: str(uuid.uuid4()))
class PodcastGenerator: class PodcastGenerator:
@ -56,6 +76,23 @@ class PodcastGenerator:
with open(self.settings.directory / feed_name / META_FILENAME, "r") as f: with open(self.settings.directory / feed_name / META_FILENAME, "r") as f:
return PodcastMeta.model_validate_json(f.read()) return PodcastMeta.model_validate_json(f.read())
def get_audio_duration(self, filename: Path) -> Optional[timedelta]:
probe = ffmpeg.probe(str(filename))
stream = next(
(stream for stream in probe["streams"] if stream["codec_type"] == "audio"),
None,
)
return (
timedelta(seconds=float(stream["duration"]))
if stream is not None and "duration" in stream
else None
)
def generate_all_feeds(self) -> None:
shutil.rmtree(self.settings.output_directory, ignore_errors=True)
for feed_name in self.settings.feeds:
self.generate_feed(feed_name)
def generate_feed(self, feed_name: str) -> None: def generate_feed(self, feed_name: str) -> None:
log.info("Generating feed for %s", feed_name) log.info("Generating feed for %s", feed_name)
@ -64,16 +101,20 @@ class PodcastGenerator:
feed = Podcast( feed = Podcast(
name=podcast_meta.name, name=podcast_meta.name,
description=podcast_meta.description, description=podcast_meta.description,
website=urllib.parse.urljoin(self.settings.url_base, feed_name), website=urllib.parse.urljoin(
self.settings.url_base, podcast_meta.output_name
),
explicit=podcast_meta.explicit, explicit=podcast_meta.explicit,
feed_url=urllib.parse.urljoin(self.settings.url_base, f"{feed_name}.xml"), feed_url=urllib.parse.urljoin(
self.settings.url_base, f"{podcast_meta.output_name}/feed.xml"
),
) )
output_dir = self.settings.output_directory / feed_name output_dir = self.settings.output_directory / podcast_meta.output_name
feed_episodes_dir = self.settings.directory / feed_name / "episodes" feed_episodes_dir = self.settings.directory / feed_name / "episodes"
shutil.rmtree(output_dir, ignore_errors=True) shutil.rmtree(output_dir, ignore_errors=True)
output_dir.mkdir() output_dir.mkdir(parents=True)
for file in feed_episodes_dir.glob("*"): for file in feed_episodes_dir.glob("*"):
if file.suffix not in EXTENSIONS: if file.suffix not in EXTENSIONS:
@ -89,7 +130,13 @@ class PodcastGenerator:
except AttributeError: except AttributeError:
file_date = datetime.now().timestamp() file_date = datetime.now().timestamp()
h = hashlib.sha256()
with open(file, "rb") as f:
for byte_block in iter(lambda: f.read(4096), b""):
h.update(byte_block)
episode = Episode( episode = Episode(
id=h.hexdigest(),
title=file.stem, title=file.stem,
media=Media( media=Media(
urllib.parse.urljoin( urllib.parse.urljoin(
@ -97,6 +144,7 @@ class PodcastGenerator:
urllib.parse.quote(f"{feed_name}/{file.name}"), urllib.parse.quote(f"{feed_name}/{file.name}"),
), ),
file.stat().st_size, file.stat().st_size,
duration=self.get_audio_duration(file),
), ),
publication_date=datetime.fromtimestamp(file_date, timezone.utc), publication_date=datetime.fromtimestamp(file_date, timezone.utc),
) )
@ -106,12 +154,14 @@ class PodcastGenerator:
) )
if description_filename.is_file(): if description_filename.is_file():
with open(description_filename, "r") as f: with open(description_filename, "r") as f:
episode.long_summary = f.read() content = f.read()
if content.strip() != "":
episode.long_summary = content.strip()
shutil.copyfile(file, output_dir / file.name) shutil.copyfile(file, output_dir / file.name)
feed.add_episode(episode) feed.add_episode(episode)
output_feed_file = self.settings.output_directory / f"{feed_name}.xml" output_feed_file = output_dir / "feed.xml"
log.info("Saving feed to %s", output_feed_file) log.info("Saving feed to %s", output_feed_file)
with open(output_feed_file, "w") as f: with open(output_feed_file, "w") as f:
feed.rss_file(f) feed.rss_file(f)
@ -120,16 +170,22 @@ class PodcastGenerator:
class GeneratorEventHandler(FileSystemEventHandler): class GeneratorEventHandler(FileSystemEventHandler):
def __init__(self, settings: Settings): def __init__(self, settings: Settings):
self.settings = settings self.settings = settings
self.audio_processor = AudioProcessor()
self.generator = PodcastGenerator(settings=settings) self.generator = PodcastGenerator(settings=settings)
self.audio_processor = AudioProcessor(
generate_callback=lambda: self.generator.generate_all_feeds()
)
self.generate_time: Optional[datetime] = None
self.audio_processor.start_processing() self.audio_processor.start_processing()
self.generator.generate_all_feeds()
super().__init__() super().__init__()
def on_any_event(self, event): def on_any_event(self, event):
src_path = Path(event.src_path) src_path = Path(event.src_path)
# log.debug("Got file watch event", e=event)
for feed_name in self.settings.feeds: for feed_name in self.settings.feeds:
feed_consume_dir = self.settings.directory / feed_name / "consume" feed_consume_dir = self.settings.directory / feed_name / "consume"
feed_meta_path = self.settings.directory / feed_name / META_FILENAME feed_meta_path = self.settings.directory / feed_name / META_FILENAME
@ -137,7 +193,11 @@ class GeneratorEventHandler(FileSystemEventHandler):
# if a file is created in a consume directory # if a file is created in a consume directory
if event.event_type == "created": if event.event_type == "created":
if src_path.parent != feed_consume_dir: if (
src_path.parent != feed_consume_dir
or src_path.suffix not in EXTENSIONS
or src_path.name.startswith(".")
):
continue continue
output_path = ( output_path = (
@ -155,11 +215,14 @@ class GeneratorEventHandler(FileSystemEventHandler):
output_path.parent / f"{output_path.stem}.{DESCRIPTION_EXTENSION}", output_path.parent / f"{output_path.stem}.{DESCRIPTION_EXTENSION}",
"a", "a",
).close() ).close()
self.generator.generate_feed(feed_name)
# if a file is modified in the episodes directory or meta has changed # if a file is modified in the episodes directory or meta has changed
if src_path == feed_meta_path or feed_episodes_dir in src_path.parents: if (
self.generator.generate_feed(feed_name) src_path == feed_meta_path
or feed_episodes_dir in src_path.parents
and not event.is_directory
):
self.generate_time = datetime.now() + timedelta(minutes=1)
if __name__ == "__main__": if __name__ == "__main__":
@ -167,17 +230,23 @@ if __name__ == "__main__":
log.info("Loaded settings", settings=settings) log.info("Loaded settings", settings=settings)
event_handler = GeneratorEventHandler(settings)
observer = Observer() observer = Observer()
observer.schedule( observer.schedule(event_handler, settings.directory, recursive=True)
GeneratorEventHandler(settings), settings.directory, recursive=True
)
observer.start() observer.start()
log.info("Listening for changes at %s...", settings.directory) log.info("Listening for changes at %s...", settings.directory)
try: try:
while True: while True:
if (
event_handler.generate_time is not None
and datetime.now() >= event_handler.generate_time
):
event_handler.generate_time = None
event_handler.generator.generate_all_feeds()
time.sleep(1) time.sleep(1)
finally: finally:
observer.stop() observer.stop()
observer.join() observer.join()
observer.join()

View file

@ -1,7 +1,10 @@
import queue import queue
import shutil
import tempfile
import threading import threading
import time
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Callable, Optional
import structlog import structlog
from ffmpeg_normalize import FFmpegNormalize from ffmpeg_normalize import FFmpegNormalize
@ -9,15 +12,17 @@ from ffmpeg_normalize import FFmpegNormalize
from settings import Settings from settings import Settings
DELETE_INPUTS = Settings().delete_consume_files DELETE_INPUTS = Settings().delete_consume_files
CONSUME_DELAY = Settings().consume_delay
log = structlog.get_logger() log = structlog.get_logger()
class AudioProcessor: class AudioProcessor:
def __init__(self): def __init__(self, generate_callback: Callable[[], None]):
self.queue: queue.Queue[(Path, Path)] = queue.Queue() self.queue: queue.Queue[(Path, Path)] = queue.Queue()
self.is_running = False self.is_running = False
self.processor_thread: Optional[threading.Thread] = None self.processor_thread: Optional[threading.Thread] = None
self.generate_callback = generate_callback
def add_file(self, input_filename: Path, output_filename: Path) -> None: def add_file(self, input_filename: Path, output_filename: Path) -> None:
self.queue.put((input_filename, output_filename)) self.queue.put((input_filename, output_filename))
@ -46,15 +51,41 @@ class AudioProcessor:
input_filename=input_filename, input_filename=input_filename,
output_filename=output_filename, output_filename=output_filename,
) )
if not input_filename.is_file():
log.error("Could not process non-file", input_filename=input_filename)
return
# wait for file to finish uploading
current_size = input_filename.stat().st_size
while True:
time.sleep(CONSUME_DELAY)
if input_filename.stat().st_size != current_size:
log.debug(
"Waiting for file to finish uploading",
input_filename=input_filename,
)
current_size = input_filename.stat().st_size
continue
break
with tempfile.TemporaryDirectory() as tmp:
input_temp_path = Path(tmp) / input_filename.name
output_temp_path = Path(tmp) / output_filename.name
# copy to temp directory
shutil.move(input_filename, input_temp_path)
ffmpeg_normalize = FFmpegNormalize( ffmpeg_normalize = FFmpegNormalize(
"ebu", audio_codec="aac", audio_bitrate="192k" "ebu", audio_codec="aac", audio_bitrate="192k"
) )
ffmpeg_normalize.add_media_file(str(input_filename), str(output_filename)) ffmpeg_normalize.add_media_file(str(input_temp_path), str(output_temp_path))
ffmpeg_normalize.run_normalization() ffmpeg_normalize.run_normalization()
# delete the original shutil.move(output_temp_path, output_filename)
if DELETE_INPUTS:
output_filename.unlink() self.generate_callback()
def _process_queue(self) -> None: def _process_queue(self) -> None:
while self.is_running: while self.is_running:

View file

@ -6,6 +6,7 @@ readme = "README.md"
requires-python = ">=3.13" requires-python = ">=3.13"
dependencies = [ dependencies = [
"ffmpeg-normalize>=1.31.0", "ffmpeg-normalize>=1.31.0",
"ffmpeg-python>=0.2.0",
"podgen>=1.1.0", "podgen>=1.1.0",
"pydantic>=2.10.5", "pydantic>=2.10.5",
"pydantic-settings>=2.7.1", "pydantic-settings>=2.7.1",

View file

@ -11,5 +11,6 @@ class Settings(BaseSettings):
feeds: Set[str] = Field(default={"default"}) feeds: Set[str] = Field(default={"default"})
url_base: str = Field(default="https://example.com") url_base: str = Field(default="https://example.com")
delete_consume_files: bool = Field(default=False) delete_consume_files: bool = Field(default=False)
consume_delay: int = Field(default=300)
model_config = SettingsConfigDict(env_nested_delimiter="__", env_prefix="PG_") model_config = SettingsConfigDict(env_nested_delimiter="__", env_prefix="PG_")

14
uv.lock
View file

@ -99,6 +99,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/83/8f/2e78af50943e498855802c293f3256e97a12101dccb233e5abd848fd4b8e/ffmpeg_progress_yield-0.11.3-py2.py3-none-any.whl", hash = "sha256:a7277e386d30b27ce513ec50a4a97fee403e48172a5370e05584350ee85db205", size = 11693 }, { url = "https://files.pythonhosted.org/packages/83/8f/2e78af50943e498855802c293f3256e97a12101dccb233e5abd848fd4b8e/ffmpeg_progress_yield-0.11.3-py2.py3-none-any.whl", hash = "sha256:a7277e386d30b27ce513ec50a4a97fee403e48172a5370e05584350ee85db205", size = 11693 },
] ]
[[package]]
name = "ffmpeg-python"
version = "0.2.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "future" },
]
sdist = { url = "https://files.pythonhosted.org/packages/dd/5e/d5f9105d59c1325759d838af4e973695081fbbc97182baf73afc78dec266/ffmpeg-python-0.2.0.tar.gz", hash = "sha256:65225db34627c578ef0e11c8b1eb528bb35e024752f6f10b78c011f6f64c4127", size = 21543 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d7/0c/56be52741f75bad4dc6555991fabd2e07b432d333da82c11ad701123888a/ffmpeg_python-0.2.0-py3-none-any.whl", hash = "sha256:ac441a0404e053f8b6a1113a77c0f452f1cfc62f6344a769475ffdc0f56c23c5", size = 25024 },
]
[[package]] [[package]]
name = "future" name = "future"
version = "1.0.0" version = "1.0.0"
@ -148,6 +160,7 @@ version = "0.1.0"
source = { virtual = "." } source = { virtual = "." }
dependencies = [ dependencies = [
{ name = "ffmpeg-normalize" }, { name = "ffmpeg-normalize" },
{ name = "ffmpeg-python" },
{ name = "podgen" }, { name = "podgen" },
{ name = "pydantic" }, { name = "pydantic" },
{ name = "pydantic-settings" }, { name = "pydantic-settings" },
@ -158,6 +171,7 @@ dependencies = [
[package.metadata] [package.metadata]
requires-dist = [ requires-dist = [
{ name = "ffmpeg-normalize", specifier = ">=1.31.0" }, { name = "ffmpeg-normalize", specifier = ">=1.31.0" },
{ name = "ffmpeg-python", specifier = ">=0.2.0" },
{ name = "podgen", specifier = ">=1.1.0" }, { name = "podgen", specifier = ">=1.1.0" },
{ name = "pydantic", specifier = ">=2.10.5" }, { name = "pydantic", specifier = ">=2.10.5" },
{ name = "pydantic-settings", specifier = ">=2.7.1" }, { name = "pydantic-settings", specifier = ">=2.7.1" },