diff --git a/main.py b/main.py index e68e9de..605646e 100644 --- a/main.py +++ b/main.py @@ -1,9 +1,13 @@ +import hashlib import shutil import time import urllib.parse -from datetime import datetime, timezone +import uuid +from datetime import datetime, timedelta, timezone from pathlib import Path +from typing import Optional +import ffmpeg import structlog from podgen import Episode, Media, Podcast from pydantic import BaseModel, Field @@ -13,7 +17,22 @@ from watchdog.observers import Observer from process import AudioProcessor from settings import Settings -EXTENSIONS = [".m4a"] +EXTENSIONS = [ + ".aac", + ".ac3", + ".aif", + ".aiff", + ".ape", + ".flac", + ".m4a", + ".mp3", + ".ogg", + ".opus", + ".ra", + ".ram", + ".wav", + ".wma", +] META_FILENAME = "meta.json" DESCRIPTION_EXTENSION = "txt" @@ -24,6 +43,7 @@ class PodcastMeta(BaseModel): name: str description: str explicit: bool = Field(default=True) + output_name: str = Field(default_factory=lambda: str(uuid.uuid4())) class PodcastGenerator: @@ -56,6 +76,23 @@ class PodcastGenerator: with open(self.settings.directory / feed_name / META_FILENAME, "r") as f: return PodcastMeta.model_validate_json(f.read()) + def get_audio_duration(self, filename: Path) -> Optional[timedelta]: + probe = ffmpeg.probe(str(filename)) + stream = next( + (stream for stream in probe["streams"] if stream["codec_type"] == "audio"), + None, + ) + return ( + timedelta(seconds=float(stream["duration"])) + if stream is not None and "duration" in stream + else None + ) + + def generate_all_feeds(self) -> None: + shutil.rmtree(self.settings.output_directory, ignore_errors=True) + for feed_name in self.settings.feeds: + self.generate_feed(feed_name) + def generate_feed(self, feed_name: str) -> None: log.info("Generating feed for %s", feed_name) @@ -64,16 +101,20 @@ class PodcastGenerator: feed = Podcast( name=podcast_meta.name, description=podcast_meta.description, - website=urllib.parse.urljoin(self.settings.url_base, feed_name), + website=urllib.parse.urljoin( + self.settings.url_base, podcast_meta.output_name + ), explicit=podcast_meta.explicit, - feed_url=urllib.parse.urljoin(self.settings.url_base, f"{feed_name}.xml"), + feed_url=urllib.parse.urljoin( + self.settings.url_base, f"{podcast_meta.output_name}/feed.xml" + ), ) - output_dir = self.settings.output_directory / feed_name + output_dir = self.settings.output_directory / podcast_meta.output_name feed_episodes_dir = self.settings.directory / feed_name / "episodes" shutil.rmtree(output_dir, ignore_errors=True) - output_dir.mkdir() + output_dir.mkdir(parents=True) for file in feed_episodes_dir.glob("*"): if file.suffix not in EXTENSIONS: @@ -89,7 +130,13 @@ class PodcastGenerator: except AttributeError: file_date = datetime.now().timestamp() + h = hashlib.sha256() + with open(file, "rb") as f: + for byte_block in iter(lambda: f.read(4096), b""): + h.update(byte_block) + episode = Episode( + id=h.hexdigest(), title=file.stem, media=Media( urllib.parse.urljoin( @@ -97,6 +144,7 @@ class PodcastGenerator: urllib.parse.quote(f"{feed_name}/{file.name}"), ), file.stat().st_size, + duration=self.get_audio_duration(file), ), publication_date=datetime.fromtimestamp(file_date, timezone.utc), ) @@ -106,12 +154,14 @@ class PodcastGenerator: ) if description_filename.is_file(): with open(description_filename, "r") as f: - episode.long_summary = f.read() + content = f.read() + if content.strip() != "": + episode.long_summary = content.strip() shutil.copyfile(file, output_dir / file.name) feed.add_episode(episode) - output_feed_file = self.settings.output_directory / f"{feed_name}.xml" + output_feed_file = output_dir / "feed.xml" log.info("Saving feed to %s", output_feed_file) with open(output_feed_file, "w") as f: feed.rss_file(f) @@ -120,16 +170,22 @@ class PodcastGenerator: class GeneratorEventHandler(FileSystemEventHandler): def __init__(self, settings: Settings): self.settings = settings - self.audio_processor = AudioProcessor() self.generator = PodcastGenerator(settings=settings) + self.audio_processor = AudioProcessor( + generate_callback=lambda: self.generator.generate_all_feeds() + ) + self.generate_time: Optional[datetime] = None self.audio_processor.start_processing() + self.generator.generate_all_feeds() super().__init__() def on_any_event(self, event): src_path = Path(event.src_path) + # log.debug("Got file watch event", e=event) + for feed_name in self.settings.feeds: feed_consume_dir = self.settings.directory / feed_name / "consume" feed_meta_path = self.settings.directory / feed_name / META_FILENAME @@ -137,7 +193,11 @@ class GeneratorEventHandler(FileSystemEventHandler): # if a file is created in a consume directory if event.event_type == "created": - if src_path.parent != feed_consume_dir: + if ( + src_path.parent != feed_consume_dir + or src_path.suffix not in EXTENSIONS + or src_path.name.startswith(".") + ): continue output_path = ( @@ -155,11 +215,14 @@ class GeneratorEventHandler(FileSystemEventHandler): output_path.parent / f"{output_path.stem}.{DESCRIPTION_EXTENSION}", "a", ).close() - self.generator.generate_feed(feed_name) # if a file is modified in the episodes directory or meta has changed - if src_path == feed_meta_path or feed_episodes_dir in src_path.parents: - self.generator.generate_feed(feed_name) + if ( + src_path == feed_meta_path + or feed_episodes_dir in src_path.parents + and not event.is_directory + ): + self.generate_time = datetime.now() + timedelta(minutes=1) if __name__ == "__main__": @@ -167,17 +230,23 @@ if __name__ == "__main__": log.info("Loaded settings", settings=settings) + event_handler = GeneratorEventHandler(settings) observer = Observer() - observer.schedule( - GeneratorEventHandler(settings), settings.directory, recursive=True - ) + observer.schedule(event_handler, settings.directory, recursive=True) observer.start() log.info("Listening for changes at %s...", settings.directory) try: while True: + if ( + event_handler.generate_time is not None + and datetime.now() >= event_handler.generate_time + ): + event_handler.generate_time = None + event_handler.generator.generate_all_feeds() time.sleep(1) finally: observer.stop() observer.join() + observer.join() diff --git a/process.py b/process.py index 43e7638..7151dc4 100644 --- a/process.py +++ b/process.py @@ -1,7 +1,10 @@ import queue +import shutil +import tempfile import threading +import time from pathlib import Path -from typing import Optional +from typing import Callable, Optional import structlog from ffmpeg_normalize import FFmpegNormalize @@ -9,15 +12,17 @@ from ffmpeg_normalize import FFmpegNormalize from settings import Settings DELETE_INPUTS = Settings().delete_consume_files +CONSUME_DELAY = Settings().consume_delay log = structlog.get_logger() class AudioProcessor: - def __init__(self): + def __init__(self, generate_callback: Callable[[], None]): self.queue: queue.Queue[(Path, Path)] = queue.Queue() self.is_running = False self.processor_thread: Optional[threading.Thread] = None + self.generate_callback = generate_callback def add_file(self, input_filename: Path, output_filename: Path) -> None: self.queue.put((input_filename, output_filename)) @@ -46,15 +51,41 @@ class AudioProcessor: input_filename=input_filename, output_filename=output_filename, ) - ffmpeg_normalize = FFmpegNormalize( - "ebu", audio_codec="aac", audio_bitrate="192k" - ) - ffmpeg_normalize.add_media_file(str(input_filename), str(output_filename)) - ffmpeg_normalize.run_normalization() - # delete the original - if DELETE_INPUTS: - output_filename.unlink() + if not input_filename.is_file(): + log.error("Could not process non-file", input_filename=input_filename) + return + + # wait for file to finish uploading + current_size = input_filename.stat().st_size + while True: + time.sleep(CONSUME_DELAY) + if input_filename.stat().st_size != current_size: + log.debug( + "Waiting for file to finish uploading", + input_filename=input_filename, + ) + current_size = input_filename.stat().st_size + continue + + break + + with tempfile.TemporaryDirectory() as tmp: + input_temp_path = Path(tmp) / input_filename.name + output_temp_path = Path(tmp) / output_filename.name + + # copy to temp directory + shutil.move(input_filename, input_temp_path) + + ffmpeg_normalize = FFmpegNormalize( + "ebu", audio_codec="aac", audio_bitrate="192k" + ) + ffmpeg_normalize.add_media_file(str(input_temp_path), str(output_temp_path)) + ffmpeg_normalize.run_normalization() + + shutil.move(output_temp_path, output_filename) + + self.generate_callback() def _process_queue(self) -> None: while self.is_running: diff --git a/pyproject.toml b/pyproject.toml index c40666f..a92ca08 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,6 +6,7 @@ readme = "README.md" requires-python = ">=3.13" dependencies = [ "ffmpeg-normalize>=1.31.0", + "ffmpeg-python>=0.2.0", "podgen>=1.1.0", "pydantic>=2.10.5", "pydantic-settings>=2.7.1", diff --git a/settings.py b/settings.py index 4d3a368..0e85e3c 100644 --- a/settings.py +++ b/settings.py @@ -11,5 +11,6 @@ class Settings(BaseSettings): feeds: Set[str] = Field(default={"default"}) url_base: str = Field(default="https://example.com") delete_consume_files: bool = Field(default=False) + consume_delay: int = Field(default=300) model_config = SettingsConfigDict(env_nested_delimiter="__", env_prefix="PG_") diff --git a/uv.lock b/uv.lock index 761eebb..9f8ef32 100644 --- a/uv.lock +++ b/uv.lock @@ -99,6 +99,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/83/8f/2e78af50943e498855802c293f3256e97a12101dccb233e5abd848fd4b8e/ffmpeg_progress_yield-0.11.3-py2.py3-none-any.whl", hash = "sha256:a7277e386d30b27ce513ec50a4a97fee403e48172a5370e05584350ee85db205", size = 11693 }, ] +[[package]] +name = "ffmpeg-python" +version = "0.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "future" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/dd/5e/d5f9105d59c1325759d838af4e973695081fbbc97182baf73afc78dec266/ffmpeg-python-0.2.0.tar.gz", hash = "sha256:65225db34627c578ef0e11c8b1eb528bb35e024752f6f10b78c011f6f64c4127", size = 21543 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d7/0c/56be52741f75bad4dc6555991fabd2e07b432d333da82c11ad701123888a/ffmpeg_python-0.2.0-py3-none-any.whl", hash = "sha256:ac441a0404e053f8b6a1113a77c0f452f1cfc62f6344a769475ffdc0f56c23c5", size = 25024 }, +] + [[package]] name = "future" version = "1.0.0" @@ -148,6 +160,7 @@ version = "0.1.0" source = { virtual = "." } dependencies = [ { name = "ffmpeg-normalize" }, + { name = "ffmpeg-python" }, { name = "podgen" }, { name = "pydantic" }, { name = "pydantic-settings" }, @@ -158,6 +171,7 @@ dependencies = [ [package.metadata] requires-dist = [ { name = "ffmpeg-normalize", specifier = ">=1.31.0" }, + { name = "ffmpeg-python", specifier = ">=0.2.0" }, { name = "podgen", specifier = ">=1.1.0" }, { name = "pydantic", specifier = ">=2.10.5" }, { name = "pydantic-settings", specifier = ">=2.7.1" },