initial commit

This commit is contained in:
Jake Walker 2025-01-21 23:40:56 +00:00
commit 423ef7455f
9 changed files with 300 additions and 0 deletions

12
.editorconfig Normal file
View file

@ -0,0 +1,12 @@
# EditorConfig is awesome: https://EditorConfig.org
# top-most EditorConfig file
root = true
[*]
indent_style = space
indent_size = 4
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true

122
.gitignore vendored Normal file
View file

@ -0,0 +1,122 @@
# File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig
# Created by https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,rust,macos,linux,rust-analyzer
# Edit at https://www.toptal.com/developers/gitignore?templates=windows,visualstudiocode,rust,macos,linux,rust-analyzer
### Linux ###
*~
# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*
# KDE directory preferences
.directory
# Linux trash folder which might appear on any partition or disk
.Trash-*
# .nfs files are created when an open file is removed but is still being accessed
.nfs*
### macOS ###
# General
.DS_Store
.AppleDouble
.LSOverride
# Icon must end with two \r
Icon
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
### macOS Patch ###
# iCloud generated files
*.icloud
### Rust ###
# Generated by Cargo
# will have compiled files and executables
debug/
target/
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
Cargo.lock
# These are backup files generated by rustfmt
**/*.rs.bk
# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb
### rust-analyzer ###
# Can be generated by other build systems other than cargo (ex: bazelbuild/rust_rules)
rust-project.json
### VisualStudioCode ###
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
!.vscode/*.code-snippets
# Local History for Visual Studio Code
.history/
# Built Visual Studio Code Extensions
*.vsix
### VisualStudioCode Patch ###
# Ignore all local history of files
.history
.ionide
### Windows ###
# Windows thumbnail cache files
Thumbs.db
Thumbs.db:encryptable
ehthumbs.db
ehthumbs_vista.db
# Dump file
*.stackdump
# Folder config file
[Dd]esktop.ini
# Recycle Bin used on file shares
$RECYCLE.BIN/
# Windows Installer files
*.cab
*.msi
*.msix
*.msm
*.msp
# Windows shortcuts
*.lnk
# End of https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,rust,macos,linux,rust-analyzer
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
sample.html

3
.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,3 @@
{
"licenser.license": "WTFPL"
}

11
Cargo.toml Normal file
View file

@ -0,0 +1,11 @@
[package]
name = "mininews"
version = "0.1.0"
edition = "2021"
[dependencies]
anyhow = "1.0.95"
chrono = { version = "0.4.39", default-features = false, features = ["now"] }
kuchikiki = "0.8.2"
rss = { version = "2.0.11" }
uuid = { version = "1.12.1", features = ["v7"] }

13
LICENSE Normal file
View file

@ -0,0 +1,13 @@
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
Version 2, December 2004
Copyright (C) 2025 Jake Walker
Everyone is permitted to copy and distribute verbatim or modified
copies of this license document, and changing it is allowed as long
as the name is changed.
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. You just DO WHAT THE FUCK YOU WANT TO.

3
README.md Normal file
View file

@ -0,0 +1,3 @@
# Mininews
A simple RSS generator for [Wikipedia's Current Events](https://en.wikipedia.org/wiki/Portal:Current_events). Inspired by [tom-james-watson's detoxed.news](https://github.com/tom-james-watson/detoxed.news) project.

1
src/lib.rs Normal file
View file

@ -0,0 +1 @@
pub mod parser;

58
src/main.rs Normal file
View file

@ -0,0 +1,58 @@
use anyhow::{Context, Error, Result};
use chrono::{Days, NaiveTime, Utc};
use mininews::parser::{parse, EventBlock, PAGE_URL};
use rss::{ChannelBuilder, Guid, Item, ItemBuilder};
use uuid::{Timestamp, Uuid};
fn generate_feed(items: Vec<EventBlock>) -> Result<()> {
let mut channel = ChannelBuilder::default()
.title("Mininews")
.link("https://example.com")
.description("An RSS feed from Wikipedia's Current Events")
.build();
channel.set_items(
items
.iter()
.filter(|x| x.date < Utc::now().date_naive())
.map(|x| {
let pub_date = x
.date
.checked_add_days(Days::new(1))
.context("failed to add to date")?
.and_time(NaiveTime::MIN)
.and_utc();
Ok::<Item, Error>(
ItemBuilder::default()
.title(x.date.to_string())
.link(Some(PAGE_URL.to_string()))
.pub_date(pub_date.to_rfc2822())
.guid(Guid {
permalink: false,
value: Uuid::new_v7(Timestamp::from_unix_time(
pub_date.timestamp() as u64,
0,
0,
0,
))
.to_string(),
})
.description(format!(
"Wikipedia current events from {}",
x.date.to_string()
))
.content(x.content.clone())
.build(),
)
})
.collect::<Result<Vec<Item>, _>>()?,
);
println!("{:?}", channel.to_string());
Ok(())
}
fn main() -> Result<()> {
generate_feed(parse()?)
}

77
src/parser.rs Normal file
View file

@ -0,0 +1,77 @@
use anyhow::{anyhow, Context, Result};
use chrono::NaiveDate;
use kuchikiki::parse_html;
use kuchikiki::traits::*;
use kuchikiki::NodeRef;
const RELATIVE_URL_BASE: &str = "https://en.wikipedia.org";
pub const PAGE_URL: &str = "https://en.wikipedia.org/wiki/Portal:Current_events";
pub struct EventBlock {
pub date: NaiveDate,
pub content: String,
}
fn parse_event_block(node: &NodeRef) -> Result<EventBlock> {
let element = node
.as_element()
.context("failed to parse element")?
.clone();
let element_attributes = element.attributes.borrow();
let date_str = element_attributes
.get("id")
.context("could not get event block id")?;
let date = NaiveDate::parse_from_str(date_str, "%Y_%B_%-d")
.map_err(|e| anyhow!(e.to_string()))
.with_context(|| "failed to parse event block date")?;
let content = node
.select_first("div.current-events-content")
.map_err(|_| anyhow!("failed to select event blocks"))?;
let content_node = content.as_node();
// rewrite relative links
for link in content_node
.select("a")
.map_err(|_| anyhow!("failed to select event block links"))?
{
let mut link_attributes = link
.as_node()
.as_element()
.context("failed to parse event block link")?
.attributes
.borrow_mut();
let mut href = link_attributes
.get("href")
.context("link has no href")?
.to_string();
if !href.starts_with("/") {
continue;
}
href = format!("{}{}", RELATIVE_URL_BASE, href);
link_attributes.insert("href", href);
}
Ok(EventBlock {
date,
content: content_node.to_string(),
})
}
pub fn parse() -> Result<Vec<EventBlock>> {
let data = include_str!("../sample.html");
let document = parse_html().one(data);
document
.select("div.p-current-events-events div.current-events-main.vevent")
.map_err(|_| anyhow!("failed to select event blocks"))?
.into_iter()
.map(|el| parse_event_block(el.as_node()))
.collect()
}