initial commit
All checks were successful
ci/woodpecker/push/deploy Pipeline was successful
ci/woodpecker/cron/deploy Pipeline was successful

This commit is contained in:
Jake Walker 2025-01-21 23:40:23 +00:00
commit d635f1cbc8
10 changed files with 363 additions and 0 deletions

15
.editorconfig Normal file
View file

@ -0,0 +1,15 @@
# EditorConfig is awesome: https://EditorConfig.org
# top-most EditorConfig file
root = true
[*]
indent_style = space
indent_size = 4
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
[*.{yml,yaml}]
indent_size = 2

122
.gitignore vendored Normal file
View file

@ -0,0 +1,122 @@
# File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig
# Created by https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,rust,macos,linux,rust-analyzer
# Edit at https://www.toptal.com/developers/gitignore?templates=windows,visualstudiocode,rust,macos,linux,rust-analyzer
### Linux ###
*~
# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*
# KDE directory preferences
.directory
# Linux trash folder which might appear on any partition or disk
.Trash-*
# .nfs files are created when an open file is removed but is still being accessed
.nfs*
### macOS ###
# General
.DS_Store
.AppleDouble
.LSOverride
# Icon must end with two \r
Icon
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
### macOS Patch ###
# iCloud generated files
*.icloud
### Rust ###
# Generated by Cargo
# will have compiled files and executables
debug/
target/
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
Cargo.lock
# These are backup files generated by rustfmt
**/*.rs.bk
# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb
### rust-analyzer ###
# Can be generated by other build systems other than cargo (ex: bazelbuild/rust_rules)
rust-project.json
### VisualStudioCode ###
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
!.vscode/*.code-snippets
# Local History for Visual Studio Code
.history/
# Built Visual Studio Code Extensions
*.vsix
### VisualStudioCode Patch ###
# Ignore all local history of files
.history
.ionide
### Windows ###
# Windows thumbnail cache files
Thumbs.db
Thumbs.db:encryptable
ehthumbs.db
ehthumbs_vista.db
# Dump file
*.stackdump
# Folder config file
[Dd]esktop.ini
# Recycle Bin used on file shares
$RECYCLE.BIN/
# Windows Installer files
*.cab
*.msi
*.msix
*.msm
*.msp
# Windows shortcuts
*.lnk
# End of https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,rust,macos,linux,rust-analyzer
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
dist

3
.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,3 @@
{
"licenser.license": "0BSD"
}

19
.woodpecker/deploy.yml Normal file
View file

@ -0,0 +1,19 @@
when:
- event: push
branch: ${CI_DEFAULT_REPO}
- event: cron
cron: daily
- event: manual
steps:
- name: generate
image: rust
commands:
- cargo run
- name: deploy
image: node:alpine
commands:
- npx wrangler pages deploy ./dist --project-name mininews
environment:
CLOUDFLARE_API_TOKEN:
from_secret: cf_token

12
Cargo.toml Normal file
View file

@ -0,0 +1,12 @@
[package]
name = "mininews"
version = "0.1.0"
edition = "2021"
[dependencies]
anyhow = "1.0.95"
chrono = { version = "0.4.39", default-features = false, features = ["now"] }
kuchikiki = "0.8.2"
reqwest = { version = "0.12.12", features = ["blocking"] }
rss = { version = "2.0.11" }
uuid = { version = "1.12.1", features = ["v7"] }

12
LICENSE Normal file
View file

@ -0,0 +1,12 @@
Zero-Clause BSD / Free Public License 1.0.0 (0BSD)
Permission to use, copy, modify, and/or distribute this software for any purpose
with or without fee is hereby granted.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.

3
README.md Normal file
View file

@ -0,0 +1,3 @@
# Mininews
A simple RSS generator for [Wikipedia's Current Events](https://en.wikipedia.org/wiki/Portal:Current_events). Inspired by [tom-james-watson's detoxed.news](https://github.com/tom-james-watson/detoxed.news) project.

1
src/lib.rs Normal file
View file

@ -0,0 +1 @@
pub mod parser;

99
src/main.rs Normal file
View file

@ -0,0 +1,99 @@
use std::{fs, io::ErrorKind};
use anyhow::{bail, Context, Error, Result};
use chrono::{Datelike, Days, NaiveTime, Utc};
use mininews::parser::{parse, EventBlock, PAGE_URL};
use reqwest::blocking::get;
use rss::{ChannelBuilder, Guid, Item, ItemBuilder};
use uuid::{Timestamp, Uuid};
fn generate_feed(items: &Vec<EventBlock>, exclude_today: bool) -> Result<String> {
let mut channel = ChannelBuilder::default()
.title("Mininews")
.link("https://example.com")
.description("An RSS feed from Wikipedia's Current Events")
.build();
channel.set_items(
items
.iter()
.filter(|x| !exclude_today || x.date < Utc::now().date_naive())
.map(|x| {
let pub_date = x
.date
.checked_add_days(Days::new(1))
.context("failed to add to date")?
.and_time(NaiveTime::MIN)
.and_utc();
Ok::<Item, Error>(
ItemBuilder::default()
.title(format!(
"Current Events - {}{} {}",
x.date.format("%A %-d").to_string(),
match x.date.day() {
1 | 21 | 31 => "st",
2 | 22 => "nd",
3 | 23 => "rd",
_ => "th",
},
x.date.format("%B").to_string()
))
.link(Some(PAGE_URL.to_string()))
.pub_date(pub_date.to_rfc2822())
.guid(Guid {
permalink: false,
value: Uuid::new_v7(Timestamp::from_unix_time(
pub_date.timestamp() as u64,
0,
0,
0,
))
.to_string(),
})
.description(format!(
"Wikipedia current events from {}",
x.date.to_string()
))
.content(x.content.clone())
.build(),
)
})
.collect::<Result<Vec<Item>, _>>()?,
);
Ok(channel.to_string())
}
fn fetch_document() -> Result<String> {
Ok(get(PAGE_URL)
.context("failed to get page")?
.text()
.context("failed to get page content")?)
}
fn main() -> Result<()> {
println!("fetching document...");
let document = fetch_document()?;
println!("parsing...");
let items = parse(&document)?;
if let Err(err) = fs::create_dir("dist") {
match err.kind() {
ErrorKind::AlreadyExists => (),
_ => bail!("failed to create directory: {}", err),
}
}
for exclude_today in [true, false] {
let feed = generate_feed(&items, exclude_today)?;
let suffix = {
match exclude_today {
false => "_with_today",
_ => "",
}
};
fs::write(format!("dist/feed{}.xml", suffix), feed).context("failed to write file")?;
}
Ok(())
}

77
src/parser.rs Normal file
View file

@ -0,0 +1,77 @@
use anyhow::{anyhow, Context, Result};
use chrono::NaiveDate;
use kuchikiki::parse_html;
use kuchikiki::traits::*;
use kuchikiki::NodeRef;
const RELATIVE_URL_BASE: &str = "https://en.wikipedia.org";
pub const PAGE_URL: &str = "https://en.wikipedia.org/wiki/Portal:Current_events";
pub struct EventBlock {
pub date: NaiveDate,
pub content: String,
}
fn parse_event_block(node: &NodeRef) -> Result<EventBlock> {
let element = node
.as_element()
.context("failed to parse element")?
.clone();
let element_attributes = element.attributes.borrow();
let date_str = element_attributes
.get("id")
.context("could not get event block id")?;
let date = NaiveDate::parse_from_str(date_str, "%Y_%B_%-d")
.map_err(|e| anyhow!(e.to_string()))
.with_context(|| "failed to parse event block date")?;
let content = node
.select_first("div.current-events-content")
.map_err(|_| anyhow!("failed to select event blocks"))?;
let content_node = content.as_node();
// rewrite relative links
for link in content_node
.select("a")
.map_err(|_| anyhow!("failed to select event block links"))?
{
let mut link_attributes = link
.as_node()
.as_element()
.context("failed to parse event block link")?
.attributes
.borrow_mut();
let mut href = link_attributes
.get("href")
.context("link has no href")?
.to_string();
if !href.starts_with("/") {
continue;
}
href = format!("{}{}", RELATIVE_URL_BASE, href);
link_attributes.insert("href", href);
}
Ok(EventBlock {
date,
content: content_node.to_string(),
})
}
pub fn parse(content: &str) -> Result<Vec<EventBlock>> {
let document = parse_html().one(content);
document
.select("div.p-current-events-events div.current-events-main.vevent")
.map_err(|_| anyhow!("failed to select event blocks"))?
.into_iter()
.filter(|el| !el.as_node().select_first("li.mw-empty-elt").is_ok())
.map(|el| parse_event_block(el.as_node()))
.collect()
}