initial commit

This commit is contained in:
Jake Walker 2025-01-21 23:40:56 +00:00
commit c633438a4f
9 changed files with 299 additions and 0 deletions

12
.editorconfig Normal file
View file

@ -0,0 +1,12 @@
# EditorConfig is awesome: https://EditorConfig.org
# top-most EditorConfig file
root = true
[*]
indent_style = space
indent_size = 4
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true

122
.gitignore vendored Normal file
View file

@ -0,0 +1,122 @@
# File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig
# Created by https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,rust,macos,linux,rust-analyzer
# Edit at https://www.toptal.com/developers/gitignore?templates=windows,visualstudiocode,rust,macos,linux,rust-analyzer
### Linux ###
*~
# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*
# KDE directory preferences
.directory
# Linux trash folder which might appear on any partition or disk
.Trash-*
# .nfs files are created when an open file is removed but is still being accessed
.nfs*
### macOS ###
# General
.DS_Store
.AppleDouble
.LSOverride
# Icon must end with two \r
Icon
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
### macOS Patch ###
# iCloud generated files
*.icloud
### Rust ###
# Generated by Cargo
# will have compiled files and executables
debug/
target/
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
Cargo.lock
# These are backup files generated by rustfmt
**/*.rs.bk
# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb
### rust-analyzer ###
# Can be generated by other build systems other than cargo (ex: bazelbuild/rust_rules)
rust-project.json
### VisualStudioCode ###
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
!.vscode/*.code-snippets
# Local History for Visual Studio Code
.history/
# Built Visual Studio Code Extensions
*.vsix
### VisualStudioCode Patch ###
# Ignore all local history of files
.history
.ionide
### Windows ###
# Windows thumbnail cache files
Thumbs.db
Thumbs.db:encryptable
ehthumbs.db
ehthumbs_vista.db
# Dump file
*.stackdump
# Folder config file
[Dd]esktop.ini
# Recycle Bin used on file shares
$RECYCLE.BIN/
# Windows Installer files
*.cab
*.msi
*.msix
*.msm
*.msp
# Windows shortcuts
*.lnk
# End of https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,rust,macos,linux,rust-analyzer
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
sample.html

3
.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,3 @@
{
"licenser.license": "0BSD"
}

11
Cargo.toml Normal file
View file

@ -0,0 +1,11 @@
[package]
name = "mininews"
version = "0.1.0"
edition = "2021"
[dependencies]
anyhow = "1.0.95"
chrono = { version = "0.4.39", default-features = false, features = ["now"] }
kuchikiki = "0.8.2"
rss = { version = "2.0.11" }
uuid = { version = "1.12.1", features = ["v7"] }

12
LICENSE Normal file
View file

@ -0,0 +1,12 @@
Zero-Clause BSD / Free Public License 1.0.0 (0BSD)
Permission to use, copy, modify, and/or distribute this software for any purpose
with or without fee is hereby granted.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.

3
README.md Normal file
View file

@ -0,0 +1,3 @@
# Mininews
A simple RSS generator for [Wikipedia's Current Events](https://en.wikipedia.org/wiki/Portal:Current_events). Inspired by [tom-james-watson's detoxed.news](https://github.com/tom-james-watson/detoxed.news) project.

1
src/lib.rs Normal file
View file

@ -0,0 +1 @@
pub mod parser;

58
src/main.rs Normal file
View file

@ -0,0 +1,58 @@
use anyhow::{Context, Error, Result};
use chrono::{Days, NaiveTime, Utc};
use mininews::parser::{parse, EventBlock, PAGE_URL};
use rss::{ChannelBuilder, Guid, Item, ItemBuilder};
use uuid::{Timestamp, Uuid};
fn generate_feed(items: Vec<EventBlock>) -> Result<()> {
let mut channel = ChannelBuilder::default()
.title("Mininews")
.link("https://example.com")
.description("An RSS feed from Wikipedia's Current Events")
.build();
channel.set_items(
items
.iter()
.filter(|x| x.date < Utc::now().date_naive())
.map(|x| {
let pub_date = x
.date
.checked_add_days(Days::new(1))
.context("failed to add to date")?
.and_time(NaiveTime::MIN)
.and_utc();
Ok::<Item, Error>(
ItemBuilder::default()
.title(x.date.to_string())
.link(Some(PAGE_URL.to_string()))
.pub_date(pub_date.to_rfc2822())
.guid(Guid {
permalink: false,
value: Uuid::new_v7(Timestamp::from_unix_time(
pub_date.timestamp() as u64,
0,
0,
0,
))
.to_string(),
})
.description(format!(
"Wikipedia current events from {}",
x.date.to_string()
))
.content(x.content.clone())
.build(),
)
})
.collect::<Result<Vec<Item>, _>>()?,
);
println!("{:?}", channel.to_string());
Ok(())
}
fn main() -> Result<()> {
generate_feed(parse()?)
}

77
src/parser.rs Normal file
View file

@ -0,0 +1,77 @@
use anyhow::{anyhow, Context, Result};
use chrono::NaiveDate;
use kuchikiki::parse_html;
use kuchikiki::traits::*;
use kuchikiki::NodeRef;
const RELATIVE_URL_BASE: &str = "https://en.wikipedia.org";
pub const PAGE_URL: &str = "https://en.wikipedia.org/wiki/Portal:Current_events";
pub struct EventBlock {
pub date: NaiveDate,
pub content: String,
}
fn parse_event_block(node: &NodeRef) -> Result<EventBlock> {
let element = node
.as_element()
.context("failed to parse element")?
.clone();
let element_attributes = element.attributes.borrow();
let date_str = element_attributes
.get("id")
.context("could not get event block id")?;
let date = NaiveDate::parse_from_str(date_str, "%Y_%B_%-d")
.map_err(|e| anyhow!(e.to_string()))
.with_context(|| "failed to parse event block date")?;
let content = node
.select_first("div.current-events-content")
.map_err(|_| anyhow!("failed to select event blocks"))?;
let content_node = content.as_node();
// rewrite relative links
for link in content_node
.select("a")
.map_err(|_| anyhow!("failed to select event block links"))?
{
let mut link_attributes = link
.as_node()
.as_element()
.context("failed to parse event block link")?
.attributes
.borrow_mut();
let mut href = link_attributes
.get("href")
.context("link has no href")?
.to_string();
if !href.starts_with("/") {
continue;
}
href = format!("{}{}", RELATIVE_URL_BASE, href);
link_attributes.insert("href", href);
}
Ok(EventBlock {
date,
content: content_node.to_string(),
})
}
pub fn parse() -> Result<Vec<EventBlock>> {
let data = include_str!("../sample.html");
let document = parse_html().one(data);
document
.select("div.p-current-events-events div.current-events-main.vevent")
.map_err(|_| anyhow!("failed to select event blocks"))?
.into_iter()
.map(|el| parse_event_block(el.as_node()))
.collect()
}