initial commit
This commit is contained in:
commit
d635f1cbc8
10 changed files with 363 additions and 0 deletions
15
.editorconfig
Normal file
15
.editorconfig
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
# EditorConfig is awesome: https://EditorConfig.org
|
||||||
|
|
||||||
|
# top-most EditorConfig file
|
||||||
|
root = true
|
||||||
|
|
||||||
|
[*]
|
||||||
|
indent_style = space
|
||||||
|
indent_size = 4
|
||||||
|
end_of_line = lf
|
||||||
|
charset = utf-8
|
||||||
|
trim_trailing_whitespace = true
|
||||||
|
insert_final_newline = true
|
||||||
|
|
||||||
|
[*.{yml,yaml}]
|
||||||
|
indent_size = 2
|
122
.gitignore
vendored
Normal file
122
.gitignore
vendored
Normal file
|
@ -0,0 +1,122 @@
|
||||||
|
# File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig
|
||||||
|
# Created by https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,rust,macos,linux,rust-analyzer
|
||||||
|
# Edit at https://www.toptal.com/developers/gitignore?templates=windows,visualstudiocode,rust,macos,linux,rust-analyzer
|
||||||
|
|
||||||
|
### Linux ###
|
||||||
|
*~
|
||||||
|
|
||||||
|
# temporary files which can be created if a process still has a handle open of a deleted file
|
||||||
|
.fuse_hidden*
|
||||||
|
|
||||||
|
# KDE directory preferences
|
||||||
|
.directory
|
||||||
|
|
||||||
|
# Linux trash folder which might appear on any partition or disk
|
||||||
|
.Trash-*
|
||||||
|
|
||||||
|
# .nfs files are created when an open file is removed but is still being accessed
|
||||||
|
.nfs*
|
||||||
|
|
||||||
|
### macOS ###
|
||||||
|
# General
|
||||||
|
.DS_Store
|
||||||
|
.AppleDouble
|
||||||
|
.LSOverride
|
||||||
|
|
||||||
|
# Icon must end with two \r
|
||||||
|
Icon
|
||||||
|
|
||||||
|
|
||||||
|
# Thumbnails
|
||||||
|
._*
|
||||||
|
|
||||||
|
# Files that might appear in the root of a volume
|
||||||
|
.DocumentRevisions-V100
|
||||||
|
.fseventsd
|
||||||
|
.Spotlight-V100
|
||||||
|
.TemporaryItems
|
||||||
|
.Trashes
|
||||||
|
.VolumeIcon.icns
|
||||||
|
.com.apple.timemachine.donotpresent
|
||||||
|
|
||||||
|
# Directories potentially created on remote AFP share
|
||||||
|
.AppleDB
|
||||||
|
.AppleDesktop
|
||||||
|
Network Trash Folder
|
||||||
|
Temporary Items
|
||||||
|
.apdisk
|
||||||
|
|
||||||
|
### macOS Patch ###
|
||||||
|
# iCloud generated files
|
||||||
|
*.icloud
|
||||||
|
|
||||||
|
### Rust ###
|
||||||
|
# Generated by Cargo
|
||||||
|
# will have compiled files and executables
|
||||||
|
debug/
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
|
||||||
|
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
|
||||||
|
Cargo.lock
|
||||||
|
|
||||||
|
# These are backup files generated by rustfmt
|
||||||
|
**/*.rs.bk
|
||||||
|
|
||||||
|
# MSVC Windows builds of rustc generate these, which store debugging information
|
||||||
|
*.pdb
|
||||||
|
|
||||||
|
### rust-analyzer ###
|
||||||
|
# Can be generated by other build systems other than cargo (ex: bazelbuild/rust_rules)
|
||||||
|
rust-project.json
|
||||||
|
|
||||||
|
|
||||||
|
### VisualStudioCode ###
|
||||||
|
.vscode/*
|
||||||
|
!.vscode/settings.json
|
||||||
|
!.vscode/tasks.json
|
||||||
|
!.vscode/launch.json
|
||||||
|
!.vscode/extensions.json
|
||||||
|
!.vscode/*.code-snippets
|
||||||
|
|
||||||
|
# Local History for Visual Studio Code
|
||||||
|
.history/
|
||||||
|
|
||||||
|
# Built Visual Studio Code Extensions
|
||||||
|
*.vsix
|
||||||
|
|
||||||
|
### VisualStudioCode Patch ###
|
||||||
|
# Ignore all local history of files
|
||||||
|
.history
|
||||||
|
.ionide
|
||||||
|
|
||||||
|
### Windows ###
|
||||||
|
# Windows thumbnail cache files
|
||||||
|
Thumbs.db
|
||||||
|
Thumbs.db:encryptable
|
||||||
|
ehthumbs.db
|
||||||
|
ehthumbs_vista.db
|
||||||
|
|
||||||
|
# Dump file
|
||||||
|
*.stackdump
|
||||||
|
|
||||||
|
# Folder config file
|
||||||
|
[Dd]esktop.ini
|
||||||
|
|
||||||
|
# Recycle Bin used on file shares
|
||||||
|
$RECYCLE.BIN/
|
||||||
|
|
||||||
|
# Windows Installer files
|
||||||
|
*.cab
|
||||||
|
*.msi
|
||||||
|
*.msix
|
||||||
|
*.msm
|
||||||
|
*.msp
|
||||||
|
|
||||||
|
# Windows shortcuts
|
||||||
|
*.lnk
|
||||||
|
|
||||||
|
# End of https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,rust,macos,linux,rust-analyzer
|
||||||
|
|
||||||
|
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
|
||||||
|
dist
|
3
.vscode/settings.json
vendored
Normal file
3
.vscode/settings.json
vendored
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
{
|
||||||
|
"licenser.license": "0BSD"
|
||||||
|
}
|
19
.woodpecker/deploy.yml
Normal file
19
.woodpecker/deploy.yml
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
when:
|
||||||
|
- event: push
|
||||||
|
branch: ${CI_DEFAULT_REPO}
|
||||||
|
- event: cron
|
||||||
|
cron: daily
|
||||||
|
- event: manual
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: generate
|
||||||
|
image: rust
|
||||||
|
commands:
|
||||||
|
- cargo run
|
||||||
|
- name: deploy
|
||||||
|
image: node:alpine
|
||||||
|
commands:
|
||||||
|
- npx wrangler pages deploy ./dist --project-name mininews
|
||||||
|
environment:
|
||||||
|
CLOUDFLARE_API_TOKEN:
|
||||||
|
from_secret: cf_token
|
12
Cargo.toml
Normal file
12
Cargo.toml
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
[package]
|
||||||
|
name = "mininews"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = "1.0.95"
|
||||||
|
chrono = { version = "0.4.39", default-features = false, features = ["now"] }
|
||||||
|
kuchikiki = "0.8.2"
|
||||||
|
reqwest = { version = "0.12.12", features = ["blocking"] }
|
||||||
|
rss = { version = "2.0.11" }
|
||||||
|
uuid = { version = "1.12.1", features = ["v7"] }
|
12
LICENSE
Normal file
12
LICENSE
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
Zero-Clause BSD / Free Public License 1.0.0 (0BSD)
|
||||||
|
|
||||||
|
Permission to use, copy, modify, and/or distribute this software for any purpose
|
||||||
|
with or without fee is hereby granted.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||||
|
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||||
|
AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
|
||||||
|
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
|
||||||
|
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
|
||||||
|
OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||||
|
PERFORMANCE OF THIS SOFTWARE.
|
3
README.md
Normal file
3
README.md
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
# Mininews
|
||||||
|
|
||||||
|
A simple RSS generator for [Wikipedia's Current Events](https://en.wikipedia.org/wiki/Portal:Current_events). Inspired by [tom-james-watson's detoxed.news](https://github.com/tom-james-watson/detoxed.news) project.
|
1
src/lib.rs
Normal file
1
src/lib.rs
Normal file
|
@ -0,0 +1 @@
|
||||||
|
pub mod parser;
|
99
src/main.rs
Normal file
99
src/main.rs
Normal file
|
@ -0,0 +1,99 @@
|
||||||
|
use std::{fs, io::ErrorKind};
|
||||||
|
|
||||||
|
use anyhow::{bail, Context, Error, Result};
|
||||||
|
use chrono::{Datelike, Days, NaiveTime, Utc};
|
||||||
|
use mininews::parser::{parse, EventBlock, PAGE_URL};
|
||||||
|
use reqwest::blocking::get;
|
||||||
|
use rss::{ChannelBuilder, Guid, Item, ItemBuilder};
|
||||||
|
use uuid::{Timestamp, Uuid};
|
||||||
|
|
||||||
|
fn generate_feed(items: &Vec<EventBlock>, exclude_today: bool) -> Result<String> {
|
||||||
|
let mut channel = ChannelBuilder::default()
|
||||||
|
.title("Mininews")
|
||||||
|
.link("https://example.com")
|
||||||
|
.description("An RSS feed from Wikipedia's Current Events")
|
||||||
|
.build();
|
||||||
|
|
||||||
|
channel.set_items(
|
||||||
|
items
|
||||||
|
.iter()
|
||||||
|
.filter(|x| !exclude_today || x.date < Utc::now().date_naive())
|
||||||
|
.map(|x| {
|
||||||
|
let pub_date = x
|
||||||
|
.date
|
||||||
|
.checked_add_days(Days::new(1))
|
||||||
|
.context("failed to add to date")?
|
||||||
|
.and_time(NaiveTime::MIN)
|
||||||
|
.and_utc();
|
||||||
|
Ok::<Item, Error>(
|
||||||
|
ItemBuilder::default()
|
||||||
|
.title(format!(
|
||||||
|
"Current Events - {}{} {}",
|
||||||
|
x.date.format("%A %-d").to_string(),
|
||||||
|
match x.date.day() {
|
||||||
|
1 | 21 | 31 => "st",
|
||||||
|
2 | 22 => "nd",
|
||||||
|
3 | 23 => "rd",
|
||||||
|
_ => "th",
|
||||||
|
},
|
||||||
|
x.date.format("%B").to_string()
|
||||||
|
))
|
||||||
|
.link(Some(PAGE_URL.to_string()))
|
||||||
|
.pub_date(pub_date.to_rfc2822())
|
||||||
|
.guid(Guid {
|
||||||
|
permalink: false,
|
||||||
|
value: Uuid::new_v7(Timestamp::from_unix_time(
|
||||||
|
pub_date.timestamp() as u64,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
))
|
||||||
|
.to_string(),
|
||||||
|
})
|
||||||
|
.description(format!(
|
||||||
|
"Wikipedia current events from {}",
|
||||||
|
x.date.to_string()
|
||||||
|
))
|
||||||
|
.content(x.content.clone())
|
||||||
|
.build(),
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.collect::<Result<Vec<Item>, _>>()?,
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(channel.to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn fetch_document() -> Result<String> {
|
||||||
|
Ok(get(PAGE_URL)
|
||||||
|
.context("failed to get page")?
|
||||||
|
.text()
|
||||||
|
.context("failed to get page content")?)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() -> Result<()> {
|
||||||
|
println!("fetching document...");
|
||||||
|
let document = fetch_document()?;
|
||||||
|
println!("parsing...");
|
||||||
|
let items = parse(&document)?;
|
||||||
|
|
||||||
|
if let Err(err) = fs::create_dir("dist") {
|
||||||
|
match err.kind() {
|
||||||
|
ErrorKind::AlreadyExists => (),
|
||||||
|
_ => bail!("failed to create directory: {}", err),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for exclude_today in [true, false] {
|
||||||
|
let feed = generate_feed(&items, exclude_today)?;
|
||||||
|
let suffix = {
|
||||||
|
match exclude_today {
|
||||||
|
false => "_with_today",
|
||||||
|
_ => "",
|
||||||
|
}
|
||||||
|
};
|
||||||
|
fs::write(format!("dist/feed{}.xml", suffix), feed).context("failed to write file")?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
77
src/parser.rs
Normal file
77
src/parser.rs
Normal file
|
@ -0,0 +1,77 @@
|
||||||
|
use anyhow::{anyhow, Context, Result};
|
||||||
|
use chrono::NaiveDate;
|
||||||
|
use kuchikiki::parse_html;
|
||||||
|
use kuchikiki::traits::*;
|
||||||
|
use kuchikiki::NodeRef;
|
||||||
|
|
||||||
|
const RELATIVE_URL_BASE: &str = "https://en.wikipedia.org";
|
||||||
|
pub const PAGE_URL: &str = "https://en.wikipedia.org/wiki/Portal:Current_events";
|
||||||
|
|
||||||
|
pub struct EventBlock {
|
||||||
|
pub date: NaiveDate,
|
||||||
|
pub content: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_event_block(node: &NodeRef) -> Result<EventBlock> {
|
||||||
|
let element = node
|
||||||
|
.as_element()
|
||||||
|
.context("failed to parse element")?
|
||||||
|
.clone();
|
||||||
|
|
||||||
|
let element_attributes = element.attributes.borrow();
|
||||||
|
|
||||||
|
let date_str = element_attributes
|
||||||
|
.get("id")
|
||||||
|
.context("could not get event block id")?;
|
||||||
|
let date = NaiveDate::parse_from_str(date_str, "%Y_%B_%-d")
|
||||||
|
.map_err(|e| anyhow!(e.to_string()))
|
||||||
|
.with_context(|| "failed to parse event block date")?;
|
||||||
|
|
||||||
|
let content = node
|
||||||
|
.select_first("div.current-events-content")
|
||||||
|
.map_err(|_| anyhow!("failed to select event blocks"))?;
|
||||||
|
let content_node = content.as_node();
|
||||||
|
|
||||||
|
// rewrite relative links
|
||||||
|
for link in content_node
|
||||||
|
.select("a")
|
||||||
|
.map_err(|_| anyhow!("failed to select event block links"))?
|
||||||
|
{
|
||||||
|
let mut link_attributes = link
|
||||||
|
.as_node()
|
||||||
|
.as_element()
|
||||||
|
.context("failed to parse event block link")?
|
||||||
|
.attributes
|
||||||
|
.borrow_mut();
|
||||||
|
|
||||||
|
let mut href = link_attributes
|
||||||
|
.get("href")
|
||||||
|
.context("link has no href")?
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
if !href.starts_with("/") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
href = format!("{}{}", RELATIVE_URL_BASE, href);
|
||||||
|
|
||||||
|
link_attributes.insert("href", href);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(EventBlock {
|
||||||
|
date,
|
||||||
|
content: content_node.to_string(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse(content: &str) -> Result<Vec<EventBlock>> {
|
||||||
|
let document = parse_html().one(content);
|
||||||
|
|
||||||
|
document
|
||||||
|
.select("div.p-current-events-events div.current-events-main.vevent")
|
||||||
|
.map_err(|_| anyhow!("failed to select event blocks"))?
|
||||||
|
.into_iter()
|
||||||
|
.filter(|el| !el.as_node().select_first("li.mw-empty-elt").is_ok())
|
||||||
|
.map(|el| parse_event_block(el.as_node()))
|
||||||
|
.collect()
|
||||||
|
}
|
Loading…
Reference in a new issue