initial commit
This commit is contained in:
commit
d635f1cbc8
10 changed files with 363 additions and 0 deletions
15
.editorconfig
Normal file
15
.editorconfig
Normal file
|
@ -0,0 +1,15 @@
|
|||
# EditorConfig is awesome: https://EditorConfig.org
|
||||
|
||||
# top-most EditorConfig file
|
||||
root = true
|
||||
|
||||
[*]
|
||||
indent_style = space
|
||||
indent_size = 4
|
||||
end_of_line = lf
|
||||
charset = utf-8
|
||||
trim_trailing_whitespace = true
|
||||
insert_final_newline = true
|
||||
|
||||
[*.{yml,yaml}]
|
||||
indent_size = 2
|
122
.gitignore
vendored
Normal file
122
.gitignore
vendored
Normal file
|
@ -0,0 +1,122 @@
|
|||
# File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig
|
||||
# Created by https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,rust,macos,linux,rust-analyzer
|
||||
# Edit at https://www.toptal.com/developers/gitignore?templates=windows,visualstudiocode,rust,macos,linux,rust-analyzer
|
||||
|
||||
### Linux ###
|
||||
*~
|
||||
|
||||
# temporary files which can be created if a process still has a handle open of a deleted file
|
||||
.fuse_hidden*
|
||||
|
||||
# KDE directory preferences
|
||||
.directory
|
||||
|
||||
# Linux trash folder which might appear on any partition or disk
|
||||
.Trash-*
|
||||
|
||||
# .nfs files are created when an open file is removed but is still being accessed
|
||||
.nfs*
|
||||
|
||||
### macOS ###
|
||||
# General
|
||||
.DS_Store
|
||||
.AppleDouble
|
||||
.LSOverride
|
||||
|
||||
# Icon must end with two \r
|
||||
Icon
|
||||
|
||||
|
||||
# Thumbnails
|
||||
._*
|
||||
|
||||
# Files that might appear in the root of a volume
|
||||
.DocumentRevisions-V100
|
||||
.fseventsd
|
||||
.Spotlight-V100
|
||||
.TemporaryItems
|
||||
.Trashes
|
||||
.VolumeIcon.icns
|
||||
.com.apple.timemachine.donotpresent
|
||||
|
||||
# Directories potentially created on remote AFP share
|
||||
.AppleDB
|
||||
.AppleDesktop
|
||||
Network Trash Folder
|
||||
Temporary Items
|
||||
.apdisk
|
||||
|
||||
### macOS Patch ###
|
||||
# iCloud generated files
|
||||
*.icloud
|
||||
|
||||
### Rust ###
|
||||
# Generated by Cargo
|
||||
# will have compiled files and executables
|
||||
debug/
|
||||
target/
|
||||
|
||||
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
|
||||
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
|
||||
Cargo.lock
|
||||
|
||||
# These are backup files generated by rustfmt
|
||||
**/*.rs.bk
|
||||
|
||||
# MSVC Windows builds of rustc generate these, which store debugging information
|
||||
*.pdb
|
||||
|
||||
### rust-analyzer ###
|
||||
# Can be generated by other build systems other than cargo (ex: bazelbuild/rust_rules)
|
||||
rust-project.json
|
||||
|
||||
|
||||
### VisualStudioCode ###
|
||||
.vscode/*
|
||||
!.vscode/settings.json
|
||||
!.vscode/tasks.json
|
||||
!.vscode/launch.json
|
||||
!.vscode/extensions.json
|
||||
!.vscode/*.code-snippets
|
||||
|
||||
# Local History for Visual Studio Code
|
||||
.history/
|
||||
|
||||
# Built Visual Studio Code Extensions
|
||||
*.vsix
|
||||
|
||||
### VisualStudioCode Patch ###
|
||||
# Ignore all local history of files
|
||||
.history
|
||||
.ionide
|
||||
|
||||
### Windows ###
|
||||
# Windows thumbnail cache files
|
||||
Thumbs.db
|
||||
Thumbs.db:encryptable
|
||||
ehthumbs.db
|
||||
ehthumbs_vista.db
|
||||
|
||||
# Dump file
|
||||
*.stackdump
|
||||
|
||||
# Folder config file
|
||||
[Dd]esktop.ini
|
||||
|
||||
# Recycle Bin used on file shares
|
||||
$RECYCLE.BIN/
|
||||
|
||||
# Windows Installer files
|
||||
*.cab
|
||||
*.msi
|
||||
*.msix
|
||||
*.msm
|
||||
*.msp
|
||||
|
||||
# Windows shortcuts
|
||||
*.lnk
|
||||
|
||||
# End of https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,rust,macos,linux,rust-analyzer
|
||||
|
||||
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
|
||||
dist
|
3
.vscode/settings.json
vendored
Normal file
3
.vscode/settings.json
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
{
|
||||
"licenser.license": "0BSD"
|
||||
}
|
19
.woodpecker/deploy.yml
Normal file
19
.woodpecker/deploy.yml
Normal file
|
@ -0,0 +1,19 @@
|
|||
when:
|
||||
- event: push
|
||||
branch: ${CI_DEFAULT_REPO}
|
||||
- event: cron
|
||||
cron: daily
|
||||
- event: manual
|
||||
|
||||
steps:
|
||||
- name: generate
|
||||
image: rust
|
||||
commands:
|
||||
- cargo run
|
||||
- name: deploy
|
||||
image: node:alpine
|
||||
commands:
|
||||
- npx wrangler pages deploy ./dist --project-name mininews
|
||||
environment:
|
||||
CLOUDFLARE_API_TOKEN:
|
||||
from_secret: cf_token
|
12
Cargo.toml
Normal file
12
Cargo.toml
Normal file
|
@ -0,0 +1,12 @@
|
|||
[package]
|
||||
name = "mininews"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.95"
|
||||
chrono = { version = "0.4.39", default-features = false, features = ["now"] }
|
||||
kuchikiki = "0.8.2"
|
||||
reqwest = { version = "0.12.12", features = ["blocking"] }
|
||||
rss = { version = "2.0.11" }
|
||||
uuid = { version = "1.12.1", features = ["v7"] }
|
12
LICENSE
Normal file
12
LICENSE
Normal file
|
@ -0,0 +1,12 @@
|
|||
Zero-Clause BSD / Free Public License 1.0.0 (0BSD)
|
||||
|
||||
Permission to use, copy, modify, and/or distribute this software for any purpose
|
||||
with or without fee is hereby granted.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
|
||||
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
|
||||
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
|
||||
OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
PERFORMANCE OF THIS SOFTWARE.
|
3
README.md
Normal file
3
README.md
Normal file
|
@ -0,0 +1,3 @@
|
|||
# Mininews
|
||||
|
||||
A simple RSS generator for [Wikipedia's Current Events](https://en.wikipedia.org/wiki/Portal:Current_events). Inspired by [tom-james-watson's detoxed.news](https://github.com/tom-james-watson/detoxed.news) project.
|
1
src/lib.rs
Normal file
1
src/lib.rs
Normal file
|
@ -0,0 +1 @@
|
|||
pub mod parser;
|
99
src/main.rs
Normal file
99
src/main.rs
Normal file
|
@ -0,0 +1,99 @@
|
|||
use std::{fs, io::ErrorKind};
|
||||
|
||||
use anyhow::{bail, Context, Error, Result};
|
||||
use chrono::{Datelike, Days, NaiveTime, Utc};
|
||||
use mininews::parser::{parse, EventBlock, PAGE_URL};
|
||||
use reqwest::blocking::get;
|
||||
use rss::{ChannelBuilder, Guid, Item, ItemBuilder};
|
||||
use uuid::{Timestamp, Uuid};
|
||||
|
||||
fn generate_feed(items: &Vec<EventBlock>, exclude_today: bool) -> Result<String> {
|
||||
let mut channel = ChannelBuilder::default()
|
||||
.title("Mininews")
|
||||
.link("https://example.com")
|
||||
.description("An RSS feed from Wikipedia's Current Events")
|
||||
.build();
|
||||
|
||||
channel.set_items(
|
||||
items
|
||||
.iter()
|
||||
.filter(|x| !exclude_today || x.date < Utc::now().date_naive())
|
||||
.map(|x| {
|
||||
let pub_date = x
|
||||
.date
|
||||
.checked_add_days(Days::new(1))
|
||||
.context("failed to add to date")?
|
||||
.and_time(NaiveTime::MIN)
|
||||
.and_utc();
|
||||
Ok::<Item, Error>(
|
||||
ItemBuilder::default()
|
||||
.title(format!(
|
||||
"Current Events - {}{} {}",
|
||||
x.date.format("%A %-d").to_string(),
|
||||
match x.date.day() {
|
||||
1 | 21 | 31 => "st",
|
||||
2 | 22 => "nd",
|
||||
3 | 23 => "rd",
|
||||
_ => "th",
|
||||
},
|
||||
x.date.format("%B").to_string()
|
||||
))
|
||||
.link(Some(PAGE_URL.to_string()))
|
||||
.pub_date(pub_date.to_rfc2822())
|
||||
.guid(Guid {
|
||||
permalink: false,
|
||||
value: Uuid::new_v7(Timestamp::from_unix_time(
|
||||
pub_date.timestamp() as u64,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
))
|
||||
.to_string(),
|
||||
})
|
||||
.description(format!(
|
||||
"Wikipedia current events from {}",
|
||||
x.date.to_string()
|
||||
))
|
||||
.content(x.content.clone())
|
||||
.build(),
|
||||
)
|
||||
})
|
||||
.collect::<Result<Vec<Item>, _>>()?,
|
||||
);
|
||||
|
||||
Ok(channel.to_string())
|
||||
}
|
||||
|
||||
fn fetch_document() -> Result<String> {
|
||||
Ok(get(PAGE_URL)
|
||||
.context("failed to get page")?
|
||||
.text()
|
||||
.context("failed to get page content")?)
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
println!("fetching document...");
|
||||
let document = fetch_document()?;
|
||||
println!("parsing...");
|
||||
let items = parse(&document)?;
|
||||
|
||||
if let Err(err) = fs::create_dir("dist") {
|
||||
match err.kind() {
|
||||
ErrorKind::AlreadyExists => (),
|
||||
_ => bail!("failed to create directory: {}", err),
|
||||
}
|
||||
}
|
||||
|
||||
for exclude_today in [true, false] {
|
||||
let feed = generate_feed(&items, exclude_today)?;
|
||||
let suffix = {
|
||||
match exclude_today {
|
||||
false => "_with_today",
|
||||
_ => "",
|
||||
}
|
||||
};
|
||||
fs::write(format!("dist/feed{}.xml", suffix), feed).context("failed to write file")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
77
src/parser.rs
Normal file
77
src/parser.rs
Normal file
|
@ -0,0 +1,77 @@
|
|||
use anyhow::{anyhow, Context, Result};
|
||||
use chrono::NaiveDate;
|
||||
use kuchikiki::parse_html;
|
||||
use kuchikiki::traits::*;
|
||||
use kuchikiki::NodeRef;
|
||||
|
||||
const RELATIVE_URL_BASE: &str = "https://en.wikipedia.org";
|
||||
pub const PAGE_URL: &str = "https://en.wikipedia.org/wiki/Portal:Current_events";
|
||||
|
||||
pub struct EventBlock {
|
||||
pub date: NaiveDate,
|
||||
pub content: String,
|
||||
}
|
||||
|
||||
fn parse_event_block(node: &NodeRef) -> Result<EventBlock> {
|
||||
let element = node
|
||||
.as_element()
|
||||
.context("failed to parse element")?
|
||||
.clone();
|
||||
|
||||
let element_attributes = element.attributes.borrow();
|
||||
|
||||
let date_str = element_attributes
|
||||
.get("id")
|
||||
.context("could not get event block id")?;
|
||||
let date = NaiveDate::parse_from_str(date_str, "%Y_%B_%-d")
|
||||
.map_err(|e| anyhow!(e.to_string()))
|
||||
.with_context(|| "failed to parse event block date")?;
|
||||
|
||||
let content = node
|
||||
.select_first("div.current-events-content")
|
||||
.map_err(|_| anyhow!("failed to select event blocks"))?;
|
||||
let content_node = content.as_node();
|
||||
|
||||
// rewrite relative links
|
||||
for link in content_node
|
||||
.select("a")
|
||||
.map_err(|_| anyhow!("failed to select event block links"))?
|
||||
{
|
||||
let mut link_attributes = link
|
||||
.as_node()
|
||||
.as_element()
|
||||
.context("failed to parse event block link")?
|
||||
.attributes
|
||||
.borrow_mut();
|
||||
|
||||
let mut href = link_attributes
|
||||
.get("href")
|
||||
.context("link has no href")?
|
||||
.to_string();
|
||||
|
||||
if !href.starts_with("/") {
|
||||
continue;
|
||||
}
|
||||
|
||||
href = format!("{}{}", RELATIVE_URL_BASE, href);
|
||||
|
||||
link_attributes.insert("href", href);
|
||||
}
|
||||
|
||||
Ok(EventBlock {
|
||||
date,
|
||||
content: content_node.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn parse(content: &str) -> Result<Vec<EventBlock>> {
|
||||
let document = parse_html().one(content);
|
||||
|
||||
document
|
||||
.select("div.p-current-events-events div.current-events-main.vevent")
|
||||
.map_err(|_| anyhow!("failed to select event blocks"))?
|
||||
.into_iter()
|
||||
.filter(|el| !el.as_node().select_first("li.mw-empty-elt").is_ok())
|
||||
.map(|el| parse_event_block(el.as_node()))
|
||||
.collect()
|
||||
}
|
Loading…
Reference in a new issue