initial commit
This commit is contained in:
commit
c633438a4f
9 changed files with 299 additions and 0 deletions
12
.editorconfig
Normal file
12
.editorconfig
Normal file
|
@ -0,0 +1,12 @@
|
|||
# EditorConfig is awesome: https://EditorConfig.org
|
||||
|
||||
# top-most EditorConfig file
|
||||
root = true
|
||||
|
||||
[*]
|
||||
indent_style = space
|
||||
indent_size = 4
|
||||
end_of_line = lf
|
||||
charset = utf-8
|
||||
trim_trailing_whitespace = true
|
||||
insert_final_newline = true
|
122
.gitignore
vendored
Normal file
122
.gitignore
vendored
Normal file
|
@ -0,0 +1,122 @@
|
|||
# File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig
|
||||
# Created by https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,rust,macos,linux,rust-analyzer
|
||||
# Edit at https://www.toptal.com/developers/gitignore?templates=windows,visualstudiocode,rust,macos,linux,rust-analyzer
|
||||
|
||||
### Linux ###
|
||||
*~
|
||||
|
||||
# temporary files which can be created if a process still has a handle open of a deleted file
|
||||
.fuse_hidden*
|
||||
|
||||
# KDE directory preferences
|
||||
.directory
|
||||
|
||||
# Linux trash folder which might appear on any partition or disk
|
||||
.Trash-*
|
||||
|
||||
# .nfs files are created when an open file is removed but is still being accessed
|
||||
.nfs*
|
||||
|
||||
### macOS ###
|
||||
# General
|
||||
.DS_Store
|
||||
.AppleDouble
|
||||
.LSOverride
|
||||
|
||||
# Icon must end with two \r
|
||||
Icon
|
||||
|
||||
# Thumbnails
|
||||
._*
|
||||
|
||||
# Files that might appear in the root of a volume
|
||||
.DocumentRevisions-V100
|
||||
.fseventsd
|
||||
.Spotlight-V100
|
||||
.TemporaryItems
|
||||
.Trashes
|
||||
.VolumeIcon.icns
|
||||
.com.apple.timemachine.donotpresent
|
||||
|
||||
# Directories potentially created on remote AFP share
|
||||
.AppleDB
|
||||
.AppleDesktop
|
||||
Network Trash Folder
|
||||
Temporary Items
|
||||
.apdisk
|
||||
|
||||
### macOS Patch ###
|
||||
# iCloud generated files
|
||||
*.icloud
|
||||
|
||||
### Rust ###
|
||||
# Generated by Cargo
|
||||
# will have compiled files and executables
|
||||
debug/
|
||||
target/
|
||||
|
||||
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
|
||||
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
|
||||
Cargo.lock
|
||||
|
||||
# These are backup files generated by rustfmt
|
||||
**/*.rs.bk
|
||||
|
||||
# MSVC Windows builds of rustc generate these, which store debugging information
|
||||
*.pdb
|
||||
|
||||
### rust-analyzer ###
|
||||
# Can be generated by other build systems other than cargo (ex: bazelbuild/rust_rules)
|
||||
rust-project.json
|
||||
|
||||
|
||||
### VisualStudioCode ###
|
||||
.vscode/*
|
||||
!.vscode/settings.json
|
||||
!.vscode/tasks.json
|
||||
!.vscode/launch.json
|
||||
!.vscode/extensions.json
|
||||
!.vscode/*.code-snippets
|
||||
|
||||
# Local History for Visual Studio Code
|
||||
.history/
|
||||
|
||||
# Built Visual Studio Code Extensions
|
||||
*.vsix
|
||||
|
||||
### VisualStudioCode Patch ###
|
||||
# Ignore all local history of files
|
||||
.history
|
||||
.ionide
|
||||
|
||||
### Windows ###
|
||||
# Windows thumbnail cache files
|
||||
Thumbs.db
|
||||
Thumbs.db:encryptable
|
||||
ehthumbs.db
|
||||
ehthumbs_vista.db
|
||||
|
||||
# Dump file
|
||||
*.stackdump
|
||||
|
||||
# Folder config file
|
||||
[Dd]esktop.ini
|
||||
|
||||
# Recycle Bin used on file shares
|
||||
$RECYCLE.BIN/
|
||||
|
||||
# Windows Installer files
|
||||
*.cab
|
||||
*.msi
|
||||
*.msix
|
||||
*.msm
|
||||
*.msp
|
||||
|
||||
# Windows shortcuts
|
||||
*.lnk
|
||||
|
||||
# End of https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,rust,macos,linux,rust-analyzer
|
||||
|
||||
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
|
||||
|
||||
sample.html
|
3
.vscode/settings.json
vendored
Normal file
3
.vscode/settings.json
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
{
|
||||
"licenser.license": "0BSD"
|
||||
}
|
11
Cargo.toml
Normal file
11
Cargo.toml
Normal file
|
@ -0,0 +1,11 @@
|
|||
[package]
|
||||
name = "mininews"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.95"
|
||||
chrono = { version = "0.4.39", default-features = false, features = ["now"] }
|
||||
kuchikiki = "0.8.2"
|
||||
rss = { version = "2.0.11" }
|
||||
uuid = { version = "1.12.1", features = ["v7"] }
|
12
LICENSE
Normal file
12
LICENSE
Normal file
|
@ -0,0 +1,12 @@
|
|||
Zero-Clause BSD / Free Public License 1.0.0 (0BSD)
|
||||
|
||||
Permission to use, copy, modify, and/or distribute this software for any purpose
|
||||
with or without fee is hereby granted.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
|
||||
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
|
||||
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
|
||||
OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
PERFORMANCE OF THIS SOFTWARE.
|
3
README.md
Normal file
3
README.md
Normal file
|
@ -0,0 +1,3 @@
|
|||
# Mininews
|
||||
|
||||
A simple RSS generator for [Wikipedia's Current Events](https://en.wikipedia.org/wiki/Portal:Current_events). Inspired by [tom-james-watson's detoxed.news](https://github.com/tom-james-watson/detoxed.news) project.
|
1
src/lib.rs
Normal file
1
src/lib.rs
Normal file
|
@ -0,0 +1 @@
|
|||
pub mod parser;
|
58
src/main.rs
Normal file
58
src/main.rs
Normal file
|
@ -0,0 +1,58 @@
|
|||
use anyhow::{Context, Error, Result};
|
||||
use chrono::{Days, NaiveTime, Utc};
|
||||
use mininews::parser::{parse, EventBlock, PAGE_URL};
|
||||
use rss::{ChannelBuilder, Guid, Item, ItemBuilder};
|
||||
use uuid::{Timestamp, Uuid};
|
||||
|
||||
fn generate_feed(items: Vec<EventBlock>) -> Result<()> {
|
||||
let mut channel = ChannelBuilder::default()
|
||||
.title("Mininews")
|
||||
.link("https://example.com")
|
||||
.description("An RSS feed from Wikipedia's Current Events")
|
||||
.build();
|
||||
|
||||
channel.set_items(
|
||||
items
|
||||
.iter()
|
||||
.filter(|x| x.date < Utc::now().date_naive())
|
||||
.map(|x| {
|
||||
let pub_date = x
|
||||
.date
|
||||
.checked_add_days(Days::new(1))
|
||||
.context("failed to add to date")?
|
||||
.and_time(NaiveTime::MIN)
|
||||
.and_utc();
|
||||
Ok::<Item, Error>(
|
||||
ItemBuilder::default()
|
||||
.title(x.date.to_string())
|
||||
.link(Some(PAGE_URL.to_string()))
|
||||
.pub_date(pub_date.to_rfc2822())
|
||||
.guid(Guid {
|
||||
permalink: false,
|
||||
value: Uuid::new_v7(Timestamp::from_unix_time(
|
||||
pub_date.timestamp() as u64,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
))
|
||||
.to_string(),
|
||||
})
|
||||
.description(format!(
|
||||
"Wikipedia current events from {}",
|
||||
x.date.to_string()
|
||||
))
|
||||
.content(x.content.clone())
|
||||
.build(),
|
||||
)
|
||||
})
|
||||
.collect::<Result<Vec<Item>, _>>()?,
|
||||
);
|
||||
|
||||
println!("{:?}", channel.to_string());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
generate_feed(parse()?)
|
||||
}
|
77
src/parser.rs
Normal file
77
src/parser.rs
Normal file
|
@ -0,0 +1,77 @@
|
|||
use anyhow::{anyhow, Context, Result};
|
||||
use chrono::NaiveDate;
|
||||
use kuchikiki::parse_html;
|
||||
use kuchikiki::traits::*;
|
||||
use kuchikiki::NodeRef;
|
||||
|
||||
const RELATIVE_URL_BASE: &str = "https://en.wikipedia.org";
|
||||
pub const PAGE_URL: &str = "https://en.wikipedia.org/wiki/Portal:Current_events";
|
||||
|
||||
pub struct EventBlock {
|
||||
pub date: NaiveDate,
|
||||
pub content: String,
|
||||
}
|
||||
|
||||
fn parse_event_block(node: &NodeRef) -> Result<EventBlock> {
|
||||
let element = node
|
||||
.as_element()
|
||||
.context("failed to parse element")?
|
||||
.clone();
|
||||
|
||||
let element_attributes = element.attributes.borrow();
|
||||
|
||||
let date_str = element_attributes
|
||||
.get("id")
|
||||
.context("could not get event block id")?;
|
||||
let date = NaiveDate::parse_from_str(date_str, "%Y_%B_%-d")
|
||||
.map_err(|e| anyhow!(e.to_string()))
|
||||
.with_context(|| "failed to parse event block date")?;
|
||||
|
||||
let content = node
|
||||
.select_first("div.current-events-content")
|
||||
.map_err(|_| anyhow!("failed to select event blocks"))?;
|
||||
let content_node = content.as_node();
|
||||
|
||||
// rewrite relative links
|
||||
for link in content_node
|
||||
.select("a")
|
||||
.map_err(|_| anyhow!("failed to select event block links"))?
|
||||
{
|
||||
let mut link_attributes = link
|
||||
.as_node()
|
||||
.as_element()
|
||||
.context("failed to parse event block link")?
|
||||
.attributes
|
||||
.borrow_mut();
|
||||
|
||||
let mut href = link_attributes
|
||||
.get("href")
|
||||
.context("link has no href")?
|
||||
.to_string();
|
||||
|
||||
if !href.starts_with("/") {
|
||||
continue;
|
||||
}
|
||||
|
||||
href = format!("{}{}", RELATIVE_URL_BASE, href);
|
||||
|
||||
link_attributes.insert("href", href);
|
||||
}
|
||||
|
||||
Ok(EventBlock {
|
||||
date,
|
||||
content: content_node.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn parse() -> Result<Vec<EventBlock>> {
|
||||
let data = include_str!("../sample.html");
|
||||
let document = parse_html().one(data);
|
||||
|
||||
document
|
||||
.select("div.p-current-events-events div.current-events-main.vevent")
|
||||
.map_err(|_| anyhow!("failed to select event blocks"))?
|
||||
.into_iter()
|
||||
.map(|el| parse_event_block(el.as_node()))
|
||||
.collect()
|
||||
}
|
Loading…
Reference in a new issue