From: Simon Tatham Date: Sat, 23 Dec 2023 15:22:29 +0000 (+0000) Subject: First cut at parsing HTML of a toot. X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~ian/git?a=commitdiff_plain;h=4c5fa83c99a6785caa0f930be961f875aa9acd24;p=mastodonochrome.git First cut at parsing HTML of a toot. But it doesn't work right, because the CSS is ignored. I think that's because html2text only processes CSS classes on elements, not on where the Mastodon server puts them. --- diff --git a/Cargo.toml b/Cargo.toml index 164d99f..a59e453 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ edition = "2021" [dependencies] chrono = { version = "0.4.31", features = ["serde"] } crossterm = "0.27.0" -html2text = "0.9.0" +html2text = { version = "0.9.0", features = ["css"] } ratatui = "0.25.0" reqwest = { version = "0.11.23", features = ["blocking"] } serde = { version = "1.0.193", features = ["derive"] } diff --git a/src/html.rs b/src/html.rs new file mode 100644 index 0000000..11b6a1b --- /dev/null +++ b/src/html.rs @@ -0,0 +1,124 @@ +use html2text::{config, Colour}; +use html2text::render::text_renderer::{TextDecorator, TaggedLine}; + +#[derive(Clone, Debug, Default)] +pub struct OurDecorator { +} + +impl OurDecorator { + pub fn new() -> OurDecorator { + OurDecorator { } + } +} + +impl TextDecorator for OurDecorator { + type Annotation = char; + + /// Return an annotation and rendering prefix for a link. + fn decorate_link_start(&mut self, _url: &str) + -> (String, Self::Annotation) { + ("".to_string(), 'U') + } + + /// Return a suffix for after a link. + fn decorate_link_end(&mut self) -> String { "".to_string() } + + /// Return an annotation and rendering prefix for em + fn decorate_em_start(&mut self) -> (String, Self::Annotation) { + ("".to_string(), '_') + } + + /// Return a suffix for after an em. + fn decorate_em_end(&mut self) -> String { "".to_string() } + + /// Return an annotation and rendering prefix for strong + fn decorate_strong_start(&mut self) -> (String, Self::Annotation) { + ("".to_string(), 's') + } + + /// Return a suffix for after a strong. + fn decorate_strong_end(&mut self) -> String { "".to_string() } + + /// Return an annotation and rendering prefix for strikeout + fn decorate_strikeout_start(&mut self) -> (String, Self::Annotation) { + ("~".to_string(), ' ') + } + + /// Return a suffix for after a strikeout. + fn decorate_strikeout_end(&mut self) -> String { "~".to_string() } + + /// Return an annotation and rendering prefix for code + fn decorate_code_start(&mut self) -> (String, Self::Annotation) { + ("".to_string(), 'c') + } + + /// Return a suffix for after a code. + fn decorate_code_end(&mut self) -> String { "".to_string() } + + /// Return an annotation for the initial part of a preformatted line + fn decorate_preformat_first(&mut self) -> Self::Annotation { 'c' } + + /// Return an annotation for a continuation line when a preformatted + /// line doesn't fit. + fn decorate_preformat_cont(&mut self) -> Self::Annotation { 'c' } + + /// Return an annotation and rendering prefix for a link. + fn decorate_image(&mut self, _src: &str, _title: &str) + -> (String, Self::Annotation) { + ("".to_string(), 'm') + } + + /// Return prefix string of header in specific level. + fn header_prefix(&mut self, level: usize) -> String { + "#".repeat(level) + " " + } + + /// Return prefix string of quoted block. + fn quote_prefix(&mut self) -> String { "> ".to_string() } + + /// Return prefix string of unordered list item. + fn unordered_item_prefix(&mut self) -> String { " - ".to_string() } + + /// Return prefix string of ith ordered list item. + fn ordered_item_prefix(&mut self, i: i64) -> String { + format!(" {}. ", i) + } + + /// Return a new decorator of the same type which can be used + /// for sub blocks. + fn make_subblock_decorator(&self) -> Self { + OurDecorator::new() + } + + /// Return an annotation corresponding to adding colour, or none. + fn push_colour(&mut self, col: Colour) -> Option { + dbg!(col); + match col.r { + 1 => Some('@'), + 4 => Some('#'), + _ => None, + } + } + + /// Pop the last colour pushed if we pushed one. + fn pop_colour(&mut self) -> bool { + true + } + + /// Finish with a document, and return extra lines (eg footnotes) + /// to add to the rendered text. + fn finalise(&mut self, _links: Vec) + -> Vec> { + Vec::new() + } +} + +pub fn render(html: &str) + -> Result>>, html2text::Error> { + config::with_decorator(OurDecorator::new()) + .add_css(r##" +.mention { color: #010203; } +.hashtag { color: #040506; } +"##)? + .lines_from_read(html.as_bytes(), 80) +} diff --git a/src/lib.rs b/src/lib.rs index bf2f457..976d77a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ pub mod types; pub mod auth; +pub mod html; #[derive(Debug)] pub enum OurError { diff --git a/src/main.rs b/src/main.rs index d73eb86..04d66cb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,7 @@ -// use mastodonochrome::types::*; +use mastodonochrome::types::*; use mastodonochrome::OurError; use mastodonochrome::auth::AuthConfig; +use mastodonochrome::html::render; use std::io::Read; use std::io::Write; @@ -19,7 +20,7 @@ use ratatui::{ use std::io::stdout; #[allow(unused)] -fn streaming(auth: &AuthConfig) -> Result<(), mastodonochrome::OurError> { +fn streaming() -> Result<(), mastodonochrome::OurError> { let auth = AuthConfig::load()?; let client = reqwest::blocking::Client::new(); @@ -61,7 +62,8 @@ fn streaming(auth: &AuthConfig) -> Result<(), mastodonochrome::OurError> { Ok(()) } -fn main() -> std::io::Result<()> { +#[allow(unused)] +fn tui() -> std::io::Result<()> { stdout().execute(EnterAlternateScreen)?; enable_raw_mode()?; let mut terminal = Terminal::new(CrosstermBackend::new(stdout()))?; @@ -109,3 +111,13 @@ fn main() -> std::io::Result<()> { disable_raw_mode()?; Ok(()) } + +fn main() -> std::io::Result<()> { + let client = reqwest::blocking::Client::new(); + let body = client.get( + "https://hachyderm.io/api/v1/statuses/111602135142646031") + .send().unwrap().text().unwrap(); + let st: Status = serde_json::from_str(&body).unwrap(); + dbg!(render(&st.content).unwrap()); + Ok(()) +} diff --git a/src/types.rs b/src/types.rs index 4da4ea1..bedb692 100644 --- a/src/types.rs +++ b/src/types.rs @@ -5,45 +5,45 @@ use std::option::Option; #[derive(Serialize, Deserialize, Debug)] pub struct AccountField { - name: String, - value: String, - verified_at: Option>, + pub name: String, + pub value: String, + pub verified_at: Option>, } #[derive(Serialize, Deserialize, Debug)] pub struct Account { - id: String, - username: String, - acct: String, - url: String, - display_name: String, - note: String, - avatar: String, - avatar_static: String, - header: String, - header_static: String, - locked: bool, - fields: Vec, - // emojis: Vec, - bot: bool, - group: bool, - discoverable: Option, - noindex: Option, - moved: Option>, - suspended: Option, - limited: Option, - created_at: DateTime, - last_status_at: Option, // this lacks a timezone, so serde - // can't deserialize it in the obvious way - statuses_count: u64, - followers_count: u64, - following_count: u64, + pub id: String, + pub username: String, + pub acct: String, + pub url: String, + pub display_name: String, + pub note: String, + pub avatar: String, + pub avatar_static: String, + pub header: String, + pub header_static: String, + pub locked: bool, + pub fields: Vec, + // pub emojis: Vec, + pub bot: bool, + pub group: bool, + pub discoverable: Option, + pub noindex: Option, + pub moved: Option>, + pub suspended: Option, + pub limited: Option, + pub created_at: DateTime, + pub last_status_at: Option, // lacks a timezone, so serde can't + // deserialize it in the obvious way + pub statuses_count: u64, + pub followers_count: u64, + pub following_count: u64, } #[derive(Serialize, Deserialize, Debug)] pub struct Application { - name: String, - website: Option, + pub name: String, + pub website: Option, } #[derive(Serialize, Deserialize, Debug)] @@ -65,53 +65,53 @@ pub enum MediaType { #[derive(Serialize, Deserialize, Debug)] pub struct MediaAttachment { - id: String, - #[serde(rename="type")] mediatype: MediaType, - url: String, - preview_url: String, - remote_url: Option, - description: Option, + pub id: String, + #[serde(rename="type")] pub mediatype: MediaType, + pub url: String, + pub preview_url: String, + pub remote_url: Option, + pub description: Option, } #[derive(Serialize, Deserialize, Debug)] pub struct StatusMention { - id: String, - username: String, - url: String, - acct: String, + pub id: String, + pub username: String, + pub url: String, + pub acct: String, } #[derive(Serialize, Deserialize, Debug)] pub struct Status { - id: String, - uri: String, - created_at: DateTime, - account: Account, - content: String, - visibility: Visibility, - sensitive: bool, - spoiler_text: String, - media_attachments: Vec, - application: Option, - mentions: Vec, - // tags: Vec, - // emojis: Vec, - reblogs_count: u64, - favourites_count: u64, - replies_count: u64, - url: String, - in_reply_to_id: Option, - in_reply_to_account_id: Option, - reblog: Option>, - // poll: Option, - // card: Option, - language: Option, - text: Option, - edited_at: Option>, - favourited: Option, - reblogged: Option, - muted: Option, - bookmarked: Option, - pinned: Option, - filtered: Option, + pub id: String, + pub uri: String, + pub created_at: DateTime, + pub account: Account, + pub content: String, + pub visibility: Visibility, + pub sensitive: bool, + pub spoiler_text: String, + pub media_attachments: Vec, + pub application: Option, + pub mentions: Vec, + // pub tags: Vec, + // pub emojis: Vec, + pub reblogs_count: u64, + pub favourites_count: u64, + pub replies_count: u64, + pub url: String, + pub in_reply_to_id: Option, + pub in_reply_to_account_id: Option, + pub reblog: Option>, + // pub poll: Option, + // pub card: Option, + pub language: Option, + pub text: Option, + pub edited_at: Option>, + pub favourited: Option, + pub reblogged: Option, + pub muted: Option, + pub bookmarked: Option, + pub pinned: Option, + pub filtered: Option, }