From 0a3e2eaaf22a91e404ff50407b2de314a29f895e Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Sat, 23 Dec 2023 20:01:06 +0000 Subject: [PATCH] Try walking the tree. It crashes, oops. --- src/html.rs | 45 +++++++++++++++++++++++++++++++++++---------- src/main.rs | 21 +++++++++++++++++++-- 2 files changed, 54 insertions(+), 12 deletions(-) diff --git a/src/html.rs b/src/html.rs index c663ea3..6a01cd8 100644 --- a/src/html.rs +++ b/src/html.rs @@ -1,9 +1,9 @@ use html5ever::{Attribute, ExpandedName, ParseOpts, parse_document}; -use html5ever::interface::{ElementFlags, NodeOrText, QualName, QuirksMode, - TreeSink}; +use html5ever::interface::{ElementFlags, NodeOrText, QuirksMode, TreeSink}; +pub use html5ever::QualName; use html5ever::tendril::{StrTendril, TendrilSink}; use std::borrow::Cow; -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; #[derive(Debug)] enum TreeNodeContents { @@ -30,6 +30,12 @@ struct Tree { error: Option, } +pub trait Receiver { + fn start_tag(&mut self, tag: &QualName, attrs: &HashMap); + fn end_tag(&mut self, tag: &QualName, attrs: &HashMap); + fn text(&mut self, text: &str); +} + impl Tree { fn new_node(&mut self, contents: TreeNodeContents) -> Handle { let handle = self.nodes.len(); @@ -90,6 +96,25 @@ impl Tree { } } } + + fn walk_recurse(&self, node: Handle, receiver: &mut R) { + match &self.nodes[node].contents { + TreeNodeContents::Text { text } => receiver.text(&text), + TreeNodeContents::Element { name, attrs, children } => { + let mut attrmap = HashMap::new(); + for attr in attrs { + attrmap.insert(attr.name.clone(), + attr.value.to_string()); + } + receiver.start_tag(&name, &attrmap); + for child in children { + self.walk_recurse(*child, receiver); + } + receiver.end_tag(&name, &attrmap); + }, + _ => (), + }; + } } impl TreeSink for Tree { @@ -256,15 +281,15 @@ impl TreeSink for Tree { fn set_quirks_mode(&mut self, _mode: QuirksMode) {} } -pub fn render_read(mut input: impl std::io::Read) { - let root = parse_document(Tree::default(), ParseOpts::default()) +fn render_read(mut input: impl std::io::Read) -> Tree { + parse_document(Tree::default(), ParseOpts::default()) .from_utf8() .read_from(&mut input) - .unwrap(); - - dbg!(root); + .unwrap() } -pub fn render(html: &str) { - render_read(html.as_bytes()) +pub fn render(html: &str, receiver: &mut R) { + let tree = render_read(html.as_bytes()); + dbg!(&tree); + tree.walk_recurse(0, receiver); } diff --git a/src/main.rs b/src/main.rs index d49d314..fe2606c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,9 @@ use mastodonochrome::types::*; use mastodonochrome::OurError; use mastodonochrome::auth::AuthConfig; -use mastodonochrome::html::render; +use mastodonochrome::html::{render, QualName}; +use mastodonochrome::html; +use std::collections::HashMap; use std::io::Read; use std::io::Write; @@ -112,12 +114,27 @@ fn tui() -> std::io::Result<()> { Ok(()) } +struct TestReceiver {} + +impl html::Receiver for TestReceiver { + fn start_tag(&mut self, tag: &QualName, attrs: &HashMap) { + dbg!("start", tag, attrs); + } + fn end_tag(&mut self, tag: &QualName, attrs: &HashMap) { + dbg!("end", tag, attrs); + } + fn text(&mut self, text: &str) { + dbg!("text", text); + } +} + fn main() -> std::io::Result<()> { let client = reqwest::blocking::Client::new(); let body = client.get( "https://hachyderm.io/api/v1/statuses/111602135142646031") .send().unwrap().text().unwrap(); let st: Status = serde_json::from_str(&body).unwrap(); - render(&st.content); + let mut t = TestReceiver {}; + render(&st.content, &mut t); Ok(()) } -- 2.30.2