chiark / gitweb /
Try walking the tree. It crashes, oops.
authorSimon Tatham <anakin@pobox.com>
Sat, 23 Dec 2023 20:01:06 +0000 (20:01 +0000)
committerSimon Tatham <anakin@pobox.com>
Sat, 23 Dec 2023 20:01:06 +0000 (20:01 +0000)
src/html.rs
src/main.rs

index c663ea32d76628b0fabd412412c7ec7a6f8166ec..6a01cd8cb2295ec3812a6bbfc8539a99bc4f97f2 100644 (file)
@@ -1,9 +1,9 @@
 use html5ever::{Attribute, ExpandedName, ParseOpts, parse_document};
-use html5ever::interface::{ElementFlags, NodeOrText, QualName, QuirksMode,
-                           TreeSink};
+use html5ever::interface::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
+pub use html5ever::QualName;
 use html5ever::tendril::{StrTendril, TendrilSink};
 use std::borrow::Cow;
-use std::collections::HashSet;
+use std::collections::{HashMap, HashSet};
 
 #[derive(Debug)]
 enum TreeNodeContents {
@@ -30,6 +30,12 @@ struct Tree {
     error: Option<String>,
 }
 
+pub trait Receiver {
+    fn start_tag(&mut self, tag: &QualName, attrs: &HashMap<QualName, String>);
+    fn end_tag(&mut self, tag: &QualName, attrs: &HashMap<QualName, String>);
+    fn text(&mut self, text: &str);
+}
+
 impl Tree {
     fn new_node(&mut self, contents: TreeNodeContents) -> Handle {
         let handle = self.nodes.len();
@@ -90,6 +96,25 @@ impl Tree {
             }
         }
     }
+
+    fn walk_recurse<R: Receiver>(&self, node: Handle, receiver: &mut R) {
+        match &self.nodes[node].contents {
+            TreeNodeContents::Text { text } => receiver.text(&text),
+            TreeNodeContents::Element { name, attrs, children } => {
+                let mut attrmap = HashMap::new();
+                for attr in attrs {
+                    attrmap.insert(attr.name.clone(),
+                                   attr.value.to_string());
+                }
+                receiver.start_tag(&name, &attrmap);
+                for child in children {
+                    self.walk_recurse(*child, receiver);
+                }
+                receiver.end_tag(&name, &attrmap);
+            },
+            _ => (),
+        };
+    }
 }
 
 impl TreeSink for Tree {
@@ -256,15 +281,15 @@ impl TreeSink for Tree {
     fn set_quirks_mode(&mut self, _mode: QuirksMode) {}
 }
 
-pub fn render_read(mut input: impl std::io::Read) {
-    let root = parse_document(Tree::default(), ParseOpts::default())
+fn render_read(mut input: impl std::io::Read) -> Tree {
+    parse_document(Tree::default(), ParseOpts::default())
         .from_utf8()
         .read_from(&mut input)
-        .unwrap();
-
-    dbg!(root);
+        .unwrap()
 }
 
-pub fn render(html: &str) {
-    render_read(html.as_bytes())
+pub fn render<R: Receiver>(html: &str, receiver: &mut R) {
+    let tree = render_read(html.as_bytes());
+    dbg!(&tree);
+    tree.walk_recurse(0, receiver);
 }
index d49d3141292708c8d7f564695a340f8781d5b22a..fe2606c8517fb32b7de44d9b675a23121a44eec5 100644 (file)
@@ -1,7 +1,9 @@
 use mastodonochrome::types::*;
 use mastodonochrome::OurError;
 use mastodonochrome::auth::AuthConfig;
-use mastodonochrome::html::render;
+use mastodonochrome::html::{render, QualName};
+use mastodonochrome::html;
+use std::collections::HashMap;
 use std::io::Read;
 use std::io::Write;
 
@@ -112,12 +114,27 @@ fn tui() -> std::io::Result<()> {
     Ok(())
 }
 
+struct TestReceiver {}
+
+impl html::Receiver for TestReceiver {
+    fn start_tag(&mut self, tag: &QualName, attrs: &HashMap<QualName, String>) {
+        dbg!("start", tag, attrs);
+    }
+    fn end_tag(&mut self, tag: &QualName, attrs: &HashMap<QualName, String>) {
+        dbg!("end", tag, attrs);
+    }
+    fn text(&mut self, text: &str) {
+        dbg!("text", text);
+    }
+}
+
 fn main() -> std::io::Result<()> {
     let client = reqwest::blocking::Client::new();
     let body = client.get(
         "https://hachyderm.io/api/v1/statuses/111602135142646031")
         .send().unwrap().text().unwrap();
     let st: Status = serde_json::from_str(&body).unwrap();
-    render(&st.content);
+    let mut t = TestReceiver {};
+    render(&st.content, &mut t);
     Ok(())
 }