-use html2text::{config, Colour};
-use html2text::render::text_renderer::{TextDecorator, TaggedLine};
+use html5ever::{Attribute, ExpandedName, ParseOpts, parse_document};
+use html5ever::interface::{ElementFlags, NodeOrText, QualName, QuirksMode,
+ TreeSink};
+use html5ever::tendril::{StrTendril, TendrilSink};
+use std::borrow::Cow;
+use std::collections::HashSet;
-#[derive(Clone, Debug, Default)]
-pub struct OurDecorator {
+#[derive(Debug)]
+enum TreeNodeContents {
+ Text { text: StrTendril },
+ Element {
+ name: QualName,
+ attrs: Vec<Attribute>,
+ children: Vec<Handle>,
+ },
+ Boring,
}
-impl OurDecorator {
- pub fn new() -> OurDecorator {
- OurDecorator { }
- }
+#[derive(Debug)]
+struct TreeNode {
+ contents: TreeNodeContents,
+ parent: Option<Handle>,
}
-impl TextDecorator for OurDecorator {
- type Annotation = char;
-
- /// Return an annotation and rendering prefix for a link.
- fn decorate_link_start(&mut self, _url: &str)
- -> (String, Self::Annotation) {
- ("".to_string(), 'U')
- }
+type Handle = usize;
- /// Return a suffix for after a link.
- fn decorate_link_end(&mut self) -> String { "".to_string() }
+#[derive(Debug, Default)]
+struct Tree {
+ nodes: Vec<TreeNode>,
+ error: Option<String>,
+}
- /// Return an annotation and rendering prefix for em
- fn decorate_em_start(&mut self) -> (String, Self::Annotation) {
- ("".to_string(), '_')
+impl Tree {
+ fn new_node(&mut self, contents: TreeNodeContents) -> Handle {
+ let handle = self.nodes.len();
+ self.nodes.push(TreeNode {
+ contents,
+ parent: None,
+ });
+ handle
}
- /// Return a suffix for after an em.
- fn decorate_em_end(&mut self) -> String { "".to_string() }
-
- /// Return an annotation and rendering prefix for strong
- fn decorate_strong_start(&mut self) -> (String, Self::Annotation) {
- ("".to_string(), 's')
+ fn not2handle(&mut self, not: NodeOrText<Handle>) -> Handle {
+ match not {
+ NodeOrText::AppendNode(h) => h,
+ NodeOrText::AppendText(s) => self.new_node(
+ TreeNodeContents::Text { text: s }),
+ }
}
- /// Return a suffix for after a strong.
- fn decorate_strong_end(&mut self) -> String { "".to_string() }
+ fn merge_text_node_into_prev(&mut self, parent: Handle, index2: usize) {
+ let indices = match self.nodes[parent].contents {
+ TreeNodeContents::Element {
+ name: _, attrs: _, ref children
+ } => {
+ if index2 > 0 && index2 < children.len() {
+ Some((children[index2 - 1], children[index2]))
+ } else {
+ None
+ }
+ },
+ _ => panic!("merge_text_node_into_prev with a bogus parent type"),
+ };
- /// Return an annotation and rendering prefix for strikeout
- fn decorate_strikeout_start(&mut self) -> (String, Self::Annotation) {
- ("~".to_string(), ' ')
+ if let Some((child1, child2)) = indices {
+ let oldtext = match self.nodes[child2].contents {
+ TreeNodeContents::Text { ref text } => Some(text.clone()),
+ _ => None,
+ };
+ let merged = match self.nodes[child1].contents {
+ TreeNodeContents::Text { text: ref mut text1 } => {
+ match oldtext {
+ Some(text2) => { text1.push_slice(&text2); true },
+ _ => false,
+ }
+ },
+ _ => false,
+ };
+ if merged {
+ self.nodes[child2].contents = TreeNodeContents::Boring;
+ self.nodes[child2].parent = None;
+ match self.nodes[parent].contents {
+ TreeNodeContents::Element {
+ name: _, attrs: _, ref mut children
+ } => {
+ children.remove(index2);
+ },
+ _ => panic!("we already checked this!"),
+ }
+ }
+ }
}
+}
+
+impl TreeSink for Tree {
+ type Handle = Handle;
+ type Output = Self;
- /// Return a suffix for after a strikeout.
- fn decorate_strikeout_end(&mut self) -> String { "~".to_string() }
+ fn finish(self) -> Self { self }
+ fn get_document(&mut self) -> Handle { 0 }
- /// Return an annotation and rendering prefix for code
- fn decorate_code_start(&mut self) -> (String, Self::Annotation) {
- ("".to_string(), 'c')
+ fn parse_error(&mut self, msg: Cow<'static, str>) {
+ self.error = Some(msg.to_string());
+ }
+
+ fn elem_name<'a>(&'a self, target: &'a Handle) -> ExpandedName<'a> {
+ match self.nodes[*target].contents {
+ TreeNodeContents::Element { ref name, .. } => name.expanded(),
+ _ => panic!("html5ever promised to only call elem_name on Element"),
+ }
}
- /// Return a suffix for after a code.
- fn decorate_code_end(&mut self) -> String { "".to_string() }
+ fn create_element(&mut self, name: QualName,
+ attrs: Vec<Attribute>, _flags: ElementFlags)
+ -> Handle {
+ self.new_node(TreeNodeContents::Element {
+ name: name,
+ attrs: attrs,
+ children: Vec::new(),
+ })
+ }
+ fn create_comment(&mut self, _text: StrTendril)
+ -> Handle { self.new_node(TreeNodeContents::Boring) }
+ fn create_pi(&mut self, _target: StrTendril,
+ _data: StrTendril)
+ -> Handle { self.new_node(TreeNodeContents::Boring) }
- /// Return an annotation for the initial part of a preformatted line
- fn decorate_preformat_first(&mut self) -> Self::Annotation { 'c' }
+ fn append(&mut self, parent: &Handle, child: NodeOrText<Handle>) {
+ let parent = *parent;
+ let child = self.not2handle(child);
+ let check_index = {
+ let children = match self.nodes[parent].contents {
+ TreeNodeContents::Element {
+ name: _, attrs: _, ref mut children
+ } => children,
+ _ => panic!("append with a bogus parent type"),
+ };
+ children.push(child);
+ children.len() - 1
+ };
+ self.nodes[child].parent = Some(parent);
+ self.merge_text_node_into_prev(parent, check_index);
+ }
+ fn remove_from_parent(&mut self, target: &Handle) {
+ let target = *target;
+ if let Some(parent) = self.nodes[target].parent {
+ match self.nodes[parent].contents {
+ TreeNodeContents::Element {
+ name: _, attrs: _, ref mut children
+ } => {
+ children.retain(|&h| h != target);
+ },
+ _ => (),
+ }
+ self.nodes[target].parent = None;
+ }
+ }
+ fn append_before_sibling(&mut self, sibling: &Handle,
+ new_node: NodeOrText<Handle>) {
+ let child = self.not2handle(new_node);
+ let sibling = *sibling;
- /// Return an annotation for a continuation line when a preformatted
- /// line doesn't fit.
- fn decorate_preformat_cont(&mut self) -> Self::Annotation { 'c' }
+ self.remove_from_parent(&child);
- /// Return an annotation and rendering prefix for a link.
- fn decorate_image(&mut self, _src: &str, _title: &str)
- -> (String, Self::Annotation) {
- ("".to_string(), 'm')
+ // Link to new parent next to sibling
+ match self.nodes[sibling].parent {
+ None => panic!(
+ "html5ever tried to append_before_sibling to the root"),
+ Some(parent) => {
+ match self.nodes[parent].contents {
+ TreeNodeContents::Element {
+ name: _, attrs: _, ref mut children
+ } => {
+ match children.iter().position(|h| *h == sibling) {
+ Some(i) => {
+ children.insert(i, child);
+ self.nodes[child].parent = Some(parent);
+ self.merge_text_node_into_prev(parent, i+1);
+ }
+ None => panic!("node not a child of its parent"),
+ }
+ },
+ _ => panic!("node had a wrong parent type"),
+ }
+ }
+ }
+ }
+ fn append_based_on_parent_node(&mut self, element: &Handle,
+ prev_element: &Handle,
+ child: NodeOrText<Handle>) {
+ if self.nodes[*element].parent.is_some() {
+ self.append_before_sibling(element, child);
+ } else {
+ self.append(prev_element, child);
+ }
}
+ fn add_attrs_if_missing(&mut self, target: &Handle,
+ mut attrs: Vec<Attribute>) {
+ let target = *target;
+ let target_attrs = match self.nodes[target].contents {
+ TreeNodeContents::Element {
+ name: _, ref mut attrs, ..
+ } => attrs,
+ _ => panic!("add_attrs_if_missing to a bogus node type"),
+ };
- /// Return prefix string of header in specific level.
- fn header_prefix(&mut self, level: usize) -> String {
- "#".repeat(level) + " "
+ let mut present = HashSet::new();
+ for attr in &mut *target_attrs {
+ present.insert(attr.name.clone());
+ }
+ for attr in attrs.drain(0..) {
+ if !present.contains(&attr.name) {
+ present.insert(attr.name.clone());
+ target_attrs.push(attr);
+ }
+ }
}
+ fn reparent_children(&mut self, old_parent: &Handle,
+ new_parent: &Handle) {
+ let old_parent = *old_parent;
+ let new_parent = *new_parent;
- /// Return prefix string of quoted block.
- fn quote_prefix(&mut self) -> String { "> ".to_string() }
+ let mut old_children = match self.nodes[old_parent].contents {
+ TreeNodeContents::Element {
+ name: _, attrs: _, ref mut children
+ } => {
+ let mut old_children = Vec::new();
+ old_children.append(children);
+ old_children
+ },
+ _ => panic!("reparent_children from a bogus node type"),
+ };
- /// Return prefix string of unordered list item.
- fn unordered_item_prefix(&mut self) -> String { " - ".to_string() }
+ for i in old_children.iter() {
+ self.nodes[*i].parent = Some(new_parent);
+ }
- /// Return prefix string of ith ordered list item.
- fn ordered_item_prefix(&mut self, i: i64) -> String {
- format!(" {}. ", i)
+ match self.nodes[old_parent].contents {
+ TreeNodeContents::Element {
+ name: _, attrs: _, ref mut children
+ } => {
+ children.append(&mut old_children);
+ },
+ _ => panic!("reparent_children to a bogus node type"),
+ };
}
- /// Return a new decorator of the same type which can be used
- /// for sub blocks.
- fn make_subblock_decorator(&self) -> Self {
- OurDecorator::new()
- }
+ fn append_doctype_to_document(&mut self, _name: StrTendril,
+ _public_id: StrTendril,
+ _system_id: StrTendril) {}
- /// Return an annotation corresponding to adding colour, or none.
- fn push_colour(&mut self, col: Colour) -> Option<Self::Annotation> {
- dbg!(col);
- match col.r {
- 1 => Some('@'),
- 4 => Some('#'),
- _ => None,
- }
- }
+ fn get_template_contents(&mut self, _target: &Handle)
+ -> Handle { self.new_node(TreeNodeContents::Boring) }
- /// Pop the last colour pushed if we pushed one.
- fn pop_colour(&mut self) -> bool {
- true
- }
+ fn same_node(&self, x: &Handle, y: &Handle) -> bool { x == y }
+ fn set_quirks_mode(&mut self, _mode: QuirksMode) {}
+}
- /// Finish with a document, and return extra lines (eg footnotes)
- /// to add to the rendered text.
- fn finalise(&mut self, _links: Vec<String>)
- -> Vec<TaggedLine<Self::Annotation>> {
- Vec::new()
- }
+pub fn render_read(mut input: impl std::io::Read) {
+ let root = parse_document(Tree::default(), ParseOpts::default())
+ .from_utf8()
+ .read_from(&mut input)
+ .unwrap();
+
+ dbg!(root);
}
-pub fn render(html: &str)
- -> Result<Vec<TaggedLine<Vec<char>>>, html2text::Error> {
- config::with_decorator(OurDecorator::new())
- .add_css(r##"
-.mention { color: #010203; }
-.hashtag { color: #040506; }
-"##)?
- .lines_from_read(html.as_bytes(), 80)
+pub fn render(html: &str) {
+ render_read(html.as_bytes())
}