chiark / gitweb /
First cut at printing post content.
authorSimon Tatham <anakin@pobox.com>
Fri, 1 Dec 2023 17:39:19 +0000 (17:39 +0000)
committerSimon Tatham <anakin@pobox.com>
Fri, 1 Dec 2023 17:39:19 +0000 (17:39 +0000)
mastodonochrome
text.py

index 6614d08df8cdb4c1afe5be33731c5f4946be6fe2..8621062849100c1893e305a675608b0645cbf0bc 100755 (executable)
@@ -58,17 +58,26 @@ class Post:
 
         self.account = data['account']
 
+        hp = text.HTMLParser()
+        hp.feed(data['content'])
+        hp.done()
+        self.content = hp.paras
+
     def text(self):
         yield text.SeparatorLine(self.datestamp)
         yield text.FromLine('@' + self.account['acct'],
                             self.account['display_name'])
+        yield text.BlankLine()
+        yield from self.content
+        yield text.BlankLine()
 
 class MainUI(Client):
     def run(self):
         for item in self.get_public("timelines/public", limit=10):
             p = Post(item)
             for thing in p.text():
-                print(thing.render(79))
+                for line in thing.render(80):
+                    print(line)
 
 def main():
     parser = argparse.ArgumentParser(
diff --git a/text.py b/text.py
index 00f9c60b44531fb2f359aae7eeea3b60a78e8422..faa3eb3d3fddbe8cc8f27e0b8b2651232df8518b 100644 (file)
--- a/text.py
+++ b/text.py
@@ -1,6 +1,12 @@
 # Represent colourised terminal text in a width-independent form.
 
+import html.parser
 import time
+import wcwidth
+
+class BlankLine:
+    def render(self, width):
+        yield ""
 
 class SeparatorLine:
     def __init__(self, timestamp):
@@ -11,7 +17,7 @@ class SeparatorLine:
                              time.localtime(self.timestamp))
         # FIXME: colours
         suffix = "[" + date + "]--"
-        return "-" * (width - len(suffix)) + suffix
+        yield "-" * (width - 1 - len(suffix)) + suffix
 
 class FromLine:
     def __init__(self, account, nameline):
@@ -21,4 +27,86 @@ class FromLine:
     def render(self, width):
         # FIXME: truncate
 
-        return f"From: {self.nameline} ({self.account})"
+        yield f"From: {self.nameline} ({self.account})"
+
+class Paragraph:
+    def __init__(self):
+        self.words = []
+        self.unfinished_word = ''
+
+    def render(self, width):
+        # For the moment, greedy algorithm. We can worry about cleverness later
+        line, space = '', ''
+        for word in self.words:
+            if line != "" and wcwidth.wcswidth(line + space + word) >= width:
+                yield line
+                line, space = '', ''
+
+            line += space + word
+            space = ' '
+
+            if wcwidth.wcswidth(line) >= width:
+                # FIXME: wrap explicitly?
+                yield line
+                line, space = '', ''
+
+        if line != "":
+            yield line
+
+    def empty(self):
+        return len(self.words) == 0
+
+    def end_word(self):
+        if len(self.unfinished_word) > 0:
+            self.words.append(self.unfinished_word)
+            self.unfinished_word = ''
+
+    def add(self, text):
+        for c in text:
+            if c == ' ':
+                self.end_word()
+            else:
+                self.unfinished_word += c
+
+class HTMLParser(html.parser.HTMLParser):
+    def __init__(self):
+        super().__init__()
+        self.paras = [Paragraph()]
+
+    def handle_starttag(self, tag, attrs):
+        if tag in {"a", "span"}:
+            # FIXME: maybe handle some cases of this to spot hashtags,
+            # usernames etc?
+            return
+
+        if tag == "p":
+            if not self.paras[-1].empty():
+                self.paras.append(Paragraph())
+                self.paras.append(Paragraph())
+            return
+
+        if tag == "br":
+            self.paras.append(Paragraph())
+            return
+
+        print("UNKNOWN START", tag, attrs)
+
+    def handle_endtag(self, tag):
+        if tag in {"a", "span"}:
+            return
+
+        if tag == "p":
+            if not self.paras[-1].empty():
+                self.paras.append(Paragraph())
+            return
+
+    def handle_data(self, data):
+        self.paras[-1].add(data)
+
+    def done(self):
+        for para in self.paras:
+            para.end_word()
+        while len(self.paras) > 0 and self.paras[0].empty():
+            self.paras.pop(0)
+        while len(self.paras) > 0 and self.paras[-1].empty():
+            self.paras.pop()