chiark / gitweb /
Report markup parse failures inline.
authorSimon Tatham <anakin@pobox.com>
Sat, 2 Dec 2023 17:58:05 +0000 (17:58 +0000)
committerSimon Tatham <anakin@pobox.com>
Sat, 2 Dec 2023 17:58:05 +0000 (17:58 +0000)
Don't want to crash the whole client for them!

text.py

diff --git a/text.py b/text.py
index a57a878618da3c709fbc95e79c511113235f6c8e..4c0e47339aa570baffe9a221ea5e753b4fea05fa 100644 (file)
--- a/text.py
+++ b/text.py
@@ -24,6 +24,7 @@ colourmap = {
     'u': [0, 1, 4, 34], # URL
     'M': [0, 1, 4, 35], # media URL
     'm': [0, 35], # media description
+    '!': [0, 1, 7, 43, 31], # error report
 }
 
 class ColouredString:
@@ -186,6 +187,7 @@ class HTMLParser(html.parser.HTMLParser):
         super().__init__()
         self.paras = [Paragraph()]
         self.colourstack = [' ']
+        self.bad_tags = set()
 
     def handle_starttag(self, tag, attrs):
         attrdict = dict(attrs)
@@ -224,7 +226,7 @@ class HTMLParser(html.parser.HTMLParser):
         # and _perhaps_ that ought to generate paragraphs with a
         # 'truncate, don't wrap' attribute?
 
-        print("UNKNOWN START", tag, attrs)
+        self.bad_tags.add(tag)
 
     def handle_endtag(self, tag):
         if tag == "span":
@@ -250,6 +252,14 @@ class HTMLParser(html.parser.HTMLParser):
         while len(self.paras) > 0 and self.paras[-1].empty():
             self.paras.pop()
 
+        if len(self.bad_tags) > 0:
+            error_para = Paragraph()
+            text = "Unsupported markup tags: " + " ".join(
+                f"<{tag}>" for tag in sorted(self.bad_tags))
+            error_para.add(ColouredString(text, '!'))
+            error_para.end_word()
+            self.paras[0:0] = [error_para, Paragraph()]
+
 class RenderTests(unittest.TestCase):
     def testBlank(self):
         bl = BlankLine()
@@ -328,6 +338,15 @@ class RenderTests(unittest.TestCase):
                            '          @@@@@@@@@'),
         ])
 
+    def testError(self):
+        html = """<p>Test of some <nonsense>unsupported</nonsense> <blither>HTML tags</blither></p>"""
+        self.assertEqual(self.parse_html(html), [
+            ColouredString('Unsupported markup tags: <blither> <nonsense>',
+                           '!!!!!!!!!!! !!!!!! !!!!! !!!!!!!!! !!!!!!!!!!'),
+            ColouredString(''),
+            ColouredString('Test of some unsupported HTML tags'),
+        ])
+
     def testMedia(self):
         ma = Media('https://a.b/c', 'foo foo foo foo foo foo foo')
         self.assertEqual(list(ma.render(16)), [