From: Vladimír Vondruš <mosra@centrum.cz>
Date: Tue, 19 Dec 2017 15:46:32 +0000 (+0100)
Subject: m.htmlsanity: improve detection for applying typography improvements.
X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~cjwatson/git?a=commitdiff_plain;h=73796295ad667e0af5681c65774f3afbe323aa90;p=blog.git

m.htmlsanity: improve detection for applying typography improvements.

Previously smart quotes were applied to arbitrary fields and even field
names. Ugh. And author was hyphenated + smart quoted but a category/tag
not.

Also, have fun spotting the differences in the test files :P
---

diff --git a/doc/plugins/htmlsanity.rst b/doc/plugins/htmlsanity.rst
index bfc786c2..ace16bdb 100644
--- a/doc/plugins/htmlsanity.rst
+++ b/doc/plugins/htmlsanity.rst
@@ -112,8 +112,9 @@ French-style quotes.
 
 This plugin contains a patched version of
 `smart_quotes option <http://docutils.sourceforge.net/docs/user/smartquotes.html>`_
-from Docutils, which is based off SmartyPants, but with proper language awareness
-on top. See for yourself:
+from Docutils, which is based off SmartyPants, but with proper language
+awareness on top. It is applied to whole document contents and fields that are
+included in the :py:`FORMATTED_FIELDS`. See for yourself:
 
 .. code-figure::
 
diff --git a/pelican-plugins/m/htmlsanity.py b/pelican-plugins/m/htmlsanity.py
index 6419314a..42046a98 100644
--- a/pelican-plugins/m/htmlsanity.py
+++ b/pelican-plugins/m/htmlsanity.py
@@ -72,6 +72,28 @@ def extract_document_language(document):
 
     return language
 
+def can_apply_typography(txtnode):
+    # Exclude:
+    #  - literals and spans inside literals
+    #  - raw code (such as SVG)
+    #  - field names
+    #  - bibliographic elements (author, date, ... fields)
+    if isinstance(txtnode.parent, nodes.literal) or \
+       isinstance(txtnode.parent.parent, nodes.literal) or \
+       isinstance(txtnode.parent, nodes.raw) or \
+       isinstance(txtnode.parent, nodes.field_name) or \
+       isinstance(txtnode.parent, nodes.Bibliographic):
+        return False
+
+    # From fields include only the ones that are in FORMATTED_FIELDS
+    if isinstance(txtnode.parent.parent, nodes.field_body):
+        field_name_index = txtnode.parent.parent.parent.first_child_matching_class(nodes.field_name)
+        if txtnode.parent.parent.parent[field_name_index][0] in settings['FORMATTED_FIELDS']:
+            return True
+        return False
+
+    return True
+
 class SmartQuotes(docutils.transforms.universal.SmartQuotes):
     """Smart quote transform
 
@@ -109,15 +131,14 @@ class SmartQuotes(docutils.transforms.universal.SmartQuotes):
                 continue
 
             # list of text nodes in the "text block":
-            # Patched here to exclude text spans inside literal nodes.
-            # Hopefully two nesting levels are enough.
-            txtnodes = [txtnode for txtnode in node.traverse(nodes.Text)
-                        if not isinstance(txtnode.parent,
-                                          nodes.option_string) and
-                           not isinstance(txtnode.parent,
-                                          nodes.literal) and
-                           not isinstance(txtnode.parent.parent,
-                                          nodes.literal)]
+            # Patched here to exclude more stuff.
+            txtnodes = []
+            for txtnode in node.traverse(nodes.Text):
+                if not can_apply_typography(txtnode): continue
+                # Don't convert -- in option strings
+                if isinstance(txtnode.parent, nodes.option_string): continue
+
+                txtnodes += [txtnode]
 
             # language: use typographical quotes for language "lang"
             lang = node.get_language_code(document_language)
@@ -169,27 +190,14 @@ class Pyphen(Transform):
 
         # Go through all text words and hyphenate them
         for node in self.document.traverse(nodes.TextElement):
-            # Skip preformatted text blocks, special elements and field names
-            if isinstance(node, (nodes.FixedTextElement, nodes.Special, nodes.field_name)):
+            # Skip preformatted text blocks and special elements
+            if isinstance(node, (nodes.FixedTextElement, nodes.Special)):
                 continue
 
             for txtnode in node.traverse(nodes.Text):
-                # Exclude:
-                #  - document title
-                #  - literals and spans inside literals
-                #  - raw code (such as SVG)
-                if isinstance(txtnode.parent, nodes.title) or \
-                   isinstance(txtnode.parent, nodes.literal) or \
-                   isinstance(txtnode.parent.parent, nodes.literal) or \
-                   isinstance(txtnode.parent, nodes.raw):
-                    continue
-
-                # From fields include only the ones that are in
-                # FORMATTED_FIELDS
-                if isinstance(txtnode.parent.parent, nodes.field_body):
-                    field_name_index = txtnode.parent.parent.parent.first_child_matching_class(nodes.field_name)
-                    if txtnode.parent.parent.parent[field_name_index][0] not in settings['FORMATTED_FIELDS']:
-                        continue
+                if not can_apply_typography(txtnode): continue
+                # Don't hyphenate document title
+                if isinstance(txtnode.parent, nodes.title): continue
 
                 # Useful for debugging, don't remove ;)
                 #print(repr(txtnode.parent), repr(txtnode.parent.parent), repr(txtnode.parent.parent.parent))
diff --git a/pelican-plugins/m/test/htmlsanity_typography/article-jumbo.html b/pelican-plugins/m/test/htmlsanity_typography/article-jumbo.html
index c06c07bf..5a8f029b 100644
--- a/pelican-plugins/m/test/htmlsanity_typography/article-jumbo.html
+++ b/pelican-plugins/m/test/htmlsanity_typography/article-jumbo.html
@@ -13,8 +13,8 @@
   <meta property="og:url" content="article-jumbo.html" />
   <meta property="og:description" content="Article content." />
   <meta name="twitter:description" content="Article content." />
-  <meta property="og:image" content="image.jpg" />
-  <meta name="twitter:image" content="image.jpg" />
+  <meta property="og:image" content="image.jpg?and&amp;in&amp;url=&#34;&#34;" />
+  <meta name="twitter:image" content="image.jpg?and&amp;in&amp;url=&#34;&#34;" />
   <meta name="twitter:card" content="summary_large_image" />
   <meta property="og:type" content="article" />
 </head>
@@ -29,12 +29,12 @@
 <main>
   <article id="m-jumbo">
     <header>
-      <div id="m-jumbo-image" style="background-image: url('image.jpg');">
+      <div id="m-jumbo-image" style="background-image: url('image.jpg?and&amp;in&amp;url=&#34;&#34;');">
         <div id="m-jumbo-cover">
           <div class="m-container">
             <div class="m-row">
               <div class="m-col-t-6 m-col-s-5 m-push-s-1 m-text-left">Dec 10, 2017</div>
-              <div class="m-col-t-6 m-col-s-5 m-push-s-1 m-text-right"><a href="author-an-author.html">An AuÂ­thor</a></div>
+              <div class="m-col-t-6 m-col-s-5 m-push-s-1 m-text-right"><a href="author-an-author.html">An Author</a></div>
             </div>
             <div class="m-row">
               <div class="m-col-t-12 m-col-s-10 m-push-s-1 m-col-m-8 m-push-m-2">
@@ -64,7 +64,7 @@
     <footer class="m-container">
       <div class="m-row">
         <div class="m-col-m-10 m-push-m-1 m-nopadb">
-          <p>Posted by <a href="author-an-author.html">An AuÂ­thor</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
+          <p>Posted by <a href="author-an-author.html">An Author</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
         </div>
       </div>
     </footer>
@@ -80,7 +80,7 @@
       <div class="m-col-s-4 m-col-l-2 m-push-l-3">
         <h3>Au&shy;thors</h3>
         <ol class="m-block-bar-s">
-          <li><a href="author-an-author.html">An AuÂ­thor</a></li>
+          <li><a href="author-an-author.html">An Author</a></li>
         </ol>
       </div>
       <div class="m-col-s-4 m-col-l-2 m-push-l-5">
diff --git a/pelican-plugins/m/test/htmlsanity_typography/article-lang.html b/pelican-plugins/m/test/htmlsanity_typography/article-lang.html
index 96b273ae..49fe820d 100644
--- a/pelican-plugins/m/test/htmlsanity_typography/article-lang.html
+++ b/pelican-plugins/m/test/htmlsanity_typography/article-lang.html
@@ -44,7 +44,7 @@
 te&shy;dy mÃ­t Äes&shy;kÃ© dÄ&shy;le&shy;nÃ­ slov. âA ta&shy;kÃ© Äes&shy;kÃ© uvo&shy;zov&shy;ky.â</p>
 <!-- /content -->
       <footer>
-        <p>Posted by <a href="author-an-author.html">An AuÂ­thor</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
+        <p>Posted by <a href="author-an-author.html">An Author</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
       </footer>
     </article>
     <nav class="m-navpanel m-col-m-2">
@@ -54,7 +54,7 @@ te&shy;dy mÃ­t Äes&shy;kÃ© dÄ&shy;le&shy;nÃ­ slov. âA ta&shy;kÃ© Äes&shy;k
       </ol>
       <h3>Au&shy;thors</h3>
       <ol class="m-block-bar-m">
-        <li><a href="author-an-author.html">An AuÂ­thor</a></li>
+        <li><a href="author-an-author.html">An Author</a></li>
       </ol>
       <h3>Tag cloud</h3>
       <ul class="m-tagcloud">
diff --git a/pelican-plugins/m/test/htmlsanity_typography/articles/jumbo.rst b/pelican-plugins/m/test/htmlsanity_typography/articles/jumbo.rst
index 1cc51cc0..4dce8cf0 100644
--- a/pelican-plugins/m/test/htmlsanity_typography/articles/jumbo.rst
+++ b/pelican-plugins/m/test/htmlsanity_typography/articles/jumbo.rst
@@ -2,7 +2,7 @@ A jumbo article
 ###############
 
 :date: 2017-12-10
-:cover: image.jpg
+:cover: image.jpg?and&in&url=""
 :category: A category
 :author: An Author
 :tags: Tagging a name
diff --git a/pelican-plugins/m/test/htmlsanity_typography/author-an-author.html b/pelican-plugins/m/test/htmlsanity_typography/author-an-author.html
index dc623200..e8f442b1 100644
--- a/pelican-plugins/m/test/htmlsanity_typography/author-an-author.html
+++ b/pelican-plugins/m/test/htmlsanity_typography/author-an-author.html
@@ -2,13 +2,13 @@
 <html lang="en" prefix="og: http://ogp.me/ns#">
 <head>
   <meta charset="UTF-8" />
-  <title>Posts by An AuÂ­thor | A Pelican Blog</title>
+  <title>Posts by An Author | A Pelican Blog</title>
   <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Source+Code+Pro:400,400i,600%7CSource+Sans+Pro:400,400i,600,600i&amp;subset=latin-ext" />
   <link rel="stylesheet" href="static/m-dark.css" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
   <meta property="og:site_name" content="A Pelican Blog" />
-  <meta property="og:title" content="An AuÂ­thor" />
-  <meta name="twitter:title" content="An AuÂ­thor" />
+  <meta property="og:title" content="An Author" />
+  <meta name="twitter:title" content="An Author" />
   <meta property="og:url" content="author-an-author.html" />
   <meta name="twitter:card" content="summary" />
   <meta property="og:type" content="website" />
@@ -26,7 +26,7 @@
   <div class="m-row">
     <div class="m-col-m-10">
       <div class="m-info m-note">
-        Showing only posts by <em>An AuÂ­thor</em>. <a href="./">Show all posts.</a>
+        Showing only posts by <em>An Author</em>. <a href="./">Show all posts.</a>
       </div>
       <article>
         <header>
@@ -39,7 +39,7 @@
           <p>Ar&shy;ti&shy;cle con&shy;tent.</p>
         </header>
         <footer>
-          <p>Posted by <a href="author-an-author.html">An AuÂ­thor</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
+          <p>Posted by <a href="author-an-author.html">An Author</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
         </footer>
         <div class="m-clearfix-l"></div>
       </article>
@@ -55,7 +55,7 @@
           te&shy;dy mÃ­t Äes&shy;kÃ© dÄ&shy;le&shy;nÃ­ slov. âA ta&shy;kÃ© Äes&shy;kÃ© uvo&shy;zov&shy;ky.â</p>
         </header>
         <footer>
-          <p>Posted by <a href="author-an-author.html">An AuÂ­thor</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
+          <p>Posted by <a href="author-an-author.html">An Author</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
         </footer>
         <div class="m-clearfix-l"></div>
       </article>
@@ -67,7 +67,7 @@
       </ol>
       <h3>Au&shy;thors</h3>
       <ol class="m-block-bar-m">
-        <li><a href="author-an-author.html">An AuÂ­thor</a></li>
+        <li><a href="author-an-author.html">An Author</a></li>
       </ol>
       <h3>Tag cloud</h3>
       <ul class="m-tagcloud">
diff --git a/pelican-plugins/m/test/htmlsanity_typography/category-a-category.html b/pelican-plugins/m/test/htmlsanity_typography/category-a-category.html
index 90464e17..f8546ca0 100644
--- a/pelican-plugins/m/test/htmlsanity_typography/category-a-category.html
+++ b/pelican-plugins/m/test/htmlsanity_typography/category-a-category.html
@@ -39,7 +39,7 @@
           <p>Ar&shy;ti&shy;cle con&shy;tent.</p>
         </header>
         <footer>
-          <p>Posted by <a href="author-an-author.html">An AuÂ­thor</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
+          <p>Posted by <a href="author-an-author.html">An Author</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
         </footer>
         <div class="m-clearfix-l"></div>
       </article>
@@ -55,7 +55,7 @@
           te&shy;dy mÃ­t Äes&shy;kÃ© dÄ&shy;le&shy;nÃ­ slov. âA ta&shy;kÃ© Äes&shy;kÃ© uvo&shy;zov&shy;ky.â</p>
         </header>
         <footer>
-          <p>Posted by <a href="author-an-author.html">An AuÂ­thor</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
+          <p>Posted by <a href="author-an-author.html">An Author</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
         </footer>
         <div class="m-clearfix-l"></div>
       </article>
@@ -67,7 +67,7 @@
       </ol>
       <h3>Au&shy;thors</h3>
       <ol class="m-block-bar-m">
-        <li><a href="author-an-author.html">An AuÂ­thor</a></li>
+        <li><a href="author-an-author.html">An Author</a></li>
       </ol>
       <h3>Tag cloud</h3>
       <ul class="m-tagcloud">
diff --git a/pelican-plugins/m/test/htmlsanity_typography/tag-tagging-a-name.html b/pelican-plugins/m/test/htmlsanity_typography/tag-tagging-a-name.html
index 328745a4..1c93f8a1 100644
--- a/pelican-plugins/m/test/htmlsanity_typography/tag-tagging-a-name.html
+++ b/pelican-plugins/m/test/htmlsanity_typography/tag-tagging-a-name.html
@@ -39,7 +39,7 @@
           <p>Ar&shy;ti&shy;cle con&shy;tent.</p>
         </header>
         <footer>
-          <p>Posted by <a href="author-an-author.html">An AuÂ­thor</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
+          <p>Posted by <a href="author-an-author.html">An Author</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
         </footer>
         <div class="m-clearfix-l"></div>
       </article>
@@ -55,7 +55,7 @@
           te&shy;dy mÃ­t Äes&shy;kÃ© dÄ&shy;le&shy;nÃ­ slov. âA ta&shy;kÃ© Äes&shy;kÃ© uvo&shy;zov&shy;ky.â</p>
         </header>
         <footer>
-          <p>Posted by <a href="author-an-author.html">An AuÂ­thor</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
+          <p>Posted by <a href="author-an-author.html">An Author</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
         </footer>
         <div class="m-clearfix-l"></div>
       </article>
@@ -67,7 +67,7 @@
       </ol>
       <h3>Au&shy;thors</h3>
       <ol class="m-block-bar-m">
-        <li><a href="author-an-author.html">An AuÂ­thor</a></li>
+        <li><a href="author-an-author.html">An Author</a></li>
       </ol>
       <h3>Tag cloud</h3>
       <ul class="m-tagcloud">