chiark / gitweb /
m.htmlsanity: improve detection for applying typography improvements.
authorVladimír Vondruš <mosra@centrum.cz>
Tue, 19 Dec 2017 15:46:32 +0000 (16:46 +0100)
committerVladimír Vondruš <mosra@centrum.cz>
Tue, 19 Dec 2017 15:49:44 +0000 (16:49 +0100)
Previously smart quotes were applied to arbitrary fields and even field
names. Ugh. And author was hyphenated + smart quoted but a category/tag
not.

Also, have fun spotting the differences in the test files :P

doc/plugins/htmlsanity.rst
pelican-plugins/m/htmlsanity.py
pelican-plugins/m/test/htmlsanity_typography/article-jumbo.html
pelican-plugins/m/test/htmlsanity_typography/article-lang.html
pelican-plugins/m/test/htmlsanity_typography/articles/jumbo.rst
pelican-plugins/m/test/htmlsanity_typography/author-an-author.html
pelican-plugins/m/test/htmlsanity_typography/category-a-category.html
pelican-plugins/m/test/htmlsanity_typography/tag-tagging-a-name.html

index bfc786c267216cac8d8933505738c2c02cc4022e..ace16bdb0a29c2d419cb8e17562e9c41d050c956 100644 (file)
@@ -112,8 +112,9 @@ French-style quotes.
 
 This plugin contains a patched version of
 `smart_quotes option <http://docutils.sourceforge.net/docs/user/smartquotes.html>`_
-from Docutils, which is based off SmartyPants, but with proper language awareness
-on top. See for yourself:
+from Docutils, which is based off SmartyPants, but with proper language
+awareness on top. It is applied to whole document contents and fields that are
+included in the :py:`FORMATTED_FIELDS`. See for yourself:
 
 .. code-figure::
 
index 6419314a19394518782838626fc2971fb0eb976c..42046a9824b94e15dabc1ad98b9a9e8b5a31b20b 100644 (file)
@@ -72,6 +72,28 @@ def extract_document_language(document):
 
     return language
 
+def can_apply_typography(txtnode):
+    # Exclude:
+    #  - literals and spans inside literals
+    #  - raw code (such as SVG)
+    #  - field names
+    #  - bibliographic elements (author, date, ... fields)
+    if isinstance(txtnode.parent, nodes.literal) or \
+       isinstance(txtnode.parent.parent, nodes.literal) or \
+       isinstance(txtnode.parent, nodes.raw) or \
+       isinstance(txtnode.parent, nodes.field_name) or \
+       isinstance(txtnode.parent, nodes.Bibliographic):
+        return False
+
+    # From fields include only the ones that are in FORMATTED_FIELDS
+    if isinstance(txtnode.parent.parent, nodes.field_body):
+        field_name_index = txtnode.parent.parent.parent.first_child_matching_class(nodes.field_name)
+        if txtnode.parent.parent.parent[field_name_index][0] in settings['FORMATTED_FIELDS']:
+            return True
+        return False
+
+    return True
+
 class SmartQuotes(docutils.transforms.universal.SmartQuotes):
     """Smart quote transform
 
@@ -109,15 +131,14 @@ class SmartQuotes(docutils.transforms.universal.SmartQuotes):
                 continue
 
             # list of text nodes in the "text block":
-            # Patched here to exclude text spans inside literal nodes.
-            # Hopefully two nesting levels are enough.
-            txtnodes = [txtnode for txtnode in node.traverse(nodes.Text)
-                        if not isinstance(txtnode.parent,
-                                          nodes.option_string) and
-                           not isinstance(txtnode.parent,
-                                          nodes.literal) and
-                           not isinstance(txtnode.parent.parent,
-                                          nodes.literal)]
+            # Patched here to exclude more stuff.
+            txtnodes = []
+            for txtnode in node.traverse(nodes.Text):
+                if not can_apply_typography(txtnode): continue
+                # Don't convert -- in option strings
+                if isinstance(txtnode.parent, nodes.option_string): continue
+
+                txtnodes += [txtnode]
 
             # language: use typographical quotes for language "lang"
             lang = node.get_language_code(document_language)
@@ -169,27 +190,14 @@ class Pyphen(Transform):
 
         # Go through all text words and hyphenate them
         for node in self.document.traverse(nodes.TextElement):
-            # Skip preformatted text blocks, special elements and field names
-            if isinstance(node, (nodes.FixedTextElement, nodes.Special, nodes.field_name)):
+            # Skip preformatted text blocks and special elements
+            if isinstance(node, (nodes.FixedTextElement, nodes.Special)):
                 continue
 
             for txtnode in node.traverse(nodes.Text):
-                # Exclude:
-                #  - document title
-                #  - literals and spans inside literals
-                #  - raw code (such as SVG)
-                if isinstance(txtnode.parent, nodes.title) or \
-                   isinstance(txtnode.parent, nodes.literal) or \
-                   isinstance(txtnode.parent.parent, nodes.literal) or \
-                   isinstance(txtnode.parent, nodes.raw):
-                    continue
-
-                # From fields include only the ones that are in
-                # FORMATTED_FIELDS
-                if isinstance(txtnode.parent.parent, nodes.field_body):
-                    field_name_index = txtnode.parent.parent.parent.first_child_matching_class(nodes.field_name)
-                    if txtnode.parent.parent.parent[field_name_index][0] not in settings['FORMATTED_FIELDS']:
-                        continue
+                if not can_apply_typography(txtnode): continue
+                # Don't hyphenate document title
+                if isinstance(txtnode.parent, nodes.title): continue
 
                 # Useful for debugging, don't remove ;)
                 #print(repr(txtnode.parent), repr(txtnode.parent.parent), repr(txtnode.parent.parent.parent))
index c06c07bfbc8ec4a8a89561edb79772a329a61692..5a8f029b71ad2d3d604424c465a053bd9a57fc08 100644 (file)
@@ -13,8 +13,8 @@
   <meta property="og:url" content="article-jumbo.html" />
   <meta property="og:description" content="Article content." />
   <meta name="twitter:description" content="Article content." />
-  <meta property="og:image" content="image.jpg" />
-  <meta name="twitter:image" content="image.jpg" />
+  <meta property="og:image" content="image.jpg?and&amp;in&amp;url=&#34;&#34;" />
+  <meta name="twitter:image" content="image.jpg?and&amp;in&amp;url=&#34;&#34;" />
   <meta name="twitter:card" content="summary_large_image" />
   <meta property="og:type" content="article" />
 </head>
 <main>
   <article id="m-jumbo">
     <header>
-      <div id="m-jumbo-image" style="background-image: url('image.jpg');">
+      <div id="m-jumbo-image" style="background-image: url('image.jpg?and&amp;in&amp;url=&#34;&#34;');">
         <div id="m-jumbo-cover">
           <div class="m-container">
             <div class="m-row">
               <div class="m-col-t-6 m-col-s-5 m-push-s-1 m-text-left">Dec 10, 2017</div>
-              <div class="m-col-t-6 m-col-s-5 m-push-s-1 m-text-right"><a href="author-an-author.html">An Au­thor</a></div>
+              <div class="m-col-t-6 m-col-s-5 m-push-s-1 m-text-right"><a href="author-an-author.html">An Author</a></div>
             </div>
             <div class="m-row">
               <div class="m-col-t-12 m-col-s-10 m-push-s-1 m-col-m-8 m-push-m-2">
@@ -64,7 +64,7 @@
     <footer class="m-container">
       <div class="m-row">
         <div class="m-col-m-10 m-push-m-1 m-nopadb">
-          <p>Posted by <a href="author-an-author.html">An Au­thor</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
+          <p>Posted by <a href="author-an-author.html">An Author</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
         </div>
       </div>
     </footer>
@@ -80,7 +80,7 @@
       <div class="m-col-s-4 m-col-l-2 m-push-l-3">
         <h3>Au&shy;thors</h3>
         <ol class="m-block-bar-s">
-          <li><a href="author-an-author.html">An Au­thor</a></li>
+          <li><a href="author-an-author.html">An Author</a></li>
         </ol>
       </div>
       <div class="m-col-s-4 m-col-l-2 m-push-l-5">
index 96b273ae229fd4b52be4fe134bcf3c62c1a9e65c..49fe820d26e5802c4af37b4e88b8836d346161e6 100644 (file)
@@ -44,7 +44,7 @@
 te&shy;dy mít čes&shy;ké dě&shy;le&shy;ní slov. „A ta&shy;ké čes&shy;ké uvo&shy;zov&shy;ky.“</p>
 <!-- /content -->
       <footer>
-        <p>Posted by <a href="author-an-author.html">An Au­thor</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
+        <p>Posted by <a href="author-an-author.html">An Author</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
       </footer>
     </article>
     <nav class="m-navpanel m-col-m-2">
@@ -54,7 +54,7 @@ te&shy;dy mít čes&shy;ké dě&shy;le&shy;ní slov. „A ta&shy;ké čes&shy;k
       </ol>
       <h3>Au&shy;thors</h3>
       <ol class="m-block-bar-m">
-        <li><a href="author-an-author.html">An Au­thor</a></li>
+        <li><a href="author-an-author.html">An Author</a></li>
       </ol>
       <h3>Tag cloud</h3>
       <ul class="m-tagcloud">
index 1cc51cc0c6e31be227624102f5737a7a08773e23..4dce8cf03c4546a8c77763faeb9d5c013ab42690 100644 (file)
@@ -2,7 +2,7 @@ A jumbo article
 ###############
 
 :date: 2017-12-10
-:cover: image.jpg
+:cover: image.jpg?and&in&url=""
 :category: A category
 :author: An Author
 :tags: Tagging a name
index dc6232000d31640f15472b5303abcb16f3851fce..e8f442b184cf9a2e2d0c7667c1531e26f2c5eed0 100644 (file)
@@ -2,13 +2,13 @@
 <html lang="en" prefix="og: http://ogp.me/ns#">
 <head>
   <meta charset="UTF-8" />
-  <title>Posts by An Au­thor | A Pelican Blog</title>
+  <title>Posts by An Author | A Pelican Blog</title>
   <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Source+Code+Pro:400,400i,600%7CSource+Sans+Pro:400,400i,600,600i&amp;subset=latin-ext" />
   <link rel="stylesheet" href="static/m-dark.css" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
   <meta property="og:site_name" content="A Pelican Blog" />
-  <meta property="og:title" content="An Au­thor" />
-  <meta name="twitter:title" content="An Au­thor" />
+  <meta property="og:title" content="An Author" />
+  <meta name="twitter:title" content="An Author" />
   <meta property="og:url" content="author-an-author.html" />
   <meta name="twitter:card" content="summary" />
   <meta property="og:type" content="website" />
@@ -26,7 +26,7 @@
   <div class="m-row">
     <div class="m-col-m-10">
       <div class="m-info m-note">
-        Showing only posts by <em>An Au­thor</em>. <a href="./">Show all posts.</a>
+        Showing only posts by <em>An Author</em>. <a href="./">Show all posts.</a>
       </div>
       <article>
         <header>
@@ -39,7 +39,7 @@
           <p>Ar&shy;ti&shy;cle con&shy;tent.</p>
         </header>
         <footer>
-          <p>Posted by <a href="author-an-author.html">An Au­thor</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
+          <p>Posted by <a href="author-an-author.html">An Author</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
         </footer>
         <div class="m-clearfix-l"></div>
       </article>
@@ -55,7 +55,7 @@
           te&shy;dy mít čes&shy;ké dě&shy;le&shy;ní slov. „A ta&shy;ké čes&shy;ké uvo&shy;zov&shy;ky.“</p>
         </header>
         <footer>
-          <p>Posted by <a href="author-an-author.html">An Au­thor</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
+          <p>Posted by <a href="author-an-author.html">An Author</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
         </footer>
         <div class="m-clearfix-l"></div>
       </article>
@@ -67,7 +67,7 @@
       </ol>
       <h3>Au&shy;thors</h3>
       <ol class="m-block-bar-m">
-        <li><a href="author-an-author.html">An Au­thor</a></li>
+        <li><a href="author-an-author.html">An Author</a></li>
       </ol>
       <h3>Tag cloud</h3>
       <ul class="m-tagcloud">
index 90464e17eafcd8cf1f6fe1661ea6a561c8403856..f8546ca03e6fe014f3d8e6474a3ce8471fcaae4e 100644 (file)
@@ -39,7 +39,7 @@
           <p>Ar&shy;ti&shy;cle con&shy;tent.</p>
         </header>
         <footer>
-          <p>Posted by <a href="author-an-author.html">An Au­thor</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
+          <p>Posted by <a href="author-an-author.html">An Author</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
         </footer>
         <div class="m-clearfix-l"></div>
       </article>
@@ -55,7 +55,7 @@
           te&shy;dy mít čes&shy;ké dě&shy;le&shy;ní slov. „A ta&shy;ké čes&shy;ké uvo&shy;zov&shy;ky.“</p>
         </header>
         <footer>
-          <p>Posted by <a href="author-an-author.html">An Au­thor</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
+          <p>Posted by <a href="author-an-author.html">An Author</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
         </footer>
         <div class="m-clearfix-l"></div>
       </article>
@@ -67,7 +67,7 @@
       </ol>
       <h3>Au&shy;thors</h3>
       <ol class="m-block-bar-m">
-        <li><a href="author-an-author.html">An Au­thor</a></li>
+        <li><a href="author-an-author.html">An Author</a></li>
       </ol>
       <h3>Tag cloud</h3>
       <ul class="m-tagcloud">
index 328745a4ba1bc4938f2ddf678a4635489072c980..1c93f8a1ee5f6ef58798d217ef978acb982e696a 100644 (file)
@@ -39,7 +39,7 @@
           <p>Ar&shy;ti&shy;cle con&shy;tent.</p>
         </header>
         <footer>
-          <p>Posted by <a href="author-an-author.html">An Au­thor</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
+          <p>Posted by <a href="author-an-author.html">An Author</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
         </footer>
         <div class="m-clearfix-l"></div>
       </article>
@@ -55,7 +55,7 @@
           te&shy;dy mít čes&shy;ké dě&shy;le&shy;ní slov. „A ta&shy;ké čes&shy;ké uvo&shy;zov&shy;ky.“</p>
         </header>
         <footer>
-          <p>Posted by <a href="author-an-author.html">An Au­thor</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
+          <p>Posted by <a href="author-an-author.html">An Author</a> on <time datetime="2017-12-10T00:00:00+00:00">Dec 10, 2017</time> in <a href="category-a-category.html">A category</a>. Tags: <a href="tag-tagging-a-name.html">Tagging a name</a>.</p>
         </footer>
         <div class="m-clearfix-l"></div>
       </article>
@@ -67,7 +67,7 @@
       </ol>
       <h3>Au&shy;thors</h3>
       <ol class="m-block-bar-m">
-        <li><a href="author-an-author.html">An Au­thor</a></li>
+        <li><a href="author-an-author.html">An Author</a></li>
       </ol>
       <h3>Tag cloud</h3>
       <ul class="m-tagcloud">