m.htmlsanity: properly use DEFAULT_LANG and :lang: metadata.

author Vladimír Vondruš <mosra@centrum.cz>

Wed, 29 Nov 2017 23:29:29 +0000 (00:29 +0100)

committer Vladimír Vondruš <mosra@centrum.cz>

Wed, 29 Nov 2017 23:29:29 +0000 (00:29 +0100)
author Vladimír Vondruš <mosra@centrum.cz>
Wed, 29 Nov 2017 23:29:29 +0000 (00:29 +0100)
committer Vladimír Vondruš <mosra@centrum.cz>
Wed, 29 Nov 2017 23:29:29 +0000 (00:29 +0100)
diff --git a/doc/plugins/htmlsanity.rst b/doc/plugins/htmlsanity.rst

index de4b6b6f2ba31b3250e13183a98b71d36e681649..31fff7bb9902752761d7ab217d9d6fdf8886ee0b 100644 (file)
--- a/doc/plugins/htmlsanity.rst
+++ b/doc/plugins/htmlsanity.rst
@@ -143,10 +143,11 @@ on top. See for yourself:
  
      *"Autres temps, autres mœurs"*
  
-The default language is of course taken from the standard :py:`DEFAULT_LANG`
-option, which defaults to :py:`'en'`. This feature is controlled by the
-:py:`M_HTMLSANITY_SMART_QUOTES` option, which, similarly to the builtin
-:py:`TYPOGRIFY` option, defaults to :py:`False`.
+The default language is taken from the standard :py:`DEFAULT_LANG` option,
+which defaults to :py:`'en'`, and can be also overriden on per-page or
+per-article basis using the :rst:`:lang:` metadata option. This feature is
+controlled by the :py:`M_HTMLSANITY_SMART_QUOTES` option, which, similarly to
+the builtin :py:`TYPOGRIFY` option, defaults to :py:`False`.
  
  .. note-warning::
  
@@ -214,8 +215,10 @@ that are candidates for a word break:
      <p lang="fr">an&shy;ti&shy;cons&shy;ti&shy;tu&shy;tion&shy;nel&shy;le&shy;ment</p>
  
  Thanks to Unicode magic this is either hidden or converted to a real hyphen and
-*doesn't* break search or SEO. This feature is controlled by the
-:py:`M_HTMLSANITY_HYPHENATION` option, which also defaults to :py:`False`.
+*doesn't* break search or SEO. Similarly to smart quotes, the default language
+is taken from the standard :py:`DEFAULT_LANG` option or the :rst:`:lang:`
+metadata option.This feature is controlled by the :py:`M_HTMLSANITY_HYPHENATION`
+option, which also defaults to :py:`False`.
  
  .. note-success::
  
@@ -244,8 +247,8 @@ settings). Just pipe your variable through the ``render_rst`` filter:
      </html>
  
  The filter is fully equivalent to the builtin reST rendering and the above
-:py:`M_HTMLSANITY_SMART_QUOTES` and :py:`M_HTMLSANITY_HYPHENATION` options
-affect it as well.
+:py:`M_HTMLSANITY_SMART_QUOTES`, :py:`M_HTMLSANITY_HYPHENATION` and
+:py:`DEFAULT_LANG` options affect it as well.
  
  .. note-warning::
  
diff --git a/pelican-plugins/m/htmlsanity.py b/pelican-plugins/m/htmlsanity.py

index 4ed9d1b56111ccbb8b426cad050bf6072e5857d3..426e9e19e29733b8cd35f0204d452dd22d9c36f4 100644 (file)
--- a/pelican-plugins/m/htmlsanity.py
+++ b/pelican-plugins/m/htmlsanity.py
@@ -50,6 +50,28 @@ except ImportError:
  settings = {}
  words_re = re.compile("""\w+""", re.UNICODE|re.X)
  
+# TODO: remove when 3.8 with https://github.com/getpelican/pelican/pull/2256
+# is released
+pelican371_default_lang_patch = False
+
+def extract_document_language(document):
+    # Take the one from settings as default
+    # TODO: remove when 3.8 with https://github.com/getpelican/pelican/pull/2256
+    # is released
+    if pelican371_default_lang_patch:
+        language = settings['DEFAULT_LANG']
+    else:
+        language = document.settings.language_code
+
+    # Then try to find the :lang: metadata option
+    for field in document.traverse(nodes.field):
+        assert isinstance(field[0], nodes.field_name)
+        assert isinstance(field[1], nodes.field_body)
+        # field_body -> paragraph -> text
+        if field[0][0] == 'lang': return str(field[1][0][0])
+
+    return language
+
  class SmartQuotes(docutils.transforms.universal.SmartQuotes):
      """Smart quote transform
  
@@ -74,7 +96,7 @@ class SmartQuotes(docutils.transforms.universal.SmartQuotes):
              alternative = False
          # print repr(alternative)
  
-        document_language = self.document.settings.language_code
+        document_language = extract_document_language(self.document)
  
          # "Educate" quotes in normal text. Handle each block of text
          # (TextElement node) as a unit to keep context around inline nodes:
@@ -141,7 +163,7 @@ class Pyphen(Transform):
          if not settings['M_HTMLSANITY_HYPHENATION']:
              return
  
-        document_language = self.document.settings.language_code
+        document_language = extract_document_language(self.document)
  
          pyphen_for_lang = {}
  
@@ -581,6 +603,7 @@ def render_rst(value):
      extra_params = {'initial_header_level': '2',
                      'syntax_highlight': 'short',
                      'input_encoding': 'utf-8',
+                    'language_code': settings['DEFAULT_LANG'],
                      'exit_status_level': 2,
                      'embed_stylesheet': False}
      if settings['DOCUTILS_SETTINGS']:
@@ -711,6 +734,15 @@ def configure_pelican(pelicanobj):
      pelicanobj.settings['JINJA_FILTERS']['hyphenate'] = hyphenate
      pelicanobj.settings['JINJA_FILTERS']['dehyphenate'] = dehyphenate
  
+    # TODO: remove when 3.8 with https://github.com/getpelican/pelican/pull/2256
+    # is released
+    reader = RstReader(pelicanobj.settings)
+    pub = reader._get_publisher(os.devnull)
+    if pub.settings.language_code != pelicanobj.settings['DEFAULT_LANG']:
+        logger.warning("Unpatched Pelican <= 3.7.1 detected, monkey-patching for DEFAULT_LANG-aware reST parsing")
+        global pelican371_default_lang_patch
+        pelican371_default_lang_patch = True
+
      # TODO: remove when 3.8 with https://github.com/getpelican/pelican/pull/2164
      # (or the _link_replacer part of it) is released
      if not hasattr(Content, '_link_replacer'):
author	Vladimír Vondruš <mosra@centrum.cz>
	Wed, 29 Nov 2017 23:29:29 +0000 (00:29 +0100)
committer	Vladimír Vondruš <mosra@centrum.cz>
	Wed, 29 Nov 2017 23:29:29 +0000 (00:29 +0100)
doc/plugins/htmlsanity.rst		patch \| blob \| history
pelican-plugins/m/htmlsanity.py		patch \| blob \| history