From: Vladimír Vondruš Date: Wed, 29 Nov 2017 23:29:29 +0000 (+0100) Subject: m.htmlsanity: properly use DEFAULT_LANG and :lang: metadata. X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~cjwatson/git?a=commitdiff_plain;h=4d4ddf0a04e2749a2e6327eb80b7d94beff4d06c;p=blog.git m.htmlsanity: properly use DEFAULT_LANG and :lang: metadata. There's a patch to Pelican at https://github.com/getpelican/pelican/pull/2256 which propagates DEFAULT_LANG to docutils, but until that's accepted and released, I'm detecting its presence and falling back to monkey-patched version that enables the same from within the m.htmlsanity plugin. Besides that, the per-page/per-article :lang: metadata are also taken into account. The docs were updated to reflect this. --- diff --git a/doc/plugins/htmlsanity.rst b/doc/plugins/htmlsanity.rst index de4b6b6f..31fff7bb 100644 --- a/doc/plugins/htmlsanity.rst +++ b/doc/plugins/htmlsanity.rst @@ -143,10 +143,11 @@ on top. See for yourself: *"Autres temps, autres mœurs"* -The default language is of course taken from the standard :py:`DEFAULT_LANG` -option, which defaults to :py:`'en'`. This feature is controlled by the -:py:`M_HTMLSANITY_SMART_QUOTES` option, which, similarly to the builtin -:py:`TYPOGRIFY` option, defaults to :py:`False`. +The default language is taken from the standard :py:`DEFAULT_LANG` option, +which defaults to :py:`'en'`, and can be also overriden on per-page or +per-article basis using the :rst:`:lang:` metadata option. This feature is +controlled by the :py:`M_HTMLSANITY_SMART_QUOTES` option, which, similarly to +the builtin :py:`TYPOGRIFY` option, defaults to :py:`False`. .. note-warning:: @@ -214,8 +215,10 @@ that are candidates for a word break:

an­ti­cons­ti­tu­tion­nel­le­ment

Thanks to Unicode magic this is either hidden or converted to a real hyphen and -*doesn't* break search or SEO. This feature is controlled by the -:py:`M_HTMLSANITY_HYPHENATION` option, which also defaults to :py:`False`. +*doesn't* break search or SEO. Similarly to smart quotes, the default language +is taken from the standard :py:`DEFAULT_LANG` option or the :rst:`:lang:` +metadata option.This feature is controlled by the :py:`M_HTMLSANITY_HYPHENATION` +option, which also defaults to :py:`False`. .. note-success:: @@ -244,8 +247,8 @@ settings). Just pipe your variable through the ``render_rst`` filter: The filter is fully equivalent to the builtin reST rendering and the above -:py:`M_HTMLSANITY_SMART_QUOTES` and :py:`M_HTMLSANITY_HYPHENATION` options -affect it as well. +:py:`M_HTMLSANITY_SMART_QUOTES`, :py:`M_HTMLSANITY_HYPHENATION` and +:py:`DEFAULT_LANG` options affect it as well. .. note-warning:: diff --git a/pelican-plugins/m/htmlsanity.py b/pelican-plugins/m/htmlsanity.py index 4ed9d1b5..426e9e19 100644 --- a/pelican-plugins/m/htmlsanity.py +++ b/pelican-plugins/m/htmlsanity.py @@ -50,6 +50,28 @@ except ImportError: settings = {} words_re = re.compile("""\w+""", re.UNICODE|re.X) +# TODO: remove when 3.8 with https://github.com/getpelican/pelican/pull/2256 +# is released +pelican371_default_lang_patch = False + +def extract_document_language(document): + # Take the one from settings as default + # TODO: remove when 3.8 with https://github.com/getpelican/pelican/pull/2256 + # is released + if pelican371_default_lang_patch: + language = settings['DEFAULT_LANG'] + else: + language = document.settings.language_code + + # Then try to find the :lang: metadata option + for field in document.traverse(nodes.field): + assert isinstance(field[0], nodes.field_name) + assert isinstance(field[1], nodes.field_body) + # field_body -> paragraph -> text + if field[0][0] == 'lang': return str(field[1][0][0]) + + return language + class SmartQuotes(docutils.transforms.universal.SmartQuotes): """Smart quote transform @@ -74,7 +96,7 @@ class SmartQuotes(docutils.transforms.universal.SmartQuotes): alternative = False # print repr(alternative) - document_language = self.document.settings.language_code + document_language = extract_document_language(self.document) # "Educate" quotes in normal text. Handle each block of text # (TextElement node) as a unit to keep context around inline nodes: @@ -141,7 +163,7 @@ class Pyphen(Transform): if not settings['M_HTMLSANITY_HYPHENATION']: return - document_language = self.document.settings.language_code + document_language = extract_document_language(self.document) pyphen_for_lang = {} @@ -581,6 +603,7 @@ def render_rst(value): extra_params = {'initial_header_level': '2', 'syntax_highlight': 'short', 'input_encoding': 'utf-8', + 'language_code': settings['DEFAULT_LANG'], 'exit_status_level': 2, 'embed_stylesheet': False} if settings['DOCUTILS_SETTINGS']: @@ -711,6 +734,15 @@ def configure_pelican(pelicanobj): pelicanobj.settings['JINJA_FILTERS']['hyphenate'] = hyphenate pelicanobj.settings['JINJA_FILTERS']['dehyphenate'] = dehyphenate + # TODO: remove when 3.8 with https://github.com/getpelican/pelican/pull/2256 + # is released + reader = RstReader(pelicanobj.settings) + pub = reader._get_publisher(os.devnull) + if pub.settings.language_code != pelicanobj.settings['DEFAULT_LANG']: + logger.warning("Unpatched Pelican <= 3.7.1 detected, monkey-patching for DEFAULT_LANG-aware reST parsing") + global pelican371_default_lang_patch + pelican371_default_lang_patch = True + # TODO: remove when 3.8 with https://github.com/getpelican/pelican/pull/2164 # (or the _link_replacer part of it) is released if not hasattr(Content, '_link_replacer'):