From bc76a80fc2b69e32b78e6115ca05fe2fe3a1af3b Mon Sep 17 00:00:00 2001 From: =?utf8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Mon, 11 Sep 2017 19:02:23 +0200 Subject: [PATCH] m.htmlsanity: take inline language into account for hyphenation. And also don't crash if given language is not supported. --- pelican-plugins/m/htmlsanity.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/pelican-plugins/m/htmlsanity.py b/pelican-plugins/m/htmlsanity.py index 21aa04ae..e57da755 100644 --- a/pelican-plugins/m/htmlsanity.py +++ b/pelican-plugins/m/htmlsanity.py @@ -126,15 +126,6 @@ class Pyphen(Transform): if isinstance(node, (nodes.FixedTextElement, nodes.Special, nodes.field_name)): continue - # Proper language-dependent hyphenation - lang = node.get_language_code(document_language) - - # Create new Pyphen object for given lang, if not yet cached. I'm - # assuming this is faster than recreating the instance for every - # text node - if lang not in pyphen_for_lang: - pyphen_for_lang[lang] = pyphen.Pyphen(lang=lang) - for txtnode in node.traverse(nodes.Text): # Exclude: # - document title @@ -155,6 +146,17 @@ class Pyphen(Transform): # Useful for debugging, don't remove ;) #print(repr(txtnode.parent), repr(txtnode.parent.parent), repr(txtnode.parent.parent.parent)) + # Proper language-dependent hyphenation. Can't be done for + # `node` as a paragraph can consist of more than one language. + lang = txtnode.parent.get_language_code(document_language) + + # Create new Pyphen object for given lang, if not yet cached. + # I'm assuming this is faster than recreating the instance for + # every text node + if lang not in pyphen_for_lang: + if lang not in pyphen.LANGUAGES: continue + pyphen_for_lang[lang] = pyphen.Pyphen(lang=lang) + txtnode.parent.replace(txtnode, nodes.Text(words_re.sub(lambda m: pyphen_for_lang[lang].inserted(m.group(0), '\u00AD'), txtnode.astext()))) class SaneHtmlTranslator(HTMLTranslator): -- 2.30.2