From 1360965cb77f6e2a05165a36f326c8ad72482a57 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Sun, 21 Oct 2018 22:20:52 +0200 Subject: [PATCH] m.htmlsanity: don't apply typography on links with URLs in title. And e-mail addresses. That's a very bad thing to do. --- doc/plugins/htmlsanity.rst | 5 +++-- pelican-plugins/m/htmlsanity.py | 10 ++++++++-- pelican-plugins/m/test/htmlsanity_typography/page.html | 6 ++++++ pelican-plugins/m/test/htmlsanity_typography/page.rst | 6 ++++++ 4 files changed, 23 insertions(+), 4 deletions(-) diff --git a/doc/plugins/htmlsanity.rst b/doc/plugins/htmlsanity.rst index 95d8619b..5ccf92aa 100644 --- a/doc/plugins/htmlsanity.rst +++ b/doc/plugins/htmlsanity.rst @@ -167,8 +167,9 @@ of long words being wrapped on new lines. The hyphenation is done using `Pyphen `_ and is applied to whole document contents and fields that are included in the :py:`FORMATTED_FIELDS`. All other fields including document title are excluded from hyphenation, the -same goes for literal and raw blocks. You can see it in practice in the -following convoluted example, it's also language-aware: +same goes for literal and raw blocks and links with URL (or e-mail) as a title. +You can see it in practice in the following convoluted example, it's also +language-aware: .. code-figure:: diff --git a/pelican-plugins/m/htmlsanity.py b/pelican-plugins/m/htmlsanity.py index 642f49a7..b6c3e68e 100644 --- a/pelican-plugins/m/htmlsanity.py +++ b/pelican-plugins/m/htmlsanity.py @@ -78,11 +78,14 @@ def can_apply_typography(txtnode): # - raw code (such as SVG) # - field names # - bibliographic elements (author, date, ... fields) + # - links with title that's the same as URL (or e-mail) if isinstance(txtnode.parent, nodes.literal) or \ isinstance(txtnode.parent.parent, nodes.literal) or \ isinstance(txtnode.parent, nodes.raw) or \ isinstance(txtnode.parent, nodes.field_name) or \ - isinstance(txtnode.parent, nodes.Bibliographic): + isinstance(txtnode.parent, nodes.Bibliographic) or \ + (isinstance(txtnode.parent, nodes.reference) and + (txtnode.astext() == txtnode.parent['refuri'] or 'mailto:' + txtnode.astext() == txtnode.parent['refuri'])): return False # From fields include only the ones that are in FORMATTED_FIELDS @@ -196,7 +199,10 @@ class Pyphen(Transform): for txtnode in node.traverse(nodes.Text): if not can_apply_typography(txtnode): continue - # Don't hyphenate document title + + # Don't hyphenate document title. Not part of + # can_apply_typography() because we *do* want smart quotes for + # a document title. if isinstance(txtnode.parent, nodes.title): continue # Useful for debugging, don't remove ;) diff --git a/pelican-plugins/m/test/htmlsanity_typography/page.html b/pelican-plugins/m/test/htmlsanity_typography/page.html index 45d13281..c92c4d92 100644 --- a/pelican-plugins/m/test/htmlsanity_typography/page.html +++ b/pelican-plugins/m/test/htmlsanity_typography/page.html @@ -52,6 +52,12 @@ Nest­ed con­tent should be hy­phen­at­ed al­so! And al&s ver­ba­tim stuff shouldn’t: hello "this" is not hyphenated. Nei­ther ver­ba­tim blocks:

"quote" hyphenation

Od­sta­vec v čeÅ¡­ti­ně. „Uvo­zov­ky“ fun­gu­jí ji­nak a dě­le­ní slov jakbys­met.

+

Links with ti­tles that are URLs (or e-mail ad­dress­es) shouldn’t be hy­phen­at­ed +ei­ther:

+ diff --git a/pelican-plugins/m/test/htmlsanity_typography/page.rst b/pelican-plugins/m/test/htmlsanity_typography/page.rst index f4291818..cdab8f14 100644 --- a/pelican-plugins/m/test/htmlsanity_typography/page.rst +++ b/pelican-plugins/m/test/htmlsanity_typography/page.rst @@ -33,3 +33,9 @@ verbatim blocks: .. class:: language-cs Odstavec v češtině. "Uvozovky" fungují jinak a dělení slov jakbysmet. + +Links with titles that are URLs (or e-mail addresses) shouldn't be hyphenated +either: + +- info@magnum.graphics +- https://magnum.graphics -- 2.30.2