From c381fb3165a1544de5a32b8dfcd8ad74398fefc8 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Sat, 23 Dec 2023 16:31:06 +0000 Subject: [PATCH] First cut at transcribing the scanning regexes. --- Cargo.toml | 1 + src/lib.rs | 1 + src/scan_re.rs | 138 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 140 insertions(+) create mode 100644 src/scan_re.rs diff --git a/Cargo.toml b/Cargo.toml index a59e453..7d7387d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ chrono = { version = "0.4.31", features = ["serde"] } crossterm = "0.27.0" html2text = { version = "0.9.0", features = ["css"] } ratatui = "0.25.0" +regex = "1.10.2" reqwest = { version = "0.11.23", features = ["blocking"] } serde = { version = "1.0.193", features = ["derive"] } serde_json = "1.0.108" diff --git a/src/lib.rs b/src/lib.rs index 976d77a..4a6c115 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,7 @@ pub mod types; pub mod auth; pub mod html; +pub mod scan_re; #[derive(Debug)] pub enum OurError { diff --git a/src/scan_re.rs b/src/scan_re.rs new file mode 100644 index 0000000..a32cf05 --- /dev/null +++ b/src/scan_re.rs @@ -0,0 +1,138 @@ +use regex::Regex; + +pub struct Scan { + pub mention: Regex, + pub hashtag: Regex, + pub url: Regex, +} + +impl Scan { + pub fn new() -> Self { + let word = "0-9A-Z_a-z\u{aa}\u{b5}\u{ba}\u{c0}-\u{d6}\u{d8}-\u{f6}\u{f8}-\u{02c1}\u{02c6}-\u{02d1}\u{02e0}-\u{02e4}\u{02ec}\u{02ee}\u{0300}-\u{0374}\u{0376}-\u{0377}\u{037a}-\u{037d}\u{037f}\u{0386}\u{0388}-\u{038a}\u{038c}\u{038e}-\u{03a1}\u{03a3}-\u{03f5}\u{03f7}-\u{0481}\u{0483}-\u{052f}\u{0531}-\u{0556}\u{0559}\u{0560}-\u{0588}\u{0591}-\u{05bd}\u{05bf}\u{05c1}-\u{05c2}\u{05c4}-\u{05c5}\u{05c7}\u{05d0}-\u{05ea}\u{05ef}-\u{05f2}\u{0610}-\u{061a}\u{0620}-\u{0669}\u{066e}-\u{06d3}\u{06d5}-\u{06dc}\u{06df}-\u{06e8}\u{06ea}-\u{06fc}\u{06ff}\u{0710}-\u{074a}\u{074d}-\u{07b1}\u{07c0}-\u{07f5}\u{07fa}\u{07fd}\u{0800}-\u{082d}\u{0840}-\u{085b}\u{0860}-\u{086a}\u{08a0}-\u{08b4}\u{08b6}-\u{08bd}\u{08d3}-\u{08e1}\u{08e3}-\u{0963}\u{0966}-\u{096f}\u{0971}-\u{0983}\u{0985}-\u{098c}\u{098f}-\u{0990}\u{0993}-\u{09a8}\u{09aa}-\u{09b0}\u{09b2}\u{09b6}-\u{09b9}\u{09bc}-\u{09c4}\u{09c7}-\u{09c8}\u{09cb}-\u{09ce}\u{09d7}\u{09dc}-\u{09dd}\u{09df}-\u{09e3}\u{09e6}-\u{09f1}\u{09fc}\u{09fe}\u{0a01}-\u{0a03}\u{0a05}-\u{0a0a}\u{0a0f}-\u{0a10}\u{0a13}-\u{0a28}\u{0a2a}-\u{0a30}\u{0a32}-\u{0a33}\u{0a35}-\u{0a36}\u{0a38}-\u{0a39}\u{0a3c}\u{0a3e}-\u{0a42}\u{0a47}-\u{0a48}\u{0a4b}-\u{0a4d}\u{0a51}\u{0a59}-\u{0a5c}\u{0a5e}\u{0a66}-\u{0a75}\u{0a81}-\u{0a83}\u{0a85}-\u{0a8d}\u{0a8f}-\u{0a91}\u{0a93}-\u{0aa8}\u{0aaa}-\u{0ab0}\u{0ab2}-\u{0ab3}\u{0ab5}-\u{0ab9}\u{0abc}-\u{0ac5}\u{0ac7}-\u{0ac9}\u{0acb}-\u{0acd}\u{0ad0}\u{0ae0}-\u{0ae3}\u{0ae6}-\u{0aef}\u{0af9}-\u{0aff}\u{0b01}-\u{0b03}\u{0b05}-\u{0b0c}\u{0b0f}-\u{0b10}\u{0b13}-\u{0b28}\u{0b2a}-\u{0b30}\u{0b32}-\u{0b33}\u{0b35}-\u{0b39}\u{0b3c}-\u{0b44}\u{0b47}-\u{0b48}\u{0b4b}-\u{0b4d}\u{0b56}-\u{0b57}\u{0b5c}-\u{0b5d}\u{0b5f}-\u{0b63}\u{0b66}-\u{0b6f}\u{0b71}\u{0b82}-\u{0b83}\u{0b85}-\u{0b8a}\u{0b8e}-\u{0b90}\u{0b92}-\u{0b95}\u{0b99}-\u{0b9a}\u{0b9c}\u{0b9e}-\u{0b9f}\u{0ba3}-\u{0ba4}\u{0ba8}-\u{0baa}\u{0bae}-\u{0bb9}\u{0bbe}-\u{0bc2}\u{0bc6}-\u{0bc8}\u{0bca}-\u{0bcd}\u{0bd0}\u{0bd7}\u{0be6}-\u{0bef}\u{0c00}-\u{0c0c}\u{0c0e}-\u{0c10}\u{0c12}-\u{0c28}\u{0c2a}-\u{0c39}\u{0c3d}-\u{0c44}\u{0c46}-\u{0c48}\u{0c4a}-\u{0c4d}\u{0c55}-\u{0c56}\u{0c58}-\u{0c5a}\u{0c60}-\u{0c63}\u{0c66}-\u{0c6f}\u{0c80}-\u{0c83}\u{0c85}-\u{0c8c}\u{0c8e}-\u{0c90}\u{0c92}-\u{0ca8}\u{0caa}-\u{0cb3}\u{0cb5}-\u{0cb9}\u{0cbc}-\u{0cc4}\u{0cc6}-\u{0cc8}\u{0cca}-\u{0ccd}\u{0cd5}-\u{0cd6}\u{0cde}\u{0ce0}-\u{0ce3}\u{0ce6}-\u{0cef}\u{0cf1}-\u{0cf2}\u{0d00}-\u{0d03}\u{0d05}-\u{0d0c}\u{0d0e}-\u{0d10}\u{0d12}-\u{0d44}\u{0d46}-\u{0d48}\u{0d4a}-\u{0d4e}\u{0d54}-\u{0d57}\u{0d5f}-\u{0d63}\u{0d66}-\u{0d6f}\u{0d7a}-\u{0d7f}\u{0d82}-\u{0d83}\u{0d85}-\u{0d96}\u{0d9a}-\u{0db1}\u{0db3}-\u{0dbb}\u{0dbd}\u{0dc0}-\u{0dc6}\u{0dca}\u{0dcf}-\u{0dd4}\u{0dd6}\u{0dd8}-\u{0ddf}\u{0de6}-\u{0def}\u{0df2}-\u{0df3}\u{0e01}-\u{0e3a}\u{0e40}-\u{0e4e}\u{0e50}-\u{0e59}\u{0e81}-\u{0e82}\u{0e84}\u{0e86}-\u{0e8a}\u{0e8c}-\u{0ea3}\u{0ea5}\u{0ea7}-\u{0ebd}\u{0ec0}-\u{0ec4}\u{0ec6}\u{0ec8}-\u{0ecd}\u{0ed0}-\u{0ed9}\u{0edc}-\u{0edf}\u{0f00}\u{0f18}-\u{0f19}\u{0f20}-\u{0f29}\u{0f35}\u{0f37}\u{0f39}\u{0f3e}-\u{0f47}\u{0f49}-\u{0f6c}\u{0f71}-\u{0f84}\u{0f86}-\u{0f97}\u{0f99}-\u{0fbc}\u{0fc6}\u{1000}-\u{1049}\u{1050}-\u{109d}\u{10a0}-\u{10c5}\u{10c7}\u{10cd}\u{10d0}-\u{10fa}\u{10fc}-\u{1248}\u{124a}-\u{124d}\u{1250}-\u{1256}\u{1258}\u{125a}-\u{125d}\u{1260}-\u{1288}\u{128a}-\u{128d}\u{1290}-\u{12b0}\u{12b2}-\u{12b5}\u{12b8}-\u{12be}\u{12c0}\u{12c2}-\u{12c5}\u{12c8}-\u{12d6}\u{12d8}-\u{1310}\u{1312}-\u{1315}\u{1318}-\u{135a}\u{135d}-\u{135f}\u{1380}-\u{138f}\u{13a0}-\u{13f5}\u{13f8}-\u{13fd}\u{1401}-\u{166c}\u{166f}-\u{167f}\u{1681}-\u{169a}\u{16a0}-\u{16ea}\u{16ee}-\u{16f8}\u{1700}-\u{170c}\u{170e}-\u{1714}\u{1720}-\u{1734}\u{1740}-\u{1753}\u{1760}-\u{176c}\u{176e}-\u{1770}\u{1772}-\u{1773}\u{1780}-\u{17d3}\u{17d7}\u{17dc}-\u{17dd}\u{17e0}-\u{17e9}\u{180b}-\u{180d}\u{1810}-\u{1819}\u{1820}-\u{1878}\u{1880}-\u{18aa}\u{18b0}-\u{18f5}\u{1900}-\u{191e}\u{1920}-\u{192b}\u{1930}-\u{193b}\u{1946}-\u{196d}\u{1970}-\u{1974}\u{1980}-\u{19ab}\u{19b0}-\u{19c9}\u{19d0}-\u{19d9}\u{1a00}-\u{1a1b}\u{1a20}-\u{1a5e}\u{1a60}-\u{1a7c}\u{1a7f}-\u{1a89}\u{1a90}-\u{1a99}\u{1aa7}\u{1ab0}-\u{1abe}\u{1b00}-\u{1b4b}\u{1b50}-\u{1b59}\u{1b6b}-\u{1b73}\u{1b80}-\u{1bf3}\u{1c00}-\u{1c37}\u{1c40}-\u{1c49}\u{1c4d}-\u{1c7d}\u{1c80}-\u{1c88}\u{1c90}-\u{1cba}\u{1cbd}-\u{1cbf}\u{1cd0}-\u{1cd2}\u{1cd4}-\u{1cfa}\u{1d00}-\u{1df9}\u{1dfb}-\u{1f15}\u{1f18}-\u{1f1d}\u{1f20}-\u{1f45}\u{1f48}-\u{1f4d}\u{1f50}-\u{1f57}\u{1f59}\u{1f5b}\u{1f5d}\u{1f5f}-\u{1f7d}\u{1f80}-\u{1fb4}\u{1fb6}-\u{1fbc}\u{1fbe}\u{1fc2}-\u{1fc4}\u{1fc6}-\u{1fcc}\u{1fd0}-\u{1fd3}\u{1fd6}-\u{1fdb}\u{1fe0}-\u{1fec}\u{1ff2}-\u{1ff4}\u{1ff6}-\u{1ffc}\u{203f}-\u{2040}\u{2054}\u{2071}\u{207f}\u{2090}-\u{209c}\u{20d0}-\u{20f0}\u{2102}\u{2107}\u{210a}-\u{2113}\u{2115}\u{2119}-\u{211d}\u{2124}\u{2126}\u{2128}\u{212a}-\u{212d}\u{212f}-\u{2139}\u{213c}-\u{213f}\u{2145}-\u{2149}\u{214e}\u{2160}-\u{2188}\u{24b6}-\u{24e9}\u{2c00}-\u{2c2e}\u{2c30}-\u{2c5e}\u{2c60}-\u{2ce4}\u{2ceb}-\u{2cf3}\u{2d00}-\u{2d25}\u{2d27}\u{2d2d}\u{2d30}-\u{2d67}\u{2d6f}\u{2d7f}-\u{2d96}\u{2da0}-\u{2da6}\u{2da8}-\u{2dae}\u{2db0}-\u{2db6}\u{2db8}-\u{2dbe}\u{2dc0}-\u{2dc6}\u{2dc8}-\u{2dce}\u{2dd0}-\u{2dd6}\u{2dd8}-\u{2dde}\u{2de0}-\u{2dff}\u{2e2f}\u{3005}-\u{3007}\u{3021}-\u{302f}\u{3031}-\u{3035}\u{3038}-\u{303c}\u{3041}-\u{3096}\u{3099}-\u{309a}\u{309d}-\u{309f}\u{30a1}-\u{30fa}\u{30fc}-\u{30ff}\u{3105}-\u{312f}\u{3131}-\u{318e}\u{31a0}-\u{31ba}\u{31f0}-\u{31ff}\u{3400}-\u{4db5}\u{4e00}-\u{9fef}\u{a000}-\u{a48c}\u{a4d0}-\u{a4fd}\u{a500}-\u{a60c}\u{a610}-\u{a62b}\u{a640}-\u{a672}\u{a674}-\u{a67d}\u{a67f}-\u{a6f1}\u{a717}-\u{a71f}\u{a722}-\u{a788}\u{a78b}-\u{a7bf}\u{a7c2}-\u{a7c6}\u{a7f7}-\u{a827}\u{a840}-\u{a873}\u{a880}-\u{a8c5}\u{a8d0}-\u{a8d9}\u{a8e0}-\u{a8f7}\u{a8fb}\u{a8fd}-\u{a92d}\u{a930}-\u{a953}\u{a960}-\u{a97c}\u{a980}-\u{a9c0}\u{a9cf}-\u{a9d9}\u{a9e0}-\u{a9fe}\u{aa00}-\u{aa36}\u{aa40}-\u{aa4d}\u{aa50}-\u{aa59}\u{aa60}-\u{aa76}\u{aa7a}-\u{aac2}\u{aadb}-\u{aadd}\u{aae0}-\u{aaef}\u{aaf2}-\u{aaf6}\u{ab01}-\u{ab06}\u{ab09}-\u{ab0e}\u{ab11}-\u{ab16}\u{ab20}-\u{ab26}\u{ab28}-\u{ab2e}\u{ab30}-\u{ab5a}\u{ab5c}-\u{ab67}\u{ab70}-\u{abea}\u{abec}-\u{abed}\u{abf0}-\u{abf9}\u{ac00}-\u{d7a3}\u{d7b0}-\u{d7c6}\u{d7cb}-\u{d7fb}\u{f900}-\u{fa6d}\u{fa70}-\u{fad9}\u{fb00}-\u{fb06}\u{fb13}-\u{fb17}\u{fb1d}-\u{fb28}\u{fb2a}-\u{fb36}\u{fb38}-\u{fb3c}\u{fb3e}\u{fb40}-\u{fb41}\u{fb43}-\u{fb44}\u{fb46}-\u{fbb1}\u{fbd3}-\u{fd3d}\u{fd50}-\u{fd8f}\u{fd92}-\u{fdc7}\u{fdf0}-\u{fdfb}\u{fe00}-\u{fe0f}\u{fe20}-\u{fe2f}\u{fe33}-\u{fe34}\u{fe4d}-\u{fe4f}\u{fe70}-\u{fe74}\u{fe76}-\u{fefc}\u{ff10}-\u{ff19}\u{ff21}-\u{ff3a}\u{ff3f}\u{ff41}-\u{ff5a}\u{ff66}-\u{ffbe}\u{ffc2}-\u{ffc7}\u{ffca}-\u{ffcf}\u{ffd2}-\u{ffd7}\u{ffda}-\u{ffdc}\u{010000}-\u{01000b}\u{01000d}-\u{010026}\u{010028}-\u{01003a}\u{01003c}-\u{01003d}\u{01003f}-\u{01004d}\u{010050}-\u{01005d}\u{010080}-\u{0100fa}\u{010140}-\u{010174}\u{0101fd}\u{010280}-\u{01029c}\u{0102a0}-\u{0102d0}\u{0102e0}\u{010300}-\u{01031f}\u{01032d}-\u{01034a}\u{010350}-\u{01037a}\u{010380}-\u{01039d}\u{0103a0}-\u{0103c3}\u{0103c8}-\u{0103cf}\u{0103d1}-\u{0103d5}\u{010400}-\u{01049d}\u{0104a0}-\u{0104a9}\u{0104b0}-\u{0104d3}\u{0104d8}-\u{0104fb}\u{010500}-\u{010527}\u{010530}-\u{010563}\u{010600}-\u{010736}\u{010740}-\u{010755}\u{010760}-\u{010767}\u{010800}-\u{010805}\u{010808}\u{01080a}-\u{010835}\u{010837}-\u{010838}\u{01083c}\u{01083f}-\u{010855}\u{010860}-\u{010876}\u{010880}-\u{01089e}\u{0108e0}-\u{0108f2}\u{0108f4}-\u{0108f5}\u{010900}-\u{010915}\u{010920}-\u{010939}\u{010980}-\u{0109b7}\u{0109be}-\u{0109bf}\u{010a00}-\u{010a03}\u{010a05}-\u{010a06}\u{010a0c}-\u{010a13}\u{010a15}-\u{010a17}\u{010a19}-\u{010a35}\u{010a38}-\u{010a3a}\u{010a3f}\u{010a60}-\u{010a7c}\u{010a80}-\u{010a9c}\u{010ac0}-\u{010ac7}\u{010ac9}-\u{010ae6}\u{010b00}-\u{010b35}\u{010b40}-\u{010b55}\u{010b60}-\u{010b72}\u{010b80}-\u{010b91}\u{010c00}-\u{010c48}\u{010c80}-\u{010cb2}\u{010cc0}-\u{010cf2}\u{010d00}-\u{010d27}\u{010d30}-\u{010d39}\u{010f00}-\u{010f1c}\u{010f27}\u{010f30}-\u{010f50}\u{010fe0}-\u{010ff6}\u{011000}-\u{011046}\u{011066}-\u{01106f}\u{01107f}-\u{0110ba}\u{0110d0}-\u{0110e8}\u{0110f0}-\u{0110f9}\u{011100}-\u{011134}\u{011136}-\u{01113f}\u{011144}-\u{011146}\u{011150}-\u{011173}\u{011176}\u{011180}-\u{0111c4}\u{0111c9}-\u{0111cc}\u{0111d0}-\u{0111da}\u{0111dc}\u{011200}-\u{011211}\u{011213}-\u{011237}\u{01123e}\u{011280}-\u{011286}\u{011288}\u{01128a}-\u{01128d}\u{01128f}-\u{01129d}\u{01129f}-\u{0112a8}\u{0112b0}-\u{0112ea}\u{0112f0}-\u{0112f9}\u{011300}-\u{011303}\u{011305}-\u{01130c}\u{01130f}-\u{011310}\u{011313}-\u{011328}\u{01132a}-\u{011330}\u{011332}-\u{011333}\u{011335}-\u{011339}\u{01133b}-\u{011344}\u{011347}-\u{011348}\u{01134b}-\u{01134d}\u{011350}\u{011357}\u{01135d}-\u{011363}\u{011366}-\u{01136c}\u{011370}-\u{011374}\u{011400}-\u{01144a}\u{011450}-\u{011459}\u{01145e}-\u{01145f}\u{011480}-\u{0114c5}\u{0114c7}\u{0114d0}-\u{0114d9}\u{011580}-\u{0115b5}\u{0115b8}-\u{0115c0}\u{0115d8}-\u{0115dd}\u{011600}-\u{011640}\u{011644}\u{011650}-\u{011659}\u{011680}-\u{0116b8}\u{0116c0}-\u{0116c9}\u{011700}-\u{01171a}\u{01171d}-\u{01172b}\u{011730}-\u{011739}\u{011800}-\u{01183a}\u{0118a0}-\u{0118e9}\u{0118ff}\u{0119a0}-\u{0119a7}\u{0119aa}-\u{0119d7}\u{0119da}-\u{0119e1}\u{0119e3}-\u{0119e4}\u{011a00}-\u{011a3e}\u{011a47}\u{011a50}-\u{011a99}\u{011a9d}\u{011ac0}-\u{011af8}\u{011c00}-\u{011c08}\u{011c0a}-\u{011c36}\u{011c38}-\u{011c40}\u{011c50}-\u{011c59}\u{011c72}-\u{011c8f}\u{011c92}-\u{011ca7}\u{011ca9}-\u{011cb6}\u{011d00}-\u{011d06}\u{011d08}-\u{011d09}\u{011d0b}-\u{011d36}\u{011d3a}\u{011d3c}-\u{011d3d}\u{011d3f}-\u{011d47}\u{011d50}-\u{011d59}\u{011d60}-\u{011d65}\u{011d67}-\u{011d68}\u{011d6a}-\u{011d8e}\u{011d90}-\u{011d91}\u{011d93}-\u{011d98}\u{011da0}-\u{011da9}\u{011ee0}-\u{011ef6}\u{012000}-\u{012399}\u{012400}-\u{01246e}\u{012480}-\u{012543}\u{013000}-\u{01342e}\u{014400}-\u{014646}\u{016800}-\u{016a38}\u{016a40}-\u{016a5e}\u{016a60}-\u{016a69}\u{016ad0}-\u{016aed}\u{016af0}-\u{016af4}\u{016b00}-\u{016b36}\u{016b40}-\u{016b43}\u{016b50}-\u{016b59}\u{016b63}-\u{016b77}\u{016b7d}-\u{016b8f}\u{016e40}-\u{016e7f}\u{016f00}-\u{016f4a}\u{016f4f}-\u{016f87}\u{016f8f}-\u{016f9f}\u{016fe0}-\u{016fe1}\u{016fe3}\u{017000}-\u{0187f7}\u{018800}-\u{018af2}\u{01b000}-\u{01b11e}\u{01b150}-\u{01b152}\u{01b164}-\u{01b167}\u{01b170}-\u{01b2fb}\u{01bc00}-\u{01bc6a}\u{01bc70}-\u{01bc7c}\u{01bc80}-\u{01bc88}\u{01bc90}-\u{01bc99}\u{01bc9d}-\u{01bc9e}\u{01d165}-\u{01d169}\u{01d16d}-\u{01d172}\u{01d17b}-\u{01d182}\u{01d185}-\u{01d18b}\u{01d1aa}-\u{01d1ad}\u{01d242}-\u{01d244}\u{01d400}-\u{01d454}\u{01d456}-\u{01d49c}\u{01d49e}-\u{01d49f}\u{01d4a2}\u{01d4a5}-\u{01d4a6}\u{01d4a9}-\u{01d4ac}\u{01d4ae}-\u{01d4b9}\u{01d4bb}\u{01d4bd}-\u{01d4c3}\u{01d4c5}-\u{01d505}\u{01d507}-\u{01d50a}\u{01d50d}-\u{01d514}\u{01d516}-\u{01d51c}\u{01d51e}-\u{01d539}\u{01d53b}-\u{01d53e}\u{01d540}-\u{01d544}\u{01d546}\u{01d54a}-\u{01d550}\u{01d552}-\u{01d6a5}\u{01d6a8}-\u{01d6c0}\u{01d6c2}-\u{01d6da}\u{01d6dc}-\u{01d6fa}\u{01d6fc}-\u{01d714}\u{01d716}-\u{01d734}\u{01d736}-\u{01d74e}\u{01d750}-\u{01d76e}\u{01d770}-\u{01d788}\u{01d78a}-\u{01d7a8}\u{01d7aa}-\u{01d7c2}\u{01d7c4}-\u{01d7cb}\u{01d7ce}-\u{01d7ff}\u{01da00}-\u{01da36}\u{01da3b}-\u{01da6c}\u{01da75}\u{01da84}\u{01da9b}-\u{01da9f}\u{01daa1}-\u{01daaf}\u{01e000}-\u{01e006}\u{01e008}-\u{01e018}\u{01e01b}-\u{01e021}\u{01e023}-\u{01e024}\u{01e026}-\u{01e02a}\u{01e100}-\u{01e12c}\u{01e130}-\u{01e13d}\u{01e140}-\u{01e149}\u{01e14e}\u{01e2c0}-\u{01e2f9}\u{01e800}-\u{01e8c4}\u{01e8d0}-\u{01e8d6}\u{01e900}-\u{01e94b}\u{01e950}-\u{01e959}\u{01ee00}-\u{01ee03}\u{01ee05}-\u{01ee1f}\u{01ee21}-\u{01ee22}\u{01ee24}\u{01ee27}\u{01ee29}-\u{01ee32}\u{01ee34}-\u{01ee37}\u{01ee39}\u{01ee3b}\u{01ee42}\u{01ee47}\u{01ee49}\u{01ee4b}\u{01ee4d}-\u{01ee4f}\u{01ee51}-\u{01ee52}\u{01ee54}\u{01ee57}\u{01ee59}\u{01ee5b}\u{01ee5d}\u{01ee5f}\u{01ee61}-\u{01ee62}\u{01ee64}\u{01ee67}-\u{01ee6a}\u{01ee6c}-\u{01ee72}\u{01ee74}-\u{01ee77}\u{01ee79}-\u{01ee7c}\u{01ee7e}\u{01ee80}-\u{01ee89}\u{01ee8b}-\u{01ee9b}\u{01eea1}-\u{01eea3}\u{01eea5}-\u{01eea9}\u{01eeab}-\u{01eebb}\u{01f130}-\u{01f149}\u{01f150}-\u{01f169}\u{01f170}-\u{01f189}\u{020000}-\u{02a6d6}\u{02a700}-\u{02b734}\u{02b740}-\u{02b81d}\u{02b820}-\u{02cea1}\u{02ceb0}-\u{02ebe0}\u{02f800}-\u{02fa1d}\u{0e0100}-\u{0e01ef}"; + + let alpha = "A-Za-z\u{aa}\u{b5}\u{ba}\u{c0}-\u{d6}\u{d8}-\u{f6}\u{f8}-\u{02c1}\u{02c6}-\u{02d1}\u{02e0}-\u{02e4}\u{02ec}\u{02ee}\u{0345}\u{0370}-\u{0374}\u{0376}-\u{0377}\u{037a}-\u{037d}\u{037f}\u{0386}\u{0388}-\u{038a}\u{038c}\u{038e}-\u{03a1}\u{03a3}-\u{03f5}\u{03f7}-\u{0481}\u{048a}-\u{052f}\u{0531}-\u{0556}\u{0559}\u{0560}-\u{0588}\u{05b0}-\u{05bd}\u{05bf}\u{05c1}-\u{05c2}\u{05c4}-\u{05c5}\u{05c7}\u{05d0}-\u{05ea}\u{05ef}-\u{05f2}\u{0610}-\u{061a}\u{0620}-\u{0657}\u{0659}-\u{065f}\u{066e}-\u{06d3}\u{06d5}-\u{06dc}\u{06e1}-\u{06e8}\u{06ed}-\u{06ef}\u{06fa}-\u{06fc}\u{06ff}\u{0710}-\u{073f}\u{074d}-\u{07b1}\u{07ca}-\u{07ea}\u{07f4}-\u{07f5}\u{07fa}\u{0800}-\u{0817}\u{081a}-\u{082c}\u{0840}-\u{0858}\u{0860}-\u{086a}\u{08a0}-\u{08b4}\u{08b6}-\u{08bd}\u{08d4}-\u{08df}\u{08e3}-\u{08e9}\u{08f0}-\u{093b}\u{093d}-\u{094c}\u{094e}-\u{0950}\u{0955}-\u{0963}\u{0971}-\u{0983}\u{0985}-\u{098c}\u{098f}-\u{0990}\u{0993}-\u{09a8}\u{09aa}-\u{09b0}\u{09b2}\u{09b6}-\u{09b9}\u{09bd}-\u{09c4}\u{09c7}-\u{09c8}\u{09cb}-\u{09cc}\u{09ce}\u{09d7}\u{09dc}-\u{09dd}\u{09df}-\u{09e3}\u{09f0}-\u{09f1}\u{09fc}\u{0a01}-\u{0a03}\u{0a05}-\u{0a0a}\u{0a0f}-\u{0a10}\u{0a13}-\u{0a28}\u{0a2a}-\u{0a30}\u{0a32}-\u{0a33}\u{0a35}-\u{0a36}\u{0a38}-\u{0a39}\u{0a3e}-\u{0a42}\u{0a47}-\u{0a48}\u{0a4b}-\u{0a4c}\u{0a51}\u{0a59}-\u{0a5c}\u{0a5e}\u{0a70}-\u{0a75}\u{0a81}-\u{0a83}\u{0a85}-\u{0a8d}\u{0a8f}-\u{0a91}\u{0a93}-\u{0aa8}\u{0aaa}-\u{0ab0}\u{0ab2}-\u{0ab3}\u{0ab5}-\u{0ab9}\u{0abd}-\u{0ac5}\u{0ac7}-\u{0ac9}\u{0acb}-\u{0acc}\u{0ad0}\u{0ae0}-\u{0ae3}\u{0af9}-\u{0afc}\u{0b01}-\u{0b03}\u{0b05}-\u{0b0c}\u{0b0f}-\u{0b10}\u{0b13}-\u{0b28}\u{0b2a}-\u{0b30}\u{0b32}-\u{0b33}\u{0b35}-\u{0b39}\u{0b3d}-\u{0b44}\u{0b47}-\u{0b48}\u{0b4b}-\u{0b4c}\u{0b56}-\u{0b57}\u{0b5c}-\u{0b5d}\u{0b5f}-\u{0b63}\u{0b71}\u{0b82}-\u{0b83}\u{0b85}-\u{0b8a}\u{0b8e}-\u{0b90}\u{0b92}-\u{0b95}\u{0b99}-\u{0b9a}\u{0b9c}\u{0b9e}-\u{0b9f}\u{0ba3}-\u{0ba4}\u{0ba8}-\u{0baa}\u{0bae}-\u{0bb9}\u{0bbe}-\u{0bc2}\u{0bc6}-\u{0bc8}\u{0bca}-\u{0bcc}\u{0bd0}\u{0bd7}\u{0c00}-\u{0c03}\u{0c05}-\u{0c0c}\u{0c0e}-\u{0c10}\u{0c12}-\u{0c28}\u{0c2a}-\u{0c39}\u{0c3d}-\u{0c44}\u{0c46}-\u{0c48}\u{0c4a}-\u{0c4c}\u{0c55}-\u{0c56}\u{0c58}-\u{0c5a}\u{0c60}-\u{0c63}\u{0c80}-\u{0c83}\u{0c85}-\u{0c8c}\u{0c8e}-\u{0c90}\u{0c92}-\u{0ca8}\u{0caa}-\u{0cb3}\u{0cb5}-\u{0cb9}\u{0cbd}-\u{0cc4}\u{0cc6}-\u{0cc8}\u{0cca}-\u{0ccc}\u{0cd5}-\u{0cd6}\u{0cde}\u{0ce0}-\u{0ce3}\u{0cf1}-\u{0cf2}\u{0d00}-\u{0d03}\u{0d05}-\u{0d0c}\u{0d0e}-\u{0d10}\u{0d12}-\u{0d3a}\u{0d3d}-\u{0d44}\u{0d46}-\u{0d48}\u{0d4a}-\u{0d4c}\u{0d4e}\u{0d54}-\u{0d57}\u{0d5f}-\u{0d63}\u{0d7a}-\u{0d7f}\u{0d82}-\u{0d83}\u{0d85}-\u{0d96}\u{0d9a}-\u{0db1}\u{0db3}-\u{0dbb}\u{0dbd}\u{0dc0}-\u{0dc6}\u{0dcf}-\u{0dd4}\u{0dd6}\u{0dd8}-\u{0ddf}\u{0df2}-\u{0df3}\u{0e01}-\u{0e3a}\u{0e40}-\u{0e46}\u{0e4d}\u{0e81}-\u{0e82}\u{0e84}\u{0e86}-\u{0e8a}\u{0e8c}-\u{0ea3}\u{0ea5}\u{0ea7}-\u{0eb9}\u{0ebb}-\u{0ebd}\u{0ec0}-\u{0ec4}\u{0ec6}\u{0ecd}\u{0edc}-\u{0edf}\u{0f00}\u{0f40}-\u{0f47}\u{0f49}-\u{0f6c}\u{0f71}-\u{0f81}\u{0f88}-\u{0f97}\u{0f99}-\u{0fbc}\u{1000}-\u{1036}\u{1038}\u{103b}-\u{103f}\u{1050}-\u{108f}\u{109a}-\u{109d}\u{10a0}-\u{10c5}\u{10c7}\u{10cd}\u{10d0}-\u{10fa}\u{10fc}-\u{1248}\u{124a}-\u{124d}\u{1250}-\u{1256}\u{1258}\u{125a}-\u{125d}\u{1260}-\u{1288}\u{128a}-\u{128d}\u{1290}-\u{12b0}\u{12b2}-\u{12b5}\u{12b8}-\u{12be}\u{12c0}\u{12c2}-\u{12c5}\u{12c8}-\u{12d6}\u{12d8}-\u{1310}\u{1312}-\u{1315}\u{1318}-\u{135a}\u{1380}-\u{138f}\u{13a0}-\u{13f5}\u{13f8}-\u{13fd}\u{1401}-\u{166c}\u{166f}-\u{167f}\u{1681}-\u{169a}\u{16a0}-\u{16ea}\u{16ee}-\u{16f8}\u{1700}-\u{170c}\u{170e}-\u{1713}\u{1720}-\u{1733}\u{1740}-\u{1753}\u{1760}-\u{176c}\u{176e}-\u{1770}\u{1772}-\u{1773}\u{1780}-\u{17b3}\u{17b6}-\u{17c8}\u{17d7}\u{17dc}\u{1820}-\u{1878}\u{1880}-\u{18aa}\u{18b0}-\u{18f5}\u{1900}-\u{191e}\u{1920}-\u{192b}\u{1930}-\u{1938}\u{1950}-\u{196d}\u{1970}-\u{1974}\u{1980}-\u{19ab}\u{19b0}-\u{19c9}\u{1a00}-\u{1a1b}\u{1a20}-\u{1a5e}\u{1a61}-\u{1a74}\u{1aa7}\u{1b00}-\u{1b33}\u{1b35}-\u{1b43}\u{1b45}-\u{1b4b}\u{1b80}-\u{1ba9}\u{1bac}-\u{1baf}\u{1bba}-\u{1be5}\u{1be7}-\u{1bf1}\u{1c00}-\u{1c36}\u{1c4d}-\u{1c4f}\u{1c5a}-\u{1c7d}\u{1c80}-\u{1c88}\u{1c90}-\u{1cba}\u{1cbd}-\u{1cbf}\u{1ce9}-\u{1cec}\u{1cee}-\u{1cf3}\u{1cf5}-\u{1cf6}\u{1cfa}\u{1d00}-\u{1dbf}\u{1de7}-\u{1df4}\u{1e00}-\u{1f15}\u{1f18}-\u{1f1d}\u{1f20}-\u{1f45}\u{1f48}-\u{1f4d}\u{1f50}-\u{1f57}\u{1f59}\u{1f5b}\u{1f5d}\u{1f5f}-\u{1f7d}\u{1f80}-\u{1fb4}\u{1fb6}-\u{1fbc}\u{1fbe}\u{1fc2}-\u{1fc4}\u{1fc6}-\u{1fcc}\u{1fd0}-\u{1fd3}\u{1fd6}-\u{1fdb}\u{1fe0}-\u{1fec}\u{1ff2}-\u{1ff4}\u{1ff6}-\u{1ffc}\u{2071}\u{207f}\u{2090}-\u{209c}\u{2102}\u{2107}\u{210a}-\u{2113}\u{2115}\u{2119}-\u{211d}\u{2124}\u{2126}\u{2128}\u{212a}-\u{212d}\u{212f}-\u{2139}\u{213c}-\u{213f}\u{2145}-\u{2149}\u{214e}\u{2160}-\u{2188}\u{24b6}-\u{24e9}\u{2c00}-\u{2c2e}\u{2c30}-\u{2c5e}\u{2c60}-\u{2ce4}\u{2ceb}-\u{2cee}\u{2cf2}-\u{2cf3}\u{2d00}-\u{2d25}\u{2d27}\u{2d2d}\u{2d30}-\u{2d67}\u{2d6f}\u{2d80}-\u{2d96}\u{2da0}-\u{2da6}\u{2da8}-\u{2dae}\u{2db0}-\u{2db6}\u{2db8}-\u{2dbe}\u{2dc0}-\u{2dc6}\u{2dc8}-\u{2dce}\u{2dd0}-\u{2dd6}\u{2dd8}-\u{2dde}\u{2de0}-\u{2dff}\u{2e2f}\u{3005}-\u{3007}\u{3021}-\u{3029}\u{3031}-\u{3035}\u{3038}-\u{303c}\u{3041}-\u{3096}\u{309d}-\u{309f}\u{30a1}-\u{30fa}\u{30fc}-\u{30ff}\u{3105}-\u{312f}\u{3131}-\u{318e}\u{31a0}-\u{31ba}\u{31f0}-\u{31ff}\u{3400}-\u{4db5}\u{4e00}-\u{9fef}\u{a000}-\u{a48c}\u{a4d0}-\u{a4fd}\u{a500}-\u{a60c}\u{a610}-\u{a61f}\u{a62a}-\u{a62b}\u{a640}-\u{a66e}\u{a674}-\u{a67b}\u{a67f}-\u{a6ef}\u{a717}-\u{a71f}\u{a722}-\u{a788}\u{a78b}-\u{a7bf}\u{a7c2}-\u{a7c6}\u{a7f7}-\u{a805}\u{a807}-\u{a827}\u{a840}-\u{a873}\u{a880}-\u{a8c3}\u{a8c5}\u{a8f2}-\u{a8f7}\u{a8fb}\u{a8fd}-\u{a8ff}\u{a90a}-\u{a92a}\u{a930}-\u{a952}\u{a960}-\u{a97c}\u{a980}-\u{a9b2}\u{a9b4}-\u{a9bf}\u{a9cf}\u{a9e0}-\u{a9ef}\u{a9fa}-\u{a9fe}\u{aa00}-\u{aa36}\u{aa40}-\u{aa4d}\u{aa60}-\u{aa76}\u{aa7a}-\u{aabe}\u{aac0}\u{aac2}\u{aadb}-\u{aadd}\u{aae0}-\u{aaef}\u{aaf2}-\u{aaf5}\u{ab01}-\u{ab06}\u{ab09}-\u{ab0e}\u{ab11}-\u{ab16}\u{ab20}-\u{ab26}\u{ab28}-\u{ab2e}\u{ab30}-\u{ab5a}\u{ab5c}-\u{ab67}\u{ab70}-\u{abea}\u{ac00}-\u{d7a3}\u{d7b0}-\u{d7c6}\u{d7cb}-\u{d7fb}\u{f900}-\u{fa6d}\u{fa70}-\u{fad9}\u{fb00}-\u{fb06}\u{fb13}-\u{fb17}\u{fb1d}-\u{fb28}\u{fb2a}-\u{fb36}\u{fb38}-\u{fb3c}\u{fb3e}\u{fb40}-\u{fb41}\u{fb43}-\u{fb44}\u{fb46}-\u{fbb1}\u{fbd3}-\u{fd3d}\u{fd50}-\u{fd8f}\u{fd92}-\u{fdc7}\u{fdf0}-\u{fdfb}\u{fe70}-\u{fe74}\u{fe76}-\u{fefc}\u{ff21}-\u{ff3a}\u{ff41}-\u{ff5a}\u{ff66}-\u{ffbe}\u{ffc2}-\u{ffc7}\u{ffca}-\u{ffcf}\u{ffd2}-\u{ffd7}\u{ffda}-\u{ffdc}\u{010000}-\u{01000b}\u{01000d}-\u{010026}\u{010028}-\u{01003a}\u{01003c}-\u{01003d}\u{01003f}-\u{01004d}\u{010050}-\u{01005d}\u{010080}-\u{0100fa}\u{010140}-\u{010174}\u{010280}-\u{01029c}\u{0102a0}-\u{0102d0}\u{010300}-\u{01031f}\u{01032d}-\u{01034a}\u{010350}-\u{01037a}\u{010380}-\u{01039d}\u{0103a0}-\u{0103c3}\u{0103c8}-\u{0103cf}\u{0103d1}-\u{0103d5}\u{010400}-\u{01049d}\u{0104b0}-\u{0104d3}\u{0104d8}-\u{0104fb}\u{010500}-\u{010527}\u{010530}-\u{010563}\u{010600}-\u{010736}\u{010740}-\u{010755}\u{010760}-\u{010767}\u{010800}-\u{010805}\u{010808}\u{01080a}-\u{010835}\u{010837}-\u{010838}\u{01083c}\u{01083f}-\u{010855}\u{010860}-\u{010876}\u{010880}-\u{01089e}\u{0108e0}-\u{0108f2}\u{0108f4}-\u{0108f5}\u{010900}-\u{010915}\u{010920}-\u{010939}\u{010980}-\u{0109b7}\u{0109be}-\u{0109bf}\u{010a00}-\u{010a03}\u{010a05}-\u{010a06}\u{010a0c}-\u{010a13}\u{010a15}-\u{010a17}\u{010a19}-\u{010a35}\u{010a60}-\u{010a7c}\u{010a80}-\u{010a9c}\u{010ac0}-\u{010ac7}\u{010ac9}-\u{010ae4}\u{010b00}-\u{010b35}\u{010b40}-\u{010b55}\u{010b60}-\u{010b72}\u{010b80}-\u{010b91}\u{010c00}-\u{010c48}\u{010c80}-\u{010cb2}\u{010cc0}-\u{010cf2}\u{010d00}-\u{010d27}\u{010f00}-\u{010f1c}\u{010f27}\u{010f30}-\u{010f45}\u{010fe0}-\u{010ff6}\u{011000}-\u{011045}\u{011082}-\u{0110b8}\u{0110d0}-\u{0110e8}\u{011100}-\u{011132}\u{011144}-\u{011146}\u{011150}-\u{011172}\u{011176}\u{011180}-\u{0111bf}\u{0111c1}-\u{0111c4}\u{0111da}\u{0111dc}\u{011200}-\u{011211}\u{011213}-\u{011234}\u{011237}\u{01123e}\u{011280}-\u{011286}\u{011288}\u{01128a}-\u{01128d}\u{01128f}-\u{01129d}\u{01129f}-\u{0112a8}\u{0112b0}-\u{0112e8}\u{011300}-\u{011303}\u{011305}-\u{01130c}\u{01130f}-\u{011310}\u{011313}-\u{011328}\u{01132a}-\u{011330}\u{011332}-\u{011333}\u{011335}-\u{011339}\u{01133d}-\u{011344}\u{011347}-\u{011348}\u{01134b}-\u{01134c}\u{011350}\u{011357}\u{01135d}-\u{011363}\u{011400}-\u{011441}\u{011443}-\u{011445}\u{011447}-\u{01144a}\u{01145f}\u{011480}-\u{0114c1}\u{0114c4}-\u{0114c5}\u{0114c7}\u{011580}-\u{0115b5}\u{0115b8}-\u{0115be}\u{0115d8}-\u{0115dd}\u{011600}-\u{01163e}\u{011640}\u{011644}\u{011680}-\u{0116b5}\u{0116b8}\u{011700}-\u{01171a}\u{01171d}-\u{01172a}\u{011800}-\u{011838}\u{0118a0}-\u{0118df}\u{0118ff}\u{0119a0}-\u{0119a7}\u{0119aa}-\u{0119d7}\u{0119da}-\u{0119df}\u{0119e1}\u{0119e3}-\u{0119e4}\u{011a00}-\u{011a32}\u{011a35}-\u{011a3e}\u{011a50}-\u{011a97}\u{011a9d}\u{011ac0}-\u{011af8}\u{011c00}-\u{011c08}\u{011c0a}-\u{011c36}\u{011c38}-\u{011c3e}\u{011c40}\u{011c72}-\u{011c8f}\u{011c92}-\u{011ca7}\u{011ca9}-\u{011cb6}\u{011d00}-\u{011d06}\u{011d08}-\u{011d09}\u{011d0b}-\u{011d36}\u{011d3a}\u{011d3c}-\u{011d3d}\u{011d3f}-\u{011d41}\u{011d43}\u{011d46}-\u{011d47}\u{011d60}-\u{011d65}\u{011d67}-\u{011d68}\u{011d6a}-\u{011d8e}\u{011d90}-\u{011d91}\u{011d93}-\u{011d96}\u{011d98}\u{011ee0}-\u{011ef6}\u{012000}-\u{012399}\u{012400}-\u{01246e}\u{012480}-\u{012543}\u{013000}-\u{01342e}\u{014400}-\u{014646}\u{016800}-\u{016a38}\u{016a40}-\u{016a5e}\u{016ad0}-\u{016aed}\u{016b00}-\u{016b2f}\u{016b40}-\u{016b43}\u{016b63}-\u{016b77}\u{016b7d}-\u{016b8f}\u{016e40}-\u{016e7f}\u{016f00}-\u{016f4a}\u{016f4f}-\u{016f87}\u{016f8f}-\u{016f9f}\u{016fe0}-\u{016fe1}\u{016fe3}\u{017000}-\u{0187f7}\u{018800}-\u{018af2}\u{01b000}-\u{01b11e}\u{01b150}-\u{01b152}\u{01b164}-\u{01b167}\u{01b170}-\u{01b2fb}\u{01bc00}-\u{01bc6a}\u{01bc70}-\u{01bc7c}\u{01bc80}-\u{01bc88}\u{01bc90}-\u{01bc99}\u{01bc9e}\u{01d400}-\u{01d454}\u{01d456}-\u{01d49c}\u{01d49e}-\u{01d49f}\u{01d4a2}\u{01d4a5}-\u{01d4a6}\u{01d4a9}-\u{01d4ac}\u{01d4ae}-\u{01d4b9}\u{01d4bb}\u{01d4bd}-\u{01d4c3}\u{01d4c5}-\u{01d505}\u{01d507}-\u{01d50a}\u{01d50d}-\u{01d514}\u{01d516}-\u{01d51c}\u{01d51e}-\u{01d539}\u{01d53b}-\u{01d53e}\u{01d540}-\u{01d544}\u{01d546}\u{01d54a}-\u{01d550}\u{01d552}-\u{01d6a5}\u{01d6a8}-\u{01d6c0}\u{01d6c2}-\u{01d6da}\u{01d6dc}-\u{01d6fa}\u{01d6fc}-\u{01d714}\u{01d716}-\u{01d734}\u{01d736}-\u{01d74e}\u{01d750}-\u{01d76e}\u{01d770}-\u{01d788}\u{01d78a}-\u{01d7a8}\u{01d7aa}-\u{01d7c2}\u{01d7c4}-\u{01d7cb}\u{01e000}-\u{01e006}\u{01e008}-\u{01e018}\u{01e01b}-\u{01e021}\u{01e023}-\u{01e024}\u{01e026}-\u{01e02a}\u{01e100}-\u{01e12c}\u{01e137}-\u{01e13d}\u{01e14e}\u{01e2c0}-\u{01e2eb}\u{01e800}-\u{01e8c4}\u{01e900}-\u{01e943}\u{01e947}\u{01e94b}\u{01ee00}-\u{01ee03}\u{01ee05}-\u{01ee1f}\u{01ee21}-\u{01ee22}\u{01ee24}\u{01ee27}\u{01ee29}-\u{01ee32}\u{01ee34}-\u{01ee37}\u{01ee39}\u{01ee3b}\u{01ee42}\u{01ee47}\u{01ee49}\u{01ee4b}\u{01ee4d}-\u{01ee4f}\u{01ee51}-\u{01ee52}\u{01ee54}\u{01ee57}\u{01ee59}\u{01ee5b}\u{01ee5d}\u{01ee5f}\u{01ee61}-\u{01ee62}\u{01ee64}\u{01ee67}-\u{01ee6a}\u{01ee6c}-\u{01ee72}\u{01ee74}-\u{01ee77}\u{01ee79}-\u{01ee7c}\u{01ee7e}\u{01ee80}-\u{01ee89}\u{01ee8b}-\u{01ee9b}\u{01eea1}-\u{01eea3}\u{01eea5}-\u{01eea9}\u{01eeab}-\u{01eebb}\u{01f130}-\u{01f149}\u{01f150}-\u{01f169}\u{01f170}-\u{01f189}\u{020000}-\u{02a6d6}\u{02a700}-\u{02b734}\u{02b740}-\u{02b81d}\u{02b820}-\u{02cea1}\u{02ceb0}-\u{02ebe0}\u{02f800}-\u{02fa1d}"; + + let cyrillic = "\u{0400}-\u{0484}\u{0487}-\u{052f}\u{1c80}-\u{1c88}\u{1d2b}\u{1d78}\u{2de0}-\u{2dff}\u{a640}-\u{a69f}\u{fe2e}-\u{fe2f}"; + + let accented = "\u{C0}-\u{D6}\u{D8}-\u{F6}\u{F8}-\u{FF}\u{0100}-\u{024F}\u{0253}-\u{0254}\u{0256}-\u{0257}\u{0259}\u{025B}\u{0263}\u{0268}\u{026F}\u{0272}\u{0289}\u{028B}\u{02BB}\u{0300}-\u{036F}\u{1E00}-\u{1EFF}"; + + let pd = "\\-\u{058A}\u{05BE}\u{1400}\u{1806}\u{2010}-\u{2015}\u{2E17}\u{2E1A}\u{2E3A}\u{2E3B}\u{2E40}\u{2E5D}\u{301C}\u{3030}\u{30A0}\u{FE31}\u{FE32}\u{FE58}\u{FE63}\u{FF0D}\u{010EAD}"; + + let directional = "\u{061C}\u{200E}\u{200F}\u{202A}\u{202B}\u{202C}\u{202D}\u{202E}\u{2066}\u{2067}\u{2068}\u{2069}"; + let ctrl = "\u{00}-\u{1F}\u{7F}"; + let space = "\u{09}-\u{0D}\u{20}\u{85}\u{A0}\u{1680}\u{180E}\u{2000}-\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}"; + + let username = "'(?i:[a-z0-9_]+([a-z0-9_.-]+[a-z0-9_]+)?)"; + + let mention = Regex::new( + &("(?i:(??@\\[\\]^\\`{|}~"; + let domain_invalid_end_chars = + domain_invalid_middle_chars.to_owned() + "_-"; + let domain_component = "[^".to_owned() + &domain_invalid_end_chars + + "](?:[^" + &domain_invalid_middle_chars + "]*" + + "[^" + &domain_invalid_end_chars + "])?"; + + // This is not quite the way the server does it, because the + // server has a huge list of valid TLDs! I can't face that. + // And I think it's only there so that it can match URLs + // _without_ an http[s] prefix and avoid too many false + // positives. So my compromise is to trust the user, when + // composing a toot, to only enter URLs with sensible domains, + // otherwise we'll mis-highlight them and get the character + // counts wrong. + let domain = domain_component.to_owned() + "(?:\\." + + &domain_component + ")*"; + + let path_end_chars = "a-z".to_owned() + &cyrillic + &accented + + "0-9=_#/\\+\\-"; + let path_mid_chars = path_end_chars.to_owned() + &pd + + "!\\*\\';:\\,\\.\\$\\%\\[\\]~&\\|@"; + + let path_bracketed_once = "\\([".to_owned() + + &path_mid_chars + "]*\\)"; + let path_char_or_bracketed_once = "(?:[".to_owned() + + &path_mid_chars + "]|" + &path_bracketed_once + ")"; + let path_bracketed = "\\(".to_owned() + + &path_char_or_bracketed_once + "*\\)"; + + let path = "(?:[".to_owned() + &path_mid_chars + "]|" + + &path_bracketed + ")*" + "(?:[" + &path_end_chars + "]|" + + &path_bracketed + ")"; + + let query_end_chars = "a-z0-9_&=#/\\-"; + let query_mid_chars = query_end_chars.to_owned() + + "!?\\*\\'\\(\\);:\\+\\$%\\[\\]\\.,~|@"; + + let url = Regex::new( + &("(?i:".to_owned() + + "(?