fn is_char_boundary(&self, pos: usize) -> bool {
if !self.text.is_char_boundary(pos) {
false
+ } else if pos == 0 {
+ true // beginning of string
} else {
match self.text[pos..].chars().next() {
None => true, // end of string
// and may use it to adjust the colours we give to
// add_region_inner.
let mut add_region = |start: usize, end: usize, colour: char| {
+ // Count the actual characters in the region.
+ let region_chars = self.core.text[start..end].chars().count();
// Determine the total cost of the current region.
let cost = match colour {
'u' => self.conf.characters_reserved_per_url,
Some(pos) => pos + 1, // just the part before the @domain
None => end - start, // otherwise the whole thing counts
}
- _ => end - start,
+ _ => region_chars,
};
// Maybe recolour the region as 'out of bounds', or partly so.
} else {
// Break the region into an ok and a bad subsection
let nbad_chars = nchars + cost - self.conf.max_characters;
- let nok_chars = end.saturating_sub(start + nbad_chars);
- let midpoint = start + nok_chars;
+ let nok_chars = region_chars.saturating_sub(nbad_chars);
+ let nok_bytes = {
+ let slice = &core.text[start..];
+ let mut char_iter = slice.char_indices();
+ for _ in 0..nok_chars {
+ char_iter.next();
+ }
+ let mut pos = char_iter.next()
+ .map_or_else(|| slice.len(), |(i,_)| i);
+
+ // If we've landed on a Unicode char boundary but
+ // not on an _editor_ char boundary (i.e. between
+ // a printing character and a combining one), move
+ // the position backwards so that the printable
+ // character is marked as overlong.
+ while pos > start && !core.is_char_boundary(start + pos) {
+ pos -= 1;
+ }
+
+ pos
+ };
+ let midpoint = start + nok_bytes;
add_region_inner(start, midpoint, colour);
add_region_inner(midpoint, end, '!');
}
ComposeBufferRegion { start: 44, end: 51-1, colour: ' ' },
ComposeBufferRegion { start: 51-1, end: 91, colour: '!' },
});
+
+ // Test handling of non-single-byte Unicode characters. Note here
+ // that ² and ³ take two bytes each in UTF-8 (they live in the ISO
+ // 8859-1 top half), but all the other superscript digits need
+ // three bytes (U+2070 onwards).
+ let unicode_sample_text = "⁰ⁱ²³⁴⁵⁶⁷⁸⁹⁰ⁱ²³⁴⁵⁶⁷⁸⁹⁰ⁱ²³⁴⁵⁶⁷⁸⁹";
+ let composer = Composer::test_new(InstanceStatusConfig {
+ max_characters: 23,
+ max_media_attachments: 4,
+ characters_reserved_per_url: 23,
+ }, unicode_sample_text);
+ assert_eq!(composer.make_regions(&composer.core), vec! {
+ // 28 bytes for a full ⁰ⁱ²³⁴⁵⁶⁷⁸⁹, 3+3+2=8 bytes for ⁰ⁱ²
+ ComposeBufferRegion { start: 0, end: 2*28+8, colour: ' ' },
+ ComposeBufferRegion { start: 2*28+8, end: 3*28, colour: '!' },
+ });
+
+ // An even more awkward case, where there's a combining mark at
+ // the join.
+ let unicode_sample_text = "Besźel";
+ let composer = Composer::test_new(InstanceStatusConfig {
+ max_characters: 4,
+ max_media_attachments: 4,
+ characters_reserved_per_url: 23,
+ }, unicode_sample_text);
+ assert_eq!(composer.make_regions(&composer.core), vec! {
+ // We expect only the "Bes" to be marked as ok, even though
+ // the 'z' part of "ź" is within bounds in principle
+ ComposeBufferRegion { start: 0, end: 3, colour: ' ' },
+ ComposeBufferRegion { start: 3, end: 8, colour: '!' },
+ });
}
#[test]