Correctly handle multi-byte characters in string encoding

This commit is contained in:
Dirkjan Ochtman 2023-09-15 10:37:06 +02:00
parent 8be61f6017
commit 82ca8a224e
1 changed files with 8 additions and 2 deletions

View File

@ -529,7 +529,7 @@ impl<T: ToXml> ToXml for Option<T> {
fn encode(input: &str) -> Result<Cow<'_, str>, Error> { fn encode(input: &str) -> Result<Cow<'_, str>, Error> {
let mut result = String::with_capacity(input.len()); let mut result = String::with_capacity(input.len());
let mut last_end = 0; let mut last_end = 0;
for (start, c) in input.chars().enumerate() { for (start, c) in input.char_indices() {
let to = match c { let to = match c {
'&' => "&amp;", '&' => "&amp;",
'"' => "&quot;", '"' => "&quot;",
@ -849,7 +849,7 @@ impl<'xml> FromXml<'xml> for IpAddr {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::decode; use super::*;
#[test] #[test]
fn test_decode() { fn test_decode() {
@ -867,4 +867,10 @@ mod tests {
assert!(decode("&foobar;").is_err()); assert!(decode("&foobar;").is_err());
assert!(decode("cbdtéd&ampü").is_err()); assert!(decode("cbdtéd&ampü").is_err());
} }
#[test]
fn encode_unicode() {
let input = "Iñtërnâ&tiônàlizætiøn";
assert_eq!(encode(input).unwrap(), "Iñtërnâ&amp;tiônàlizætiøn");
}
} }