newline_converter/
lib.rs

1//! A library for newline character converting.
2//!
3//! This crate provides two functions: [`dos2unix`] and [`unix2dos`] that perform the conversion on strings.
4//!
5//! The conversion functions are **lazy** - they don't perform any allocations if the input is already in correct format.
6//!
7//! [`dos2unix`]: fn.dos2unix.html
8//! [`unix2dos`]: fn.unix2dos.html
9
10use std::borrow::Cow;
11use unicode_segmentation::UnicodeSegmentation;
12
13#[deny(clippy::unwrap_used)]
14#[deny(clippy::expect_used)]
15
16const UNPACK_MSG: &str = "Grapheme should always be found -- Please file a bug report";
17
18macro_rules! unpack_grapheme {
19    ($x:expr) => {
20        match $x {
21            Some(i) => i,
22            None => unreachable!("{}", UNPACK_MSG),
23        }
24    };
25}
26
27/// Converts DOS-style line endings (`\r\n`) to UNIX-style (`\n`).
28///
29/// The input string may already be in correct format, so this function
30/// returns `Cow<str>`, to avoid unnecessary allocation and copying.
31///
32/// # Examples
33/// ```
34/// assert_eq!(newline_converter::dos2unix("\r\nfoo\r\nbar\r\n"), "\nfoo\nbar\n");
35/// ```
36///
37/// Lone `\r` bytes will be preserved:
38/// ```
39///  assert_eq!(
40///    newline_converter::dos2unix("\nfoo\rbar\r\n"),
41///    "\nfoo\rbar\n"
42///  );
43/// ```
44pub fn dos2unix<T: AsRef<str> + ?Sized>(input: &T) -> Cow<str> {
45    let mut iter = input.as_ref().chars().peekable();
46
47    let input = input.as_ref();
48    let mut output: Option<String> = None;
49
50    while let Some(current) = iter.next() {
51        if '\r' == current {
52            if let Some('\n') = iter.peek() {
53                // drop it
54                if output.is_none() {
55                    let n = input.chars().filter(|x| *x == '\r').count();
56                    let mut buffer = String::with_capacity(input.len() - n);
57                    let i = unpack_grapheme!(input
58                        .grapheme_indices(true)
59                        .find(|(_, x)| *x == "\r\n")
60                        .map(|(i, _)| i));
61                    let (past, _) = input.split_at(i);
62                    buffer.push_str(past);
63                    output = Some(buffer);
64                }
65                continue;
66            }
67        }
68        if output.is_some() {
69            output.as_mut().unwrap().push(current);
70        }
71    }
72
73    match output {
74        None => Cow::Borrowed(input),
75        Some(o) => Cow::Owned(o),
76    }
77}
78
79#[allow(clippy::match_like_matches_macro)] // MSRV 1.38, matches! macro available in 1.42
80/// Converts UNIX-style line endings (`\n`) to DOS-style (`\r\n`).
81///
82/// The input string may already be in correct format, so this function
83/// returns `Cow<str>`, to avoid unnecessary allocation and copying.
84///
85/// # Examples
86/// ```
87/// assert_eq!(newline_converter::unix2dos("\nfoo\nbar\n"), "\r\nfoo\r\nbar\r\n");
88/// ```
89///
90/// Already present DOS line breaks are respected:
91/// ```
92/// assert_eq!(newline_converter::unix2dos("\nfoo\r\nbar\n"), "\r\nfoo\r\nbar\r\n");
93/// ```
94pub fn unix2dos<T: AsRef<str> + ?Sized>(input: &T) -> Cow<str> {
95    let mut output: Option<String> = None;
96    let mut last_char: Option<char> = None;
97
98    let input = input.as_ref();
99    for (i, current) in input.chars().enumerate() {
100        if '\n' == current
101            && (i == 0
102                || match last_char {
103                    Some('\r') => false,
104                    _ => true,
105                })
106        {
107            if output.is_none() {
108                let n = input.chars().filter(|x| *x == '\n').count();
109                let mut buffer = String::with_capacity(input.len() + n);
110                let i = unpack_grapheme!(input
111                    .grapheme_indices(true)
112                    .find(|(_, x)| *x == "\n")
113                    .map(|(i, _)| i));
114                let (past, _) = input.split_at(i);
115                buffer.push_str(past);
116                output = Some(buffer);
117            }
118            output.as_mut().unwrap().push('\r');
119        }
120        last_char = Some(current);
121
122        if let Some(o) = output.as_mut() {
123            o.push(current);
124        }
125    }
126
127    match output {
128        Some(o) => Cow::Owned(o),
129        None => Cow::Borrowed(input),
130    }
131}
132
133#[cfg(test)]
134mod tests {
135    use super::*;
136
137    #[test]
138    fn middle() {
139        assert_eq!(dos2unix("foo\r\nbar"), "foo\nbar");
140        assert_eq!(unix2dos("foo\nbar"), "foo\r\nbar");
141    }
142
143    #[test]
144    fn beginning() {
145        assert_eq!(dos2unix("\r\nfoobar"), "\nfoobar");
146        assert_eq!(unix2dos("\nfoobar"), "\r\nfoobar");
147    }
148
149    #[test]
150    fn end() {
151        assert_eq!(dos2unix("foobar\r\n"), "foobar\n");
152        assert_eq!(unix2dos("foobar\n"), "foobar\r\n");
153    }
154
155    #[test]
156    fn all() {
157        assert_eq!(dos2unix("\r\nfoo\r\nbar\r\n"), "\nfoo\nbar\n");
158        assert_eq!(unix2dos("\nfoo\nbar\n"), "\r\nfoo\r\nbar\r\n");
159    }
160
161    #[test]
162    fn advanced() {
163        assert_eq!(unix2dos("\rfoo\r\nbar\n"), "\rfoo\r\nbar\r\n");
164        assert_eq!(dos2unix("\nfoo\rbar\r\n"), "\nfoo\rbar\n");
165    }
166
167    #[test]
168    fn not_mutated_dos2unix() {
169        let converted = dos2unix("\nfoo\nbar\n");
170        assert_eq!(converted, Cow::Borrowed("\nfoo\nbar\n") as Cow<str>);
171    }
172
173    #[test]
174    fn mutated_dos2unix() {
175        let converted = dos2unix("\r\nfoo\r\nbar\r\n");
176        assert_eq!(
177            converted,
178            Cow::Owned(String::from("\nfoo\nbar\n")) as Cow<str>
179        );
180    }
181
182    #[test]
183    fn not_mutated_unix2dos() {
184        let converted = unix2dos("\r\nfoo\r\nbar\r\n");
185        assert_eq!(converted, Cow::Borrowed("\r\nfoo\r\nbar\r\n") as Cow<str>);
186    }
187
188    #[test]
189    fn mutated_unix2dos() {
190        let converted = unix2dos("\nfoo\nbar\n");
191        assert_eq!(
192            converted,
193            Cow::Owned(String::from("\r\nfoo\r\nbar\r\n")) as Cow<str>
194        );
195    }
196
197    #[test]
198    fn non_ascii_characters_unix2dos() {
199        assert_eq!(
200            unix2dos("Zażółć\ngęślą\njaźń\n"),
201            "Zażółć\r\ngęślą\r\njaźń\r\n"
202        );
203    }
204
205    #[test]
206    fn non_ascii_characters_dos2unix() {
207        assert_eq!(
208            dos2unix("Zażółć\r\ngęślą\r\njaźń\r\n"),
209            "Zażółć\ngęślą\njaźń\n"
210        );
211    }
212
213    #[test]
214    // https://github.com/spitfire05/rnc/issues/14
215    fn panics_in_0_2_1_unix2dos() {
216        assert_eq!(unix2dos("ä\n"), "ä\r\n");
217    }
218
219    #[test]
220    // https://github.com/spitfire05/rnc/issues/14
221    fn panics_in_0_2_1_dos2unix() {
222        assert_eq!(dos2unix("ä\r\n"), "ä\n");
223    }
224}