serde_jsonlines/
lib.rs

1#![cfg_attr(docsrs, feature(doc_cfg))]
2//! JSON Lines (a.k.a. newline-delimited JSON) is a simple format for storing
3//! sequences of JSON values in which each value is serialized on a single line
4//! and terminated by a newline sequence.  The `serde-jsonlines` crate provides
5//! functionality for reading & writing these documents (whether all at once or
6//! line by line) using [`serde`]'s serialization & deserialization features.
7//!
8//! Basic usage involves simply importing the [`BufReadExt`] or [`WriteExt`]
9//! extension trait and then using the [`json_lines()`][BufReadExt::json_lines]
10//! or [`write_json_lines()`][WriteExt::write_json_lines] method on a `BufRead`
11//! or `Write` value to read or write a sequence of JSON Lines values.
12//! Convenience functions are also provided for the common case of reading or
13//! writing a JSON Lines file given as a filepath.
14//!
15//! At a lower level, values can be read or written one at a time (which is
16//! useful if, say, different lines are different types) by wrapping a
17//! `BufRead` or `Write` value in a [`JsonLinesReader`] or [`JsonLinesWriter`]
18//! and then calling the wrapped structure's [`read()`][JsonLinesReader::read]
19//! or [`write()`][JsonLinesWriter::write] method, respectively.
20//!
21//! When the `async` feature is enabled, analogous types for working with JSON
22//! Lines asynchronously under [`tokio`] become available.
23//!
24//! Example
25//! =======
26//!
27//! ```no_run
28//! use serde::{Deserialize, Serialize};
29//! use serde_jsonlines::{json_lines, write_json_lines};
30//! use std::io::Result;
31//!
32//! #[derive(Debug, Deserialize, Eq, PartialEq, Serialize)]
33//! pub struct Structure {
34//!     pub name: String,
35//!     pub size: i32,
36//!     pub on: bool,
37//! }
38//!
39//! fn main() -> Result<()> {
40//!     let values = vec![
41//!         Structure {
42//!             name: "Foo Bar".into(),
43//!             size: 42,
44//!             on: true,
45//!         },
46//!         Structure {
47//!             name: "Quux".into(),
48//!             size: 23,
49//!             on: false,
50//!         },
51//!         Structure {
52//!             name: "Gnusto Cleesh".into(),
53//!             size: 17,
54//!             on: true,
55//!         },
56//!     ];
57//!     write_json_lines("example.jsonl", &values)?;
58//!     let values2 = json_lines("example.jsonl")?.collect::<Result<Vec<Structure>>>()?;
59//!     assert_eq!(values, values2);
60//!     Ok(())
61//! }
62//! ```
63
64use serde::{de::DeserializeOwned, Serialize};
65use std::fs::{File, OpenOptions};
66use std::io::{BufRead, BufReader, BufWriter, Result, Write};
67use std::marker::PhantomData;
68use std::path::Path;
69
70#[cfg(feature = "async")]
71mod asynclib;
72#[cfg(feature = "async")]
73pub use asynclib::*;
74
75/// A type alias for a [`JsonLinesIter`] on a buffered file object.
76///
77/// This is the return type of [`json_lines()`].
78pub type JsonLinesFileIter<T> = JsonLinesIter<BufReader<File>, T>;
79
80/// A structure for writing JSON values as JSON Lines.
81///
82/// A `JsonLinesWriter` wraps a [`std::io::Write`] instance and writes
83/// [`serde::Serialize`] values to it by serializing each one as a single line
84/// of JSON and appending a newline.
85///
86/// # Example
87///
88/// ```no_run
89/// use serde::Serialize;
90/// use serde_jsonlines::JsonLinesWriter;
91/// use std::fs::{read_to_string, File};
92///
93/// #[derive(Serialize)]
94/// pub struct Structure {
95///     pub name: String,
96///     pub size: i32,
97///     pub on: bool,
98/// }
99///
100/// fn main() -> std::io::Result<()> {
101///     {
102///         let fp = File::create("example.jsonl")?;
103///         let mut writer = JsonLinesWriter::new(fp);
104///         writer.write_all([
105///             Structure {
106///                 name: "Foo Bar".into(),
107///                 size: 42,
108///                 on: true,
109///             },
110///             Structure {
111///                 name: "Quux".into(),
112///                 size: 23,
113///                 on: false,
114///             },
115///             Structure {
116///                 name: "Gnusto Cleesh".into(),
117///                 size: 17,
118///                 on: true,
119///             },
120///         ])?;
121///         writer.flush()?;
122///     }
123///     // End the block to close the writer
124///     assert_eq!(
125///         read_to_string("example.jsonl")?,
126///         concat!(
127///             "{\"name\":\"Foo Bar\",\"size\":42,\"on\":true}\n",
128///             "{\"name\":\"Quux\",\"size\":23,\"on\":false}\n",
129///             "{\"name\":\"Gnusto Cleesh\",\"size\":17,\"on\":true}\n",
130///         )
131///     );
132///     Ok(())
133/// }
134/// ```
135#[derive(Clone, Debug, Eq, PartialEq)]
136pub struct JsonLinesWriter<W> {
137    inner: W,
138}
139
140impl<W> JsonLinesWriter<W> {
141    /// Construct a new `JsonLinesWriter` from a [`std::io::Write`] instance
142    pub fn new(writer: W) -> Self {
143        JsonLinesWriter { inner: writer }
144    }
145
146    /// Consume the `JsonLinesWriter` and return the underlying writer
147    pub fn into_inner(self) -> W {
148        self.inner
149    }
150
151    /// Get a reference to the underlying writer
152    pub fn get_ref(&self) -> &W {
153        &self.inner
154    }
155
156    /// Get a mutable reference to the underlying writer
157    pub fn get_mut(&mut self) -> &mut W {
158        &mut self.inner
159    }
160}
161
162impl<W: Write> JsonLinesWriter<W> {
163    /// Serialize a value as a line of JSON and write it to the underlying
164    /// writer, followed by a newline.
165    ///
166    /// Note that separate calls to this method may write different types of
167    /// values.
168    ///
169    /// # Errors
170    ///
171    /// Has the same error conditions as [`serde_json::to_writer()`] and
172    /// [`std::io::Write::write_all()`].
173    pub fn write<T>(&mut self, value: &T) -> Result<()>
174    where
175        T: ?Sized + Serialize,
176    {
177        serde_json::to_writer(&mut self.inner, value)?;
178        self.inner.write_all(b"\n")?;
179        Ok(())
180    }
181
182    /// Serialize each item in an iterator as a line of JSON, and write out
183    /// each one followed by a newline to the underlying writer.
184    ///
185    /// All values in a single call to `write_all()` must be the same type, but
186    /// separate calls may write different types.
187    ///
188    /// # Errors
189    ///
190    /// Has the same error conditions as [`write()`][JsonLinesWriter::write].
191    pub fn write_all<T, I>(&mut self, items: I) -> Result<()>
192    where
193        I: IntoIterator<Item = T>,
194        T: Serialize,
195    {
196        for value in items {
197            self.write(&value)?;
198        }
199        Ok(())
200    }
201
202    /// Flush the underlying writer.
203    ///
204    /// Neither [`write()`][JsonLinesWriter::write] nor
205    /// [`write_all()`][JsonLinesWriter::write_all] flush the writer, so you
206    /// must explicitly call this method if you need output flushed.
207    ///
208    /// # Errors
209    ///
210    /// Has the same error conditions as [`std::io::Write::flush()`].
211    pub fn flush(&mut self) -> Result<()> {
212        self.inner.flush()
213    }
214}
215
216/// A structure for reading JSON values from JSON Lines input.
217///
218/// A `JsonLinesReader` wraps a [`std::io::BufRead`] instance and parses each
219/// line as a [`serde::de::DeserializeOwned`] value in JSON.
220///
221/// # Example
222///
223/// ```no_run
224/// use serde::Deserialize;
225/// use serde_jsonlines::JsonLinesReader;
226/// use std::fs::{write, File};
227/// use std::io::BufReader;
228///
229/// #[derive(Debug, Deserialize, PartialEq)]
230/// pub struct Structure {
231///     pub name: String,
232///     pub size: i32,
233///     pub on: bool,
234/// }
235///
236/// fn main() -> std::io::Result<()> {
237///     write(
238///         "example.jsonl",
239///         concat!(
240///             "{\"name\": \"Foo Bar\", \"on\":true,\"size\": 42 }\n",
241///             "{ \"name\":\"Quux\", \"on\" : false ,\"size\": 23}\n",
242///             " {\"name\": \"Gnusto Cleesh\" , \"on\": true, \"size\": 17}\n",
243///         ),
244///     )?;
245///     let fp = BufReader::new(File::open("example.jsonl")?);
246///     let reader = JsonLinesReader::new(fp);
247///     let items = reader
248///         .read_all::<Structure>()
249///         .collect::<std::io::Result<Vec<_>>>()?;
250///     assert_eq!(
251///         items,
252///         [
253///             Structure {
254///                 name: "Foo Bar".into(),
255///                 size: 42,
256///                 on: true,
257///             },
258///             Structure {
259///                 name: "Quux".into(),
260///                 size: 23,
261///                 on: false,
262///             },
263///             Structure {
264///                 name: "Gnusto Cleesh".into(),
265///                 size: 17,
266///                 on: true,
267///             },
268///         ]
269///     );
270///     Ok(())
271/// }
272/// ```
273#[derive(Clone, Debug, Eq, PartialEq)]
274pub struct JsonLinesReader<R> {
275    inner: R,
276}
277
278impl<R> JsonLinesReader<R> {
279    /// Construct a new `JsonLinesReader` from a [`std::io::BufRead`] instance
280    pub fn new(reader: R) -> Self {
281        JsonLinesReader { inner: reader }
282    }
283
284    /// Consume the `JsonLinesReader` and return the underlying reader
285    pub fn into_inner(self) -> R {
286        self.inner
287    }
288
289    /// Get a reference to the underlying reader
290    pub fn get_ref(&self) -> &R {
291        &self.inner
292    }
293
294    /// Get a mutable reference to the underlying reader
295    pub fn get_mut(&mut self) -> &mut R {
296        &mut self.inner
297    }
298
299    /// Consume the `JsonLinesReader` and return an iterator over the
300    /// deserialized JSON values from each line.
301    ///
302    /// The returned iterator has an `Item` type of `std::io::Result<T>`.  Each
303    /// call to `next()` has the same error conditions as
304    /// [`read()`][JsonLinesReader::read].
305    ///
306    /// Note that all deserialized values will be of the same type.  If you
307    /// wish to read lines of varying types, use the
308    /// [`read()`][JsonLinesReader::read] method instead.
309    pub fn read_all<T>(self) -> JsonLinesIter<R, T> {
310        JsonLinesIter {
311            reader: self,
312            _output: PhantomData,
313        }
314    }
315}
316
317impl<R: BufRead> JsonLinesReader<R> {
318    /// Read & deserialize a line of JSON from the underlying reader.
319    ///
320    /// If end-of-file is reached, this method returns `Ok(None)`.
321    ///
322    /// Note that separate calls to this method may read different types of
323    /// values.
324    ///
325    /// # Errors
326    ///
327    /// Has the same error conditions as [`std::io::BufRead::read_line()`] and
328    /// [`serde_json::from_str()`].  Note that, in the latter case (which can
329    /// be identified by the [`std::io::Error`] having a [`serde_json::Error`]
330    /// value as its payload), continuing to read from the `JsonLinesReader`
331    /// afterwards will pick up on the next line as though the error never
332    /// happened, so invalid JSON can be easily ignored if you so wish.
333    pub fn read<T>(&mut self) -> Result<Option<T>>
334    where
335        T: DeserializeOwned,
336    {
337        let mut s = String::new();
338        let r = self.inner.read_line(&mut s)?;
339        if r == 0 {
340            Ok(None)
341        } else {
342            Ok(Some(serde_json::from_str::<T>(&s)?))
343        }
344    }
345}
346
347/// An iterator over the lines of a [`BufRead`] value `R` that decodes each
348/// line as JSON of type `T`.
349///
350/// This iterator yields items of type `Result<T, std::io::Error>`.  Errors
351/// occurr under the same conditions as for [`JsonLinesReader::read()`].
352///
353/// Iterators of this type are returned by [`JsonLinesReader::read_all()`],
354/// [`BufReadExt::json_lines()`], and [`json_lines()`].
355#[derive(Clone, Debug, Eq, PartialEq)]
356pub struct JsonLinesIter<R, T> {
357    reader: JsonLinesReader<R>,
358    _output: PhantomData<T>,
359}
360
361impl<R, T> Iterator for JsonLinesIter<R, T>
362where
363    T: DeserializeOwned,
364    R: BufRead,
365{
366    type Item = Result<T>;
367
368    fn next(&mut self) -> Option<Result<T>> {
369        self.reader.read().transpose()
370    }
371}
372
373/// An extension trait for the [`std::io::Write`] trait that adds a
374/// `write_json_lines()` method
375///
376/// # Example
377///
378/// ```no_run
379/// use serde::Serialize;
380/// use serde_jsonlines::WriteExt;
381/// use std::fs::{read_to_string, File};
382/// use std::io::Write;
383///
384/// #[derive(Serialize)]
385/// pub struct Structure {
386///     pub name: String,
387///     pub size: i32,
388///     pub on: bool,
389/// }
390///
391/// fn main() -> std::io::Result<()> {
392///     {
393///         let mut fp = File::create("example.jsonl")?;
394///         fp.write_json_lines([
395///             Structure {
396///                 name: "Foo Bar".into(),
397///                 size: 42,
398///                 on: true,
399///             },
400///             Structure {
401///                 name: "Quux".into(),
402///                 size: 23,
403///                 on: false,
404///             },
405///             Structure {
406///                 name: "Gnusto Cleesh".into(),
407///                 size: 17,
408///                 on: true,
409///             },
410///         ])?;
411///         fp.flush()?;
412///     }
413///     // End the block to close the writer
414///     assert_eq!(
415///         read_to_string("example.jsonl")?,
416///         concat!(
417///             "{\"name\":\"Foo Bar\",\"size\":42,\"on\":true}\n",
418///             "{\"name\":\"Quux\",\"size\":23,\"on\":false}\n",
419///             "{\"name\":\"Gnusto Cleesh\",\"size\":17,\"on\":true}\n",
420///         )
421///     );
422///     Ok(())
423/// }
424/// ```
425pub trait WriteExt: Write {
426    /// Serialize each item in an iterator as a line of JSON, and write out
427    /// each one followed by a newline.
428    ///
429    /// All values in a single call to `write_json_lines()` must be the same
430    /// type, but separate calls may write different types.
431    ///
432    /// This method does not flush.
433    ///
434    /// # Errors
435    ///
436    /// Has the same error conditions as [`serde_json::to_writer()`] and
437    /// [`std::io::Write::write_all()`].
438    fn write_json_lines<T, I>(&mut self, items: I) -> Result<()>
439    where
440        I: IntoIterator<Item = T>,
441        T: Serialize,
442    {
443        for value in items {
444            serde_json::to_writer(&mut *self, &value)?;
445            self.write_all(b"\n")?;
446        }
447        Ok(())
448    }
449}
450
451impl<W: Write> WriteExt for W {}
452
453/// An extension trait for the [`std::io::BufRead`] trait that adds a
454/// `json_lines()` method
455///
456/// # Example
457///
458/// ```no_run
459/// use serde::Deserialize;
460/// use serde_jsonlines::BufReadExt;
461/// use std::fs::{write, File};
462/// use std::io::{BufReader, Result};
463///
464/// #[derive(Debug, Deserialize, PartialEq)]
465/// pub struct Structure {
466///     pub name: String,
467///     pub size: i32,
468///     pub on: bool,
469/// }
470///
471/// fn main() -> Result<()> {
472///     write(
473///         "example.jsonl",
474///         concat!(
475///             "{\"name\": \"Foo Bar\", \"on\":true,\"size\": 42 }\n",
476///             "{ \"name\":\"Quux\", \"on\" : false ,\"size\": 23}\n",
477///             " {\"name\": \"Gnusto Cleesh\" , \"on\": true, \"size\": 17}\n",
478///         ),
479///     )?;
480///     let fp = BufReader::new(File::open("example.jsonl")?);
481///     let items = fp.json_lines::<Structure>().collect::<Result<Vec<_>>>()?;
482///     assert_eq!(
483///         items,
484///         [
485///             Structure {
486///                 name: "Foo Bar".into(),
487///                 size: 42,
488///                 on: true,
489///             },
490///             Structure {
491///                 name: "Quux".into(),
492///                 size: 23,
493///                 on: false,
494///             },
495///             Structure {
496///                 name: "Gnusto Cleesh".into(),
497///                 size: 17,
498///                 on: true,
499///             },
500///         ]
501///     );
502///     Ok(())
503/// }
504/// ```
505pub trait BufReadExt: BufRead {
506    /// Consume the reader and return an iterator over the deserialized JSON
507    /// values from each line.
508    ///
509    /// The returned iterator has an `Item` type of `std::io::Result<T>`.  Each
510    /// call to `next()` has the same error conditions as
511    /// [`JsonLinesReader::read()`].
512    ///
513    /// Note that all deserialized values will be of the same type.
514    fn json_lines<T>(self) -> JsonLinesIter<Self, T>
515    where
516        Self: Sized,
517    {
518        JsonLinesReader::new(self).read_all()
519    }
520}
521
522impl<R: BufRead> BufReadExt for R {}
523
524/// Write an iterator of values to the file at `path` as JSON Lines.
525///
526/// If the file does not already exist, it is created.  If it does exist, any
527/// contents are discarded.
528///
529/// # Errors
530///
531/// Has the same error conditions as [`File::create()`],
532/// [`serde_json::to_writer()`], [`std::io::Write::write_all()`], and
533/// [`std::io::Write::flush()`].
534///
535/// # Example
536///
537/// ```no_run
538/// use serde::Serialize;
539/// use serde_jsonlines::write_json_lines;
540/// use std::fs::read_to_string;
541///
542/// #[derive(Serialize)]
543/// pub struct Structure {
544///     pub name: String,
545///     pub size: i32,
546///     pub on: bool,
547/// }
548///
549/// fn main() -> std::io::Result<()> {
550///     write_json_lines(
551///         "example.jsonl",
552///         [
553///             Structure {
554///                 name: "Foo Bar".into(),
555///                 size: 42,
556///                 on: true,
557///             },
558///             Structure {
559///                 name: "Quux".into(),
560///                 size: 23,
561///                 on: false,
562///             },
563///             Structure {
564///                 name: "Gnusto Cleesh".into(),
565///                 size: 17,
566///                 on: true,
567///             },
568///         ],
569///     )?;
570///     assert_eq!(
571///         read_to_string("example.jsonl")?,
572///         concat!(
573///             "{\"name\":\"Foo Bar\",\"size\":42,\"on\":true}\n",
574///             "{\"name\":\"Quux\",\"size\":23,\"on\":false}\n",
575///             "{\"name\":\"Gnusto Cleesh\",\"size\":17,\"on\":true}\n",
576///         )
577///     );
578///     Ok(())
579/// }
580/// ```
581pub fn write_json_lines<P, I, T>(path: P, items: I) -> Result<()>
582where
583    P: AsRef<Path>,
584    I: IntoIterator<Item = T>,
585    T: Serialize,
586{
587    let mut fp = BufWriter::new(File::create(path)?);
588    fp.write_json_lines(items)?;
589    fp.flush()
590}
591
592/// Append an iterator of values to the file at `path` as JSON Lines.
593///
594/// If the file does not already exist, it is created.  If it does exist, the
595/// new lines are added after any lines that are already present.
596///
597/// # Errors
598///
599/// Has the same error conditions as [`File::create()`],
600/// [`serde_json::to_writer()`], [`std::io::Write::write_all()`], and
601/// [`std::io::Write::flush()`].
602///
603/// # Example
604///
605/// ```no_run
606/// use serde::Serialize;
607/// use serde_jsonlines::append_json_lines;
608/// use std::fs::read_to_string;
609///
610/// #[derive(Serialize)]
611/// pub struct Structure {
612///     pub name: String,
613///     pub size: i32,
614///     pub on: bool,
615/// }
616///
617/// fn main() -> std::io::Result<()> {
618///     append_json_lines(
619///         "example.jsonl",
620///         [
621///             Structure {
622///                 name: "Foo Bar".into(),
623///                 size: 42,
624///                 on: true,
625///             },
626///             Structure {
627///                 name: "Quux".into(),
628///                 size: 23,
629///                 on: false,
630///             },
631///         ],
632///     )?;
633///     assert_eq!(
634///         read_to_string("example.jsonl")?,
635///         concat!(
636///             "{\"name\":\"Foo Bar\",\"size\":42,\"on\":true}\n",
637///             "{\"name\":\"Quux\",\"size\":23,\"on\":false}\n",
638///         )
639///     );
640///     append_json_lines(
641///         "example.jsonl",
642///         [
643///             Structure {
644///                 name: "Gnusto Cleesh".into(),
645///                 size: 17,
646///                 on: true,
647///             },
648///             Structure {
649///                 name: "baz".into(),
650///                 size: 69105,
651///                 on: false,
652///             },
653///         ],
654///     )?;
655///     assert_eq!(
656///         read_to_string("example.jsonl")?,
657///         concat!(
658///             "{\"name\":\"Foo Bar\",\"size\":42,\"on\":true}\n",
659///             "{\"name\":\"Quux\",\"size\":23,\"on\":false}\n",
660///             "{\"name\":\"Gnusto Cleesh\",\"size\":17,\"on\":true}\n",
661///             "{\"name\":\"baz\",\"size\":69105,\"on\":false}\n",
662///         )
663///     );
664///     Ok(())
665/// }
666/// ```
667pub fn append_json_lines<P, I, T>(path: P, items: I) -> Result<()>
668where
669    P: AsRef<Path>,
670    I: IntoIterator<Item = T>,
671    T: Serialize,
672{
673    let mut fp = BufWriter::new(OpenOptions::new().append(true).create(true).open(path)?);
674    fp.write_json_lines(items)?;
675    fp.flush()
676}
677
678/// Iterate over JSON Lines values from a file.
679///
680/// `json_lines(path)` returns an iterator of values deserialized from the JSON
681/// Lines in the file at `path`.
682///
683/// The returned iterator has an `Item` type of `std::io::Result<T>`.  Each
684/// call to `next()` has the same error conditions as
685/// [`JsonLinesReader::read()`].
686///
687/// # Errors
688///
689/// Has the same error conditions as [`File::open()`].
690///
691/// # Example
692///
693/// ```no_run
694/// use serde::Deserialize;
695/// use serde_jsonlines::json_lines;
696/// use std::fs::write;
697/// use std::io::Result;
698///
699/// #[derive(Debug, Deserialize, PartialEq)]
700/// pub struct Structure {
701///     pub name: String,
702///     pub size: i32,
703///     pub on: bool,
704/// }
705///
706/// fn main() -> Result<()> {
707///     write(
708///         "example.jsonl",
709///         concat!(
710///             "{\"name\": \"Foo Bar\", \"on\":true,\"size\": 42 }\n",
711///             "{ \"name\":\"Quux\", \"on\" : false ,\"size\": 23}\n",
712///             " {\"name\": \"Gnusto Cleesh\" , \"on\": true, \"size\": 17}\n",
713///         ),
714///     )?;
715///     let items = json_lines::<Structure, _>("example.jsonl")?.collect::<Result<Vec<_>>>()?;
716///     assert_eq!(
717///         items,
718///         [
719///             Structure {
720///                 name: "Foo Bar".into(),
721///                 size: 42,
722///                 on: true,
723///             },
724///             Structure {
725///                 name: "Quux".into(),
726///                 size: 23,
727///                 on: false,
728///             },
729///             Structure {
730///                 name: "Gnusto Cleesh".into(),
731///                 size: 17,
732///                 on: true,
733///             },
734///         ]
735///     );
736///     Ok(())
737/// }
738/// ```
739pub fn json_lines<T, P: AsRef<Path>>(path: P) -> Result<JsonLinesFileIter<T>> {
740    let fp = BufReader::new(File::open(path)?);
741    Ok(fp.json_lines())
742}