serde_jsonlines/lib.rs
1#![cfg_attr(docsrs, feature(doc_cfg))]
2//! JSON Lines (a.k.a. newline-delimited JSON) is a simple format for storing
3//! sequences of JSON values in which each value is serialized on a single line
4//! and terminated by a newline sequence. The `serde-jsonlines` crate provides
5//! functionality for reading & writing these documents (whether all at once or
6//! line by line) using [`serde`]'s serialization & deserialization features.
7//!
8//! Basic usage involves simply importing the [`BufReadExt`] or [`WriteExt`]
9//! extension trait and then using the [`json_lines()`][BufReadExt::json_lines]
10//! or [`write_json_lines()`][WriteExt::write_json_lines] method on a `BufRead`
11//! or `Write` value to read or write a sequence of JSON Lines values.
12//! Convenience functions are also provided for the common case of reading or
13//! writing a JSON Lines file given as a filepath.
14//!
15//! At a lower level, values can be read or written one at a time (which is
16//! useful if, say, different lines are different types) by wrapping a
17//! `BufRead` or `Write` value in a [`JsonLinesReader`] or [`JsonLinesWriter`]
18//! and then calling the wrapped structure's [`read()`][JsonLinesReader::read]
19//! or [`write()`][JsonLinesWriter::write] method, respectively.
20//!
21//! When the `async` feature is enabled, analogous types for working with JSON
22//! Lines asynchronously under [`tokio`] become available.
23//!
24//! Example
25//! =======
26//!
27//! ```no_run
28//! use serde::{Deserialize, Serialize};
29//! use serde_jsonlines::{json_lines, write_json_lines};
30//! use std::io::Result;
31//!
32//! #[derive(Debug, Deserialize, Eq, PartialEq, Serialize)]
33//! pub struct Structure {
34//! pub name: String,
35//! pub size: i32,
36//! pub on: bool,
37//! }
38//!
39//! fn main() -> Result<()> {
40//! let values = vec![
41//! Structure {
42//! name: "Foo Bar".into(),
43//! size: 42,
44//! on: true,
45//! },
46//! Structure {
47//! name: "Quux".into(),
48//! size: 23,
49//! on: false,
50//! },
51//! Structure {
52//! name: "Gnusto Cleesh".into(),
53//! size: 17,
54//! on: true,
55//! },
56//! ];
57//! write_json_lines("example.jsonl", &values)?;
58//! let values2 = json_lines("example.jsonl")?.collect::<Result<Vec<Structure>>>()?;
59//! assert_eq!(values, values2);
60//! Ok(())
61//! }
62//! ```
63
64use serde::{de::DeserializeOwned, Serialize};
65use std::fs::{File, OpenOptions};
66use std::io::{BufRead, BufReader, BufWriter, Result, Write};
67use std::marker::PhantomData;
68use std::path::Path;
69
70#[cfg(feature = "async")]
71mod asynclib;
72#[cfg(feature = "async")]
73pub use asynclib::*;
74
75/// A type alias for a [`JsonLinesIter`] on a buffered file object.
76///
77/// This is the return type of [`json_lines()`].
78pub type JsonLinesFileIter<T> = JsonLinesIter<BufReader<File>, T>;
79
80/// A structure for writing JSON values as JSON Lines.
81///
82/// A `JsonLinesWriter` wraps a [`std::io::Write`] instance and writes
83/// [`serde::Serialize`] values to it by serializing each one as a single line
84/// of JSON and appending a newline.
85///
86/// # Example
87///
88/// ```no_run
89/// use serde::Serialize;
90/// use serde_jsonlines::JsonLinesWriter;
91/// use std::fs::{read_to_string, File};
92///
93/// #[derive(Serialize)]
94/// pub struct Structure {
95/// pub name: String,
96/// pub size: i32,
97/// pub on: bool,
98/// }
99///
100/// fn main() -> std::io::Result<()> {
101/// {
102/// let fp = File::create("example.jsonl")?;
103/// let mut writer = JsonLinesWriter::new(fp);
104/// writer.write_all([
105/// Structure {
106/// name: "Foo Bar".into(),
107/// size: 42,
108/// on: true,
109/// },
110/// Structure {
111/// name: "Quux".into(),
112/// size: 23,
113/// on: false,
114/// },
115/// Structure {
116/// name: "Gnusto Cleesh".into(),
117/// size: 17,
118/// on: true,
119/// },
120/// ])?;
121/// writer.flush()?;
122/// }
123/// // End the block to close the writer
124/// assert_eq!(
125/// read_to_string("example.jsonl")?,
126/// concat!(
127/// "{\"name\":\"Foo Bar\",\"size\":42,\"on\":true}\n",
128/// "{\"name\":\"Quux\",\"size\":23,\"on\":false}\n",
129/// "{\"name\":\"Gnusto Cleesh\",\"size\":17,\"on\":true}\n",
130/// )
131/// );
132/// Ok(())
133/// }
134/// ```
135#[derive(Clone, Debug, Eq, PartialEq)]
136pub struct JsonLinesWriter<W> {
137 inner: W,
138}
139
140impl<W> JsonLinesWriter<W> {
141 /// Construct a new `JsonLinesWriter` from a [`std::io::Write`] instance
142 pub fn new(writer: W) -> Self {
143 JsonLinesWriter { inner: writer }
144 }
145
146 /// Consume the `JsonLinesWriter` and return the underlying writer
147 pub fn into_inner(self) -> W {
148 self.inner
149 }
150
151 /// Get a reference to the underlying writer
152 pub fn get_ref(&self) -> &W {
153 &self.inner
154 }
155
156 /// Get a mutable reference to the underlying writer
157 pub fn get_mut(&mut self) -> &mut W {
158 &mut self.inner
159 }
160}
161
162impl<W: Write> JsonLinesWriter<W> {
163 /// Serialize a value as a line of JSON and write it to the underlying
164 /// writer, followed by a newline.
165 ///
166 /// Note that separate calls to this method may write different types of
167 /// values.
168 ///
169 /// # Errors
170 ///
171 /// Has the same error conditions as [`serde_json::to_writer()`] and
172 /// [`std::io::Write::write_all()`].
173 pub fn write<T>(&mut self, value: &T) -> Result<()>
174 where
175 T: ?Sized + Serialize,
176 {
177 serde_json::to_writer(&mut self.inner, value)?;
178 self.inner.write_all(b"\n")?;
179 Ok(())
180 }
181
182 /// Serialize each item in an iterator as a line of JSON, and write out
183 /// each one followed by a newline to the underlying writer.
184 ///
185 /// All values in a single call to `write_all()` must be the same type, but
186 /// separate calls may write different types.
187 ///
188 /// # Errors
189 ///
190 /// Has the same error conditions as [`write()`][JsonLinesWriter::write].
191 pub fn write_all<T, I>(&mut self, items: I) -> Result<()>
192 where
193 I: IntoIterator<Item = T>,
194 T: Serialize,
195 {
196 for value in items {
197 self.write(&value)?;
198 }
199 Ok(())
200 }
201
202 /// Flush the underlying writer.
203 ///
204 /// Neither [`write()`][JsonLinesWriter::write] nor
205 /// [`write_all()`][JsonLinesWriter::write_all] flush the writer, so you
206 /// must explicitly call this method if you need output flushed.
207 ///
208 /// # Errors
209 ///
210 /// Has the same error conditions as [`std::io::Write::flush()`].
211 pub fn flush(&mut self) -> Result<()> {
212 self.inner.flush()
213 }
214}
215
216/// A structure for reading JSON values from JSON Lines input.
217///
218/// A `JsonLinesReader` wraps a [`std::io::BufRead`] instance and parses each
219/// line as a [`serde::de::DeserializeOwned`] value in JSON.
220///
221/// # Example
222///
223/// ```no_run
224/// use serde::Deserialize;
225/// use serde_jsonlines::JsonLinesReader;
226/// use std::fs::{write, File};
227/// use std::io::BufReader;
228///
229/// #[derive(Debug, Deserialize, PartialEq)]
230/// pub struct Structure {
231/// pub name: String,
232/// pub size: i32,
233/// pub on: bool,
234/// }
235///
236/// fn main() -> std::io::Result<()> {
237/// write(
238/// "example.jsonl",
239/// concat!(
240/// "{\"name\": \"Foo Bar\", \"on\":true,\"size\": 42 }\n",
241/// "{ \"name\":\"Quux\", \"on\" : false ,\"size\": 23}\n",
242/// " {\"name\": \"Gnusto Cleesh\" , \"on\": true, \"size\": 17}\n",
243/// ),
244/// )?;
245/// let fp = BufReader::new(File::open("example.jsonl")?);
246/// let reader = JsonLinesReader::new(fp);
247/// let items = reader
248/// .read_all::<Structure>()
249/// .collect::<std::io::Result<Vec<_>>>()?;
250/// assert_eq!(
251/// items,
252/// [
253/// Structure {
254/// name: "Foo Bar".into(),
255/// size: 42,
256/// on: true,
257/// },
258/// Structure {
259/// name: "Quux".into(),
260/// size: 23,
261/// on: false,
262/// },
263/// Structure {
264/// name: "Gnusto Cleesh".into(),
265/// size: 17,
266/// on: true,
267/// },
268/// ]
269/// );
270/// Ok(())
271/// }
272/// ```
273#[derive(Clone, Debug, Eq, PartialEq)]
274pub struct JsonLinesReader<R> {
275 inner: R,
276}
277
278impl<R> JsonLinesReader<R> {
279 /// Construct a new `JsonLinesReader` from a [`std::io::BufRead`] instance
280 pub fn new(reader: R) -> Self {
281 JsonLinesReader { inner: reader }
282 }
283
284 /// Consume the `JsonLinesReader` and return the underlying reader
285 pub fn into_inner(self) -> R {
286 self.inner
287 }
288
289 /// Get a reference to the underlying reader
290 pub fn get_ref(&self) -> &R {
291 &self.inner
292 }
293
294 /// Get a mutable reference to the underlying reader
295 pub fn get_mut(&mut self) -> &mut R {
296 &mut self.inner
297 }
298
299 /// Consume the `JsonLinesReader` and return an iterator over the
300 /// deserialized JSON values from each line.
301 ///
302 /// The returned iterator has an `Item` type of `std::io::Result<T>`. Each
303 /// call to `next()` has the same error conditions as
304 /// [`read()`][JsonLinesReader::read].
305 ///
306 /// Note that all deserialized values will be of the same type. If you
307 /// wish to read lines of varying types, use the
308 /// [`read()`][JsonLinesReader::read] method instead.
309 pub fn read_all<T>(self) -> JsonLinesIter<R, T> {
310 JsonLinesIter {
311 reader: self,
312 _output: PhantomData,
313 }
314 }
315}
316
317impl<R: BufRead> JsonLinesReader<R> {
318 /// Read & deserialize a line of JSON from the underlying reader.
319 ///
320 /// If end-of-file is reached, this method returns `Ok(None)`.
321 ///
322 /// Note that separate calls to this method may read different types of
323 /// values.
324 ///
325 /// # Errors
326 ///
327 /// Has the same error conditions as [`std::io::BufRead::read_line()`] and
328 /// [`serde_json::from_str()`]. Note that, in the latter case (which can
329 /// be identified by the [`std::io::Error`] having a [`serde_json::Error`]
330 /// value as its payload), continuing to read from the `JsonLinesReader`
331 /// afterwards will pick up on the next line as though the error never
332 /// happened, so invalid JSON can be easily ignored if you so wish.
333 pub fn read<T>(&mut self) -> Result<Option<T>>
334 where
335 T: DeserializeOwned,
336 {
337 let mut s = String::new();
338 let r = self.inner.read_line(&mut s)?;
339 if r == 0 {
340 Ok(None)
341 } else {
342 Ok(Some(serde_json::from_str::<T>(&s)?))
343 }
344 }
345}
346
347/// An iterator over the lines of a [`BufRead`] value `R` that decodes each
348/// line as JSON of type `T`.
349///
350/// This iterator yields items of type `Result<T, std::io::Error>`. Errors
351/// occurr under the same conditions as for [`JsonLinesReader::read()`].
352///
353/// Iterators of this type are returned by [`JsonLinesReader::read_all()`],
354/// [`BufReadExt::json_lines()`], and [`json_lines()`].
355#[derive(Clone, Debug, Eq, PartialEq)]
356pub struct JsonLinesIter<R, T> {
357 reader: JsonLinesReader<R>,
358 _output: PhantomData<T>,
359}
360
361impl<R, T> Iterator for JsonLinesIter<R, T>
362where
363 T: DeserializeOwned,
364 R: BufRead,
365{
366 type Item = Result<T>;
367
368 fn next(&mut self) -> Option<Result<T>> {
369 self.reader.read().transpose()
370 }
371}
372
373/// An extension trait for the [`std::io::Write`] trait that adds a
374/// `write_json_lines()` method
375///
376/// # Example
377///
378/// ```no_run
379/// use serde::Serialize;
380/// use serde_jsonlines::WriteExt;
381/// use std::fs::{read_to_string, File};
382/// use std::io::Write;
383///
384/// #[derive(Serialize)]
385/// pub struct Structure {
386/// pub name: String,
387/// pub size: i32,
388/// pub on: bool,
389/// }
390///
391/// fn main() -> std::io::Result<()> {
392/// {
393/// let mut fp = File::create("example.jsonl")?;
394/// fp.write_json_lines([
395/// Structure {
396/// name: "Foo Bar".into(),
397/// size: 42,
398/// on: true,
399/// },
400/// Structure {
401/// name: "Quux".into(),
402/// size: 23,
403/// on: false,
404/// },
405/// Structure {
406/// name: "Gnusto Cleesh".into(),
407/// size: 17,
408/// on: true,
409/// },
410/// ])?;
411/// fp.flush()?;
412/// }
413/// // End the block to close the writer
414/// assert_eq!(
415/// read_to_string("example.jsonl")?,
416/// concat!(
417/// "{\"name\":\"Foo Bar\",\"size\":42,\"on\":true}\n",
418/// "{\"name\":\"Quux\",\"size\":23,\"on\":false}\n",
419/// "{\"name\":\"Gnusto Cleesh\",\"size\":17,\"on\":true}\n",
420/// )
421/// );
422/// Ok(())
423/// }
424/// ```
425pub trait WriteExt: Write {
426 /// Serialize each item in an iterator as a line of JSON, and write out
427 /// each one followed by a newline.
428 ///
429 /// All values in a single call to `write_json_lines()` must be the same
430 /// type, but separate calls may write different types.
431 ///
432 /// This method does not flush.
433 ///
434 /// # Errors
435 ///
436 /// Has the same error conditions as [`serde_json::to_writer()`] and
437 /// [`std::io::Write::write_all()`].
438 fn write_json_lines<T, I>(&mut self, items: I) -> Result<()>
439 where
440 I: IntoIterator<Item = T>,
441 T: Serialize,
442 {
443 for value in items {
444 serde_json::to_writer(&mut *self, &value)?;
445 self.write_all(b"\n")?;
446 }
447 Ok(())
448 }
449}
450
451impl<W: Write> WriteExt for W {}
452
453/// An extension trait for the [`std::io::BufRead`] trait that adds a
454/// `json_lines()` method
455///
456/// # Example
457///
458/// ```no_run
459/// use serde::Deserialize;
460/// use serde_jsonlines::BufReadExt;
461/// use std::fs::{write, File};
462/// use std::io::{BufReader, Result};
463///
464/// #[derive(Debug, Deserialize, PartialEq)]
465/// pub struct Structure {
466/// pub name: String,
467/// pub size: i32,
468/// pub on: bool,
469/// }
470///
471/// fn main() -> Result<()> {
472/// write(
473/// "example.jsonl",
474/// concat!(
475/// "{\"name\": \"Foo Bar\", \"on\":true,\"size\": 42 }\n",
476/// "{ \"name\":\"Quux\", \"on\" : false ,\"size\": 23}\n",
477/// " {\"name\": \"Gnusto Cleesh\" , \"on\": true, \"size\": 17}\n",
478/// ),
479/// )?;
480/// let fp = BufReader::new(File::open("example.jsonl")?);
481/// let items = fp.json_lines::<Structure>().collect::<Result<Vec<_>>>()?;
482/// assert_eq!(
483/// items,
484/// [
485/// Structure {
486/// name: "Foo Bar".into(),
487/// size: 42,
488/// on: true,
489/// },
490/// Structure {
491/// name: "Quux".into(),
492/// size: 23,
493/// on: false,
494/// },
495/// Structure {
496/// name: "Gnusto Cleesh".into(),
497/// size: 17,
498/// on: true,
499/// },
500/// ]
501/// );
502/// Ok(())
503/// }
504/// ```
505pub trait BufReadExt: BufRead {
506 /// Consume the reader and return an iterator over the deserialized JSON
507 /// values from each line.
508 ///
509 /// The returned iterator has an `Item` type of `std::io::Result<T>`. Each
510 /// call to `next()` has the same error conditions as
511 /// [`JsonLinesReader::read()`].
512 ///
513 /// Note that all deserialized values will be of the same type.
514 fn json_lines<T>(self) -> JsonLinesIter<Self, T>
515 where
516 Self: Sized,
517 {
518 JsonLinesReader::new(self).read_all()
519 }
520}
521
522impl<R: BufRead> BufReadExt for R {}
523
524/// Write an iterator of values to the file at `path` as JSON Lines.
525///
526/// If the file does not already exist, it is created. If it does exist, any
527/// contents are discarded.
528///
529/// # Errors
530///
531/// Has the same error conditions as [`File::create()`],
532/// [`serde_json::to_writer()`], [`std::io::Write::write_all()`], and
533/// [`std::io::Write::flush()`].
534///
535/// # Example
536///
537/// ```no_run
538/// use serde::Serialize;
539/// use serde_jsonlines::write_json_lines;
540/// use std::fs::read_to_string;
541///
542/// #[derive(Serialize)]
543/// pub struct Structure {
544/// pub name: String,
545/// pub size: i32,
546/// pub on: bool,
547/// }
548///
549/// fn main() -> std::io::Result<()> {
550/// write_json_lines(
551/// "example.jsonl",
552/// [
553/// Structure {
554/// name: "Foo Bar".into(),
555/// size: 42,
556/// on: true,
557/// },
558/// Structure {
559/// name: "Quux".into(),
560/// size: 23,
561/// on: false,
562/// },
563/// Structure {
564/// name: "Gnusto Cleesh".into(),
565/// size: 17,
566/// on: true,
567/// },
568/// ],
569/// )?;
570/// assert_eq!(
571/// read_to_string("example.jsonl")?,
572/// concat!(
573/// "{\"name\":\"Foo Bar\",\"size\":42,\"on\":true}\n",
574/// "{\"name\":\"Quux\",\"size\":23,\"on\":false}\n",
575/// "{\"name\":\"Gnusto Cleesh\",\"size\":17,\"on\":true}\n",
576/// )
577/// );
578/// Ok(())
579/// }
580/// ```
581pub fn write_json_lines<P, I, T>(path: P, items: I) -> Result<()>
582where
583 P: AsRef<Path>,
584 I: IntoIterator<Item = T>,
585 T: Serialize,
586{
587 let mut fp = BufWriter::new(File::create(path)?);
588 fp.write_json_lines(items)?;
589 fp.flush()
590}
591
592/// Append an iterator of values to the file at `path` as JSON Lines.
593///
594/// If the file does not already exist, it is created. If it does exist, the
595/// new lines are added after any lines that are already present.
596///
597/// # Errors
598///
599/// Has the same error conditions as [`File::create()`],
600/// [`serde_json::to_writer()`], [`std::io::Write::write_all()`], and
601/// [`std::io::Write::flush()`].
602///
603/// # Example
604///
605/// ```no_run
606/// use serde::Serialize;
607/// use serde_jsonlines::append_json_lines;
608/// use std::fs::read_to_string;
609///
610/// #[derive(Serialize)]
611/// pub struct Structure {
612/// pub name: String,
613/// pub size: i32,
614/// pub on: bool,
615/// }
616///
617/// fn main() -> std::io::Result<()> {
618/// append_json_lines(
619/// "example.jsonl",
620/// [
621/// Structure {
622/// name: "Foo Bar".into(),
623/// size: 42,
624/// on: true,
625/// },
626/// Structure {
627/// name: "Quux".into(),
628/// size: 23,
629/// on: false,
630/// },
631/// ],
632/// )?;
633/// assert_eq!(
634/// read_to_string("example.jsonl")?,
635/// concat!(
636/// "{\"name\":\"Foo Bar\",\"size\":42,\"on\":true}\n",
637/// "{\"name\":\"Quux\",\"size\":23,\"on\":false}\n",
638/// )
639/// );
640/// append_json_lines(
641/// "example.jsonl",
642/// [
643/// Structure {
644/// name: "Gnusto Cleesh".into(),
645/// size: 17,
646/// on: true,
647/// },
648/// Structure {
649/// name: "baz".into(),
650/// size: 69105,
651/// on: false,
652/// },
653/// ],
654/// )?;
655/// assert_eq!(
656/// read_to_string("example.jsonl")?,
657/// concat!(
658/// "{\"name\":\"Foo Bar\",\"size\":42,\"on\":true}\n",
659/// "{\"name\":\"Quux\",\"size\":23,\"on\":false}\n",
660/// "{\"name\":\"Gnusto Cleesh\",\"size\":17,\"on\":true}\n",
661/// "{\"name\":\"baz\",\"size\":69105,\"on\":false}\n",
662/// )
663/// );
664/// Ok(())
665/// }
666/// ```
667pub fn append_json_lines<P, I, T>(path: P, items: I) -> Result<()>
668where
669 P: AsRef<Path>,
670 I: IntoIterator<Item = T>,
671 T: Serialize,
672{
673 let mut fp = BufWriter::new(OpenOptions::new().append(true).create(true).open(path)?);
674 fp.write_json_lines(items)?;
675 fp.flush()
676}
677
678/// Iterate over JSON Lines values from a file.
679///
680/// `json_lines(path)` returns an iterator of values deserialized from the JSON
681/// Lines in the file at `path`.
682///
683/// The returned iterator has an `Item` type of `std::io::Result<T>`. Each
684/// call to `next()` has the same error conditions as
685/// [`JsonLinesReader::read()`].
686///
687/// # Errors
688///
689/// Has the same error conditions as [`File::open()`].
690///
691/// # Example
692///
693/// ```no_run
694/// use serde::Deserialize;
695/// use serde_jsonlines::json_lines;
696/// use std::fs::write;
697/// use std::io::Result;
698///
699/// #[derive(Debug, Deserialize, PartialEq)]
700/// pub struct Structure {
701/// pub name: String,
702/// pub size: i32,
703/// pub on: bool,
704/// }
705///
706/// fn main() -> Result<()> {
707/// write(
708/// "example.jsonl",
709/// concat!(
710/// "{\"name\": \"Foo Bar\", \"on\":true,\"size\": 42 }\n",
711/// "{ \"name\":\"Quux\", \"on\" : false ,\"size\": 23}\n",
712/// " {\"name\": \"Gnusto Cleesh\" , \"on\": true, \"size\": 17}\n",
713/// ),
714/// )?;
715/// let items = json_lines::<Structure, _>("example.jsonl")?.collect::<Result<Vec<_>>>()?;
716/// assert_eq!(
717/// items,
718/// [
719/// Structure {
720/// name: "Foo Bar".into(),
721/// size: 42,
722/// on: true,
723/// },
724/// Structure {
725/// name: "Quux".into(),
726/// size: 23,
727/// on: false,
728/// },
729/// Structure {
730/// name: "Gnusto Cleesh".into(),
731/// size: 17,
732/// on: true,
733/// },
734/// ]
735/// );
736/// Ok(())
737/// }
738/// ```
739pub fn json_lines<T, P: AsRef<Path>>(path: P) -> Result<JsonLinesFileIter<T>> {
740 let fp = BufReader::new(File::open(path)?);
741 Ok(fp.json_lines())
742}