use core::fmt;
use core::str;
use memchr::memchr;
use crate::{QuoteStyle, Terminator};
#[derive(Debug)]
pub struct WriterBuilder {
wtr: Writer,
}
impl WriterBuilder {
pub fn new() -> WriterBuilder {
let wtr = Writer {
state: WriterState::default(),
requires_quotes: [false; 256],
delimiter: b',',
term: Terminator::Any(b'\n'),
style: QuoteStyle::default(),
quote: b'"',
escape: b'\\',
double_quote: true,
};
WriterBuilder { wtr: wtr }
}
pub fn build(&self) -> Writer {
use crate::Terminator::*;
let mut wtr = self.wtr.clone();
wtr.requires_quotes[self.wtr.delimiter as usize] = true;
wtr.requires_quotes[self.wtr.quote as usize] = true;
if !self.wtr.double_quote {
wtr.requires_quotes[self.wtr.escape as usize] = true;
}
match self.wtr.term {
CRLF | Any(b'\n') | Any(b'\r') => {
wtr.requires_quotes[b'\r' as usize] = true;
wtr.requires_quotes[b'\n' as usize] = true;
}
Any(b) => {
wtr.requires_quotes[b as usize] = true;
}
_ => unreachable!(),
}
wtr
}
pub fn delimiter(&mut self, delimiter: u8) -> &mut WriterBuilder {
self.wtr.delimiter = delimiter;
self
}
pub fn terminator(&mut self, term: Terminator) -> &mut WriterBuilder {
self.wtr.term = term;
self
}
pub fn quote_style(&mut self, style: QuoteStyle) -> &mut WriterBuilder {
self.wtr.style = style;
self
}
pub fn quote(&mut self, quote: u8) -> &mut WriterBuilder {
self.wtr.quote = quote;
self
}
pub fn escape(&mut self, escape: u8) -> &mut WriterBuilder {
self.wtr.escape = escape;
self
}
pub fn double_quote(&mut self, yes: bool) -> &mut WriterBuilder {
self.wtr.double_quote = yes;
self
}
}
impl Default for WriterBuilder {
fn default() -> WriterBuilder {
WriterBuilder::new()
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum WriteResult {
InputEmpty,
OutputFull,
}
pub struct Writer {
state: WriterState,
requires_quotes: [bool; 256],
delimiter: u8,
term: Terminator,
style: QuoteStyle,
quote: u8,
escape: u8,
double_quote: bool,
}
impl Clone for Writer {
fn clone(&self) -> Writer {
let mut requires_quotes = [false; 256];
for i in 0..256 {
requires_quotes[i] = self.requires_quotes[i];
}
Writer {
state: self.state.clone(),
requires_quotes: requires_quotes,
delimiter: self.delimiter,
term: self.term,
style: self.style,
quote: self.quote,
escape: self.escape,
double_quote: self.double_quote,
}
}
}
impl fmt::Debug for Writer {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_struct("Writer")
.field("state", &self.state)
.field("delimiter", &self.delimiter)
.field("term", &self.term)
.field("style", &self.style)
.field("quote", &self.quote)
.field("escape", &self.escape)
.field("double_quote", &self.double_quote)
.finish()
}
}
#[derive(Clone, Debug)]
struct WriterState {
in_field: bool,
quoting: bool,
record_bytes: u64,
}
impl Writer {
pub fn new() -> Writer {
Writer::default()
}
pub fn finish(&mut self, mut output: &mut [u8]) -> (WriteResult, usize) {
let mut nout = 0;
if self.state.record_bytes == 0 && self.state.in_field {
assert!(!self.state.quoting);
let (res, o) = self.write(&[self.quote, self.quote], output);
if o == 0 {
return (res, 0);
}
output = &mut moving(output)[o..];
nout += o;
self.state.record_bytes += o as u64;
}
if !self.state.quoting {
return (WriteResult::InputEmpty, nout);
}
let (res, o) = self.write(&[self.quote], output);
if o == 0 {
return (res, nout);
}
nout += o;
self.state.record_bytes = 0;
self.state.in_field = false;
self.state.quoting = false;
(res, nout)
}
pub fn field(
&mut self,
input: &[u8],
mut output: &mut [u8],
) -> (WriteResult, usize, usize) {
let (mut nin, mut nout) = (0, 0);
if !self.state.in_field {
self.state.quoting = self.should_quote(input);
if self.state.quoting {
let (res, o) = self.write(&[self.quote], output);
if o == 0 {
return (res, 0, 0);
}
output = &mut moving(output)[o..];
nout += o;
self.state.record_bytes += o as u64;
}
self.state.in_field = true;
}
let (res, i, o) = if self.state.quoting {
quote(input, output, self.quote, self.escape, self.double_quote)
} else {
write_optimistic(input, output)
};
nin += i;
nout += o;
self.state.record_bytes += o as u64;
(res, nin, nout)
}
pub fn delimiter(
&mut self,
mut output: &mut [u8],
) -> (WriteResult, usize) {
let mut nout = 0;
if self.state.quoting {
let (res, o) = self.write(&[self.quote], output);
if o == 0 {
return (res, o);
}
output = &mut moving(output)[o..];
nout += o;
self.state.record_bytes += o as u64;
self.state.quoting = false;
}
let (res, o) = self.write(&[self.delimiter], output);
if o == 0 {
return (res, nout);
}
nout += o;
self.state.record_bytes += o as u64;
self.state.in_field = false;
(res, nout)
}
pub fn terminator(
&mut self,
mut output: &mut [u8],
) -> (WriteResult, usize) {
let mut nout = 0;
if self.state.record_bytes == 0 {
assert!(!self.state.quoting);
let (res, o) = self.write(&[self.quote, self.quote], output);
if o == 0 {
return (res, 0);
}
output = &mut moving(output)[o..];
nout += o;
self.state.record_bytes += o as u64;
}
if self.state.quoting {
let (res, o) = self.write(&[self.quote], output);
if o == 0 {
return (res, o);
}
output = &mut moving(output)[o..];
nout += o;
self.state.record_bytes += o as u64;
self.state.quoting = false;
}
let (res, o) = match self.term {
Terminator::CRLF => write_pessimistic(&[b'\r', b'\n'], output),
Terminator::Any(b) => write_pessimistic(&[b], output),
_ => unreachable!(),
};
if o == 0 {
return (res, nout);
}
nout += o;
self.state.record_bytes = 0;
self.state.in_field = false;
(res, nout)
}
#[inline]
fn needs_quotes(&self, mut input: &[u8]) -> bool {
let mut needs = false;
while !needs && input.len() >= 8 {
needs = self.requires_quotes[input[0] as usize]
|| self.requires_quotes[input[1] as usize]
|| self.requires_quotes[input[2] as usize]
|| self.requires_quotes[input[3] as usize]
|| self.requires_quotes[input[4] as usize]
|| self.requires_quotes[input[5] as usize]
|| self.requires_quotes[input[6] as usize]
|| self.requires_quotes[input[7] as usize];
input = &input[8..];
}
needs || input.iter().any(|&b| self.is_special_byte(b))
}
#[inline]
pub fn is_special_byte(&self, b: u8) -> bool {
self.requires_quotes[b as usize]
}
#[inline]
pub fn should_quote(&self, input: &[u8]) -> bool {
match self.style {
QuoteStyle::Always => true,
QuoteStyle::Never => false,
QuoteStyle::NonNumeric => is_non_numeric(input),
QuoteStyle::Necessary => self.needs_quotes(input),
_ => unreachable!(),
}
}
#[inline]
pub fn get_delimiter(&self) -> u8 {
self.delimiter
}
#[inline]
pub fn get_terminator(&self) -> Terminator {
self.term
}
#[inline]
pub fn get_quote_style(&self) -> QuoteStyle {
self.style
}
#[inline]
pub fn get_quote(&self) -> u8 {
self.quote
}
#[inline]
pub fn get_escape(&self) -> u8 {
self.escape
}
#[inline]
pub fn get_double_quote(&self) -> bool {
self.double_quote
}
fn write(&self, data: &[u8], output: &mut [u8]) -> (WriteResult, usize) {
if data.len() > output.len() {
(WriteResult::OutputFull, 0)
} else {
output[..data.len()].copy_from_slice(data);
(WriteResult::InputEmpty, data.len())
}
}
}
impl Default for Writer {
fn default() -> Writer {
WriterBuilder::new().build()
}
}
impl Default for WriterState {
fn default() -> WriterState {
WriterState { in_field: false, quoting: false, record_bytes: 0 }
}
}
pub fn is_non_numeric(input: &[u8]) -> bool {
let s = match str::from_utf8(input) {
Err(_) => return true,
Ok(s) => s,
};
!s.parse::<f64>().is_ok() && !s.parse::<i128>().is_ok()
}
pub fn quote(
mut input: &[u8],
mut output: &mut [u8],
quote: u8,
escape: u8,
double_quote: bool,
) -> (WriteResult, usize, usize) {
let (mut nin, mut nout) = (0, 0);
loop {
match memchr(quote, input) {
None => {
let (res, i, o) = write_optimistic(input, output);
nin += i;
nout += o;
return (res, nin, nout);
}
Some(next_quote) => {
let (res, i, o) =
write_optimistic(&input[..next_quote], output);
input = &input[i..];
output = &mut moving(output)[o..];
nin += i;
nout += o;
if let WriteResult::OutputFull = res {
return (res, nin, nout);
}
if double_quote {
let (res, o) = write_pessimistic(&[quote, quote], output);
if let WriteResult::OutputFull = res {
return (res, nin, nout);
}
nout += o;
output = &mut moving(output)[o..];
} else {
let (res, o) = write_pessimistic(&[escape, quote], output);
if let WriteResult::OutputFull = res {
return (res, nin, nout);
}
nout += o;
output = &mut moving(output)[o..];
}
nin += 1;
input = &input[1..];
}
}
}
}
fn write_optimistic(
input: &[u8],
output: &mut [u8],
) -> (WriteResult, usize, usize) {
if input.len() > output.len() {
let input = &input[..output.len()];
output.copy_from_slice(input);
(WriteResult::OutputFull, output.len(), output.len())
} else {
output[..input.len()].copy_from_slice(input);
(WriteResult::InputEmpty, input.len(), input.len())
}
}
fn write_pessimistic(input: &[u8], output: &mut [u8]) -> (WriteResult, usize) {
if input.len() > output.len() {
(WriteResult::OutputFull, 0)
} else {
output[..input.len()].copy_from_slice(input);
(WriteResult::InputEmpty, input.len())
}
}
fn moving<T>(x: T) -> T {
x
}
#[cfg(test)]
mod tests {
use crate::writer::WriteResult::*;
use crate::writer::{quote, QuoteStyle, Writer, WriterBuilder};
fn b(s: &str) -> &[u8] {
s.as_bytes()
}
fn s(b: &[u8]) -> &str {
::core::str::from_utf8(b).unwrap()
}
macro_rules! assert_field {
(
$wtr:expr, $inp:expr, $out:expr,
$expect_in:expr, $expect_out:expr,
$expect_res:expr, $expect_data:expr
) => {{
let (res, i, o) = $wtr.field($inp, $out);
assert_eq!($expect_res, res, "result");
assert_eq!($expect_in, i, "input");
assert_eq!($expect_out, o, "output");
assert_eq!($expect_data, s(&$out[..o]), "data");
}};
}
macro_rules! assert_write {
(
$wtr:expr, $which:ident, $out:expr,
$expect_out:expr, $expect_res:expr, $expect_data:expr
) => {{
let (res, o) = $wtr.$which($out);
assert_eq!($expect_res, res, "result");
assert_eq!($expect_out, o, "output");
assert_eq!($expect_data, s(&$out[..o]), "data");
}};
}
#[test]
fn writer_one_field() {
let mut wtr = Writer::new();
let out = &mut [0; 1024];
let mut n = 0;
assert_field!(wtr, b("abc"), &mut out[n..], 3, 3, InputEmpty, "abc");
n += 3;
assert_write!(wtr, finish, &mut out[n..], 0, InputEmpty, "");
}
#[test]
fn writer_one_empty_field_terminator() {
let mut wtr = Writer::new();
let out = &mut [0; 1024];
assert_field!(wtr, b(""), &mut out[..], 0, 0, InputEmpty, "");
assert_write!(wtr, terminator, &mut out[..], 3, InputEmpty, "\"\"\n");
assert_write!(wtr, finish, &mut out[..], 0, InputEmpty, "");
}
#[test]
fn writer_one_empty_field_finish() {
let mut wtr = Writer::new();
let out = &mut [0; 1024];
assert_field!(wtr, b(""), &mut out[..], 0, 0, InputEmpty, "");
assert_write!(wtr, finish, &mut out[..], 2, InputEmpty, "\"\"");
}
#[test]
fn writer_many_one_empty_field_finish() {
let mut wtr = Writer::new();
let out = &mut [0; 1024];
assert_field!(wtr, b(""), &mut out[..], 0, 0, InputEmpty, "");
assert_write!(wtr, terminator, &mut out[..], 3, InputEmpty, "\"\"\n");
assert_field!(wtr, b(""), &mut out[..], 0, 0, InputEmpty, "");
assert_write!(wtr, finish, &mut out[..], 2, InputEmpty, "\"\"");
}
#[test]
fn writer_many_one_empty_field_terminator() {
let mut wtr = Writer::new();
let out = &mut [0; 1024];
assert_field!(wtr, b(""), &mut out[..], 0, 0, InputEmpty, "");
assert_write!(wtr, terminator, &mut out[..], 3, InputEmpty, "\"\"\n");
assert_field!(wtr, b(""), &mut out[..], 0, 0, InputEmpty, "");
assert_write!(wtr, terminator, &mut out[..], 3, InputEmpty, "\"\"\n");
assert_write!(wtr, finish, &mut out[..], 0, InputEmpty, "");
}
#[test]
fn writer_one_field_quote() {
let mut wtr = Writer::new();
let out = &mut [0; 1024];
let mut n = 0;
assert_field!(
wtr,
b("a\"bc"),
&mut out[n..],
4,
6,
InputEmpty,
"\"a\"\"bc"
);
n += 6;
assert_write!(wtr, finish, &mut out[n..], 1, InputEmpty, "\"");
}
#[test]
fn writer_one_field_stream() {
let mut wtr = Writer::new();
let out = &mut [0; 1024];
let mut n = 0;
assert_field!(wtr, b("abc"), &mut out[n..], 3, 3, InputEmpty, "abc");
n += 3;
assert_field!(wtr, b("x"), &mut out[n..], 1, 1, InputEmpty, "x");
n += 1;
assert_write!(wtr, finish, &mut out[n..], 0, InputEmpty, "");
}
#[test]
fn writer_one_field_stream_quote() {
let mut wtr = Writer::new();
let out = &mut [0; 1024];
let mut n = 0;
assert_field!(
wtr,
b("abc\""),
&mut out[n..],
4,
6,
InputEmpty,
"\"abc\"\""
);
n += 6;
assert_field!(wtr, b("x"), &mut out[n..], 1, 1, InputEmpty, "x");
n += 1;
assert_write!(wtr, finish, &mut out[n..], 1, InputEmpty, "\"");
}
#[test]
fn writer_one_field_stream_quote_partial() {
let mut wtr = Writer::new();
let out = &mut [0; 4];
assert_field!(wtr, b("ab\"xyz"), out, 2, 3, OutputFull, "\"ab");
assert_field!(wtr, b("\"xyz"), out, 3, 4, OutputFull, "\"\"xy");
assert_field!(wtr, b("z"), out, 1, 1, InputEmpty, "z");
assert_write!(wtr, finish, out, 1, InputEmpty, "\"");
}
#[test]
fn writer_two_fields() {
let mut wtr = Writer::new();
let out = &mut [0; 1024];
let mut n = 0;
assert_field!(wtr, b("abc"), &mut out[n..], 3, 3, InputEmpty, "abc");
n += 3;
assert_write!(wtr, delimiter, &mut out[n..], 1, InputEmpty, ",");
n += 1;
assert_field!(wtr, b("yz"), &mut out[n..], 2, 2, InputEmpty, "yz");
n += 2;
assert_write!(wtr, finish, &mut out[n..], 0, InputEmpty, "");
assert_eq!("abc,yz", s(&out[..n]));
}
#[test]
fn writer_two_fields_non_numeric() {
let mut wtr =
WriterBuilder::new().quote_style(QuoteStyle::NonNumeric).build();
let out = &mut [0; 1024];
let mut n = 0;
assert_field!(wtr, b("abc"), &mut out[n..], 3, 4, InputEmpty, "\"abc");
n += 4;
assert_write!(wtr, delimiter, &mut out[n..], 2, InputEmpty, "\",");
n += 2;
assert_field!(wtr, b("5.2"), &mut out[n..], 3, 3, InputEmpty, "5.2");
n += 3;
assert_write!(wtr, delimiter, &mut out[n..], 1, InputEmpty, ",");
n += 1;
assert_field!(wtr, b("98"), &mut out[n..], 2, 2, InputEmpty, "98");
n += 2;
assert_write!(wtr, finish, &mut out[n..], 0, InputEmpty, "");
assert_eq!("\"abc\",5.2,98", s(&out[..n]));
}
#[test]
fn writer_two_fields_quote() {
let mut wtr = Writer::new();
let out = &mut [0; 1024];
let mut n = 0;
assert_field!(
wtr,
b("a,bc"),
&mut out[n..],
4,
5,
InputEmpty,
"\"a,bc"
);
n += 5;
assert_write!(wtr, delimiter, &mut out[n..], 2, InputEmpty, "\",");
n += 2;
assert_field!(wtr, b("\nz"), &mut out[n..], 2, 3, InputEmpty, "\"\nz");
n += 3;
assert_write!(wtr, finish, &mut out[n..], 1, InputEmpty, "\"");
n += 1;
assert_eq!("\"a,bc\",\"\nz\"", s(&out[..n]));
}
#[test]
fn writer_two_fields_two_records() {
let mut wtr = Writer::new();
let out = &mut [0; 1024];
let mut n = 0;
assert_field!(wtr, b("abc"), &mut out[n..], 3, 3, InputEmpty, "abc");
n += 3;
assert_write!(wtr, delimiter, &mut out[n..], 1, InputEmpty, ",");
n += 1;
assert_field!(wtr, b("yz"), &mut out[n..], 2, 2, InputEmpty, "yz");
n += 2;
assert_write!(wtr, terminator, &mut out[n..], 1, InputEmpty, "\n");
n += 1;
assert_field!(wtr, b("foo"), &mut out[n..], 3, 3, InputEmpty, "foo");
n += 3;
assert_write!(wtr, delimiter, &mut out[n..], 1, InputEmpty, ",");
n += 1;
assert_field!(wtr, b("quux"), &mut out[n..], 4, 4, InputEmpty, "quux");
n += 4;
assert_write!(wtr, finish, &mut out[n..], 0, InputEmpty, "");
assert_eq!("abc,yz\nfoo,quux", s(&out[..n]));
}
#[test]
fn writer_two_fields_two_records_quote() {
let mut wtr = Writer::new();
let out = &mut [0; 1024];
let mut n = 0;
assert_field!(
wtr,
b("a,bc"),
&mut out[n..],
4,
5,
InputEmpty,
"\"a,bc"
);
n += 5;
assert_write!(wtr, delimiter, &mut out[n..], 2, InputEmpty, "\",");
n += 2;
assert_field!(wtr, b("\nz"), &mut out[n..], 2, 3, InputEmpty, "\"\nz");
n += 3;
assert_write!(wtr, terminator, &mut out[n..], 2, InputEmpty, "\"\n");
n += 2;
assert_field!(
wtr,
b("f\"oo"),
&mut out[n..],
4,
6,
InputEmpty,
"\"f\"\"oo"
);
n += 6;
assert_write!(wtr, delimiter, &mut out[n..], 2, InputEmpty, "\",");
n += 2;
assert_field!(
wtr,
b("quux,"),
&mut out[n..],
5,
6,
InputEmpty,
"\"quux,"
);
n += 6;
assert_write!(wtr, finish, &mut out[n..], 1, InputEmpty, "\"");
n += 1;
assert_eq!("\"a,bc\",\"\nz\"\n\"f\"\"oo\",\"quux,\"", s(&out[..n]));
}
macro_rules! assert_quote {
(
$inp:expr, $out:expr,
$expect_in:expr, $expect_out:expr,
$expect_res:expr, $expect_data:expr
) => {
assert_quote!(
$inp,
$out,
$expect_in,
$expect_out,
$expect_res,
$expect_data,
true
);
};
(
$inp:expr, $out:expr,
$expect_in:expr, $expect_out:expr,
$expect_res:expr, $expect_data:expr,
$double_quote:expr
) => {{
let (res, i, o) = quote($inp, $out, b'"', b'\\', $double_quote);
assert_eq!($expect_res, res, "result");
assert_eq!($expect_in, i, "input");
assert_eq!($expect_out, o, "output");
assert_eq!(b($expect_data), &$out[..o], "data");
}};
}
#[test]
fn quote_empty() {
let inp = b("");
let out = &mut [0; 1024];
assert_quote!(inp, out, 0, 0, InputEmpty, "");
}
#[test]
fn quote_no_quotes() {
let inp = b("foobar");
let out = &mut [0; 1024];
assert_quote!(inp, out, 6, 6, InputEmpty, "foobar");
}
#[test]
fn quote_one_quote() {
let inp = b("\"");
let out = &mut [0; 1024];
assert_quote!(inp, out, 1, 2, InputEmpty, r#""""#);
}
#[test]
fn quote_two_quotes() {
let inp = b("\"\"");
let out = &mut [0; 1024];
assert_quote!(inp, out, 2, 4, InputEmpty, r#""""""#);
}
#[test]
fn quote_escaped_one() {
let inp = b("\"");
let out = &mut [0; 1024];
assert_quote!(inp, out, 1, 2, InputEmpty, r#"\""#, false);
}
#[test]
fn quote_escaped_two() {
let inp = b("\"\"");
let out = &mut [0; 1024];
assert_quote!(inp, out, 2, 4, InputEmpty, r#"\"\""#, false);
}
#[test]
fn quote_misc() {
let inp = b(r#"foo "bar" baz "quux"?"#);
let out = &mut [0; 1024];
assert_quote!(
inp,
out,
21,
25,
InputEmpty,
r#"foo ""bar"" baz ""quux""?"#
);
}
#[test]
fn quote_stream_no_quotes() {
let mut inp = b("fooba");
let out = &mut [0; 2];
assert_quote!(inp, out, 2, 2, OutputFull, "fo");
inp = &inp[2..];
assert_quote!(inp, out, 2, 2, OutputFull, "ob");
inp = &inp[2..];
assert_quote!(inp, out, 1, 1, InputEmpty, "a");
}
#[test]
fn quote_stream_quotes() {
let mut inp = b(r#"a"bc"d""#);
let out = &mut [0; 2];
assert_quote!(inp, out, 1, 1, OutputFull, "a");
inp = &inp[1..];
assert_quote!(inp, out, 1, 2, OutputFull, r#""""#);
inp = &inp[1..];
assert_quote!(inp, out, 2, 2, OutputFull, "bc");
inp = &inp[2..];
assert_quote!(inp, out, 1, 2, OutputFull, r#""""#);
inp = &inp[1..];
assert_quote!(inp, out, 1, 1, OutputFull, "d");
inp = &inp[1..];
assert_quote!(inp, out, 1, 2, InputEmpty, r#""""#);
}
}