use core::fmt;
use crate::Terminator;
#[derive(Clone, Debug)]
pub struct Reader {
dfa: Dfa,
dfa_state: DfaState,
nfa_state: NfaState,
delimiter: u8,
term: Terminator,
quote: u8,
escape: Option<u8>,
double_quote: bool,
comment: Option<u8>,
quoting: bool,
use_nfa: bool,
line: u64,
has_read: bool,
output_pos: usize,
}
impl Default for Reader {
fn default() -> Reader {
Reader {
dfa: Dfa::new(),
dfa_state: DfaState::start(),
nfa_state: NfaState::StartRecord,
delimiter: b',',
term: Terminator::default(),
quote: b'"',
escape: None,
double_quote: true,
comment: None,
quoting: true,
use_nfa: false,
line: 1,
has_read: false,
output_pos: 0,
}
}
}
#[derive(Debug, Default)]
pub struct ReaderBuilder {
rdr: Reader,
}
impl ReaderBuilder {
pub fn new() -> ReaderBuilder {
ReaderBuilder::default()
}
pub fn build(&self) -> Reader {
let mut rdr = self.rdr.clone();
rdr.build_dfa();
rdr
}
pub fn delimiter(&mut self, delimiter: u8) -> &mut ReaderBuilder {
self.rdr.delimiter = delimiter;
self
}
pub fn terminator(&mut self, term: Terminator) -> &mut ReaderBuilder {
self.rdr.term = term;
self
}
pub fn quote(&mut self, quote: u8) -> &mut ReaderBuilder {
self.rdr.quote = quote;
self
}
pub fn escape(&mut self, escape: Option<u8>) -> &mut ReaderBuilder {
self.rdr.escape = escape;
self
}
pub fn double_quote(&mut self, yes: bool) -> &mut ReaderBuilder {
self.rdr.double_quote = yes;
self
}
pub fn quoting(&mut self, yes: bool) -> &mut ReaderBuilder {
self.rdr.quoting = yes;
self
}
pub fn comment(&mut self, comment: Option<u8>) -> &mut ReaderBuilder {
self.rdr.comment = comment;
self
}
pub fn ascii(&mut self) -> &mut ReaderBuilder {
self.delimiter(b'\x1F').terminator(Terminator::Any(b'\x1E'))
}
#[doc(hidden)]
pub fn nfa(&mut self, yes: bool) -> &mut ReaderBuilder {
self.rdr.use_nfa = yes;
self
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum ReadFieldResult {
InputEmpty,
OutputFull,
Field {
record_end: bool,
},
End,
}
impl ReadFieldResult {
fn from_nfa(
state: NfaState,
inpdone: bool,
outdone: bool,
) -> ReadFieldResult {
match state {
NfaState::End => ReadFieldResult::End,
NfaState::EndRecord | NfaState::CRLF => {
ReadFieldResult::Field { record_end: true }
}
NfaState::EndFieldDelim => {
ReadFieldResult::Field { record_end: false }
}
_ => {
assert!(!state.is_field_final());
if !inpdone && outdone {
ReadFieldResult::OutputFull
} else {
ReadFieldResult::InputEmpty
}
}
}
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum ReadFieldNoCopyResult {
InputEmpty,
Field {
record_end: bool,
},
End,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum ReadRecordResult {
InputEmpty,
OutputFull,
OutputEndsFull,
Record,
End,
}
impl ReadRecordResult {
fn is_record(&self) -> bool {
*self == ReadRecordResult::Record
}
fn from_nfa(
state: NfaState,
inpdone: bool,
outdone: bool,
endsdone: bool,
) -> ReadRecordResult {
match state {
NfaState::End => ReadRecordResult::End,
NfaState::EndRecord | NfaState::CRLF => ReadRecordResult::Record,
_ => {
assert!(!state.is_record_final());
if !inpdone && outdone {
ReadRecordResult::OutputFull
} else if !inpdone && endsdone {
ReadRecordResult::OutputEndsFull
} else {
ReadRecordResult::InputEmpty
}
}
}
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum ReadRecordNoCopyResult {
InputEmpty,
Record,
End,
}
#[derive(Clone, Debug, Eq, PartialEq)]
enum NfaInputAction {
Epsilon,
CopyToOutput,
Discard,
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
enum NfaState {
EndFieldTerm = 200,
InRecordTerm = 201,
End = 202,
StartRecord = 0,
StartField = 1,
InField = 2,
InQuotedField = 3,
InEscapedQuote = 4,
InDoubleEscapedQuote = 5,
InComment = 6,
EndFieldDelim = 7,
EndRecord = 8,
CRLF = 9,
}
const NFA_STATES: &'static [NfaState] = &[
NfaState::StartRecord,
NfaState::StartField,
NfaState::EndFieldDelim,
NfaState::InField,
NfaState::InQuotedField,
NfaState::InEscapedQuote,
NfaState::InDoubleEscapedQuote,
NfaState::InComment,
NfaState::EndRecord,
NfaState::CRLF,
];
impl NfaState {
fn is_field_final(&self) -> bool {
match *self {
NfaState::End
| NfaState::EndRecord
| NfaState::CRLF
| NfaState::EndFieldDelim => true,
_ => false,
}
}
fn is_record_final(&self) -> bool {
match *self {
NfaState::End | NfaState::EndRecord | NfaState::CRLF => true,
_ => false,
}
}
}
impl Reader {
pub fn new() -> Reader {
ReaderBuilder::new().build()
}
pub fn reset(&mut self) {
self.dfa_state = self.dfa.new_state(NfaState::StartRecord);
self.nfa_state = NfaState::StartRecord;
self.line = 1;
self.has_read = false;
}
pub fn line(&self) -> u64 {
self.line
}
pub fn set_line(&mut self, line: u64) {
self.line = line;
}
pub fn read_field(
&mut self,
input: &[u8],
output: &mut [u8],
) -> (ReadFieldResult, usize, usize) {
let (input, bom_nin) = self.strip_utf8_bom(input);
let (res, nin, nout) = if self.use_nfa {
self.read_field_nfa(input, output)
} else {
self.read_field_dfa(input, output)
};
self.has_read = true;
(res, nin + bom_nin, nout)
}
pub fn read_record(
&mut self,
input: &[u8],
output: &mut [u8],
ends: &mut [usize],
) -> (ReadRecordResult, usize, usize, usize) {
let (input, bom_nin) = self.strip_utf8_bom(input);
let (res, nin, nout, nend) = if self.use_nfa {
self.read_record_nfa(input, output, ends)
} else {
self.read_record_dfa(input, output, ends)
};
self.has_read = true;
(res, nin + bom_nin, nout, nend)
}
fn strip_utf8_bom<'a>(&self, input: &'a [u8]) -> (&'a [u8], usize) {
let (input, nin) = if {
!self.has_read
&& input.len() >= 3
&& &input[0..3] == b"\xef\xbb\xbf"
} {
(&input[3..], 3)
} else {
(input, 0)
};
(input, nin)
}
#[inline(always)]
fn read_record_dfa(
&mut self,
input: &[u8],
output: &mut [u8],
ends: &mut [usize],
) -> (ReadRecordResult, usize, usize, usize) {
if input.is_empty() {
let s = self.transition_final_dfa(self.dfa_state);
let res =
self.dfa.new_read_record_result(s, true, false, false, false);
return match res {
ReadRecordResult::Record => {
if ends.is_empty() {
return (ReadRecordResult::OutputEndsFull, 0, 0, 0);
}
self.dfa_state = s;
ends[0] = self.output_pos;
self.output_pos = 0;
(res, 0, 0, 1)
}
_ => {
self.dfa_state = s;
(res, 0, 0, 0)
}
};
}
if output.is_empty() {
return (ReadRecordResult::OutputFull, 0, 0, 0);
}
if ends.is_empty() {
return (ReadRecordResult::OutputEndsFull, 0, 0, 0);
}
let (mut nin, mut nout, mut nend) = (0, 0, 0);
let mut state = self.dfa_state;
while nin < input.len() && nout < output.len() && nend < ends.len() {
let (s, has_out) = self.dfa.get_output(state, input[nin]);
self.line += (input[nin] == b'\n') as u64;
state = s;
if has_out {
output[nout] = input[nin];
nout += 1;
}
nin += 1;
if state >= self.dfa.final_field {
ends[nend] = self.output_pos + nout;
nend += 1;
if state > self.dfa.final_field {
break;
}
}
if state == self.dfa.in_field || state == self.dfa.in_quoted {
self.dfa
.classes
.scan_and_copy(input, &mut nin, output, &mut nout);
}
}
let res = self.dfa.new_read_record_result(
state,
false,
nin >= input.len(),
nout >= output.len(),
nend >= ends.len(),
);
self.dfa_state = state;
if res.is_record() {
self.output_pos = 0;
} else {
self.output_pos += nout;
}
(res, nin, nout, nend)
}
#[inline(always)]
fn read_field_dfa(
&mut self,
input: &[u8],
output: &mut [u8],
) -> (ReadFieldResult, usize, usize) {
if input.is_empty() {
self.dfa_state = self.transition_final_dfa(self.dfa_state);
let res = self.dfa.new_read_field_result(
self.dfa_state,
true,
false,
false,
);
return (res, 0, 0);
}
if output.is_empty() {
return (ReadFieldResult::OutputFull, 0, 0);
}
let (mut nin, mut nout) = (0, 0);
let mut state = self.dfa_state;
while nin < input.len() && nout < output.len() {
let b = input[nin];
self.line += (b == b'\n') as u64;
let (s, has_out) = self.dfa.get_output(state, b);
state = s;
if has_out {
output[nout] = b;
nout += 1;
}
nin += 1;
if state >= self.dfa.final_field {
break;
}
}
let res = self.dfa.new_read_field_result(
state,
false,
nin >= input.len(),
nout >= output.len(),
);
self.dfa_state = state;
(res, nin, nout)
}
fn transition_final_dfa(&self, state: DfaState) -> DfaState {
if state >= self.dfa.final_record || state.is_start() {
self.dfa.new_state_final_end()
} else {
self.dfa.new_state_final_record()
}
}
fn build_dfa(&mut self) {
self.dfa.classes.add(self.delimiter);
if self.quoting {
self.dfa.classes.add(self.quote);
if let Some(escape) = self.escape {
self.dfa.classes.add(escape);
}
}
if let Some(comment) = self.comment {
self.dfa.classes.add(comment);
}
match self.term {
Terminator::Any(b) => self.dfa.classes.add(b),
Terminator::CRLF => {
self.dfa.classes.add(b'\r');
self.dfa.classes.add(b'\n');
}
_ => unreachable!(),
}
for &state in NFA_STATES {
for c in (0..256).map(|c| c as u8) {
let mut nfa_result = (state, NfaInputAction::Epsilon);
while nfa_result.0 != NfaState::End
&& nfa_result.1 == NfaInputAction::Epsilon
{
nfa_result = self.transition_nfa(nfa_result.0, c);
}
let from = self.dfa.new_state(state);
let to = self.dfa.new_state(nfa_result.0);
self.dfa.set(
from,
c,
to,
nfa_result.1 == NfaInputAction::CopyToOutput,
);
}
}
self.dfa_state = self.dfa.new_state(NfaState::StartRecord);
self.dfa.finish();
}
#[inline(always)]
fn read_record_nfa(
&mut self,
input: &[u8],
output: &mut [u8],
ends: &mut [usize],
) -> (ReadRecordResult, usize, usize, usize) {
if input.is_empty() {
let s = self.transition_final_nfa(self.nfa_state);
let res = ReadRecordResult::from_nfa(s, false, false, false);
return match res {
ReadRecordResult::Record => {
if ends.is_empty() {
return (ReadRecordResult::OutputEndsFull, 0, 0, 0);
}
self.nfa_state = s;
ends[0] = self.output_pos;
self.output_pos = 0;
(res, 0, 0, 1)
}
_ => {
self.nfa_state = s;
(res, 0, 0, 0)
}
};
}
if output.is_empty() {
return (ReadRecordResult::OutputFull, 0, 0, 0);
}
if ends.is_empty() {
return (ReadRecordResult::OutputEndsFull, 0, 0, 0);
}
let (mut nin, mut nout, mut nend) = (0, self.output_pos, 0);
let mut state = self.nfa_state;
while nin < input.len() && nout < output.len() && nend < ends.len() {
let (s, io) = self.transition_nfa(state, input[nin]);
match io {
NfaInputAction::CopyToOutput => {
output[nout] = input[nin];
nout += 1;
nin += 1;
}
NfaInputAction::Discard => {
nin += 1;
}
NfaInputAction::Epsilon => {}
}
state = s;
if state.is_field_final() {
ends[nend] = nout;
nend += 1;
if state != NfaState::EndFieldDelim {
break;
}
}
}
let res = ReadRecordResult::from_nfa(
state,
nin >= input.len(),
nout >= output.len(),
nend >= ends.len(),
);
self.nfa_state = state;
self.output_pos = if res.is_record() { 0 } else { nout };
(res, nin, nout, nend)
}
#[inline(always)]
fn read_field_nfa(
&mut self,
input: &[u8],
output: &mut [u8],
) -> (ReadFieldResult, usize, usize) {
if input.is_empty() {
self.nfa_state = self.transition_final_nfa(self.nfa_state);
let res = ReadFieldResult::from_nfa(self.nfa_state, false, false);
return (res, 0, 0);
}
if output.is_empty() {
return (ReadFieldResult::OutputFull, 0, 0);
}
let (mut nin, mut nout) = (0, 0);
let mut state = self.nfa_state;
while nin < input.len() && nout < output.len() {
let (s, io) = self.transition_nfa(state, input[nin]);
match io {
NfaInputAction::CopyToOutput => {
output[nout] = input[nin];
nout += 1;
nin += 1;
}
NfaInputAction::Discard => {
nin += 1;
}
NfaInputAction::Epsilon => (),
}
state = s;
if state.is_field_final() {
break;
}
}
let res = ReadFieldResult::from_nfa(
state,
nin >= input.len(),
nout >= output.len(),
);
self.nfa_state = state;
(res, nin, nout)
}
#[inline(always)]
fn transition_final_nfa(&self, state: NfaState) -> NfaState {
use self::NfaState::*;
match state {
End | StartRecord | EndRecord | InComment | CRLF => End,
StartField | EndFieldDelim | EndFieldTerm | InField
| InQuotedField | InEscapedQuote | InDoubleEscapedQuote
| InRecordTerm => EndRecord,
}
}
#[inline(always)]
fn transition_nfa(
&self,
state: NfaState,
c: u8,
) -> (NfaState, NfaInputAction) {
use self::NfaState::*;
match state {
End => (End, NfaInputAction::Epsilon),
StartRecord => {
if self.term.equals(c) {
(StartRecord, NfaInputAction::Discard)
} else if self.comment == Some(c) {
(InComment, NfaInputAction::Discard)
} else {
(StartField, NfaInputAction::Epsilon)
}
}
EndRecord => (StartRecord, NfaInputAction::Epsilon),
StartField => {
if self.quoting && self.quote == c {
(InQuotedField, NfaInputAction::Discard)
} else if self.delimiter == c {
(EndFieldDelim, NfaInputAction::Discard)
} else if self.term.equals(c) {
(EndFieldTerm, NfaInputAction::Epsilon)
} else {
(InField, NfaInputAction::CopyToOutput)
}
}
EndFieldDelim => (StartField, NfaInputAction::Epsilon),
EndFieldTerm => (InRecordTerm, NfaInputAction::Epsilon),
InField => {
if self.delimiter == c {
(EndFieldDelim, NfaInputAction::Discard)
} else if self.term.equals(c) {
(EndFieldTerm, NfaInputAction::Epsilon)
} else {
(InField, NfaInputAction::CopyToOutput)
}
}
InQuotedField => {
if self.quoting && self.quote == c {
(InDoubleEscapedQuote, NfaInputAction::Discard)
} else if self.quoting && self.escape == Some(c) {
(InEscapedQuote, NfaInputAction::Discard)
} else {
(InQuotedField, NfaInputAction::CopyToOutput)
}
}
InEscapedQuote => (InQuotedField, NfaInputAction::CopyToOutput),
InDoubleEscapedQuote => {
if self.quoting && self.double_quote && self.quote == c {
(InQuotedField, NfaInputAction::CopyToOutput)
} else if self.delimiter == c {
(EndFieldDelim, NfaInputAction::Discard)
} else if self.term.equals(c) {
(EndFieldTerm, NfaInputAction::Epsilon)
} else {
(InField, NfaInputAction::CopyToOutput)
}
}
InComment => {
if b'\n' == c {
(StartRecord, NfaInputAction::Discard)
} else {
(InComment, NfaInputAction::Discard)
}
}
InRecordTerm => {
if self.term.is_crlf() && b'\r' == c {
(CRLF, NfaInputAction::Discard)
} else {
(EndRecord, NfaInputAction::Discard)
}
}
CRLF => {
if b'\n' == c {
(StartRecord, NfaInputAction::Discard)
} else {
(StartRecord, NfaInputAction::Epsilon)
}
}
}
}
}
const TRANS_CLASSES: usize = 7;
const DFA_STATES: usize = 10;
const TRANS_SIZE: usize = TRANS_CLASSES * DFA_STATES;
const CLASS_SIZE: usize = 256;
struct Dfa {
trans: [DfaState; TRANS_SIZE],
has_output: [bool; TRANS_SIZE],
classes: DfaClasses,
in_field: DfaState,
in_quoted: DfaState,
final_field: DfaState,
final_record: DfaState,
}
impl Dfa {
fn new() -> Dfa {
Dfa {
trans: [DfaState(0); TRANS_SIZE],
has_output: [false; TRANS_SIZE],
classes: DfaClasses::new(),
in_field: DfaState(0),
in_quoted: DfaState(0),
final_field: DfaState(0),
final_record: DfaState(0),
}
}
fn new_state(&self, nfa_state: NfaState) -> DfaState {
let nclasses = self.classes.num_classes() as u8;
let idx = (nfa_state as u8).checked_mul(nclasses).unwrap();
DfaState(idx)
}
fn new_state_final_end(&self) -> DfaState {
self.new_state(NfaState::StartRecord)
}
fn new_state_final_record(&self) -> DfaState {
self.new_state(NfaState::EndRecord)
}
fn get_output(&self, state: DfaState, c: u8) -> (DfaState, bool) {
let cls = self.classes.classes[c as usize];
let idx = state.0 as usize + cls as usize;
(self.trans[idx], self.has_output[idx])
}
fn set(&mut self, from: DfaState, c: u8, to: DfaState, output: bool) {
let cls = self.classes.classes[c as usize];
let idx = from.0 as usize + cls as usize;
self.trans[idx] = to;
self.has_output[idx] = output;
}
fn finish(&mut self) {
self.in_field = self.new_state(NfaState::InField);
self.in_quoted = self.new_state(NfaState::InQuotedField);
self.final_field = self.new_state(NfaState::EndFieldDelim);
self.final_record = self.new_state(NfaState::EndRecord);
}
fn new_read_field_result(
&self,
state: DfaState,
is_final_trans: bool,
inpdone: bool,
outdone: bool,
) -> ReadFieldResult {
if state >= self.final_record {
ReadFieldResult::Field { record_end: true }
} else if state == self.final_field {
ReadFieldResult::Field { record_end: false }
} else if is_final_trans && state.is_start() {
ReadFieldResult::End
} else {
debug_assert!(state < self.final_field);
if !inpdone && outdone {
ReadFieldResult::OutputFull
} else {
ReadFieldResult::InputEmpty
}
}
}
fn new_read_record_result(
&self,
state: DfaState,
is_final_trans: bool,
inpdone: bool,
outdone: bool,
endsdone: bool,
) -> ReadRecordResult {
if state >= self.final_record {
ReadRecordResult::Record
} else if is_final_trans && state.is_start() {
ReadRecordResult::End
} else {
debug_assert!(state < self.final_record);
if !inpdone && outdone {
ReadRecordResult::OutputFull
} else if !inpdone && endsdone {
ReadRecordResult::OutputEndsFull
} else {
ReadRecordResult::InputEmpty
}
}
}
}
struct DfaClasses {
classes: [u8; CLASS_SIZE],
next_class: usize,
}
impl DfaClasses {
fn new() -> DfaClasses {
DfaClasses { classes: [0; CLASS_SIZE], next_class: 1 }
}
fn add(&mut self, b: u8) {
if self.next_class > CLASS_SIZE {
panic!("added too many classes")
}
self.classes[b as usize] = self.next_class as u8;
self.next_class = self.next_class + 1;
}
fn num_classes(&self) -> usize {
self.next_class as usize
}
#[inline(always)]
fn scan_and_copy(
&self,
input: &[u8],
nin: &mut usize,
output: &mut [u8],
nout: &mut usize,
) {
while *nin < input.len()
&& *nout < output.len()
&& self.classes[input[*nin] as usize] == 0
{
output[*nout] = input[*nin];
*nin += 1;
*nout += 1;
}
}
}
#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)]
struct DfaState(u8);
impl DfaState {
fn start() -> DfaState {
DfaState(0)
}
fn is_start(&self) -> bool {
self.0 == 0
}
}
impl fmt::Debug for Dfa {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Dfa(N/A)")
}
}
impl fmt::Debug for DfaClasses {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"DfaClasses {{ classes: N/A, next_class: {:?} }}",
self.next_class
)
}
}
impl Clone for Dfa {
fn clone(&self) -> Dfa {
let mut dfa = Dfa::new();
dfa.trans.copy_from_slice(&self.trans);
dfa
}
}
impl Clone for DfaClasses {
fn clone(&self) -> DfaClasses {
let mut x = DfaClasses::new();
x.classes.copy_from_slice(&self.classes);
x
}
}
#[cfg(test)]
mod tests {
use core::str;
use arrayvec::{ArrayString, ArrayVec};
use super::{ReadFieldResult, Reader, ReaderBuilder, Terminator};
type Csv = ArrayVec<[Row; 10]>;
type Row = ArrayVec<[Field; 10]>;
type Field = ArrayString<[u8; 10]>;
fn b(s: &str) -> &[u8] {
s.as_bytes()
}
macro_rules! csv {
($([$($field:expr),*]),*) => {{
#[allow(unused_mut)]
fn x() -> Csv {
let mut csv = Csv::new();
$(
let mut row = Row::new();
$(
row.push(Field::from($field).unwrap());
)*
csv.push(row);
)*
csv
}
x()
}}
}
macro_rules! parses_to {
($name:ident, $data:expr, $expected:expr) => {
parses_to!($name, $data, $expected, |builder| builder);
};
($name:ident, $data:expr, $expected:expr, $config:expr) => {
#[test]
fn $name() {
let mut builder = ReaderBuilder::new();
builder.nfa(true);
$config(&mut builder);
let mut rdr = builder.build();
let got = parse_by_field(&mut rdr, $data);
let expected = $expected;
assert_eq!(expected, got, "nfa by field");
let mut builder = ReaderBuilder::new();
builder.nfa(true);
$config(&mut builder);
let mut rdr = builder.build();
let got = parse_by_record(&mut rdr, $data);
let expected = $expected;
assert_eq!(expected, got, "nfa by record");
let mut builder = ReaderBuilder::new();
$config(&mut builder);
let mut rdr = builder.build();
let got = parse_by_field(&mut rdr, $data);
let expected = $expected;
assert_eq!(expected, got, "dfa by field");
let mut builder = ReaderBuilder::new();
$config(&mut builder);
let mut rdr = builder.build();
let got = parse_by_record(&mut rdr, $data);
let expected = $expected;
assert_eq!(expected, got, "dfa by record");
}
};
}
fn parse_by_field(rdr: &mut Reader, data: &str) -> Csv {
let mut data = data.as_bytes();
let mut field = [0u8; 10];
let mut csv = Csv::new();
let mut row = Row::new();
let mut outpos = 0;
loop {
let (res, nin, nout) = rdr.read_field(data, &mut field[outpos..]);
data = &data[nin..];
outpos += nout;
match res {
ReadFieldResult::InputEmpty => {
if !data.is_empty() {
panic!("missing input data")
}
}
ReadFieldResult::OutputFull => panic!("field too large"),
ReadFieldResult::Field { record_end } => {
let s = str::from_utf8(&field[..outpos]).unwrap();
row.push(Field::from(s).unwrap());
outpos = 0;
if record_end {
csv.push(row);
row = Row::new();
}
}
ReadFieldResult::End => {
return csv;
}
}
}
}
fn parse_by_record(rdr: &mut Reader, data: &str) -> Csv {
use crate::ReadRecordResult::*;
let mut data = data.as_bytes();
let mut record = [0; 1024];
let mut ends = [0; 10];
let mut csv = Csv::new();
let (mut outpos, mut endpos) = (0, 0);
loop {
let (res, nin, nout, nend) = rdr.read_record(
data,
&mut record[outpos..],
&mut ends[endpos..],
);
data = &data[nin..];
outpos += nout;
endpos += nend;
match res {
InputEmpty => {
if !data.is_empty() {
panic!("missing input data")
}
}
OutputFull => panic!("record too large (out buffer)"),
OutputEndsFull => panic!("record too large (end buffer)"),
Record => {
let s = str::from_utf8(&record[..outpos]).unwrap();
let mut start = 0;
let mut row = Row::new();
for &end in &ends[..endpos] {
row.push(Field::from(&s[start..end]).unwrap());
start = end;
}
csv.push(row);
outpos = 0;
endpos = 0;
}
End => return csv,
}
}
}
parses_to!(one_row_one_field, "a", csv![["a"]]);
parses_to!(one_row_many_fields, "a,b,c", csv![["a", "b", "c"]]);
parses_to!(one_row_trailing_comma, "a,b,", csv![["a", "b", ""]]);
parses_to!(one_row_one_field_lf, "a\n", csv![["a"]]);
parses_to!(one_row_many_fields_lf, "a,b,c\n", csv![["a", "b", "c"]]);
parses_to!(one_row_trailing_comma_lf, "a,b,\n", csv![["a", "b", ""]]);
parses_to!(one_row_one_field_crlf, "a\r\n", csv![["a"]]);
parses_to!(one_row_many_fields_crlf, "a,b,c\r\n", csv![["a", "b", "c"]]);
parses_to!(one_row_trailing_comma_crlf, "a,b,\r\n", csv![["a", "b", ""]]);
parses_to!(one_row_one_field_cr, "a\r", csv![["a"]]);
parses_to!(one_row_many_fields_cr, "a,b,c\r", csv![["a", "b", "c"]]);
parses_to!(one_row_trailing_comma_cr, "a,b,\r", csv![["a", "b", ""]]);
parses_to!(many_rows_one_field, "a\nb", csv![["a"], ["b"]]);
parses_to!(
many_rows_many_fields,
"a,b,c\nx,y,z",
csv![["a", "b", "c"], ["x", "y", "z"]]
);
parses_to!(
many_rows_trailing_comma,
"a,b,\nx,y,",
csv![["a", "b", ""], ["x", "y", ""]]
);
parses_to!(many_rows_one_field_lf, "a\nb\n", csv![["a"], ["b"]]);
parses_to!(
many_rows_many_fields_lf,
"a,b,c\nx,y,z\n",
csv![["a", "b", "c"], ["x", "y", "z"]]
);
parses_to!(
many_rows_trailing_comma_lf,
"a,b,\nx,y,\n",
csv![["a", "b", ""], ["x", "y", ""]]
);
parses_to!(many_rows_one_field_crlf, "a\r\nb\r\n", csv![["a"], ["b"]]);
parses_to!(
many_rows_many_fields_crlf,
"a,b,c\r\nx,y,z\r\n",
csv![["a", "b", "c"], ["x", "y", "z"]]
);
parses_to!(
many_rows_trailing_comma_crlf,
"a,b,\r\nx,y,\r\n",
csv![["a", "b", ""], ["x", "y", ""]]
);
parses_to!(many_rows_one_field_cr, "a\rb\r", csv![["a"], ["b"]]);
parses_to!(
many_rows_many_fields_cr,
"a,b,c\rx,y,z\r",
csv![["a", "b", "c"], ["x", "y", "z"]]
);
parses_to!(
many_rows_trailing_comma_cr,
"a,b,\rx,y,\r",
csv![["a", "b", ""], ["x", "y", ""]]
);
parses_to!(
trailing_lines_no_record,
"\n\n\na,b,c\nx,y,z\n\n\n",
csv![["a", "b", "c"], ["x", "y", "z"]]
);
parses_to!(
trailing_lines_no_record_cr,
"\r\r\ra,b,c\rx,y,z\r\r\r",
csv![["a", "b", "c"], ["x", "y", "z"]]
);
parses_to!(
trailing_lines_no_record_crlf,
"\r\n\r\n\r\na,b,c\r\nx,y,z\r\n\r\n\r\n",
csv![["a", "b", "c"], ["x", "y", "z"]]
);
parses_to!(empty, "", csv![]);
parses_to!(empty_lines, "\n\n\n\n", csv![]);
parses_to!(
empty_lines_interspersed,
"\n\na,b\n\n\nx,y\n\n\nm,n\n",
csv![["a", "b"], ["x", "y"], ["m", "n"]]
);
parses_to!(empty_lines_crlf, "\r\n\r\n\r\n\r\n", csv![]);
parses_to!(
empty_lines_interspersed_crlf,
"\r\n\r\na,b\r\n\r\n\r\nx,y\r\n\r\n\r\nm,n\r\n",
csv![["a", "b"], ["x", "y"], ["m", "n"]]
);
parses_to!(empty_lines_mixed, "\r\n\n\r\n\n", csv![]);
parses_to!(
empty_lines_interspersed_mixed,
"\n\r\na,b\r\n\n\r\nx,y\r\n\n\r\nm,n\r\n",
csv![["a", "b"], ["x", "y"], ["m", "n"]]
);
parses_to!(empty_lines_cr, "\r\r\r\r", csv![]);
parses_to!(
empty_lines_interspersed_cr,
"\r\ra,b\r\r\rx,y\r\r\rm,n\r",
csv![["a", "b"], ["x", "y"], ["m", "n"]]
);
parses_to!(
term_weird,
"zza,bzc,dzz",
csv![["a", "b"], ["c", "d"]],
|b: &mut ReaderBuilder| {
b.terminator(Terminator::Any(b'z'));
}
);
parses_to!(
ascii_delimited,
"a\x1fb\x1ec\x1fd",
csv![["a", "b"], ["c", "d"]],
|b: &mut ReaderBuilder| {
b.ascii();
}
);
parses_to!(bom_at_start, "\u{feff}a", csv![["a"]]);
parses_to!(bom_in_field, "a\u{feff}", csv![["a\u{feff}"]]);
parses_to!(bom_at_field_start, "a,\u{feff}b", csv![["a", "\u{feff}b"]]);
parses_to!(quote_empty, "\"\"", csv![[""]]);
parses_to!(quote_lf, "\"\"\n", csv![[""]]);
parses_to!(quote_space, "\" \"", csv![[" "]]);
parses_to!(quote_inner_space, "\" a \"", csv![[" a "]]);
parses_to!(quote_outer_space, " \"a\" ", csv![[" \"a\" "]]);
parses_to!(quote_change, "zaz", csv![["a"]], |b: &mut ReaderBuilder| {
b.quote(b'z');
});
parses_to!(
quote_delimiter,
",a,,b",
csv![["a,b"]],
|b: &mut ReaderBuilder| {
b.quote(b',');
}
);
parses_to!(quote_no_escapes, r#""a\"b""#, csv![[r#"a\b""#]]);
parses_to!(
quote_escapes_no_double,
r#""a""b""#,
csv![[r#"a"b""#]],
|b: &mut ReaderBuilder| {
b.double_quote(false);
}
);
parses_to!(
quote_escapes,
r#""a\"b""#,
csv![[r#"a"b"#]],
|b: &mut ReaderBuilder| {
b.escape(Some(b'\\'));
}
);
parses_to!(
quote_escapes_change,
r#""az"b""#,
csv![[r#"a"b"#]],
|b: &mut ReaderBuilder| {
b.escape(Some(b'z'));
}
);
parses_to!(
quote_escapes_with_comma,
r#""\"A,B\"""#,
csv![[r#""A,B""#]],
|b: &mut ReaderBuilder| {
b.escape(Some(b'\\')).double_quote(false);
}
);
parses_to!(
quoting_disabled,
r#""abc,foo""#,
csv![[r#""abc"#, r#"foo""#]],
|b: &mut ReaderBuilder| {
b.quoting(false);
}
);
parses_to!(
delimiter_tabs,
"a\tb",
csv![["a", "b"]],
|b: &mut ReaderBuilder| {
b.delimiter(b'\t');
}
);
parses_to!(
delimiter_weird,
"azb",
csv![["a", "b"]],
|b: &mut ReaderBuilder| {
b.delimiter(b'z');
}
);
parses_to!(extra_record_crlf_1, "foo\n1\n", csv![["foo"], ["1"]]);
parses_to!(extra_record_crlf_2, "foo\r\n1\r\n", csv![["foo"], ["1"]]);
parses_to!(
comment_1,
"foo\n# hi\nbar\n",
csv![["foo"], ["bar"]],
|b: &mut ReaderBuilder| {
b.comment(Some(b'#'));
}
);
parses_to!(
comment_2,
"foo\n # hi\nbar\n",
csv![["foo"], [" # hi"], ["bar"]],
|b: &mut ReaderBuilder| {
b.comment(Some(b'#'));
}
);
parses_to!(
comment_3,
"foo\n# hi\nbar\n",
csv![["foo"], ["# hi"], ["bar"]],
|b: &mut ReaderBuilder| {
b.comment(Some(b'\n'));
}
);
parses_to!(
comment_4,
"foo,b#ar,baz",
csv![["foo", "b#ar", "baz"]],
|b: &mut ReaderBuilder| {
b.comment(Some(b'#'));
}
);
parses_to!(
comment_5,
"foo,#bar,baz",
csv![["foo", "#bar", "baz"]],
|b: &mut ReaderBuilder| {
b.comment(Some(b'#'));
}
);
macro_rules! assert_read {
(
$rdr:expr, $input:expr, $output:expr,
$expect_in:expr, $expect_out:expr, $expect_res:expr
) => {{
let (res, nin, nout) = $rdr.read_field($input, $output);
assert_eq!($expect_in, nin);
assert_eq!($expect_out, nout);
assert_eq!($expect_res, res);
}};
}
#[test]
fn stream_empty() {
use crate::ReadFieldResult::*;
let mut rdr = Reader::new();
assert_read!(rdr, &[], &mut [], 0, 0, End);
}
#[test]
fn stream_space() {
use crate::ReadFieldResult::*;
let mut rdr = Reader::new();
assert_read!(rdr, b(" "), &mut [0], 1, 1, InputEmpty);
assert_read!(rdr, &[], &mut [0], 0, 0, Field { record_end: true });
assert_read!(rdr, &[], &mut [0], 0, 0, End);
}
#[test]
fn stream_comma() {
use crate::ReadFieldResult::*;
let mut rdr = Reader::new();
assert_read!(rdr, b(","), &mut [0], 1, 0, Field { record_end: false });
assert_read!(rdr, &[], &mut [0], 0, 0, Field { record_end: true });
assert_read!(rdr, &[], &mut [0], 0, 0, End);
}
#[test]
fn stream_output_chunks() {
use crate::ReadFieldResult::*;
let mut inp = b("fooquux");
let out = &mut [0; 2];
let mut rdr = Reader::new();
assert_read!(rdr, inp, out, 2, 2, OutputFull);
assert_eq!(out, b("fo"));
inp = &inp[2..];
assert_read!(rdr, inp, out, 2, 2, OutputFull);
assert_eq!(out, b("oq"));
inp = &inp[2..];
assert_read!(rdr, inp, out, 2, 2, OutputFull);
assert_eq!(out, b("uu"));
inp = &inp[2..];
assert_read!(rdr, inp, out, 1, 1, InputEmpty);
assert_eq!(&out[..1], b("x"));
inp = &inp[1..];
assert!(inp.is_empty());
assert_read!(rdr, &[], out, 0, 0, Field { record_end: true });
assert_read!(rdr, inp, out, 0, 0, End);
}
#[test]
fn stream_input_chunks() {
use crate::ReadFieldResult::*;
let out = &mut [0; 10];
let mut rdr = Reader::new();
assert_read!(rdr, b("fo"), out, 2, 2, InputEmpty);
assert_eq!(&out[..2], b("fo"));
assert_read!(rdr, b("oq"), &mut out[2..], 2, 2, InputEmpty);
assert_eq!(&out[..4], b("fooq"));
assert_read!(rdr, b("uu"), &mut out[4..], 2, 2, InputEmpty);
assert_eq!(&out[..6], b("fooquu"));
assert_read!(rdr, b("x"), &mut out[6..], 1, 1, InputEmpty);
assert_eq!(&out[..7], b("fooquux"));
assert_read!(rdr, &[], out, 0, 0, Field { record_end: true });
assert_read!(rdr, &[], out, 0, 0, End);
}
#[test]
fn stream_doubled_quotes() {
use crate::ReadFieldResult::*;
let out = &mut [0; 10];
let mut rdr = Reader::new();
assert_read!(rdr, b("\"fo\""), out, 4, 2, InputEmpty);
assert_eq!(&out[..2], b("fo"));
assert_read!(rdr, b("\"o"), &mut out[2..], 2, 2, InputEmpty);
assert_eq!(&out[..4], b("fo\"o"));
assert_read!(rdr, &[], out, 0, 0, Field { record_end: true });
assert_read!(rdr, &[], out, 0, 0, End);
}
#[test]
fn stream_escaped_quotes() {
use crate::ReadFieldResult::*;
let out = &mut [0; 10];
let mut builder = ReaderBuilder::new();
let mut rdr = builder.escape(Some(b'\\')).build();
assert_read!(rdr, b("\"fo\\"), out, 4, 2, InputEmpty);
assert_eq!(&out[..2], b("fo"));
assert_read!(rdr, b("\"o"), &mut out[2..], 2, 2, InputEmpty);
assert_eq!(&out[..4], b("fo\"o"));
assert_read!(rdr, &[], out, 0, 0, Field { record_end: true });
assert_read!(rdr, &[], out, 0, 0, End);
}
#[test]
fn stream_empty_output() {
use crate::ReadFieldResult::*;
let out = &mut [0; 10];
let mut rdr = Reader::new();
assert_read!(
rdr,
b("foo,bar"),
out,
4,
3,
Field { record_end: false }
);
assert_eq!(&out[..3], b("foo"));
assert_read!(rdr, b("bar"), &mut [], 0, 0, OutputFull);
assert_read!(rdr, b("bar"), out, 3, 3, InputEmpty);
assert_eq!(&out[..3], b("bar"));
assert_read!(rdr, &[], out, 0, 0, Field { record_end: true });
assert_read!(rdr, &[], out, 0, 0, End);
}
#[test]
fn reset_works() {
use crate::ReadFieldResult::*;
let out = &mut [0; 10];
let mut rdr = Reader::new();
assert_read!(rdr, b("\"foo"), out, 4, 3, InputEmpty);
assert_eq!(&out[..3], b("foo"));
rdr.reset();
assert_read!(rdr, b("\"\"bar\""), out, 6, 4, InputEmpty);
assert_eq!(&out[..4], b("bar\""));
}
#[test]
fn line_numbers() {
use crate::ReadFieldResult::*;
let out = &mut [0; 10];
let mut rdr = Reader::new();
assert_eq!(1, rdr.line());
assert_read!(rdr, b("\n\n\n\n"), out, 4, 0, InputEmpty);
assert_eq!(5, rdr.line());
assert_read!(rdr, b("foo,"), out, 4, 3, Field { record_end: false });
assert_eq!(5, rdr.line());
assert_read!(rdr, b("bar\n"), out, 4, 3, Field { record_end: true });
assert_eq!(6, rdr.line());
assert_read!(rdr, &[], &mut [0], 0, 0, End);
assert_eq!(6, rdr.line());
}
macro_rules! assert_read_record {
(
$rdr:expr, $input:expr, $output:expr, $ends:expr,
$expect_in:expr, $expect_out:expr,
$expect_end:expr, $expect_res:expr
) => {{
let (res, nin, nout, nend) =
$rdr.read_record($input, $output, $ends);
assert_eq!($expect_res, res, "result");
assert_eq!($expect_in, nin, "input");
assert_eq!($expect_out, nout, "output");
assert_eq!($expect_end, nend, "ends");
}};
}
#[test]
fn stream_record() {
use crate::ReadRecordResult::*;
let mut inp = b("foo,bar\nbaz");
let out = &mut [0; 1024];
let ends = &mut [0; 10];
let mut rdr = Reader::new();
assert_read_record!(rdr, &inp, out, ends, 8, 6, 2, Record);
assert_eq!(ends[0], 3);
assert_eq!(ends[1], 6);
inp = &inp[8..];
assert_read_record!(rdr, &inp, out, ends, 3, 3, 0, InputEmpty);
inp = &inp[3..];
assert_read_record!(rdr, &inp, out, ends, 0, 0, 1, Record);
assert_eq!(ends[0], 3);
assert_read_record!(rdr, &inp, out, ends, 0, 0, 0, End);
}
#[test]
fn stream_record_last_end_output_full() {
use crate::ReadRecordResult::*;
let mut inp = b("foo,bar\nbaz");
let out = &mut [0; 1024];
let ends = &mut [0; 10];
let mut rdr = Reader::new();
assert_read_record!(rdr, &inp, out, ends, 8, 6, 2, Record);
assert_eq!(ends[0], 3);
assert_eq!(ends[1], 6);
inp = &inp[8..];
assert_read_record!(rdr, &inp, out, ends, 3, 3, 0, InputEmpty);
inp = &inp[3..];
assert_read_record!(rdr, &inp, out, &mut [], 0, 0, 0, OutputEndsFull);
assert_read_record!(rdr, &inp, out, ends, 0, 0, 1, Record);
assert_eq!(ends[0], 3);
assert_read_record!(rdr, &inp, out, ends, 0, 0, 0, End);
}
}