using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using Swan.Reflection;
namespace Swan.Formatters {
///
/// Represents a reader designed for CSV text.
/// It is capable of deserializing objects from individual lines of CSV text,
/// transforming CSV lines of text into objects,
/// or simply reading the lines of CSV as an array of strings.
///
///
///
/// The following example describes how to load a list of objects from a CSV file.
///
/// using Swan.Formatters;
///
/// class Example
/// {
/// class Person
/// {
/// public string Name { get; set; }
/// public int Age { get; set; }
/// }
///
/// static void Main()
/// {
/// // load records from a CSV file
/// var loadedRecords =
/// CsvReader.LoadRecords<Person>("C:\\Users\\user\\Documents\\file.csv");
///
/// // loadedRecords =
/// // [
/// // { Age = 20, Name = "George" }
/// // { Age = 18, Name = "Juan" }
/// // ]
/// }
/// }
///
/// The following code explains how to read a CSV formatted string.
///
/// using Swan.Formatters;
/// using System.Text;
/// using Swan.Formatters;
///
/// class Example
/// {
/// static void Main()
/// {
/// // data to be read
/// var data = @"Company,OpenPositions,MainTechnology,Revenue
/// Co,2,""C#, MySQL, JavaScript, HTML5 and CSS3"",500
/// Ca,2,""C#, MySQL, JavaScript, HTML5 and CSS3"",600";
///
/// using(var stream = new MemoryStream(Encoding.UTF8.GetBytes(data)))
/// {
/// // create a CSV reader
/// var reader = new CsvReader(stream, false, Encoding.UTF8);
/// }
/// }
/// }
///
///
public class CsvReader : IDisposable {
private static readonly PropertyTypeCache TypeCache = new PropertyTypeCache();
private readonly Object _syncLock = new Object();
private UInt64 _count;
private Char _escapeCharacter = '"';
private Char _separatorCharacter = ',';
private Boolean _hasDisposed; // To detect redundant calls
private String[] _headings;
private Dictionary _defaultMap;
private StreamReader _reader;
#region Constructors
///
/// Initializes a new instance of the class.
///
/// The stream.
/// if set to true leaves the input stream open.
/// The text encoding.
public CsvReader(Stream inputStream, Boolean leaveOpen, Encoding textEncoding) {
if(inputStream == null) {
throw new ArgumentNullException(nameof(inputStream));
}
if(textEncoding == null) {
throw new ArgumentNullException(nameof(textEncoding));
}
this._reader = new StreamReader(inputStream, textEncoding, true, 2048, leaveOpen);
}
///
/// Initializes a new instance of the class.
/// It will automatically close the stream upon disposing.
///
/// The stream.
/// The text encoding.
public CsvReader(Stream stream, Encoding textEncoding) : this(stream, false, textEncoding) {
// placeholder
}
///
/// Initializes a new instance of the class.
/// It automatically closes the stream when disposing this reader
/// and uses the Windows 1253 encoding.
///
/// The stream.
public CsvReader(Stream stream) : this(stream, false, Definitions.Windows1252Encoding) {
}
///
/// Initializes a new instance of the class.
/// It uses the Windows 1252 Encoding by default and it automatically closes the file
/// when this reader is disposed of.
///
/// The filename.
public CsvReader(String filename) : this(File.OpenRead(filename), false, Definitions.Windows1252Encoding) {
// placeholder
}
///
/// Initializes a new instance of the class.
/// It automatically closes the file when disposing this reader.
///
/// The filename.
/// The encoding.
public CsvReader(String filename, Encoding encoding) : this(File.OpenRead(filename), false, encoding) {
// placeholder
}
#endregion
#region Properties
///
/// Gets number of lines that have been read, including the headings.
///
///
/// The count.
///
public UInt64 Count {
get {
lock(this._syncLock) {
return this._count;
}
}
}
///
/// Gets or sets the escape character.
/// By default it is the double quote '"'.
///
///
/// The escape character.
///
public Char EscapeCharacter {
get => this._escapeCharacter;
set {
lock(this._syncLock) {
this._escapeCharacter = value;
}
}
}
///
/// Gets or sets the separator character.
/// By default it is the comma character ','.
///
///
/// The separator character.
///
public Char SeparatorCharacter {
get => this._separatorCharacter;
set {
lock(this._syncLock) {
this._separatorCharacter = value;
}
}
}
///
/// Gets a value indicating whether the stream reader is at the end of the stream
/// In other words, if no more data can be read, this will be set to true.
///
///
/// true if [end of stream]; otherwise, false.
///
public Boolean EndOfStream {
get {
lock(this._syncLock) {
return this._reader.EndOfStream;
}
}
}
#endregion
#region Generic, Main ReadLine method
///
/// Reads a line of CSV text into an array of strings.
///
/// An array of the specified element type containing copies of the elements of the ArrayList.
/// Cannot read past the end of the stream.
public String[] ReadLine() {
lock(this._syncLock) {
if(this._reader.EndOfStream) {
throw new EndOfStreamException("Cannot read past the end of the stream");
}
String[] values = ParseRecord(this._reader, this._escapeCharacter, this._separatorCharacter);
this._count++;
return values;
}
}
#endregion
#region Read Methods
///
/// Skips a line of CSV text.
/// This operation does not increment the Count property and it is useful when you need to read the headings
/// skipping over a few lines as Reading headings is only supported
/// as the first read operation (i.e. while count is still 0).
///
/// Cannot read past the end of the stream.
public void SkipRecord() {
lock(this._syncLock) {
if(this._reader.EndOfStream) {
throw new EndOfStreamException("Cannot read past the end of the stream");
}
_ = ParseRecord(this._reader, this._escapeCharacter, this._separatorCharacter);
}
}
///
/// Reads a line of CSV text and stores the values read as a representation of the column names
/// to be used for parsing objects. You have to call this method before calling ReadObject methods.
///
/// An array of the specified element type containing copies of the elements of the ArrayList.
///
/// Reading headings is only supported as the first read operation.
/// or
/// ReadHeadings.
///
/// Cannot read past the end of the stream.
public String[] ReadHeadings() {
lock(this._syncLock) {
if(this._headings != null) {
throw new InvalidOperationException($"The {nameof(ReadHeadings)} method had already been called.");
}
if(this._count != 0) {
throw new InvalidOperationException("Reading headings is only supported as the first read operation.");
}
this._headings = this.ReadLine();
this._defaultMap = this._headings.ToDictionary(x => x, x => x);
return this._headings.ToArray();
}
}
///
/// Reads a line of CSV text, converting it into a dynamic object in which properties correspond to the names of the headings.
///
/// The mappings between CSV headings (keys) and object properties (values).
/// Object of the type of the elements in the collection of key/value pairs.
/// ReadHeadings.
/// Cannot read past the end of the stream.
/// map.
public IDictionary ReadObject(IDictionary map) {
lock(this._syncLock) {
if(this._headings == null) {
throw new InvalidOperationException($"Call the {nameof(ReadHeadings)} method before reading as an object.");
}
if(map == null) {
throw new ArgumentNullException(nameof(map));
}
Dictionary result = new Dictionary();
String[] values = this.ReadLine();
for(Int32 i = 0; i < this._headings.Length; i++) {
if(i > values.Length - 1) {
break;
}
result[this._headings[i]] = values[i];
}
return result;
}
}
///
/// Reads a line of CSV text, converting it into a dynamic object
/// The property names correspond to the names of the CSV headings.
///
/// Object of the type of the elements in the collection of key/value pairs.
public IDictionary ReadObject() => this.ReadObject(this._defaultMap);
///
/// Reads a line of CSV text converting it into an object of the given type, using a map (or Dictionary)
/// where the keys are the names of the headings and the values are the names of the instance properties
/// in the given Type. The result object must be already instantiated.
///
/// The type of object to map.
/// The map.
/// The result.
/// map
/// or
/// result.
/// ReadHeadings.
/// Cannot read past the end of the stream.
public void ReadObject(IDictionary map, ref T result) {
lock(this._syncLock) {
// Check arguments
{
if(map == null) {
throw new ArgumentNullException(nameof(map));
}
if(this._reader.EndOfStream) {
throw new EndOfStreamException("Cannot read past the end of the stream");
}
if(this._headings == null) {
throw new InvalidOperationException($"Call the {nameof(ReadHeadings)} method before reading as an object.");
}
if(Equals(result, default(T))) {
throw new ArgumentNullException(nameof(result));
}
}
// Read line and extract values
String[] values = this.ReadLine();
// Extract properties from cache
IEnumerable properties = TypeCache .RetrieveFilteredProperties(typeof(T), true, x => x.CanWrite && Definitions.BasicTypesInfo.Value.ContainsKey(x.PropertyType));
// Assign property values for each heading
for(Int32 i = 0; i < this._headings.Length; i++) {
// break if no more headings are matched
if(i > values.Length - 1) {
break;
}
// skip if no heading is available or the heading is empty
if(map.ContainsKey(this._headings[i]) == false && String.IsNullOrWhiteSpace(map[this._headings[i]]) == false) {
continue;
}
// Prepare the target property
String propertyName = map[this._headings[i]];
// Parse and assign the basic type value to the property if exists
_ = properties .FirstOrDefault(p => p.Name == propertyName)? .TrySetBasicType(values[i], result);
}
}
}
///
/// Reads a line of CSV text converting it into an object of the given type, using a map (or Dictionary)
/// where the keys are the names of the headings and the values are the names of the instance properties
/// in the given Type.
///
/// The type of object to map.
/// The map of CSV headings (keys) and Type property names (values).
/// The conversion of specific type of object.
/// map.
/// ReadHeadings.
/// Cannot read past the end of the stream.
public T ReadObject(IDictionary map) where T : new() {
T result = Activator.CreateInstance();
this.ReadObject(map, ref result);
return result;
}
///
/// Reads a line of CSV text converting it into an object of the given type, and assuming
/// the property names of the target type match the heading names of the file.
///
/// The type of object.
/// The conversion of specific type of object.
public T ReadObject() where T : new() => this.ReadObject(this._defaultMap);
#endregion
#region Support Methods
///
/// Parses a line of standard CSV text into an array of strings.
/// Note that quoted values might have new line sequences in them. Field values will contain such sequences.
///
/// The reader.
/// The escape character.
/// The separator character.
/// An array of the specified element type containing copies of the elements of the ArrayList.
private static String[] ParseRecord(StreamReader reader, Char escapeCharacter = '"', Char separatorCharacter = ',') {
List values = new List();
StringBuilder currentValue = new StringBuilder(1024);
ReadState currentState = ReadState.WaitingForNewField;
String line;
while((line = reader.ReadLine()) != null) {
for(Int32 charIndex = 0; charIndex < line.Length; charIndex++) {
// Get the current and next character
Char currentChar = line[charIndex];
Char? nextChar = charIndex < line.Length - 1 ? line[charIndex + 1] : new global::System.Char?();
// Perform logic based on state and decide on next state
switch(currentState) {
case ReadState.WaitingForNewField: {
_ = currentValue.Clear();
if(currentChar == escapeCharacter) {
currentState = ReadState.PushingQuoted;
continue;
}
if(currentChar == separatorCharacter) {
values.Add(currentValue.ToString());
currentState = ReadState.WaitingForNewField;
continue;
}
_ = currentValue.Append(currentChar);
currentState = ReadState.PushingNormal;
continue;
}
case ReadState.PushingNormal: {
// Handle field content delimiter by comma
if(currentChar == separatorCharacter) {
currentState = ReadState.WaitingForNewField;
values.Add(currentValue.ToString());
_ = currentValue.Clear();
continue;
}
// Handle double quote escaping
if(currentChar == escapeCharacter && nextChar.HasValue && nextChar == escapeCharacter) {
// advance 1 character now. The loop will advance one more.
_ = currentValue.Append(currentChar);
charIndex++;
continue;
}
_ = currentValue.Append(currentChar);
break;
}
case ReadState.PushingQuoted: {
// Handle field content delimiter by ending double quotes
if(currentChar == escapeCharacter && (nextChar.HasValue == false || nextChar != escapeCharacter)) {
currentState = ReadState.PushingNormal;
continue;
}
// Handle double quote escaping
if(currentChar == escapeCharacter && nextChar.HasValue && nextChar == escapeCharacter) {
// advance 1 character now. The loop will advance one more.
_ = currentValue.Append(currentChar);
charIndex++;
continue;
}
_ = currentValue.Append(currentChar);
break;
}
}
}
// determine if we need to continue reading a new line if it is part of the quoted
// field value
if(currentState == ReadState.PushingQuoted) {
// we need to add the new line sequence to the output of the field
// because we were pushing a quoted value
_ = currentValue.Append(Environment.NewLine);
} else {
// push anything that has not been pushed (flush) into a last value
values.Add(currentValue.ToString());
_ = currentValue.Clear();
// stop reading more lines we have reached the end of the CSV record
break;
}
}
// If we ended up pushing quoted and no closing quotes we might
// have additional text in yt
if(currentValue.Length > 0) {
values.Add(currentValue.ToString());
}
return values.ToArray();
}
#endregion
#region Helpers
///
/// Loads the records from the stream
/// This method uses Windows 1252 encoding.
///
/// The type of IList items to load.
/// The stream.
/// A generic collection of objects that can be individually accessed by index.
public static IList LoadRecords(Stream stream) where T : new() {
List result = new List();
using(CsvReader reader = new CsvReader(stream)) {
_ = reader.ReadHeadings();
while(!reader.EndOfStream) {
result.Add(reader.ReadObject());
}
}
return result;
}
///
/// Loads the records from the give file path.
/// This method uses Windows 1252 encoding.
///
/// The type of IList items to load.
/// The file path.
/// A generic collection of objects that can be individually accessed by index.
public static IList LoadRecords(String filePath) where T : new() => LoadRecords(File.OpenRead(filePath));
#endregion
#region IDisposable Support
///
/// Releases unmanaged and - optionally - managed resources.
///
/// true to release both managed and unmanaged resources; false to release only unmanaged resources.
protected virtual void Dispose(Boolean disposing) {
if(this._hasDisposed) {
return;
}
if(disposing) {
try {
this._reader.Dispose();
} finally {
this._reader = null;
}
}
this._hasDisposed = true;
}
///
public void Dispose() {
this.Dispose(true);
GC.SuppressFinalize(this);
}
#endregion
///
/// Defines the 3 different read states
/// for the parsing state machine.
///
private enum ReadState {
WaitingForNewField,
PushingNormal,
PushingQuoted,
}
}
}