namespace Unosquare.Swan.Formatters { using Reflection; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Reflection; using System.Text; /// /// Represents a reader designed for CSV text. /// It is capable of deserializing objects from individual lines of CSV text, /// transforming CSV lines of text into objects, /// or simply reading the lines of CSV as an array of strings. /// /// /// /// The following example describes how to load a list of objects from a CSV file. /// /// using Unosquare.Swan.Formatters; /// /// class Example /// { /// class Person /// { /// public string Name { get; set; } /// public int Age { get; set; } /// } /// /// static void Main() /// { /// // load records from a CSV file /// var loadedRecords = /// CsvReader.LoadRecords<Person>("C:\\Users\\user\\Documents\\file.csv"); /// /// // loadedRecords = /// // [ /// // { Age = 20, Name = "George" } /// // { Age = 18, Name = "Juan" } /// // ] /// } /// } /// /// The following code explains how to read a CSV formatted string. /// /// using Unosquare.Swan.Formatters; /// using System.Text; /// using Unosquare.Swan.Formatters; /// /// class Example /// { /// static void Main() /// { /// // data to be read /// var data = @"Company,OpenPositions,MainTechnology,Revenue /// Co,2,""C#, MySQL, JavaScript, HTML5 and CSS3"",500 /// Ca,2,""C#, MySQL, JavaScript, HTML5 and CSS3"",600"; /// /// using(var stream = new MemoryStream(Encoding.UTF8.GetBytes(data))) /// { /// // create a CSV reader /// var reader = new CsvReader(stream, false, Encoding.UTF8); /// } /// } /// } /// /// public class CsvReader : IDisposable { private static readonly PropertyTypeCache TypeCache = new PropertyTypeCache(); private readonly object _syncLock = new object(); private ulong _count; private char _escapeCharacter = '"'; private char _separatorCharacter = ','; private bool _hasDisposed; // To detect redundant calls private string[] _headings; private Dictionary _defaultMap; private StreamReader _reader; #region Constructors /// /// Initializes a new instance of the class. /// /// The stream. /// if set to true leaves the input stream open. /// The text encoding. public CsvReader(Stream inputStream, bool leaveOpen, Encoding textEncoding) { if (inputStream == null) throw new ArgumentNullException(nameof(inputStream)); if (textEncoding == null) throw new ArgumentNullException(nameof(textEncoding)); _reader = new StreamReader(inputStream, textEncoding, true, 2048, leaveOpen); } /// /// Initializes a new instance of the class. /// It will automatically close the stream upon disposing. /// /// The stream. /// The text encoding. public CsvReader(Stream stream, Encoding textEncoding) : this(stream, false, textEncoding) { // placeholder } /// /// Initializes a new instance of the class. /// It automatically closes the stream when disposing this reader /// and uses the Windows 1253 encoding. /// /// The stream. public CsvReader(Stream stream) : this(stream, false, Definitions.Windows1252Encoding) { } /// /// Initializes a new instance of the class. /// It uses the Windows 1252 Encoding by default and it automatically closes the file /// when this reader is disposed of. /// /// The filename. public CsvReader(string filename) : this(File.OpenRead(filename), false, Definitions.Windows1252Encoding) { // placeholder } /// /// Initializes a new instance of the class. /// It automatically closes the file when disposing this reader. /// /// The filename. /// The encoding. public CsvReader(string filename, Encoding encoding) : this(File.OpenRead(filename), false, encoding) { // placeholder } #endregion #region Properties /// /// Gets number of lines that have been read, including the headings. /// /// /// The count. /// public ulong Count { get { lock (_syncLock) { return _count; } } } /// /// Gets or sets the escape character. /// By default it is the double quote '"'. /// /// /// The escape character. /// public char EscapeCharacter { get => _escapeCharacter; set { lock (_syncLock) { _escapeCharacter = value; } } } /// /// Gets or sets the separator character. /// By default it is the comma character ','. /// /// /// The separator character. /// public char SeparatorCharacter { get => _separatorCharacter; set { lock (_syncLock) { _separatorCharacter = value; } } } /// /// Gets a value indicating whether the stream reader is at the end of the stream /// In other words, if no more data can be read, this will be set to true. /// /// /// true if [end of stream]; otherwise, false. /// public bool EndOfStream { get { lock (_syncLock) { return _reader.EndOfStream; } } } #endregion #region Generic, Main ReadLine method /// /// Reads a line of CSV text into an array of strings. /// /// An array of the specified element type containing copies of the elements of the ArrayList. /// Cannot read past the end of the stream. public string[] ReadLine() { lock (_syncLock) { if (_reader.EndOfStream) throw new EndOfStreamException("Cannot read past the end of the stream"); var values = ParseRecord(_reader, _escapeCharacter, _separatorCharacter); _count++; return values; } } #endregion #region Read Methods /// /// Skips a line of CSV text. /// This operation does not increment the Count property and it is useful when you need to read the headings /// skipping over a few lines as Reading headings is only supported /// as the first read operation (i.e. while count is still 0). /// /// Cannot read past the end of the stream. public void SkipRecord() { lock (_syncLock) { if (_reader.EndOfStream) throw new EndOfStreamException("Cannot read past the end of the stream"); ParseRecord(_reader, _escapeCharacter, _separatorCharacter); } } /// /// Reads a line of CSV text and stores the values read as a representation of the column names /// to be used for parsing objects. You have to call this method before calling ReadObject methods. /// /// An array of the specified element type containing copies of the elements of the ArrayList. /// /// Reading headings is only supported as the first read operation. /// or /// ReadHeadings. /// /// Cannot read past the end of the stream. public string[] ReadHeadings() { lock (_syncLock) { if (_headings != null) throw new InvalidOperationException($"The {nameof(ReadHeadings)} method had already been called."); if (_count != 0) throw new InvalidOperationException("Reading headings is only supported as the first read operation."); _headings = ReadLine(); _defaultMap = _headings.ToDictionary(x => x, x => x); return _headings.ToArray(); } } /// /// Reads a line of CSV text, converting it into a dynamic object in which properties correspond to the names of the headings. /// /// The mappings between CSV headings (keys) and object properties (values). /// Object of the type of the elements in the collection of key/value pairs. /// ReadHeadings. /// Cannot read past the end of the stream. /// map. public IDictionary ReadObject(IDictionary map) { lock (_syncLock) { if (_headings == null) throw new InvalidOperationException($"Call the {nameof(ReadHeadings)} method before reading as an object."); if (map == null) throw new ArgumentNullException(nameof(map)); var result = new Dictionary(); var values = ReadLine(); for (var i = 0; i < _headings.Length; i++) { if (i > values.Length - 1) break; result[_headings[i]] = values[i]; } return result; } } /// /// Reads a line of CSV text, converting it into a dynamic object /// The property names correspond to the names of the CSV headings. /// /// Object of the type of the elements in the collection of key/value pairs. public IDictionary ReadObject() => ReadObject(_defaultMap); /// /// Reads a line of CSV text converting it into an object of the given type, using a map (or Dictionary) /// where the keys are the names of the headings and the values are the names of the instance properties /// in the given Type. The result object must be already instantiated. /// /// The type of object to map. /// The map. /// The result. /// map /// or /// result. /// ReadHeadings. /// Cannot read past the end of the stream. public void ReadObject(IDictionary map, ref T result) { lock (_syncLock) { // Check arguments { if (map == null) throw new ArgumentNullException(nameof(map)); if (_reader.EndOfStream) throw new EndOfStreamException("Cannot read past the end of the stream"); if (_headings == null) throw new InvalidOperationException($"Call the {nameof(ReadHeadings)} method before reading as an object."); if (Equals(result, default(T))) throw new ArgumentNullException(nameof(result)); } // Read line and extract values var values = ReadLine(); // Extract properties from cache var properties = TypeCache .RetrieveFilteredProperties(typeof(T), true, x => x.CanWrite && Definitions.BasicTypesInfo.ContainsKey(x.PropertyType)); // Assign property values for each heading for (var i = 0; i < _headings.Length; i++) { // break if no more headings are matched if (i > values.Length - 1) break; // skip if no heading is available or the heading is empty if (map.ContainsKey(_headings[i]) == false && string.IsNullOrWhiteSpace(map[_headings[i]]) == false) continue; // Prepare the target property var propertyName = map[_headings[i]]; // Parse and assign the basic type value to the property if exists properties .FirstOrDefault(p => p.Name.Equals(propertyName))? .TrySetBasicType(values[i], result); } } } /// /// Reads a line of CSV text converting it into an object of the given type, using a map (or Dictionary) /// where the keys are the names of the headings and the values are the names of the instance properties /// in the given Type. /// /// The type of object to map. /// The map of CSV headings (keys) and Type property names (values). /// The conversion of specific type of object. /// map. /// ReadHeadings. /// Cannot read past the end of the stream. public T ReadObject(IDictionary map) where T : new() { var result = Activator.CreateInstance(); ReadObject(map, ref result); return result; } /// /// Reads a line of CSV text converting it into an object of the given type, and assuming /// the property names of the target type match the heading names of the file. /// /// The type of object. /// The conversion of specific type of object. public T ReadObject() where T : new() { return ReadObject(_defaultMap); } #endregion #region Support Methods /// /// Parses a line of standard CSV text into an array of strings. /// Note that quoted values might have new line sequences in them. Field values will contain such sequences. /// /// The reader. /// The escape character. /// The separator character. /// An array of the specified element type containing copies of the elements of the ArrayList. private static string[] ParseRecord(StreamReader reader, char escapeCharacter = '"', char separatorCharacter = ',') { var values = new List(); var currentValue = new StringBuilder(1024); var currentState = ReadState.WaitingForNewField; string line; while ((line = reader.ReadLine()) != null) { for (var charIndex = 0; charIndex < line.Length; charIndex++) { // Get the current and next character var currentChar = line[charIndex]; var nextChar = charIndex < line.Length - 1 ? line[charIndex + 1] : new char?(); // Perform logic based on state and decide on next state switch (currentState) { case ReadState.WaitingForNewField: { currentValue.Clear(); if (currentChar == escapeCharacter) { currentState = ReadState.PushingQuoted; continue; } if (currentChar == separatorCharacter) { values.Add(currentValue.ToString()); currentState = ReadState.WaitingForNewField; continue; } currentValue.Append(currentChar); currentState = ReadState.PushingNormal; continue; } case ReadState.PushingNormal: { // Handle field content delimiter by comma if (currentChar == separatorCharacter) { currentState = ReadState.WaitingForNewField; values.Add(currentValue.ToString()); currentValue.Clear(); continue; } // Handle double quote escaping if (currentChar == escapeCharacter && nextChar.HasValue && nextChar == escapeCharacter) { // advance 1 character now. The loop will advance one more. currentValue.Append(currentChar); charIndex++; continue; } currentValue.Append(currentChar); break; } case ReadState.PushingQuoted: { // Handle field content delimiter by ending double quotes if (currentChar == escapeCharacter && (nextChar.HasValue == false || nextChar != escapeCharacter)) { currentState = ReadState.PushingNormal; continue; } // Handle double quote escaping if (currentChar == escapeCharacter && nextChar.HasValue && nextChar == escapeCharacter) { // advance 1 character now. The loop will advance one more. currentValue.Append(currentChar); charIndex++; continue; } currentValue.Append(currentChar); break; } } } // determine if we need to continue reading a new line if it is part of the quoted // field value if (currentState == ReadState.PushingQuoted) { // we need to add the new line sequence to the output of the field // because we were pushing a quoted value currentValue.Append(Environment.NewLine); } else { // push anything that has not been pushed (flush) into a last value values.Add(currentValue.ToString()); currentValue.Clear(); // stop reading more lines we have reached the end of the CSV record break; } } // If we ended up pushing quoted and no closing quotes we might // have additional text in yt if (currentValue.Length > 0) { values.Add(currentValue.ToString()); } return values.ToArray(); } #endregion #region Helpers /// /// Loads the records from the stream /// This method uses Windows 1252 encoding. /// /// The type of IList items to load. /// The stream. /// A generic collection of objects that can be individually accessed by index. public static IList LoadRecords(Stream stream) where T : new() { var result = new List(); using (var reader = new CsvReader(stream)) { reader.ReadHeadings(); while (!reader.EndOfStream) { result.Add(reader.ReadObject()); } } return result; } /// /// Loads the records from the give file path. /// This method uses Windows 1252 encoding. /// /// The type of IList items to load. /// The file path. /// A generic collection of objects that can be individually accessed by index. public static IList LoadRecords(string filePath) where T : new() { return LoadRecords(File.OpenRead(filePath)); } #endregion #region IDisposable Support /// /// Releases unmanaged and - optionally - managed resources. /// /// true to release both managed and unmanaged resources; false to release only unmanaged resources. protected virtual void Dispose(bool disposing) { if (_hasDisposed) return; if (disposing) { try { _reader.Dispose(); } finally { _reader = null; } } _hasDisposed = true; } /// /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources. /// public void Dispose() { Dispose(true); } #endregion /// /// Defines the 3 different read states /// for the parsing state machine. /// private enum ReadState { WaitingForNewField, PushingNormal, PushingQuoted, } } }