using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; using Swan.Reflection; namespace Swan.Formatters { /// /// Represents a reader designed for CSV text. /// It is capable of deserializing objects from individual lines of CSV text, /// transforming CSV lines of text into objects, /// or simply reading the lines of CSV as an array of strings. /// /// /// /// The following example describes how to load a list of objects from a CSV file. /// /// using Swan.Formatters; /// /// class Example /// { /// class Person /// { /// public string Name { get; set; } /// public int Age { get; set; } /// } /// /// static void Main() /// { /// // load records from a CSV file /// var loadedRecords = /// CsvReader.LoadRecords<Person>("C:\\Users\\user\\Documents\\file.csv"); /// /// // loadedRecords = /// // [ /// // { Age = 20, Name = "George" } /// // { Age = 18, Name = "Juan" } /// // ] /// } /// } /// /// The following code explains how to read a CSV formatted string. /// /// using Swan.Formatters; /// using System.Text; /// using Swan.Formatters; /// /// class Example /// { /// static void Main() /// { /// // data to be read /// var data = @"Company,OpenPositions,MainTechnology,Revenue /// Co,2,""C#, MySQL, JavaScript, HTML5 and CSS3"",500 /// Ca,2,""C#, MySQL, JavaScript, HTML5 and CSS3"",600"; /// /// using(var stream = new MemoryStream(Encoding.UTF8.GetBytes(data))) /// { /// // create a CSV reader /// var reader = new CsvReader(stream, false, Encoding.UTF8); /// } /// } /// } /// /// public class CsvReader : IDisposable { private static readonly PropertyTypeCache TypeCache = new PropertyTypeCache(); private readonly Object _syncLock = new Object(); private UInt64 _count; private Char _escapeCharacter = '"'; private Char _separatorCharacter = ','; private Boolean _hasDisposed; // To detect redundant calls private String[] _headings; private Dictionary _defaultMap; private StreamReader _reader; #region Constructors /// /// Initializes a new instance of the class. /// /// The stream. /// if set to true leaves the input stream open. /// The text encoding. public CsvReader(Stream inputStream, Boolean leaveOpen, Encoding textEncoding) { if(inputStream == null) { throw new ArgumentNullException(nameof(inputStream)); } if(textEncoding == null) { throw new ArgumentNullException(nameof(textEncoding)); } this._reader = new StreamReader(inputStream, textEncoding, true, 2048, leaveOpen); } /// /// Initializes a new instance of the class. /// It will automatically close the stream upon disposing. /// /// The stream. /// The text encoding. public CsvReader(Stream stream, Encoding textEncoding) : this(stream, false, textEncoding) { // placeholder } /// /// Initializes a new instance of the class. /// It automatically closes the stream when disposing this reader /// and uses the Windows 1253 encoding. /// /// The stream. public CsvReader(Stream stream) : this(stream, false, Definitions.Windows1252Encoding) { } /// /// Initializes a new instance of the class. /// It uses the Windows 1252 Encoding by default and it automatically closes the file /// when this reader is disposed of. /// /// The filename. public CsvReader(String filename) : this(File.OpenRead(filename), false, Definitions.Windows1252Encoding) { // placeholder } /// /// Initializes a new instance of the class. /// It automatically closes the file when disposing this reader. /// /// The filename. /// The encoding. public CsvReader(String filename, Encoding encoding) : this(File.OpenRead(filename), false, encoding) { // placeholder } #endregion #region Properties /// /// Gets number of lines that have been read, including the headings. /// /// /// The count. /// public UInt64 Count { get { lock(this._syncLock) { return this._count; } } } /// /// Gets or sets the escape character. /// By default it is the double quote '"'. /// /// /// The escape character. /// public Char EscapeCharacter { get => this._escapeCharacter; set { lock(this._syncLock) { this._escapeCharacter = value; } } } /// /// Gets or sets the separator character. /// By default it is the comma character ','. /// /// /// The separator character. /// public Char SeparatorCharacter { get => this._separatorCharacter; set { lock(this._syncLock) { this._separatorCharacter = value; } } } /// /// Gets a value indicating whether the stream reader is at the end of the stream /// In other words, if no more data can be read, this will be set to true. /// /// /// true if [end of stream]; otherwise, false. /// public Boolean EndOfStream { get { lock(this._syncLock) { return this._reader.EndOfStream; } } } #endregion #region Generic, Main ReadLine method /// /// Reads a line of CSV text into an array of strings. /// /// An array of the specified element type containing copies of the elements of the ArrayList. /// Cannot read past the end of the stream. public String[] ReadLine() { lock(this._syncLock) { if(this._reader.EndOfStream) { throw new EndOfStreamException("Cannot read past the end of the stream"); } String[] values = ParseRecord(this._reader, this._escapeCharacter, this._separatorCharacter); this._count++; return values; } } #endregion #region Read Methods /// /// Skips a line of CSV text. /// This operation does not increment the Count property and it is useful when you need to read the headings /// skipping over a few lines as Reading headings is only supported /// as the first read operation (i.e. while count is still 0). /// /// Cannot read past the end of the stream. public void SkipRecord() { lock(this._syncLock) { if(this._reader.EndOfStream) { throw new EndOfStreamException("Cannot read past the end of the stream"); } _ = ParseRecord(this._reader, this._escapeCharacter, this._separatorCharacter); } } /// /// Reads a line of CSV text and stores the values read as a representation of the column names /// to be used for parsing objects. You have to call this method before calling ReadObject methods. /// /// An array of the specified element type containing copies of the elements of the ArrayList. /// /// Reading headings is only supported as the first read operation. /// or /// ReadHeadings. /// /// Cannot read past the end of the stream. public String[] ReadHeadings() { lock(this._syncLock) { if(this._headings != null) { throw new InvalidOperationException($"The {nameof(ReadHeadings)} method had already been called."); } if(this._count != 0) { throw new InvalidOperationException("Reading headings is only supported as the first read operation."); } this._headings = this.ReadLine(); this._defaultMap = this._headings.ToDictionary(x => x, x => x); return this._headings.ToArray(); } } /// /// Reads a line of CSV text, converting it into a dynamic object in which properties correspond to the names of the headings. /// /// The mappings between CSV headings (keys) and object properties (values). /// Object of the type of the elements in the collection of key/value pairs. /// ReadHeadings. /// Cannot read past the end of the stream. /// map. public IDictionary ReadObject(IDictionary map) { lock(this._syncLock) { if(this._headings == null) { throw new InvalidOperationException($"Call the {nameof(ReadHeadings)} method before reading as an object."); } if(map == null) { throw new ArgumentNullException(nameof(map)); } Dictionary result = new Dictionary(); String[] values = this.ReadLine(); for(Int32 i = 0; i < this._headings.Length; i++) { if(i > values.Length - 1) { break; } result[this._headings[i]] = values[i]; } return result; } } /// /// Reads a line of CSV text, converting it into a dynamic object /// The property names correspond to the names of the CSV headings. /// /// Object of the type of the elements in the collection of key/value pairs. public IDictionary ReadObject() => this.ReadObject(this._defaultMap); /// /// Reads a line of CSV text converting it into an object of the given type, using a map (or Dictionary) /// where the keys are the names of the headings and the values are the names of the instance properties /// in the given Type. The result object must be already instantiated. /// /// The type of object to map. /// The map. /// The result. /// map /// or /// result. /// ReadHeadings. /// Cannot read past the end of the stream. public void ReadObject(IDictionary map, ref T result) { lock(this._syncLock) { // Check arguments { if(map == null) { throw new ArgumentNullException(nameof(map)); } if(this._reader.EndOfStream) { throw new EndOfStreamException("Cannot read past the end of the stream"); } if(this._headings == null) { throw new InvalidOperationException($"Call the {nameof(ReadHeadings)} method before reading as an object."); } if(Equals(result, default(T))) { throw new ArgumentNullException(nameof(result)); } } // Read line and extract values String[] values = this.ReadLine(); // Extract properties from cache IEnumerable properties = TypeCache .RetrieveFilteredProperties(typeof(T), true, x => x.CanWrite && Definitions.BasicTypesInfo.Value.ContainsKey(x.PropertyType)); // Assign property values for each heading for(Int32 i = 0; i < this._headings.Length; i++) { // break if no more headings are matched if(i > values.Length - 1) { break; } // skip if no heading is available or the heading is empty if(map.ContainsKey(this._headings[i]) == false && String.IsNullOrWhiteSpace(map[this._headings[i]]) == false) { continue; } // Prepare the target property String propertyName = map[this._headings[i]]; // Parse and assign the basic type value to the property if exists _ = properties .FirstOrDefault(p => p.Name == propertyName)? .TrySetBasicType(values[i], result); } } } /// /// Reads a line of CSV text converting it into an object of the given type, using a map (or Dictionary) /// where the keys are the names of the headings and the values are the names of the instance properties /// in the given Type. /// /// The type of object to map. /// The map of CSV headings (keys) and Type property names (values). /// The conversion of specific type of object. /// map. /// ReadHeadings. /// Cannot read past the end of the stream. public T ReadObject(IDictionary map) where T : new() { T result = Activator.CreateInstance(); this.ReadObject(map, ref result); return result; } /// /// Reads a line of CSV text converting it into an object of the given type, and assuming /// the property names of the target type match the heading names of the file. /// /// The type of object. /// The conversion of specific type of object. public T ReadObject() where T : new() => this.ReadObject(this._defaultMap); #endregion #region Support Methods /// /// Parses a line of standard CSV text into an array of strings. /// Note that quoted values might have new line sequences in them. Field values will contain such sequences. /// /// The reader. /// The escape character. /// The separator character. /// An array of the specified element type containing copies of the elements of the ArrayList. private static String[] ParseRecord(StreamReader reader, Char escapeCharacter = '"', Char separatorCharacter = ',') { List values = new List(); StringBuilder currentValue = new StringBuilder(1024); ReadState currentState = ReadState.WaitingForNewField; String line; while((line = reader.ReadLine()) != null) { for(Int32 charIndex = 0; charIndex < line.Length; charIndex++) { // Get the current and next character Char currentChar = line[charIndex]; Char? nextChar = charIndex < line.Length - 1 ? line[charIndex + 1] : new global::System.Char?(); // Perform logic based on state and decide on next state switch(currentState) { case ReadState.WaitingForNewField: { _ = currentValue.Clear(); if(currentChar == escapeCharacter) { currentState = ReadState.PushingQuoted; continue; } if(currentChar == separatorCharacter) { values.Add(currentValue.ToString()); currentState = ReadState.WaitingForNewField; continue; } _ = currentValue.Append(currentChar); currentState = ReadState.PushingNormal; continue; } case ReadState.PushingNormal: { // Handle field content delimiter by comma if(currentChar == separatorCharacter) { currentState = ReadState.WaitingForNewField; values.Add(currentValue.ToString()); _ = currentValue.Clear(); continue; } // Handle double quote escaping if(currentChar == escapeCharacter && nextChar.HasValue && nextChar == escapeCharacter) { // advance 1 character now. The loop will advance one more. _ = currentValue.Append(currentChar); charIndex++; continue; } _ = currentValue.Append(currentChar); break; } case ReadState.PushingQuoted: { // Handle field content delimiter by ending double quotes if(currentChar == escapeCharacter && (nextChar.HasValue == false || nextChar != escapeCharacter)) { currentState = ReadState.PushingNormal; continue; } // Handle double quote escaping if(currentChar == escapeCharacter && nextChar.HasValue && nextChar == escapeCharacter) { // advance 1 character now. The loop will advance one more. _ = currentValue.Append(currentChar); charIndex++; continue; } _ = currentValue.Append(currentChar); break; } } } // determine if we need to continue reading a new line if it is part of the quoted // field value if(currentState == ReadState.PushingQuoted) { // we need to add the new line sequence to the output of the field // because we were pushing a quoted value _ = currentValue.Append(Environment.NewLine); } else { // push anything that has not been pushed (flush) into a last value values.Add(currentValue.ToString()); _ = currentValue.Clear(); // stop reading more lines we have reached the end of the CSV record break; } } // If we ended up pushing quoted and no closing quotes we might // have additional text in yt if(currentValue.Length > 0) { values.Add(currentValue.ToString()); } return values.ToArray(); } #endregion #region Helpers /// /// Loads the records from the stream /// This method uses Windows 1252 encoding. /// /// The type of IList items to load. /// The stream. /// A generic collection of objects that can be individually accessed by index. public static IList LoadRecords(Stream stream) where T : new() { List result = new List(); using(CsvReader reader = new CsvReader(stream)) { _ = reader.ReadHeadings(); while(!reader.EndOfStream) { result.Add(reader.ReadObject()); } } return result; } /// /// Loads the records from the give file path. /// This method uses Windows 1252 encoding. /// /// The type of IList items to load. /// The file path. /// A generic collection of objects that can be individually accessed by index. public static IList LoadRecords(String filePath) where T : new() => LoadRecords(File.OpenRead(filePath)); #endregion #region IDisposable Support /// /// Releases unmanaged and - optionally - managed resources. /// /// true to release both managed and unmanaged resources; false to release only unmanaged resources. protected virtual void Dispose(Boolean disposing) { if(this._hasDisposed) { return; } if(disposing) { try { this._reader.Dispose(); } finally { this._reader = null; } } this._hasDisposed = true; } /// public void Dispose() { this.Dispose(true); GC.SuppressFinalize(this); } #endregion /// /// Defines the 3 different read states /// for the parsing state machine. /// private enum ReadState { WaitingForNewField, PushingNormal, PushingQuoted, } } }