Source code

001package gudusoft.gsqlparser.util.csv;
002
003import java.io.BufferedReader;
004import java.io.File;
005import java.io.FileInputStream;
006import java.io.FileNotFoundException;
007import java.io.IOException;
008import java.io.InputStream;
009import java.io.InputStreamReader;
010import java.io.Reader;
011import java.io.StringReader;
012import java.nio.charset.Charset;
013import java.text.NumberFormat;
014import java.util.HashMap;
015
016/**
017 * A stream based parser for parsing delimited text data from a file or a
018 * stream.
019 */
020public class CsvReader {
021    private Reader inputStream = null;
022
023    private String fileName = null;
024
025    // this holds all the values for switches that the user is allowed to set
026    private UserSettings userSettings = new UserSettings();
027
028    private Charset charset = null;
029
030    private boolean useCustomRecordDelimiter = false;
031
032    // this will be our working buffer to hold data chunks
033    // read in from the data file
034
035    private DataBuffer dataBuffer = new DataBuffer();
036
037    private ColumnBuffer columnBuffer = new ColumnBuffer();
038
039    private RawRecordBuffer rawBuffer = new RawRecordBuffer();
040
041    private boolean[] isQualified = null;
042
043    private String rawRecord = "";
044
045    private HeadersHolder headersHolder = new HeadersHolder();
046
047    // these are all more or less global loop variables
048    // to keep from needing to pass them all into various
049    // methods during parsing
050
051    private boolean startedColumn = false;
052
053    private boolean startedWithQualifier = false;
054
055    private boolean hasMoreData = true;
056
057    private char lastLetter = '\0';
058
059    private boolean hasReadNextLine = false;
060
061    private int columnsCount = 0;
062
063    private long currentRecord = 0;
064
065    private String[] values = new String[StaticSettings.INITIAL_COLUMN_COUNT];
066
067    private boolean initialized = false;
068
069    private boolean closed = false;
070
071    /**
072     * Double up the text qualifier to represent an occurance of the text
073     * qualifier.
074     */
075    public static final int ESCAPE_MODE_DOUBLED = 1;
076
077    /**
078     * Use a backslash character before the text qualifier to represent an
079     * occurance of the text qualifier.
080     */
081    public static final int ESCAPE_MODE_BACKSLASH = 2;
082
083    /**
084     * Creates a {@link CsvReader CsvReader} object using a file
085     * as the data source.
086     *
087     * @param fileName
088     *            The path to the file to use as the data source.
089     * @param delimiter
090     *            The character to use as the column delimiter.
091     * @param charset
092     *            The {@link java.nio.charset.Charset Charset} to use while
093     *            parsing the data.
094     */
095    public CsvReader(String fileName, char delimiter, Charset charset)
096            throws FileNotFoundException {
097        if (fileName == null) {
098            throw new IllegalArgumentException(
099                    "Parameter fileName can not be null.");
100        }
101
102        if (charset == null) {
103            throw new IllegalArgumentException(
104                    "Parameter charset can not be null.");
105        }
106
107        if (!new File(fileName).exists()) {
108            throw new FileNotFoundException("File " + fileName
109                    + " does not exist.");
110        }
111
112        this.fileName = fileName;
113        this.userSettings.Delimiter = delimiter;
114        this.charset = charset;
115
116        isQualified = new boolean[values.length];
117    }
118
119    /**
120     * Creates a {@link CsvReader CsvReader} object using a file
121     * as the data source.&nbsp;Uses ISO-8859-1 as the
122     * {@link java.nio.charset.Charset Charset}.
123     *
124     * @param fileName
125     *            The path to the file to use as the data source.
126     * @param delimiter
127     *            The character to use as the column delimiter.
128     */
129    public CsvReader(String fileName, char delimiter)
130            throws FileNotFoundException {
131        this(fileName, delimiter, Charset.forName("ISO-8859-1"));
132    }
133
134    /**
135     * Creates a {@link CsvReader CsvReader} object using a file
136     * as the data source.&nbsp;Uses a comma as the column delimiter and
137     * ISO-8859-1 as the {@link java.nio.charset.Charset Charset}.
138     *
139     * @param fileName
140     *            The path to the file to use as the data source.
141     */
142    public CsvReader(String fileName) throws FileNotFoundException {
143        this(fileName, Letters.COMMA);
144    }
145
146    /**
147     * Constructs a {@link CsvReader CsvReader} object using a
148     * {@link java.io.Reader Reader} object as the data source.
149     *
150     * @param inputStream
151     *            The stream to use as the data source.
152     * @param delimiter
153     *            The character to use as the column delimiter.
154     */
155    public CsvReader(Reader inputStream, char delimiter) {
156        if (inputStream == null) {
157            throw new IllegalArgumentException(
158                    "Parameter inputStream can not be null.");
159        }
160
161        this.inputStream = inputStream;
162        this.userSettings.Delimiter = delimiter;
163        initialized = true;
164
165        isQualified = new boolean[values.length];
166    }
167
168    /**
169     * Constructs a {@link CsvReader CsvReader} object using a
170     * {@link java.io.Reader Reader} object as the data source.&nbsp;Uses a
171     * comma as the column delimiter.
172     *
173     * @param inputStream
174     *            The stream to use as the data source.
175     */
176    public CsvReader(Reader inputStream) {
177        this(inputStream, Letters.COMMA);
178    }
179
180    /**
181     * Constructs a {@link CsvReader CsvReader} object using an
182     * {@link java.io.InputStream InputStream} object as the data source.
183     *
184     * @param inputStream
185     *            The stream to use as the data source.
186     * @param delimiter
187     *            The character to use as the column delimiter.
188     * @param charset
189     *            The {@link java.nio.charset.Charset Charset} to use while
190     *            parsing the data.
191     */
192    public CsvReader(InputStream inputStream, char delimiter, Charset charset) {
193        this(new InputStreamReader(inputStream, charset), delimiter);
194    }
195
196    /**
197     * Constructs a {@link CsvReader CsvReader} object using an
198     * {@link java.io.InputStream InputStream} object as the data
199     * source.&nbsp;Uses a comma as the column delimiter.
200     *
201     * @param inputStream
202     *            The stream to use as the data source.
203     * @param charset
204     *            The {@link java.nio.charset.Charset Charset} to use while
205     *            parsing the data.
206     */
207    public CsvReader(InputStream inputStream, Charset charset) {
208        this(new InputStreamReader(inputStream, charset));
209    }
210
211    public boolean getCaptureRawRecord() {
212        return userSettings.CaptureRawRecord;
213    }
214
215    public void setCaptureRawRecord(boolean captureRawRecord) {
216        userSettings.CaptureRawRecord = captureRawRecord;
217    }
218
219    public String getRawRecord() {
220        return rawRecord;
221    }
222
223    /**
224     * Gets whether leading and trailing whitespace characters are being trimmed
225     * from non-textqualified column data. Default is true.
226     *
227     * @return Whether leading and trailing whitespace characters are being
228     *         trimmed from non-textqualified column data.
229     */
230    public boolean getTrimWhitespace() {
231        return userSettings.TrimWhitespace;
232    }
233
234    /**
235     * Sets whether leading and trailing whitespace characters should be trimmed
236     * from non-textqualified column data or not. Default is true.
237     *
238     * @param trimWhitespace
239     *            Whether leading and trailing whitespace characters should be
240     *            trimmed from non-textqualified column data or not.
241     */
242    public void setTrimWhitespace(boolean trimWhitespace) {
243        userSettings.TrimWhitespace = trimWhitespace;
244    }
245
246    /**
247     * Gets the character being used as the column delimiter. Default is comma,
248     * ','.
249     *
250     * @return The character being used as the column delimiter.
251     */
252    public char getDelimiter() {
253        return userSettings.Delimiter;
254    }
255
256    /**
257     * Sets the character to use as the column delimiter. Default is comma, ','.
258     *
259     * @param delimiter
260     *            The character to use as the column delimiter.
261     */
262    public void setDelimiter(char delimiter) {
263        userSettings.Delimiter = delimiter;
264    }
265
266    public char getRecordDelimiter() {
267        return userSettings.RecordDelimiter;
268    }
269
270    /**
271     * Sets the character to use as the record delimiter.
272     *
273     * @param recordDelimiter
274     *            The character to use as the record delimiter. Default is
275     *            combination of standard end of line characters for Windows,
276     *            Unix, or Mac.
277     */
278    public void setRecordDelimiter(char recordDelimiter) {
279        useCustomRecordDelimiter = true;
280        userSettings.RecordDelimiter = recordDelimiter;
281    }
282
283    /**
284     * Gets the character to use as a text qualifier in the data.
285     *
286     * @return The character to use as a text qualifier in the data.
287     */
288    public char getTextQualifier() {
289        return userSettings.TextQualifier;
290    }
291
292    /**
293     * Sets the character to use as a text qualifier in the data.
294     *
295     * @param textQualifier
296     *            The character to use as a text qualifier in the data.
297     */
298    public void setTextQualifier(char textQualifier) {
299        userSettings.TextQualifier = textQualifier;
300    }
301
302    /**
303     * Whether text qualifiers will be used while parsing or not.
304     *
305     * @return Whether text qualifiers will be used while parsing or not.
306     */
307    public boolean getUseTextQualifier() {
308        return userSettings.UseTextQualifier;
309    }
310
311    /**
312     * Sets whether text qualifiers will be used while parsing or not.
313     *
314     * @param useTextQualifier
315     *            Whether to use a text qualifier while parsing or not.
316     */
317    public void setUseTextQualifier(boolean useTextQualifier) {
318        userSettings.UseTextQualifier = useTextQualifier;
319    }
320
321    /**
322     * Gets the character being used as a comment signal.
323     *
324     * @return The character being used as a comment signal.
325     */
326    public char getComment() {
327        return userSettings.Comment;
328    }
329
330    /**
331     * Sets the character to use as a comment signal.
332     *
333     * @param comment
334     *            The character to use as a comment signal.
335     */
336    public void setComment(char comment) {
337        userSettings.Comment = comment;
338    }
339
340    /**
341     * Gets whether comments are being looked for while parsing or not.
342     *
343     * @return Whether comments are being looked for while parsing or not.
344     */
345    public boolean getUseComments() {
346        return userSettings.UseComments;
347    }
348
349    /**
350     * Sets whether comments are being looked for while parsing or not.
351     *
352     * @param useComments
353     *            Whether comments are being looked for while parsing or not.
354     */
355    public void setUseComments(boolean useComments) {
356        userSettings.UseComments = useComments;
357    }
358
359    /**
360     * Gets the current way to escape an occurance of the text qualifier inside
361     * qualified data.
362     *
363     * @return The current way to escape an occurance of the text qualifier
364     *         inside qualified data.
365     */
366    public int getEscapeMode() {
367        return userSettings.EscapeMode;
368    }
369
370    /**
371     * Sets the current way to escape an occurance of the text qualifier inside
372     * qualified data.
373     *
374     * @param escapeMode
375     *            The way to escape an occurance of the text qualifier inside
376     *            qualified data.
377     * @exception IllegalArgumentException
378     *                When an illegal value is specified for escapeMode.
379     */
380    public void setEscapeMode(int escapeMode) throws IllegalArgumentException {
381        if (escapeMode != ESCAPE_MODE_DOUBLED
382                && escapeMode != ESCAPE_MODE_BACKSLASH) {
383            throw new IllegalArgumentException(
384                    "Parameter escapeMode must be a valid value.");
385        }
386
387        userSettings.EscapeMode = escapeMode;
388    }
389
390    public boolean getSkipEmptyRecords() {
391        return userSettings.SkipEmptyRecords;
392    }
393
394    public void setSkipEmptyRecords(boolean skipEmptyRecords) {
395        userSettings.SkipEmptyRecords = skipEmptyRecords;
396    }
397
398    /**
399     * Safety caution to prevent the parser from using large amounts of memory
400     * in the case where parsing settings like file encodings don't end up
401     * matching the actual format of a file. This switch can be turned off if
402     * the file format is known and tested. With the switch off, the max column
403     * lengths and max column count per record supported by the parser will
404     * greatly increase. Default is true.
405     *
406     * @return The current setting of the safety switch.
407     */
408    public boolean getSafetySwitch() {
409        return userSettings.SafetySwitch;
410    }
411
412    /**
413     * Safety caution to prevent the parser from using large amounts of memory
414     * in the case where parsing settings like file encodings don't end up
415     * matching the actual format of a file. This switch can be turned off if
416     * the file format is known and tested. With the switch off, the max column
417     * lengths and max column count per record supported by the parser will
418     * greatly increase. Default is true.
419     *
420     * @param safetySwitch
421     */
422    public void setSafetySwitch(boolean safetySwitch) {
423        userSettings.SafetySwitch = safetySwitch;
424    }
425
426    /**
427     * Gets the count of columns found in this record.
428     *
429     * @return The count of columns found in this record.
430     */
431    public int getColumnCount() {
432        return columnsCount;
433    }
434
435    /**
436     * Gets the index of the current record.
437     *
438     * @return The index of the current record.
439     */
440    public long getCurrentRecord() {
441        return currentRecord - 1;
442    }
443
444    /**
445     * Gets the count of headers read in by a previous call to
446     * {@link CsvReader#readHeaders readHeaders()}.
447     *
448     * @return The count of headers read in by a previous call to
449     *         {@link CsvReader#readHeaders readHeaders()}.
450     */
451    public int getHeaderCount() {
452        return headersHolder.Length;
453    }
454
455    /**
456     * Returns the header values as a string array.
457     *
458     * @return The header values as a String array.
459     * @exception IOException
460     *                Thrown if this object has already been closed.
461     */
462    public String[] getHeaders() throws IOException {
463        checkClosed();
464
465        if (headersHolder.Headers == null) {
466            return null;
467        } else {
468            // use clone here to prevent the outside code from
469            // setting values on the array directly, which would
470            // throw off the index lookup based on header name
471            String[] clone = new String[headersHolder.Length];
472            System.arraycopy(headersHolder.Headers, 0, clone, 0,
473                    headersHolder.Length);
474            return clone;
475        }
476    }
477
478    public void setHeaders(String[] headers) {
479        headersHolder.Headers = headers;
480
481        headersHolder.IndexByName.clear();
482
483        if (headers != null) {
484            headersHolder.Length = headers.length;
485        } else {
486            headersHolder.Length = 0;
487        }
488
489        // use headersHolder.Length here in case headers is null
490        for (int i = 0; i < headersHolder.Length; i++) {
491            headersHolder.IndexByName.put(headers[i], new Integer(i));
492        }
493    }
494
495    public String[] getValues() throws IOException {
496        checkClosed();
497
498        // need to return a clone, and can't use clone because values.Length
499        // might be greater than columnsCount
500        String[] clone = new String[columnsCount];
501        System.arraycopy(values, 0, clone, 0, columnsCount);
502        return clone;
503    }
504
505    /**
506     * Returns the current column value for a given column index.
507     *
508     * @param columnIndex
509     *            The index of the column.
510     * @return The current column value.
511     * @exception IOException
512     *                Thrown if this object has already been closed.
513     */
514    public String get(int columnIndex) throws IOException {
515        checkClosed();
516
517        if (columnIndex > -1 && columnIndex < columnsCount) {
518            return values[columnIndex];
519        } else {
520            return "";
521        }
522    }
523
524    /**
525     * Returns the current column value for a given column header name.
526     *
527     * @param headerName
528     *            The header name of the column.
529     * @return The current column value.
530     * @exception IOException
531     *                Thrown if this object has already been closed.
532     */
533    public String get(String headerName) throws IOException {
534        checkClosed();
535
536        return get(getIndex(headerName));
537    }
538
539    /**
540     * Creates a {@link CsvReader CsvReader} object using a string
541     * of data as the source.&nbsp;Uses ISO-8859-1 as the
542     * {@link java.nio.charset.Charset Charset}.
543     *
544     * @param data
545     *            The String of data to use as the source.
546     * @return A {@link CsvReader CsvReader} object using the
547     *         String of data as the source.
548     */
549    public static CsvReader parse(String data) {
550        if (data == null) {
551            throw new IllegalArgumentException(
552                    "Parameter data can not be null.");
553        }
554
555        return new CsvReader(new StringReader(data));
556    }
557
558    /**
559     * Reads another record.
560     *
561     * @return Whether another record was successfully read or not.
562     * @exception IOException
563     *                Thrown if an error occurs while reading data from the
564     *                source stream.
565     */
566    public boolean readRecord() throws IOException {
567        checkClosed();
568
569        columnsCount = 0;
570        rawBuffer.Position = 0;
571
572        dataBuffer.LineStart = dataBuffer.Position;
573
574        hasReadNextLine = false;
575
576        // check to see if we've already found the end of data
577
578        if (hasMoreData) {
579            // loop over the data stream until the end of data is found
580            // or the end of the record is found
581
582            do {
583                if (dataBuffer.Position == dataBuffer.Count) {
584                    checkDataLength();
585                } else {
586                    startedWithQualifier = false;
587
588                    // grab the current letter as a char
589
590                    char currentLetter = dataBuffer.Buffer[dataBuffer.Position];
591
592                    if (userSettings.UseTextQualifier
593                            && currentLetter == userSettings.TextQualifier) {
594                        // this will be a text qualified column, so
595                        // we need to set startedWithQualifier to make it
596                        // enter the seperate branch to handle text
597                        // qualified columns
598
599                        lastLetter = currentLetter;
600
601                        // read qualified
602                        startedColumn = true;
603                        dataBuffer.ColumnStart = dataBuffer.Position + 1;
604                        startedWithQualifier = true;
605                        boolean lastLetterWasQualifier = false;
606
607                        char escapeChar = userSettings.TextQualifier;
608
609                        if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH) {
610                            escapeChar = Letters.BACKSLASH;
611                        }
612
613                        boolean eatingTrailingJunk = false;
614                        boolean lastLetterWasEscape = false;
615                        boolean readingComplexEscape = false;
616                        int escape = ComplexEscape.UNICODE;
617                        int escapeLength = 0;
618                        char escapeValue = (char) 0;
619
620                        dataBuffer.Position++;
621
622                        do {
623                            if (dataBuffer.Position == dataBuffer.Count) {
624                                checkDataLength();
625                            } else {
626                                // grab the current letter as a char
627
628                                currentLetter = dataBuffer.Buffer[dataBuffer.Position];
629
630                                if (eatingTrailingJunk) {
631                                    dataBuffer.ColumnStart = dataBuffer.Position + 1;
632
633                                    if (currentLetter == userSettings.Delimiter) {
634                                        endColumn();
635                                    } else if ((!useCustomRecordDelimiter && (currentLetter == Letters.CR || currentLetter == Letters.LF))
636                                            || (useCustomRecordDelimiter && currentLetter == userSettings.RecordDelimiter)) {
637                                        endColumn();
638
639                                        endRecord();
640                                    }
641                                } else if (readingComplexEscape) {
642                                    escapeLength++;
643
644                                    switch (escape) {
645                                        case ComplexEscape.UNICODE:
646                                            escapeValue *= (char) 16;
647                                            escapeValue += hexToDec(currentLetter);
648
649                                            if (escapeLength == 4) {
650                                                readingComplexEscape = false;
651                                            }
652
653                                            break;
654                                        case ComplexEscape.OCTAL:
655                                            escapeValue *= (char) 8;
656                                            escapeValue += (char) (currentLetter - '0');
657
658                                            if (escapeLength == 3) {
659                                                readingComplexEscape = false;
660                                            }
661
662                                            break;
663                                        case ComplexEscape.DECIMAL:
664                                            escapeValue *= (char) 10;
665                                            escapeValue += (char) (currentLetter - '0');
666
667                                            if (escapeLength == 3) {
668                                                readingComplexEscape = false;
669                                            }
670
671                                            break;
672                                        case ComplexEscape.HEX:
673                                            escapeValue *= (char) 16;
674                                            escapeValue += hexToDec(currentLetter);
675
676                                            if (escapeLength == 2) {
677                                                readingComplexEscape = false;
678                                            }
679
680                                            break;
681                                    }
682
683                                    if (!readingComplexEscape) {
684                                        appendLetter(escapeValue);
685                                    } else {
686                                        dataBuffer.ColumnStart = dataBuffer.Position + 1;
687                                    }
688                                } else if (currentLetter == userSettings.TextQualifier) {
689                                    if (lastLetterWasEscape) {
690                                        lastLetterWasEscape = false;
691                                        lastLetterWasQualifier = false;
692                                    } else {
693                                        updateCurrentValue();
694
695                                        if (userSettings.EscapeMode == ESCAPE_MODE_DOUBLED) {
696                                            lastLetterWasEscape = true;
697                                        }
698
699                                        lastLetterWasQualifier = true;
700                                    }
701                                } else if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH
702                                        && lastLetterWasEscape) {
703                                    switch (currentLetter) {
704                                        case 'n':
705                                            appendLetter(Letters.LF);
706                                            break;
707                                        case 'r':
708                                            appendLetter(Letters.CR);
709                                            break;
710                                        case 't':
711                                            appendLetter(Letters.TAB);
712                                            break;
713                                        case 'b':
714                                            appendLetter(Letters.BACKSPACE);
715                                            break;
716                                        case 'f':
717                                            appendLetter(Letters.FORM_FEED);
718                                            break;
719                                        case 'e':
720                                            appendLetter(Letters.ESCAPE);
721                                            break;
722                                        case 'v':
723                                            appendLetter(Letters.VERTICAL_TAB);
724                                            break;
725                                        case 'a':
726                                            appendLetter(Letters.ALERT);
727                                            break;
728                                        case '0':
729                                        case '1':
730                                        case '2':
731                                        case '3':
732                                        case '4':
733                                        case '5':
734                                        case '6':
735                                        case '7':
736                                            escape = ComplexEscape.OCTAL;
737                                            readingComplexEscape = true;
738                                            escapeLength = 1;
739                                            escapeValue = (char) (currentLetter - '0');
740                                            dataBuffer.ColumnStart = dataBuffer.Position + 1;
741                                            break;
742                                        case 'u':
743                                        case 'x':
744                                        case 'o':
745                                        case 'd':
746                                        case 'U':
747                                        case 'X':
748                                        case 'O':
749                                        case 'D':
750                                            switch (currentLetter) {
751                                                case 'u':
752                                                case 'U':
753                                                    escape = ComplexEscape.UNICODE;
754                                                    break;
755                                                case 'x':
756                                                case 'X':
757                                                    escape = ComplexEscape.HEX;
758                                                    break;
759                                                case 'o':
760                                                case 'O':
761                                                    escape = ComplexEscape.OCTAL;
762                                                    break;
763                                                case 'd':
764                                                case 'D':
765                                                    escape = ComplexEscape.DECIMAL;
766                                                    break;
767                                            }
768
769                                            readingComplexEscape = true;
770                                            escapeLength = 0;
771                                            escapeValue = (char) 0;
772                                            dataBuffer.ColumnStart = dataBuffer.Position + 1;
773
774                                            break;
775                                        default:
776                                            break;
777                                    }
778
779                                    lastLetterWasEscape = false;
780
781                                    // can only happen for ESCAPE_MODE_BACKSLASH
782                                } else if (currentLetter == escapeChar) {
783                                    updateCurrentValue();
784                                    lastLetterWasEscape = true;
785                                } else {
786                                    if (lastLetterWasQualifier) {
787                                        if (currentLetter == userSettings.Delimiter) {
788                                            endColumn();
789                                        } else if ((!useCustomRecordDelimiter && (currentLetter == Letters.CR || currentLetter == Letters.LF))
790                                                || (useCustomRecordDelimiter && currentLetter == userSettings.RecordDelimiter)) {
791                                            endColumn();
792
793                                            endRecord();
794                                        } else {
795                                            dataBuffer.ColumnStart = dataBuffer.Position + 1;
796
797                                            eatingTrailingJunk = true;
798                                        }
799
800                                        // make sure to clear the flag for next
801                                        // run of the loop
802
803                                        lastLetterWasQualifier = false;
804                                    }
805                                }
806
807                                // keep track of the last letter because we need
808                                // it for several key decisions
809
810                                lastLetter = currentLetter;
811
812                                if (startedColumn) {
813                                    dataBuffer.Position++;
814
815                                    if (userSettings.SafetySwitch
816                                            && dataBuffer.Position
817                                            - dataBuffer.ColumnStart
818                                            + columnBuffer.Position > 100000) {
819                                        close();
820
821                                        throw new IOException(
822                                                "Maximum column length of 100,000 exceeded in column "
823                                                        + NumberFormat
824                                                        .getIntegerInstance()
825                                                        .format(
826                                                                columnsCount)
827                                                        + " in record "
828                                                        + NumberFormat
829                                                        .getIntegerInstance()
830                                                        .format(
831                                                                currentRecord)
832                                                        + ". Set the SafetySwitch property to false"
833                                                        + " if you're expecting column lengths greater than 100,000 characters to"
834                                                        + " avoid this error.");
835                                    }
836                                }
837                            } // end else
838
839                        } while (hasMoreData && startedColumn);
840                    } else if (currentLetter == userSettings.Delimiter) {
841                        // we encountered a column with no data, so
842                        // just send the end column
843
844                        lastLetter = currentLetter;
845
846                        endColumn();
847                    } else if (useCustomRecordDelimiter
848                            && currentLetter == userSettings.RecordDelimiter) {
849                        // this will skip blank lines
850                        if (startedColumn || columnsCount > 0
851                                || !userSettings.SkipEmptyRecords) {
852                            endColumn();
853
854                            endRecord();
855                        } else {
856                            dataBuffer.LineStart = dataBuffer.Position + 1;
857                        }
858
859                        lastLetter = currentLetter;
860                    } else if (!useCustomRecordDelimiter
861                            && (currentLetter == Letters.CR || currentLetter == Letters.LF)) {
862                        // this will skip blank lines
863                        if (startedColumn
864                                || columnsCount > 0
865                                || (!userSettings.SkipEmptyRecords && (currentLetter == Letters.CR || lastLetter != Letters.CR))) {
866                            endColumn();
867
868                            endRecord();
869                        } else {
870                            dataBuffer.LineStart = dataBuffer.Position + 1;
871                        }
872
873                        lastLetter = currentLetter;
874                    } else if (userSettings.UseComments && columnsCount == 0
875                            && currentLetter == userSettings.Comment) {
876                        // encountered a comment character at the beginning of
877                        // the line so just ignore the rest of the line
878
879                        lastLetter = currentLetter;
880
881                        skipLine();
882                    } else if (userSettings.TrimWhitespace
883                            && (currentLetter == Letters.SPACE || currentLetter == Letters.TAB)) {
884                        // do nothing, this will trim leading whitespace
885                        // for both text qualified columns and non
886
887                        startedColumn = true;
888                        dataBuffer.ColumnStart = dataBuffer.Position + 1;
889                    } else {
890                        // since the letter wasn't a special letter, this
891                        // will be the first letter of our current column
892
893                        startedColumn = true;
894                        dataBuffer.ColumnStart = dataBuffer.Position;
895                        boolean lastLetterWasBackslash = false;
896                        boolean readingComplexEscape = false;
897                        int escape = ComplexEscape.UNICODE;
898                        int escapeLength = 0;
899                        char escapeValue = (char) 0;
900
901                        boolean firstLoop = true;
902
903                        do {
904                            if (!firstLoop
905                                    && dataBuffer.Position == dataBuffer.Count) {
906                                checkDataLength();
907                            } else {
908                                if (!firstLoop) {
909                                    // grab the current letter as a char
910                                    currentLetter = dataBuffer.Buffer[dataBuffer.Position];
911                                }
912
913                                if (!userSettings.UseTextQualifier
914                                        && userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH
915                                        && currentLetter == Letters.BACKSLASH) {
916                                    if (lastLetterWasBackslash) {
917                                        lastLetterWasBackslash = false;
918                                    } else {
919                                        updateCurrentValue();
920                                        lastLetterWasBackslash = true;
921                                    }
922                                } else if (readingComplexEscape) {
923                                    escapeLength++;
924
925                                    switch (escape) {
926                                        case ComplexEscape.UNICODE:
927                                            escapeValue *= (char) 16;
928                                            escapeValue += hexToDec(currentLetter);
929
930                                            if (escapeLength == 4) {
931                                                readingComplexEscape = false;
932                                            }
933
934                                            break;
935                                        case ComplexEscape.OCTAL:
936                                            escapeValue *= (char) 8;
937                                            escapeValue += (char) (currentLetter - '0');
938
939                                            if (escapeLength == 3) {
940                                                readingComplexEscape = false;
941                                            }
942
943                                            break;
944                                        case ComplexEscape.DECIMAL:
945                                            escapeValue *= (char) 10;
946                                            escapeValue += (char) (currentLetter - '0');
947
948                                            if (escapeLength == 3) {
949                                                readingComplexEscape = false;
950                                            }
951
952                                            break;
953                                        case ComplexEscape.HEX:
954                                            escapeValue *= (char) 16;
955                                            escapeValue += hexToDec(currentLetter);
956
957                                            if (escapeLength == 2) {
958                                                readingComplexEscape = false;
959                                            }
960
961                                            break;
962                                    }
963
964                                    if (!readingComplexEscape) {
965                                        appendLetter(escapeValue);
966                                    } else {
967                                        dataBuffer.ColumnStart = dataBuffer.Position + 1;
968                                    }
969                                } else if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH
970                                        && lastLetterWasBackslash) {
971                                    switch (currentLetter) {
972                                        case 'n':
973                                            appendLetter(Letters.LF);
974                                            break;
975                                        case 'r':
976                                            appendLetter(Letters.CR);
977                                            break;
978                                        case 't':
979                                            appendLetter(Letters.TAB);
980                                            break;
981                                        case 'b':
982                                            appendLetter(Letters.BACKSPACE);
983                                            break;
984                                        case 'f':
985                                            appendLetter(Letters.FORM_FEED);
986                                            break;
987                                        case 'e':
988                                            appendLetter(Letters.ESCAPE);
989                                            break;
990                                        case 'v':
991                                            appendLetter(Letters.VERTICAL_TAB);
992                                            break;
993                                        case 'a':
994                                            appendLetter(Letters.ALERT);
995                                            break;
996                                        case '0':
997                                        case '1':
998                                        case '2':
999                                        case '3':
1000                                        case '4':
1001                                        case '5':
1002                                        case '6':
1003                                        case '7':
1004                                            escape = ComplexEscape.OCTAL;
1005                                            readingComplexEscape = true;
1006                                            escapeLength = 1;
1007                                            escapeValue = (char) (currentLetter - '0');
1008                                            dataBuffer.ColumnStart = dataBuffer.Position + 1;
1009                                            break;
1010                                        case 'u':
1011                                        case 'x':
1012                                        case 'o':
1013                                        case 'd':
1014                                        case 'U':
1015                                        case 'X':
1016                                        case 'O':
1017                                        case 'D':
1018                                            switch (currentLetter) {
1019                                                case 'u':
1020                                                case 'U':
1021                                                    escape = ComplexEscape.UNICODE;
1022                                                    break;
1023                                                case 'x':
1024                                                case 'X':
1025                                                    escape = ComplexEscape.HEX;
1026                                                    break;
1027                                                case 'o':
1028                                                case 'O':
1029                                                    escape = ComplexEscape.OCTAL;
1030                                                    break;
1031                                                case 'd':
1032                                                case 'D':
1033                                                    escape = ComplexEscape.DECIMAL;
1034                                                    break;
1035                                            }
1036
1037                                            readingComplexEscape = true;
1038                                            escapeLength = 0;
1039                                            escapeValue = (char) 0;
1040                                            dataBuffer.ColumnStart = dataBuffer.Position + 1;
1041
1042                                            break;
1043                                        default:
1044                                            break;
1045                                    }
1046
1047                                    lastLetterWasBackslash = false;
1048                                } else {
1049                                    if (currentLetter == userSettings.Delimiter) {
1050                                        endColumn();
1051                                    } else if ((!useCustomRecordDelimiter && (currentLetter == Letters.CR || currentLetter == Letters.LF))
1052                                            || (useCustomRecordDelimiter && currentLetter == userSettings.RecordDelimiter)) {
1053                                        endColumn();
1054
1055                                        endRecord();
1056                                    }
1057                                }
1058
1059                                // keep track of the last letter because we need
1060                                // it for several key decisions
1061
1062                                lastLetter = currentLetter;
1063                                firstLoop = false;
1064
1065                                if (startedColumn) {
1066                                    dataBuffer.Position++;
1067
1068                                    if (userSettings.SafetySwitch
1069                                            && dataBuffer.Position
1070                                            - dataBuffer.ColumnStart
1071                                            + columnBuffer.Position > 100000) {
1072                                        close();
1073
1074                                        throw new IOException(
1075                                                "Maximum column length of 100,000 exceeded in column "
1076                                                        + NumberFormat
1077                                                        .getIntegerInstance()
1078                                                        .format(
1079                                                                columnsCount)
1080                                                        + " in record "
1081                                                        + NumberFormat
1082                                                        .getIntegerInstance()
1083                                                        .format(
1084                                                                currentRecord)
1085                                                        + ". Set the SafetySwitch property to false"
1086                                                        + " if you're expecting column lengths greater than 100,000 characters to"
1087                                                        + " avoid this error.");
1088                                    }
1089                                }
1090                            } // end else
1091                        } while (hasMoreData && startedColumn);
1092                    }
1093
1094                    if (hasMoreData) {
1095                        dataBuffer.Position++;
1096                    }
1097                } // end else
1098            } while (hasMoreData && !hasReadNextLine);
1099
1100            // check to see if we hit the end of the file
1101            // without processing the current record
1102
1103            if (startedColumn || lastLetter == userSettings.Delimiter) {
1104                endColumn();
1105
1106                endRecord();
1107            }
1108        }
1109
1110        if (userSettings.CaptureRawRecord) {
1111            if (hasMoreData) {
1112                if (rawBuffer.Position == 0) {
1113                    rawRecord = new String(dataBuffer.Buffer,
1114                            dataBuffer.LineStart, dataBuffer.Position
1115                            - dataBuffer.LineStart - 1);
1116                } else {
1117                    rawRecord = new String(rawBuffer.Buffer, 0,
1118                            rawBuffer.Position)
1119                            + new String(dataBuffer.Buffer,
1120                            dataBuffer.LineStart, dataBuffer.Position
1121                            - dataBuffer.LineStart - 1);
1122                }
1123            } else {
1124                // for hasMoreData to ever be false, all data would have had to
1125                // have been
1126                // copied to the raw buffer
1127                rawRecord = new String(rawBuffer.Buffer, 0, rawBuffer.Position);
1128            }
1129        } else {
1130            rawRecord = "";
1131        }
1132
1133        return hasReadNextLine;
1134    }
1135
1136    /**
1137     * @exception IOException
1138     *                Thrown if an error occurs while reading data from the
1139     *                source stream.
1140     */
1141    private void checkDataLength() throws IOException {
1142        if (!initialized) {
1143            if (fileName != null) {
1144                inputStream = new BufferedReader(new InputStreamReader(
1145                        new FileInputStream(fileName), charset),
1146                        StaticSettings.MAX_FILE_BUFFER_SIZE);
1147            }
1148
1149            charset = null;
1150            initialized = true;
1151        }
1152
1153        updateCurrentValue();
1154
1155        if (userSettings.CaptureRawRecord && dataBuffer.Count > 0) {
1156            if (rawBuffer.Buffer.length - rawBuffer.Position < dataBuffer.Count
1157                    - dataBuffer.LineStart) {
1158                int newLength = rawBuffer.Buffer.length
1159                        + Math.max(dataBuffer.Count - dataBuffer.LineStart,
1160                        rawBuffer.Buffer.length);
1161
1162                char[] holder = new char[newLength];
1163
1164                System.arraycopy(rawBuffer.Buffer, 0, holder, 0,
1165                        rawBuffer.Position);
1166
1167                rawBuffer.Buffer = holder;
1168            }
1169
1170            System.arraycopy(dataBuffer.Buffer, dataBuffer.LineStart,
1171                    rawBuffer.Buffer, rawBuffer.Position, dataBuffer.Count
1172                            - dataBuffer.LineStart);
1173
1174            rawBuffer.Position += dataBuffer.Count - dataBuffer.LineStart;
1175        }
1176
1177        try {
1178            dataBuffer.Count = inputStream.read(dataBuffer.Buffer, 0,
1179                    dataBuffer.Buffer.length);
1180        } catch (IOException ex) {
1181            close();
1182
1183            throw ex;
1184        }
1185
1186        // if no more data could be found, set flag stating that
1187        // the end of the data was found
1188
1189        if (dataBuffer.Count == -1) {
1190            hasMoreData = false;
1191        }
1192
1193        dataBuffer.Position = 0;
1194        dataBuffer.LineStart = 0;
1195        dataBuffer.ColumnStart = 0;
1196    }
1197
1198    /**
1199     * Read the first record of data as column headers.
1200     *
1201     * @return Whether the header record was successfully read or not.
1202     * @exception IOException
1203     *                Thrown if an error occurs while reading data from the
1204     *                source stream.
1205     */
1206    public boolean readHeaders() throws IOException {
1207        boolean result = readRecord();
1208
1209        // copy the header data from the column array
1210        // to the header string array
1211
1212        headersHolder.Length = columnsCount;
1213
1214        headersHolder.Headers = new String[columnsCount];
1215
1216        for (int i = 0; i < headersHolder.Length; i++) {
1217            String columnValue = get(i);
1218
1219            headersHolder.Headers[i] = columnValue;
1220
1221            // if there are duplicate header names, we will save the last one
1222            headersHolder.IndexByName.put(columnValue, new Integer(i));
1223        }
1224
1225        if (result) {
1226            currentRecord--;
1227        }
1228
1229        columnsCount = 0;
1230
1231        return result;
1232    }
1233
1234    /**
1235     * Returns the column header value for a given column index.
1236     *
1237     * @param columnIndex
1238     *            The index of the header column being requested.
1239     * @return The value of the column header at the given column index.
1240     * @exception IOException
1241     *                Thrown if this object has already been closed.
1242     */
1243    public String getHeader(int columnIndex) throws IOException {
1244        checkClosed();
1245
1246        // check to see if we have read the header record yet
1247
1248        // check to see if the column index is within the bounds
1249        // of our header array
1250
1251        if (columnIndex > -1 && columnIndex < headersHolder.Length) {
1252            // return the processed header data for this column
1253
1254            return headersHolder.Headers[columnIndex];
1255        } else {
1256            return "";
1257        }
1258    }
1259
1260    public boolean isQualified(int columnIndex) throws IOException {
1261        checkClosed();
1262
1263        if (columnIndex < columnsCount && columnIndex > -1) {
1264            return isQualified[columnIndex];
1265        } else {
1266            return false;
1267        }
1268    }
1269
1270    /**
1271     * @exception IOException
1272     *                Thrown if a very rare extreme exception occurs during
1273     *                parsing, normally resulting from improper data format.
1274     */
1275    private void endColumn() throws IOException {
1276        String currentValue = "";
1277
1278        // must be called before setting startedColumn = false
1279        if (startedColumn) {
1280            if (columnBuffer.Position == 0) {
1281                if (dataBuffer.ColumnStart < dataBuffer.Position) {
1282                    int lastLetter = dataBuffer.Position - 1;
1283
1284                    if (userSettings.TrimWhitespace && !startedWithQualifier) {
1285                        while (lastLetter >= dataBuffer.ColumnStart
1286                                && (dataBuffer.Buffer[lastLetter] == Letters.SPACE || dataBuffer.Buffer[lastLetter] == Letters.TAB)) {
1287                            lastLetter--;
1288                        }
1289                    }
1290
1291                    currentValue = new String(dataBuffer.Buffer,
1292                            dataBuffer.ColumnStart, lastLetter
1293                            - dataBuffer.ColumnStart + 1);
1294                }
1295            } else {
1296                updateCurrentValue();
1297
1298                int lastLetter = columnBuffer.Position - 1;
1299
1300                if (userSettings.TrimWhitespace && !startedWithQualifier) {
1301                    while (lastLetter >= 0
1302                            && (columnBuffer.Buffer[lastLetter] == Letters.SPACE || columnBuffer.Buffer[lastLetter] == Letters.SPACE)) {
1303                        lastLetter--;
1304                    }
1305                }
1306
1307                currentValue = new String(columnBuffer.Buffer, 0,
1308                        lastLetter + 1);
1309            }
1310        }
1311
1312        columnBuffer.Position = 0;
1313
1314        startedColumn = false;
1315
1316        if (columnsCount >= 100000 && userSettings.SafetySwitch) {
1317            close();
1318
1319            throw new IOException(
1320                    "Maximum column count of 100,000 exceeded in record "
1321                            + NumberFormat.getIntegerInstance().format(
1322                            currentRecord)
1323                            + ". Set the SafetySwitch property to false"
1324                            + " if you're expecting more than 100,000 columns per record to"
1325                            + " avoid this error.");
1326        }
1327
1328        // check to see if our current holder array for
1329        // column chunks is still big enough to handle another
1330        // column chunk
1331
1332        if (columnsCount == values.length) {
1333            // holder array needs to grow to be able to hold another column
1334            int newLength = values.length * 2;
1335
1336            String[] holder = new String[newLength];
1337
1338            System.arraycopy(values, 0, holder, 0, values.length);
1339
1340            values = holder;
1341
1342            boolean[] qualifiedHolder = new boolean[newLength];
1343
1344            System.arraycopy(isQualified, 0, qualifiedHolder, 0,
1345                    isQualified.length);
1346
1347            isQualified = qualifiedHolder;
1348        }
1349
1350        values[columnsCount] = currentValue;
1351
1352        isQualified[columnsCount] = startedWithQualifier;
1353
1354        currentValue = "";
1355
1356        columnsCount++;
1357    }
1358
1359    private void appendLetter(char letter) {
1360        if (columnBuffer.Position == columnBuffer.Buffer.length) {
1361            int newLength = columnBuffer.Buffer.length * 2;
1362
1363            char[] holder = new char[newLength];
1364
1365            System.arraycopy(columnBuffer.Buffer, 0, holder, 0,
1366                    columnBuffer.Position);
1367
1368            columnBuffer.Buffer = holder;
1369        }
1370        columnBuffer.Buffer[columnBuffer.Position++] = letter;
1371        dataBuffer.ColumnStart = dataBuffer.Position + 1;
1372    }
1373
1374    private void updateCurrentValue() {
1375        if (startedColumn && dataBuffer.ColumnStart < dataBuffer.Position) {
1376            if (columnBuffer.Buffer.length - columnBuffer.Position < dataBuffer.Position
1377                    - dataBuffer.ColumnStart) {
1378                int newLength = columnBuffer.Buffer.length
1379                        + Math.max(
1380                        dataBuffer.Position - dataBuffer.ColumnStart,
1381                        columnBuffer.Buffer.length);
1382
1383                char[] holder = new char[newLength];
1384
1385                System.arraycopy(columnBuffer.Buffer, 0, holder, 0,
1386                        columnBuffer.Position);
1387
1388                columnBuffer.Buffer = holder;
1389            }
1390
1391            System.arraycopy(dataBuffer.Buffer, dataBuffer.ColumnStart,
1392                    columnBuffer.Buffer, columnBuffer.Position,
1393                    dataBuffer.Position - dataBuffer.ColumnStart);
1394
1395            columnBuffer.Position += dataBuffer.Position
1396                    - dataBuffer.ColumnStart;
1397        }
1398
1399        dataBuffer.ColumnStart = dataBuffer.Position + 1;
1400    }
1401
1402    /**
1403     * @exception IOException
1404     *                Thrown if an error occurs while reading data from the
1405     *                source stream.
1406     */
1407    private void endRecord() throws IOException {
1408        // this flag is used as a loop exit condition
1409        // during parsing
1410
1411        hasReadNextLine = true;
1412
1413        currentRecord++;
1414    }
1415
1416    /**
1417     * Gets the corresponding column index for a given column header name.
1418     *
1419     * @param headerName
1420     *            The header name of the column.
1421     * @return The column index for the given column header name.&nbsp;Returns
1422     *         -1 if not found.
1423     * @exception IOException
1424     *                Thrown if this object has already been closed.
1425     */
1426    public int getIndex(String headerName) throws IOException {
1427        checkClosed();
1428
1429        Object indexValue = headersHolder.IndexByName.get(headerName);
1430
1431        if (indexValue != null) {
1432            return ((Integer) indexValue).intValue();
1433        } else {
1434            return -1;
1435        }
1436    }
1437
1438    /**
1439     * Skips the next record of data by parsing each column.&nbsp;Does not
1440     * increment
1441     * {@link CsvReader#getCurrentRecord getCurrentRecord()}.
1442     *
1443     * @return Whether another record was successfully skipped or not.
1444     * @exception IOException
1445     *                Thrown if an error occurs while reading data from the
1446     *                source stream.
1447     */
1448    public boolean skipRecord() throws IOException {
1449        checkClosed();
1450
1451        boolean recordRead = false;
1452
1453        if (hasMoreData) {
1454            recordRead = readRecord();
1455
1456            if (recordRead) {
1457                currentRecord--;
1458            }
1459        }
1460
1461        return recordRead;
1462    }
1463
1464    /**
1465     * Skips the next line of data using the standard end of line characters and
1466     * does not do any column delimited parsing.
1467     *
1468     * @return Whether a line was successfully skipped or not.
1469     * @exception IOException
1470     *                Thrown if an error occurs while reading data from the
1471     *                source stream.
1472     */
1473    public boolean skipLine() throws IOException {
1474        checkClosed();
1475
1476        // clear public column values for current line
1477
1478        columnsCount = 0;
1479
1480        boolean skippedLine = false;
1481
1482        if (hasMoreData) {
1483            boolean foundEol = false;
1484
1485            do {
1486                if (dataBuffer.Position == dataBuffer.Count) {
1487                    checkDataLength();
1488                } else {
1489                    skippedLine = true;
1490
1491                    // grab the current letter as a char
1492
1493                    char currentLetter = dataBuffer.Buffer[dataBuffer.Position];
1494
1495                    if (currentLetter == Letters.CR
1496                            || currentLetter == Letters.LF) {
1497                        foundEol = true;
1498                    }
1499
1500                    // keep track of the last letter because we need
1501                    // it for several key decisions
1502
1503                    lastLetter = currentLetter;
1504
1505                    if (!foundEol) {
1506                        dataBuffer.Position++;
1507                    }
1508
1509                } // end else
1510            } while (hasMoreData && !foundEol);
1511
1512            columnBuffer.Position = 0;
1513
1514            dataBuffer.LineStart = dataBuffer.Position + 1;
1515        }
1516
1517        rawBuffer.Position = 0;
1518        rawRecord = "";
1519
1520        return skippedLine;
1521    }
1522
1523    /**
1524     * Closes and releases all related resources.
1525     */
1526    public void close() {
1527        if (!closed) {
1528            close(true);
1529
1530            closed = true;
1531        }
1532    }
1533
1534    /**
1535     *
1536     */
1537    private void close(boolean closing) {
1538        if (!closed) {
1539            if (closing) {
1540                charset = null;
1541                headersHolder.Headers = null;
1542                headersHolder.IndexByName = null;
1543                dataBuffer.Buffer = null;
1544                columnBuffer.Buffer = null;
1545                rawBuffer.Buffer = null;
1546            }
1547
1548            try {
1549                if (initialized) {
1550                    inputStream.close();
1551                }
1552            } catch (Exception e) {
1553                // just eat the exception
1554            }
1555
1556            inputStream = null;
1557
1558            closed = true;
1559        }
1560    }
1561
1562    /**
1563     * @exception IOException
1564     *                Thrown if this object has already been closed.
1565     */
1566    private void checkClosed() throws IOException {
1567        if (closed) {
1568            throw new IOException(
1569                    "This instance of the CsvReader class has already been closed.");
1570        }
1571    }
1572
1573    /**
1574     *
1575     */
1576    protected void finalize() {
1577        close(false);
1578    }
1579
1580    private class ComplexEscape {
1581        private static final int UNICODE = 1;
1582
1583        private static final int OCTAL = 2;
1584
1585        private static final int DECIMAL = 3;
1586
1587        private static final int HEX = 4;
1588    }
1589
1590    private static char hexToDec(char hex) {
1591        char result;
1592
1593        if (hex >= 'a') {
1594            result = (char) (hex - 'a' + 10);
1595        } else if (hex >= 'A') {
1596            result = (char) (hex - 'A' + 10);
1597        } else {
1598            result = (char) (hex - '0');
1599        }
1600
1601        return result;
1602    }
1603
1604    private class DataBuffer {
1605        public char[] Buffer;
1606
1607        public int Position;
1608
1609        // / <summary>
1610        // / How much usable data has been read into the stream,
1611        // / which will not always be as long as Buffer.Length.
1612        // / </summary>
1613        public int Count;
1614
1615        // / <summary>
1616        // / The position of the cursor in the buffer when the
1617        // / current column was started or the last time data
1618        // / was moved out to the column buffer.
1619        // / </summary>
1620        public int ColumnStart;
1621
1622        public int LineStart;
1623
1624        public DataBuffer() {
1625            Buffer = new char[StaticSettings.MAX_BUFFER_SIZE];
1626            Position = 0;
1627            Count = 0;
1628            ColumnStart = 0;
1629            LineStart = 0;
1630        }
1631    }
1632
1633    private class ColumnBuffer {
1634        public char[] Buffer;
1635
1636        public int Position;
1637
1638        public ColumnBuffer() {
1639            Buffer = new char[StaticSettings.INITIAL_COLUMN_BUFFER_SIZE];
1640            Position = 0;
1641        }
1642    }
1643
1644    private class RawRecordBuffer {
1645        public char[] Buffer;
1646
1647        public int Position;
1648
1649        public RawRecordBuffer() {
1650            Buffer = new char[StaticSettings.INITIAL_COLUMN_BUFFER_SIZE
1651                    * StaticSettings.INITIAL_COLUMN_COUNT];
1652            Position = 0;
1653        }
1654    }
1655
1656    private class Letters {
1657        public static final char LF = '\n';
1658
1659        public static final char CR = '\r';
1660
1661        public static final char QUOTE = '"';
1662
1663        public static final char COMMA = ',';
1664
1665        public static final char SPACE = ' ';
1666
1667        public static final char TAB = '\t';
1668
1669        public static final char POUND = '#';
1670
1671        public static final char BACKSLASH = '\\';
1672
1673        public static final char NULL = '\0';
1674
1675        public static final char BACKSPACE = '\b';
1676
1677        public static final char FORM_FEED = '\f';
1678
1679        public static final char ESCAPE = '\u001B'; // ASCII/ANSI escape
1680
1681        public static final char VERTICAL_TAB = '\u000B';
1682
1683        public static final char ALERT = '\u0007';
1684    }
1685
1686    private class UserSettings {
1687        // having these as publicly accessible members will prevent
1688        // the overhead of the method call that exists on properties
1689        public boolean CaseSensitive;
1690
1691        public char TextQualifier;
1692
1693        public boolean TrimWhitespace;
1694
1695        public boolean UseTextQualifier;
1696
1697        public char Delimiter;
1698
1699        public char RecordDelimiter;
1700
1701        public char Comment;
1702
1703        public boolean UseComments;
1704
1705        public int EscapeMode;
1706
1707        public boolean SafetySwitch;
1708
1709        public boolean SkipEmptyRecords;
1710
1711        public boolean CaptureRawRecord;
1712
1713        public UserSettings() {
1714            CaseSensitive = true;
1715            TextQualifier = Letters.QUOTE;
1716            TrimWhitespace = true;
1717            UseTextQualifier = true;
1718            Delimiter = Letters.COMMA;
1719            RecordDelimiter = Letters.NULL;
1720            Comment = Letters.POUND;
1721            UseComments = false;
1722            EscapeMode = CsvReader.ESCAPE_MODE_DOUBLED;
1723            SafetySwitch = true;
1724            SkipEmptyRecords = true;
1725            CaptureRawRecord = true;
1726        }
1727    }
1728
1729    private class HeadersHolder {
1730        public String[] Headers;
1731
1732        public int Length;
1733
1734        public HashMap IndexByName;
1735
1736        public HeadersHolder() {
1737            Headers = null;
1738            Length = 0;
1739            IndexByName = new HashMap();
1740        }
1741    }
1742
1743    private class StaticSettings {
1744        // these are static instead of final so they can be changed in unit test
1745        // isn't visible outside this class and is only accessed once during
1746        // CsvReader construction
1747        public static final int MAX_BUFFER_SIZE = 1024;
1748
1749        public static final int MAX_FILE_BUFFER_SIZE = 4 * 1024;
1750
1751        public static final int INITIAL_COLUMN_COUNT = 10;
1752
1753        public static final int INITIAL_COLUMN_BUFFER_SIZE = 50;
1754    }
1755}