001package gudusoft.gsqlparser.util.csv;
002
003import java.io.*;
004import java.nio.charset.Charset;
005import java.text.NumberFormat;
006import java.util.Arrays;
007import java.util.HashMap;
008import java.util.Map;
009
010public class CsvReader {
011
012    public static final int ESCAPE_MODE_DOUBLED = 1;
013    public static final int ESCAPE_MODE_BACKSLASH = 2;
014
015    private Reader reader = null;
016    private String path = null;
017    private Cfg cfg = new Cfg(this);
018    private Charset cs = null;
019    private boolean useCustomRecDelim = false;
020
021    private Buf buf = new Buf();
022    private ColBuf colBuf = new ColBuf();
023    private RawBuf rawBuf = new RawBuf();
024
025    private boolean[] qualified = null;
026    private String rawRow = "";
027    private Hdrs headers = new Hdrs();
028
029    private boolean fieldStarted = false;
030    private boolean beganWithQualifier = false;
031    private boolean hasMore = true;
032    private char prevChar = '\0';
033    private boolean readNextLine = false;
034
035    private int colCount = 0;
036    private long rowIndex = 0;
037    private String[] fields = new String[Statics.INITIAL_COLUMN_COUNT];
038
039    private boolean init = false;
040    private boolean closed = false;
041
042    public CsvReader(String fileName, char delimiter, Charset charset) throws FileNotFoundException {
043        if (fileName == null) {
044            throw new IllegalArgumentException("Parameter fileName can not be null.");
045        }
046        if (charset == null) {
047            throw new IllegalArgumentException("Parameter charset can not be null.");
048        }
049        if (!new File(fileName).exists()) {
050            throw new FileNotFoundException("File " + fileName + " does not exist.");
051        }
052        this.path = fileName;
053        this.cfg.Delimiter = delimiter;
054        this.cs = charset;
055        this.qualified = new boolean[fields.length];
056    }
057
058    public CsvReader(String fileName, char delimiter) throws FileNotFoundException {
059        this(fileName, delimiter, Charset.forName("UTF-8"));
060    }
061
062    public CsvReader(String fileName) throws FileNotFoundException {
063        this(fileName, Chars.COMMA);
064    }
065
066    public CsvReader(Reader inputStream, char delimiter) {
067        if (inputStream == null) {
068            throw new IllegalArgumentException("Parameter inputStream can not be null.");
069        }
070        this.reader = inputStream;
071        this.cfg.Delimiter = delimiter;
072        this.init = true;
073        this.qualified = new boolean[fields.length];
074    }
075
076    public CsvReader(Reader inputStream) {
077        this(inputStream, Chars.COMMA);
078    }
079
080    public CsvReader(InputStream inputStream, char delimiter, Charset charset) {
081        this(new InputStreamReader(inputStream, charset), delimiter);
082    }
083
084    public CsvReader(InputStream inputStream, Charset charset) {
085        this(new InputStreamReader(inputStream, charset));
086    }
087
088    public boolean getCaptureRawRecord() {
089        return cfg.CaptureRawRecord;
090    }
091
092    public void setCaptureRawRecord(boolean captureRawRecord) {
093        cfg.CaptureRawRecord = captureRawRecord;
094    }
095
096    public String getRawRecord() {
097        return rawRow;
098    }
099
100    public boolean getTrimWhitespace() {
101        return cfg.TrimWhitespace;
102    }
103
104    public void setTrimWhitespace(boolean trimWhitespace) {
105        cfg.TrimWhitespace = trimWhitespace;
106    }
107
108    public char getDelimiter() {
109        return cfg.Delimiter;
110    }
111
112    public void setDelimiter(char delimiter) {
113        cfg.Delimiter = delimiter;
114    }
115
116    public char getRecordDelimiter() {
117        return cfg.RecordDelimiter;
118    }
119
120    public void setRecordDelimiter(char recordDelimiter) {
121        useCustomRecDelim = true;
122        cfg.RecordDelimiter = recordDelimiter;
123    }
124
125    public char getTextQualifier() {
126        return cfg.TextQualifier;
127    }
128
129    public void setTextQualifier(char textQualifier) {
130        cfg.TextQualifier = textQualifier;
131    }
132
133    public boolean getUseTextQualifier() {
134        return cfg.UseTextQualifier;
135    }
136
137    public void setUseTextQualifier(boolean useTextQualifier) {
138        cfg.UseTextQualifier = useTextQualifier;
139    }
140
141    public char getComment() {
142        return cfg.Comment;
143    }
144
145    public void setComment(char comment) {
146        cfg.Comment = comment;
147    }
148
149    public boolean getUseComments() {
150        return cfg.UseComments;
151    }
152
153    public void setUseComments(boolean useComments) {
154        cfg.UseComments = useComments;
155    }
156
157    public int getEscapeMode() {
158        return cfg.EscapeMode;
159    }
160
161    public void setEscapeMode(int escapeMode) throws IllegalArgumentException {
162        if (escapeMode != ESCAPE_MODE_DOUBLED && escapeMode != ESCAPE_MODE_BACKSLASH) {
163            throw new IllegalArgumentException("Parameter escapeMode must be a valid value.");
164        }
165        cfg.EscapeMode = escapeMode;
166    }
167
168    public boolean getSkipEmptyRecords() {
169        return cfg.SkipEmptyRecords;
170    }
171
172    public void setSkipEmptyRecords(boolean skipEmptyRecords) {
173        cfg.SkipEmptyRecords = skipEmptyRecords;
174    }
175
176    public boolean getSafetySwitch() {
177        return cfg.SafetySwitch;
178    }
179
180    public void setSafetySwitch(boolean safetySwitch) {
181        cfg.SafetySwitch = safetySwitch;
182    }
183
184    public int getColumnCount() {
185        return colCount;
186    }
187
188    public long getCurrentRecord() {
189        return rowIndex - 1;
190    }
191
192    public int getHeaderCount() {
193        return headers.Length;
194    }
195
196    public String[] getHeaders() throws IOException {
197        checkClosed();
198        if (headers.Headers == null) {
199            return null;
200        } else {
201            String[] clone = new String[headers.Length];
202            System.arraycopy(headers.Headers, 0, clone, 0, headers.Length);
203            return clone;
204        }
205    }
206
207    public void setHeaders(String[] hdrs) {
208        headers.Headers = hdrs;
209        headers.IndexByName.clear();
210        headers.Length = (hdrs != null) ? hdrs.length : 0;
211        for (int i = 0; i < headers.Length; i++) {
212            headers.IndexByName.put(hdrs[i], Integer.valueOf(i));
213        }
214    }
215
216    public String[] getValues() throws IOException {
217        checkClosed();
218        String[] clone = new String[colCount];
219        System.arraycopy(fields, 0, clone, 0, colCount);
220        return clone;
221    }
222
223    public String get(int columnIndex) throws IOException {
224        checkClosed();
225        if (columnIndex > -1 && columnIndex < colCount) {
226            return fields[columnIndex];
227        } else {
228            return "";
229        }
230    }
231
232    public String get(String headerName) throws IOException {
233        checkClosed();
234        return get(getIndex(headerName));
235    }
236
237    public static CsvReader parse(String data) {
238        if (data == null) {
239            throw new IllegalArgumentException("Parameter data can not be null.");
240        }
241        return new CsvReader(new StringReader(data));
242    }
243
244    public boolean readRecord() throws IOException {
245        checkClosed();
246        colCount = 0;
247        rawBuf.Position = 0;
248        buf.LineStart = buf.Position;
249        readNextLine = false;
250
251        if (hasMore) {
252            do {
253                if (buf.Position == buf.Count) {
254                    checkDataLength();
255                } else {
256                    beganWithQualifier = false;
257                    char ch = buf.Buffer[buf.Position];
258
259                    if (cfg.UseTextQualifier && ch == cfg.TextQualifier) {
260                        prevChar = ch;
261                        fieldStarted = true;
262                        buf.ColumnStart = buf.Position + 1;
263                        beganWithQualifier = true;
264
265                        boolean lastWasQualifier = false;
266                        char slashOrQuote = cfg.TextQualifier;
267                        if (cfg.EscapeMode == ESCAPE_MODE_BACKSLASH) {
268                            slashOrQuote = Chars.BACKSLASH;
269                        }
270                        boolean eatingTail = false;
271                        boolean lastWasEscape = false;
272                        boolean readingComplexEscape = false;
273                        int escType = Esc.UNICODE;
274                        int escLen = 0;
275                        char escVal = (char) 0;
276
277                        buf.Position++;
278                        do {
279                            if (buf.Position == buf.Count) {
280                                checkDataLength();
281                            } else {
282                                ch = buf.Buffer[buf.Position];
283                                if (eatingTail) {
284                                    buf.ColumnStart = buf.Position + 1;
285                                    if (ch == cfg.Delimiter) {
286                                        endColumn();
287                                    } else if ((!useCustomRecDelim && (ch == Chars.CR || ch == Chars.LF))
288                                            || (useCustomRecDelim && ch == cfg.RecordDelimiter)) {
289                                        endColumn();
290                                        endRecord();
291                                    }
292                                } else if (readingComplexEscape) {
293                                    escLen++;
294                                    switch (escType) {
295                                        case Esc.UNICODE:
296                                            escVal *= (char) 16;
297                                            escVal += hexToDec(ch);
298                                            if (escLen == 4) readingComplexEscape = false;
299                                            break;
300                                        case Esc.OCTAL:
301                                            escVal *= (char) 8;
302                                            escVal += (char) (ch - '0');
303                                            if (escLen == 3) readingComplexEscape = false;
304                                            break;
305                                        case Esc.DECIMAL:
306                                            escVal *= (char) 10;
307                                            escVal += (char) (ch - '0');
308                                            if (escLen == 3) readingComplexEscape = false;
309                                            break;
310                                        case Esc.HEX:
311                                            escVal *= (char) 16;
312                                            escVal += hexToDec(ch);
313                                            if (escLen == 2) readingComplexEscape = false;
314                                            break;
315                                    }
316                                    if (!readingComplexEscape) {
317                                        appendLetter(escVal);
318                                    } else {
319                                        buf.ColumnStart = buf.Position + 1;
320                                    }
321                                } else if (ch == cfg.TextQualifier) {
322                                    if (lastWasEscape) {
323                                        lastWasEscape = false;
324                                        lastWasQualifier = false;
325                                    } else {
326                                        updateCurrentValue();
327                                        if (cfg.EscapeMode == ESCAPE_MODE_DOUBLED) {
328                                            lastWasEscape = true;
329                                        }
330                                        lastWasQualifier = true;
331                                    }
332                                } else if (cfg.EscapeMode == ESCAPE_MODE_BACKSLASH && lastWasEscape) {
333                                    switch (ch) {
334                                        case 'n':
335                                            appendLetter(Chars.LF);
336                                            break;
337                                        case 'r':
338                                            appendLetter(Chars.CR);
339                                            break;
340                                        case 't':
341                                            appendLetter(Chars.TAB);
342                                            break;
343                                        case 'b':
344                                            appendLetter(Chars.BACKSPACE);
345                                            break;
346                                        case 'f':
347                                            appendLetter(Chars.FORM_FEED);
348                                            break;
349                                        case 'e':
350                                            appendLetter(Chars.ESCAPE);
351                                            break;
352                                        case 'v':
353                                            appendLetter(Chars.VERTICAL_TAB);
354                                            break;
355                                        case 'a':
356                                            appendLetter(Chars.ALERT);
357                                            break;
358                                        case '0':
359                                        case '1':
360                                        case '2':
361                                        case '3':
362                                        case '4':
363                                        case '5':
364                                        case '6':
365                                        case '7':
366                                            escType = Esc.OCTAL;
367                                            readingComplexEscape = true;
368                                            escLen = 1;
369                                            escVal = (char) (ch - '0');
370                                            buf.ColumnStart = buf.Position + 1;
371                                            break;
372                                        case 'u':
373                                        case 'x':
374                                        case 'o':
375                                        case 'd':
376                                        case 'U':
377                                        case 'X':
378                                        case 'O':
379                                        case 'D':
380                                            switch (ch) {
381                                                case 'u':
382                                                case 'U':
383                                                    escType = Esc.UNICODE;
384                                                    break;
385                                                case 'x':
386                                                case 'X':
387                                                    escType = Esc.HEX;
388                                                    break;
389                                                case 'o':
390                                                case 'O':
391                                                    escType = Esc.OCTAL;
392                                                    break;
393                                                case 'd':
394                                                case 'D':
395                                                    escType = Esc.DECIMAL;
396                                                    break;
397                                            }
398                                            readingComplexEscape = true;
399                                            escLen = 0;
400                                            escVal = (char) 0;
401                                            buf.ColumnStart = buf.Position + 1;
402                                            break;
403                                        default:
404                                            break;
405                                    }
406                                    lastWasEscape = false;
407                                } else if (ch == slashOrQuote) {
408                                    updateCurrentValue();
409                                    lastWasEscape = true;
410                                } else {
411                                    if (lastWasQualifier) {
412                                        if (ch == cfg.Delimiter) {
413                                            endColumn();
414                                        } else if ((!useCustomRecDelim && (ch == Chars.CR || ch == Chars.LF))
415                                                || (useCustomRecDelim && ch == cfg.RecordDelimiter)) {
416                                            endColumn();
417                                            endRecord();
418                                        } else {
419                                            buf.ColumnStart = buf.Position + 1;
420                                            eatingTail = true;
421                                        }
422                                        lastWasQualifier = false;
423                                    }
424                                }
425
426                                prevChar = ch;
427                                if (fieldStarted) {
428                                    buf.Position++;
429                                    if (cfg.SafetySwitch && buf.Position - buf.ColumnStart + colBuf.Position > 100000) {
430                                        close();
431                                        throw new IOException("Maximum column length of 100,000 exceeded in column "
432                                                + NumberFormat.getIntegerInstance().format(colCount)
433                                                + " in record "
434                                                + NumberFormat.getIntegerInstance().format(rowIndex)
435                                                + ". Set the SafetySwitch property to false"
436                                                + " if you're expecting column lengths greater than 100,000 characters to"
437                                                + " avoid this error.");
438                                    }
439                                }
440                            }
441                        } while (hasMore && fieldStarted);
442
443                    } else if (ch == cfg.Delimiter) {
444                        prevChar = ch;
445                        endColumn();
446
447                    } else if (useCustomRecDelim && ch == cfg.RecordDelimiter) {
448                        if (fieldStarted || colCount > 0 || !cfg.SkipEmptyRecords) {
449                            endColumn();
450                            endRecord();
451                        } else {
452                            buf.LineStart = buf.Position + 1;
453                        }
454                        prevChar = ch;
455
456                    } else if (!useCustomRecDelim && (ch == Chars.CR || ch == Chars.LF)) {
457                        if (fieldStarted || colCount > 0
458                                || (!cfg.SkipEmptyRecords && (ch == Chars.CR || prevChar != Chars.CR))) {
459                            endColumn();
460                            endRecord();
461                        } else {
462                            buf.LineStart = buf.Position + 1;
463                        }
464                        prevChar = ch;
465
466                    } else if (cfg.UseComments && colCount == 0 && ch == cfg.Comment) {
467                        prevChar = ch;
468                        skipLine();
469
470                    } else if (cfg.TrimWhitespace && (ch == Chars.SPACE || ch == Chars.TAB)) {
471                        fieldStarted = true;
472                        buf.ColumnStart = buf.Position + 1;
473
474                    } else {
475                        fieldStarted = true;
476                        buf.ColumnStart = buf.Position;
477                        boolean lastWasBackslash = false;
478                        boolean readingComplexEscape = false;
479                        int escType = Esc.UNICODE;
480                        int escLen = 0;
481                        char escVal = (char) 0;
482                        boolean firstLoop = true;
483
484                        do {
485                            if (!firstLoop && buf.Position == buf.Count) {
486                                checkDataLength();
487                            } else {
488                                if (!firstLoop) {
489                                    ch = buf.Buffer[buf.Position];
490                                }
491                                if (!cfg.UseTextQualifier && cfg.EscapeMode == ESCAPE_MODE_BACKSLASH && ch == Chars.BACKSLASH) {
492                                    if (lastWasBackslash) {
493                                        lastWasBackslash = false;
494                                    } else {
495                                        updateCurrentValue();
496                                        lastWasBackslash = true;
497                                    }
498                                } else if (readingComplexEscape) {
499                                    escLen++;
500                                    switch (escType) {
501                                        case Esc.UNICODE:
502                                            escVal *= (char) 16;
503                                            escVal += hexToDec(ch);
504                                            if (escLen == 4) readingComplexEscape = false;
505                                            break;
506                                        case Esc.OCTAL:
507                                            escVal *= (char) 8;
508                                            escVal += (char) (ch - '0');
509                                            if (escLen == 3) readingComplexEscape = false;
510                                            break;
511                                        case Esc.DECIMAL:
512                                            escVal *= (char) 10;
513                                            escVal += (char) (ch - '0');
514                                            if (escLen == 3) readingComplexEscape = false;
515                                            break;
516                                        case Esc.HEX:
517                                            escVal *= (char) 16;
518                                            escVal += hexToDec(ch);
519                                            if (escLen == 2) readingComplexEscape = false;
520                                            break;
521                                    }
522                                    if (!readingComplexEscape) {
523                                        appendLetter(escVal);
524                                    } else {
525                                        buf.ColumnStart = buf.Position + 1;
526                                    }
527                                } else if (cfg.EscapeMode == ESCAPE_MODE_BACKSLASH && lastWasBackslash) {
528                                    switch (ch) {
529                                        case 'n':
530                                            appendLetter(Chars.LF);
531                                            break;
532                                        case 'r':
533                                            appendLetter(Chars.CR);
534                                            break;
535                                        case 't':
536                                            appendLetter(Chars.TAB);
537                                            break;
538                                        case 'b':
539                                            appendLetter(Chars.BACKSPACE);
540                                            break;
541                                        case 'f':
542                                            appendLetter(Chars.FORM_FEED);
543                                            break;
544                                        case 'e':
545                                            appendLetter(Chars.ESCAPE);
546                                            break;
547                                        case 'v':
548                                            appendLetter(Chars.VERTICAL_TAB);
549                                            break;
550                                        case 'a':
551                                            appendLetter(Chars.ALERT);
552                                            break;
553                                        case '0':
554                                        case '1':
555                                        case '2':
556                                        case '3':
557                                        case '4':
558                                        case '5':
559                                        case '6':
560                                        case '7':
561                                            escType = Esc.OCTAL;
562                                            readingComplexEscape = true;
563                                            escLen = 1;
564                                            escVal = (char) (ch - '0');
565                                            buf.ColumnStart = buf.Position + 1;
566                                            break;
567                                        case 'u':
568                                        case 'x':
569                                        case 'o':
570                                        case 'd':
571                                        case 'U':
572                                        case 'X':
573                                        case 'O':
574                                        case 'D':
575                                            switch (ch) {
576                                                case 'u':
577                                                case 'U':
578                                                    escType = Esc.UNICODE;
579                                                    break;
580                                                case 'x':
581                                                case 'X':
582                                                    escType = Esc.HEX;
583                                                    break;
584                                                case 'o':
585                                                case 'O':
586                                                    escType = Esc.OCTAL;
587                                                    break;
588                                                case 'd':
589                                                case 'D':
590                                                    escType = Esc.DECIMAL;
591                                                    break;
592                                            }
593                                            readingComplexEscape = true;
594                                            escLen = 0;
595                                            escVal = (char) 0;
596                                            buf.ColumnStart = buf.Position + 1;
597                                            break;
598                                        default:
599                                            break;
600                                    }
601                                    lastWasBackslash = false;
602                                } else {
603                                    if (ch == cfg.Delimiter) {
604                                        endColumn();
605                                    } else if ((!useCustomRecDelim && (ch == Chars.CR || ch == Chars.LF))
606                                            || (useCustomRecDelim && ch == cfg.RecordDelimiter)) {
607                                        endColumn();
608                                        endRecord();
609                                    }
610                                }
611                                prevChar = ch;
612                                firstLoop = false;
613                                if (fieldStarted) {
614                                    buf.Position++;
615                                    if (cfg.SafetySwitch && buf.Position - buf.ColumnStart + colBuf.Position > 100000) {
616                                        close();
617                                        throw new IOException("Maximum column length of 100,000 exceeded in column "
618                                                + NumberFormat.getIntegerInstance().format(colCount)
619                                                + " in record "
620                                                + NumberFormat.getIntegerInstance().format(rowIndex)
621                                                + ". Set the SafetySwitch property to false"
622                                                + " if you're expecting column lengths greater than 100,000 characters to"
623                                                + " avoid this error.");
624                                    }
625                                }
626                            }
627                        } while (hasMore && fieldStarted);
628                    }
629
630                    if (hasMore) {
631                        buf.Position++;
632                    }
633                }
634            } while (hasMore && !readNextLine);
635
636            if (fieldStarted || prevChar == cfg.Delimiter) {
637                endColumn();
638                endRecord();
639            }
640        }
641
642        if (cfg.CaptureRawRecord) {
643            if (hasMore) {
644                if (rawBuf.Position == 0) {
645                    rawRow = new String(buf.Buffer, buf.LineStart, buf.Position - buf.LineStart - 1);
646                } else {
647                    rawRow = new String(rawBuf.Buffer, 0, rawBuf.Position)
648                            + new String(buf.Buffer, buf.LineStart, buf.Position - buf.LineStart - 1);
649                }
650            } else {
651                rawRow = new String(rawBuf.Buffer, 0, rawBuf.Position);
652            }
653        } else {
654            rawRow = "";
655        }
656
657        return readNextLine;
658    }
659
660    private void checkDataLength() throws IOException {
661        if (!init) {
662            if (path != null) {
663                reader = new BufferedReader(new InputStreamReader(new FileInputStream(path), cs),
664                        Statics.MAX_FILE_BUFFER_SIZE);
665            }
666            cs = null;
667            init = true;
668        }
669
670        updateCurrentValue();
671
672        if (cfg.CaptureRawRecord && buf.Count > 0) {
673            if (rawBuf.Buffer.length - rawBuf.Position < buf.Count - buf.LineStart) {
674                int newLength = rawBuf.Buffer.length + Math.max(buf.Count - buf.LineStart, rawBuf.Buffer.length);
675                char[] holder = new char[newLength];
676                System.arraycopy(rawBuf.Buffer, 0, holder, 0, rawBuf.Position);
677                rawBuf.Buffer = holder;
678            }
679            System.arraycopy(buf.Buffer, buf.LineStart, rawBuf.Buffer, rawBuf.Position, buf.Count - buf.LineStart);
680            rawBuf.Position += buf.Count - buf.LineStart;
681        }
682
683        try {
684            buf.Count = reader.read(buf.Buffer, 0, buf.Buffer.length);
685        } catch (IOException ex) {
686            close();
687            throw ex;
688        }
689
690        if (buf.Count == -1) {
691            hasMore = false;
692        }
693
694        buf.Position = 0;
695        buf.LineStart = 0;
696        buf.ColumnStart = 0;
697    }
698
699    public boolean readHeaders() throws IOException {
700        boolean result = readRecord();
701        headers.Length = colCount;
702        headers.Headers = new String[colCount];
703        for (int i = 0; i < headers.Length; i++) {
704            String columnValue = get(i);
705            headers.Headers[i] = columnValue;
706            headers.IndexByName.put(columnValue, Integer.valueOf(i));
707        }
708        if (result) {
709            rowIndex--;
710        }
711        colCount = 0;
712        return result;
713    }
714
715    public String getHeader(int columnIndex) throws IOException {
716        checkClosed();
717        if (columnIndex > -1 && columnIndex < headers.Length) {
718            return headers.Headers[columnIndex];
719        } else {
720            return "";
721        }
722    }
723
724    public boolean isQualified(int columnIndex) throws IOException {
725        checkClosed();
726        if (columnIndex < colCount && columnIndex > -1) {
727            return qualified[columnIndex];
728        } else {
729            return false;
730        }
731    }
732
733    private void endColumn() throws IOException {
734        String currentValue = "";
735        if (fieldStarted) {
736            if (colBuf.Position == 0) {
737                if (buf.ColumnStart < buf.Position) {
738                    int last = buf.Position - 1;
739                    if (cfg.TrimWhitespace && !beganWithQualifier) {
740                        while (last >= buf.ColumnStart
741                                && (buf.Buffer[last] == Chars.SPACE || buf.Buffer[last] == Chars.TAB)) {
742                            last--;
743                        }
744                    }
745                    currentValue = new String(buf.Buffer, buf.ColumnStart, last - buf.ColumnStart + 1);
746                }
747            } else {
748                updateCurrentValue();
749                int last = colBuf.Position - 1;
750                if (cfg.TrimWhitespace && !beganWithQualifier) {
751                    while (last >= 0 && (colBuf.Buffer[last] == Chars.SPACE || colBuf.Buffer[last] == Chars.SPACE)) {
752                        last--;
753                    }
754                }
755                currentValue = new String(colBuf.Buffer, 0, last + 1);
756            }
757        }
758        colBuf.Position = 0;
759        fieldStarted = false;
760
761        if (colCount >= 100000 && cfg.SafetySwitch) {
762            close();
763            throw new IOException(
764                    "Maximum column count of 100,000 exceeded in record "
765                            + NumberFormat.getIntegerInstance().format(rowIndex)
766                            + ". Set the SafetySwitch property to false"
767                            + " if you're expecting more than 100,000 columns per record to"
768                            + " avoid this error.");
769        }
770
771        if (colCount == fields.length) {
772            int newLength = fields.length * 2;
773            String[] holder = new String[newLength];
774            System.arraycopy(fields, 0, holder, 0, fields.length);
775            fields = holder;
776
777            boolean[] qh = new boolean[newLength];
778            System.arraycopy(qualified, 0, qh, 0, qualified.length);
779            qualified = qh;
780        }
781
782        fields[colCount] = currentValue;
783        qualified[colCount] = beganWithQualifier;
784        currentValue = "";
785        colCount++;
786    }
787
788    private void appendLetter(char letter) {
789        if (colBuf.Position == colBuf.Buffer.length) {
790            int newLength = colBuf.Buffer.length * 2;
791            char[] holder = new char[newLength];
792            System.arraycopy(colBuf.Buffer, 0, holder, 0, colBuf.Position);
793            colBuf.Buffer = holder;
794        }
795        colBuf.Buffer[colBuf.Position++] = letter;
796        buf.ColumnStart = buf.Position + 1;
797    }
798
799    private void updateCurrentValue() {
800        if (fieldStarted && buf.ColumnStart < buf.Position) {
801            if (colBuf.Buffer.length - colBuf.Position < buf.Position - buf.ColumnStart) {
802                int newLength = colBuf.Buffer.length
803                        + Math.max(buf.Position - buf.ColumnStart, colBuf.Buffer.length);
804                char[] holder = new char[newLength];
805                System.arraycopy(colBuf.Buffer, 0, holder, 0, colBuf.Position);
806                colBuf.Buffer = holder;
807            }
808            System.arraycopy(buf.Buffer, buf.ColumnStart, colBuf.Buffer, colBuf.Position,
809                    buf.Position - buf.ColumnStart);
810            colBuf.Position += buf.Position - buf.ColumnStart;
811        }
812        buf.ColumnStart = buf.Position + 1;
813    }
814
815    private void endRecord() throws IOException {
816        readNextLine = true;
817        rowIndex++;
818    }
819
820    public int getIndex(String headerName) throws IOException {
821        checkClosed();
822        Object indexValue = headers.IndexByName.get(headerName);
823        if (indexValue != null) {
824            return ((Integer) indexValue).intValue();
825        } else {
826            return -1;
827        }
828    }
829
830    public boolean skipRecord() throws IOException {
831        checkClosed();
832        boolean recordRead = false;
833        if (hasMore) {
834            recordRead = readRecord();
835            if (recordRead) {
836                rowIndex--;
837            }
838        }
839        return recordRead;
840    }
841
842    public boolean skipLine() throws IOException {
843        checkClosed();
844        colCount = 0;
845        boolean skippedLine = false;
846        if (hasMore) {
847            boolean foundEol = false;
848            do {
849                if (buf.Position == buf.Count) {
850                    checkDataLength();
851                } else {
852                    skippedLine = true;
853                    char ch = buf.Buffer[buf.Position];
854                    if (ch == Chars.CR || ch == Chars.LF) {
855                        foundEol = true;
856                    }
857                    prevChar = ch;
858                    if (!foundEol) {
859                        buf.Position++;
860                    }
861                }
862            } while (hasMore && !foundEol);
863            colBuf.Position = 0;
864            buf.LineStart = buf.Position + 1;
865        }
866        rawBuf.Position = 0;
867        rawRow = "";
868        return skippedLine;
869    }
870
871    public void close() {
872        if (!closed) {
873            close(true);
874            closed = true;
875        }
876    }
877
878    private void close(boolean closing) {
879        if (!closed) {
880            if (closing) {
881                cs = null;
882                headers.Headers = null;
883                headers.IndexByName = null;
884                buf.Buffer = null;
885                colBuf.Buffer = null;
886                rawBuf.Buffer = null;
887            }
888            try {
889                if (init) {
890                    reader.close();
891                }
892            } catch (Exception e) {
893                // ignore
894            }
895            reader = null;
896            closed = true;
897        }
898    }
899
900    private void checkClosed() throws IOException {
901        if (closed) {
902            throw new IOException("This instance of the CsvReader class has already been closed.");
903        }
904    }
905
906    @Override
907    protected void finalize() {
908        close(false);
909    }
910
911    private static class Esc {
912        private static final int UNICODE = 1;
913        private static final int OCTAL = 2;
914        private static final int DECIMAL = 3;
915        private static final int HEX = 4;
916    }
917
918    private static char hexToDec(char hex) {
919        char result;
920        if (hex >= 'a') {
921            result = (char) (hex - 'a' + 10);
922        } else if (hex >= 'A') {
923            result = (char) (hex - 'A' + 10);
924        } else {
925            result = (char) (hex - '0');
926        }
927        return result;
928    }
929
930    private static class Cfg {
931        char Delimiter = Chars.COMMA;
932        char RecordDelimiter = '\n';
933        char TextQualifier = '"';
934        char Comment = '#';
935        boolean UseTextQualifier = true;
936        boolean UseComments = false;
937        boolean CaptureRawRecord = false;
938        boolean TrimWhitespace = true;
939        boolean SkipEmptyRecords = true;
940        boolean SafetySwitch = true;
941        int EscapeMode = ESCAPE_MODE_DOUBLED;
942        final CsvReader Owner;
943
944        Cfg(CsvReader owner) {
945            this.Owner = owner;
946        }
947    }
948
949    private static class Hdrs {
950        String[] Headers = null;
951        int Length = 0;
952        Map<String, Integer> IndexByName = new HashMap<>();
953    }
954
955    private static class Buf {
956        char[] Buffer = new char[1024];
957        int Position = 0;
958        int Count = 0;
959        int ColumnStart = 0;
960        int LineStart = 0;
961    }
962
963    private static class ColBuf {
964        char[] Buffer = new char[1024];
965        int Position = 0;
966    }
967
968    private static class RawBuf {
969        char[] Buffer = new char[1024];
970        int Position = 0;
971    }
972
973    private static class Statics {
974        static final int INITIAL_COLUMN_COUNT = 50;
975        static final int MAX_FILE_BUFFER_SIZE = 4 * 1024;
976    }
977
978    private static class Chars {
979        static final char COMMA = ',';
980        static final char CR = '\r';
981        static final char LF = '\n';
982        static final char TAB = '\t';
983        static final char SPACE = ' ';
984        static final char BACKSLASH = '\\';
985        static final char BACKSPACE = '\b';
986        static final char FORM_FEED = '\f';
987        static final char ESCAPE = 27;
988        static final char VERTICAL_TAB = 11;
989        static final char ALERT = 7;
990    }
991    
992    public static void main(String[] args) throws Exception {
993                CsvReader reader = new CsvReader("C:\\Users\\KK\\Desktop\\extract_answer_yuxi.csv", ',', Charset.forName("GBK"));
994                reader.readHeaders();
995                System.out.println(Arrays.toString(reader.getHeaders()));
996                 while (reader.readRecord()) {
997                         System.out.println(Arrays.toString(reader.getValues()));
998                 }
999        }
1000}