001package gudusoft.gsqlparser.util.csv; 002 003import java.io.*; 004import java.nio.charset.Charset; 005import java.text.NumberFormat; 006import java.util.Arrays; 007import java.util.HashMap; 008import java.util.Map; 009 010public class CsvReader { 011 012 public static final int ESCAPE_MODE_DOUBLED = 1; 013 public static final int ESCAPE_MODE_BACKSLASH = 2; 014 015 private Reader reader = null; 016 private String path = null; 017 private Cfg cfg = new Cfg(this); 018 private Charset cs = null; 019 private boolean useCustomRecDelim = false; 020 021 private Buf buf = new Buf(); 022 private ColBuf colBuf = new ColBuf(); 023 private RawBuf rawBuf = new RawBuf(); 024 025 private boolean[] qualified = null; 026 private String rawRow = ""; 027 private Hdrs headers = new Hdrs(); 028 029 private boolean fieldStarted = false; 030 private boolean beganWithQualifier = false; 031 private boolean hasMore = true; 032 private char prevChar = '\0'; 033 private boolean readNextLine = false; 034 035 private int colCount = 0; 036 private long rowIndex = 0; 037 private String[] fields = new String[Statics.INITIAL_COLUMN_COUNT]; 038 039 private boolean init = false; 040 private boolean closed = false; 041 042 public CsvReader(String fileName, char delimiter, Charset charset) throws FileNotFoundException { 043 if (fileName == null) { 044 throw new IllegalArgumentException("Parameter fileName can not be null."); 045 } 046 if (charset == null) { 047 throw new IllegalArgumentException("Parameter charset can not be null."); 048 } 049 if (!new File(fileName).exists()) { 050 throw new FileNotFoundException("File " + fileName + " does not exist."); 051 } 052 this.path = fileName; 053 this.cfg.Delimiter = delimiter; 054 this.cs = charset; 055 this.qualified = new boolean[fields.length]; 056 } 057 058 public CsvReader(String fileName, char delimiter) throws FileNotFoundException { 059 this(fileName, delimiter, Charset.forName("UTF-8")); 060 } 061 062 public CsvReader(String fileName) throws FileNotFoundException { 063 this(fileName, Chars.COMMA); 064 } 065 066 public CsvReader(Reader inputStream, char delimiter) { 067 if (inputStream == null) { 068 throw new IllegalArgumentException("Parameter inputStream can not be null."); 069 } 070 this.reader = inputStream; 071 this.cfg.Delimiter = delimiter; 072 this.init = true; 073 this.qualified = new boolean[fields.length]; 074 } 075 076 public CsvReader(Reader inputStream) { 077 this(inputStream, Chars.COMMA); 078 } 079 080 public CsvReader(InputStream inputStream, char delimiter, Charset charset) { 081 this(new InputStreamReader(inputStream, charset), delimiter); 082 } 083 084 public CsvReader(InputStream inputStream, Charset charset) { 085 this(new InputStreamReader(inputStream, charset)); 086 } 087 088 public boolean getCaptureRawRecord() { 089 return cfg.CaptureRawRecord; 090 } 091 092 public void setCaptureRawRecord(boolean captureRawRecord) { 093 cfg.CaptureRawRecord = captureRawRecord; 094 } 095 096 public String getRawRecord() { 097 return rawRow; 098 } 099 100 public boolean getTrimWhitespace() { 101 return cfg.TrimWhitespace; 102 } 103 104 public void setTrimWhitespace(boolean trimWhitespace) { 105 cfg.TrimWhitespace = trimWhitespace; 106 } 107 108 public char getDelimiter() { 109 return cfg.Delimiter; 110 } 111 112 public void setDelimiter(char delimiter) { 113 cfg.Delimiter = delimiter; 114 } 115 116 public char getRecordDelimiter() { 117 return cfg.RecordDelimiter; 118 } 119 120 public void setRecordDelimiter(char recordDelimiter) { 121 useCustomRecDelim = true; 122 cfg.RecordDelimiter = recordDelimiter; 123 } 124 125 public char getTextQualifier() { 126 return cfg.TextQualifier; 127 } 128 129 public void setTextQualifier(char textQualifier) { 130 cfg.TextQualifier = textQualifier; 131 } 132 133 public boolean getUseTextQualifier() { 134 return cfg.UseTextQualifier; 135 } 136 137 public void setUseTextQualifier(boolean useTextQualifier) { 138 cfg.UseTextQualifier = useTextQualifier; 139 } 140 141 public char getComment() { 142 return cfg.Comment; 143 } 144 145 public void setComment(char comment) { 146 cfg.Comment = comment; 147 } 148 149 public boolean getUseComments() { 150 return cfg.UseComments; 151 } 152 153 public void setUseComments(boolean useComments) { 154 cfg.UseComments = useComments; 155 } 156 157 public int getEscapeMode() { 158 return cfg.EscapeMode; 159 } 160 161 public void setEscapeMode(int escapeMode) throws IllegalArgumentException { 162 if (escapeMode != ESCAPE_MODE_DOUBLED && escapeMode != ESCAPE_MODE_BACKSLASH) { 163 throw new IllegalArgumentException("Parameter escapeMode must be a valid value."); 164 } 165 cfg.EscapeMode = escapeMode; 166 } 167 168 public boolean getSkipEmptyRecords() { 169 return cfg.SkipEmptyRecords; 170 } 171 172 public void setSkipEmptyRecords(boolean skipEmptyRecords) { 173 cfg.SkipEmptyRecords = skipEmptyRecords; 174 } 175 176 public boolean getSafetySwitch() { 177 return cfg.SafetySwitch; 178 } 179 180 public void setSafetySwitch(boolean safetySwitch) { 181 cfg.SafetySwitch = safetySwitch; 182 } 183 184 public int getColumnCount() { 185 return colCount; 186 } 187 188 public long getCurrentRecord() { 189 return rowIndex - 1; 190 } 191 192 public int getHeaderCount() { 193 return headers.Length; 194 } 195 196 public String[] getHeaders() throws IOException { 197 checkClosed(); 198 if (headers.Headers == null) { 199 return null; 200 } else { 201 String[] clone = new String[headers.Length]; 202 System.arraycopy(headers.Headers, 0, clone, 0, headers.Length); 203 return clone; 204 } 205 } 206 207 public void setHeaders(String[] hdrs) { 208 headers.Headers = hdrs; 209 headers.IndexByName.clear(); 210 headers.Length = (hdrs != null) ? hdrs.length : 0; 211 for (int i = 0; i < headers.Length; i++) { 212 headers.IndexByName.put(hdrs[i], Integer.valueOf(i)); 213 } 214 } 215 216 public String[] getValues() throws IOException { 217 checkClosed(); 218 String[] clone = new String[colCount]; 219 System.arraycopy(fields, 0, clone, 0, colCount); 220 return clone; 221 } 222 223 public String get(int columnIndex) throws IOException { 224 checkClosed(); 225 if (columnIndex > -1 && columnIndex < colCount) { 226 return fields[columnIndex]; 227 } else { 228 return ""; 229 } 230 } 231 232 public String get(String headerName) throws IOException { 233 checkClosed(); 234 return get(getIndex(headerName)); 235 } 236 237 public static CsvReader parse(String data) { 238 if (data == null) { 239 throw new IllegalArgumentException("Parameter data can not be null."); 240 } 241 return new CsvReader(new StringReader(data)); 242 } 243 244 public boolean readRecord() throws IOException { 245 checkClosed(); 246 colCount = 0; 247 rawBuf.Position = 0; 248 buf.LineStart = buf.Position; 249 readNextLine = false; 250 251 if (hasMore) { 252 do { 253 if (buf.Position == buf.Count) { 254 checkDataLength(); 255 } else { 256 beganWithQualifier = false; 257 char ch = buf.Buffer[buf.Position]; 258 259 if (cfg.UseTextQualifier && ch == cfg.TextQualifier) { 260 prevChar = ch; 261 fieldStarted = true; 262 buf.ColumnStart = buf.Position + 1; 263 beganWithQualifier = true; 264 265 boolean lastWasQualifier = false; 266 char slashOrQuote = cfg.TextQualifier; 267 if (cfg.EscapeMode == ESCAPE_MODE_BACKSLASH) { 268 slashOrQuote = Chars.BACKSLASH; 269 } 270 boolean eatingTail = false; 271 boolean lastWasEscape = false; 272 boolean readingComplexEscape = false; 273 int escType = Esc.UNICODE; 274 int escLen = 0; 275 char escVal = (char) 0; 276 277 buf.Position++; 278 do { 279 if (buf.Position == buf.Count) { 280 checkDataLength(); 281 } else { 282 ch = buf.Buffer[buf.Position]; 283 if (eatingTail) { 284 buf.ColumnStart = buf.Position + 1; 285 if (ch == cfg.Delimiter) { 286 endColumn(); 287 } else if ((!useCustomRecDelim && (ch == Chars.CR || ch == Chars.LF)) 288 || (useCustomRecDelim && ch == cfg.RecordDelimiter)) { 289 endColumn(); 290 endRecord(); 291 } 292 } else if (readingComplexEscape) { 293 escLen++; 294 switch (escType) { 295 case Esc.UNICODE: 296 escVal *= (char) 16; 297 escVal += hexToDec(ch); 298 if (escLen == 4) readingComplexEscape = false; 299 break; 300 case Esc.OCTAL: 301 escVal *= (char) 8; 302 escVal += (char) (ch - '0'); 303 if (escLen == 3) readingComplexEscape = false; 304 break; 305 case Esc.DECIMAL: 306 escVal *= (char) 10; 307 escVal += (char) (ch - '0'); 308 if (escLen == 3) readingComplexEscape = false; 309 break; 310 case Esc.HEX: 311 escVal *= (char) 16; 312 escVal += hexToDec(ch); 313 if (escLen == 2) readingComplexEscape = false; 314 break; 315 } 316 if (!readingComplexEscape) { 317 appendLetter(escVal); 318 } else { 319 buf.ColumnStart = buf.Position + 1; 320 } 321 } else if (ch == cfg.TextQualifier) { 322 if (lastWasEscape) { 323 lastWasEscape = false; 324 lastWasQualifier = false; 325 } else { 326 updateCurrentValue(); 327 if (cfg.EscapeMode == ESCAPE_MODE_DOUBLED) { 328 lastWasEscape = true; 329 } 330 lastWasQualifier = true; 331 } 332 } else if (cfg.EscapeMode == ESCAPE_MODE_BACKSLASH && lastWasEscape) { 333 switch (ch) { 334 case 'n': 335 appendLetter(Chars.LF); 336 break; 337 case 'r': 338 appendLetter(Chars.CR); 339 break; 340 case 't': 341 appendLetter(Chars.TAB); 342 break; 343 case 'b': 344 appendLetter(Chars.BACKSPACE); 345 break; 346 case 'f': 347 appendLetter(Chars.FORM_FEED); 348 break; 349 case 'e': 350 appendLetter(Chars.ESCAPE); 351 break; 352 case 'v': 353 appendLetter(Chars.VERTICAL_TAB); 354 break; 355 case 'a': 356 appendLetter(Chars.ALERT); 357 break; 358 case '0': 359 case '1': 360 case '2': 361 case '3': 362 case '4': 363 case '5': 364 case '6': 365 case '7': 366 escType = Esc.OCTAL; 367 readingComplexEscape = true; 368 escLen = 1; 369 escVal = (char) (ch - '0'); 370 buf.ColumnStart = buf.Position + 1; 371 break; 372 case 'u': 373 case 'x': 374 case 'o': 375 case 'd': 376 case 'U': 377 case 'X': 378 case 'O': 379 case 'D': 380 switch (ch) { 381 case 'u': 382 case 'U': 383 escType = Esc.UNICODE; 384 break; 385 case 'x': 386 case 'X': 387 escType = Esc.HEX; 388 break; 389 case 'o': 390 case 'O': 391 escType = Esc.OCTAL; 392 break; 393 case 'd': 394 case 'D': 395 escType = Esc.DECIMAL; 396 break; 397 } 398 readingComplexEscape = true; 399 escLen = 0; 400 escVal = (char) 0; 401 buf.ColumnStart = buf.Position + 1; 402 break; 403 default: 404 break; 405 } 406 lastWasEscape = false; 407 } else if (ch == slashOrQuote) { 408 updateCurrentValue(); 409 lastWasEscape = true; 410 } else { 411 if (lastWasQualifier) { 412 if (ch == cfg.Delimiter) { 413 endColumn(); 414 } else if ((!useCustomRecDelim && (ch == Chars.CR || ch == Chars.LF)) 415 || (useCustomRecDelim && ch == cfg.RecordDelimiter)) { 416 endColumn(); 417 endRecord(); 418 } else { 419 buf.ColumnStart = buf.Position + 1; 420 eatingTail = true; 421 } 422 lastWasQualifier = false; 423 } 424 } 425 426 prevChar = ch; 427 if (fieldStarted) { 428 buf.Position++; 429 if (cfg.SafetySwitch && buf.Position - buf.ColumnStart + colBuf.Position > 100000) { 430 close(); 431 throw new IOException("Maximum column length of 100,000 exceeded in column " 432 + NumberFormat.getIntegerInstance().format(colCount) 433 + " in record " 434 + NumberFormat.getIntegerInstance().format(rowIndex) 435 + ". Set the SafetySwitch property to false" 436 + " if you're expecting column lengths greater than 100,000 characters to" 437 + " avoid this error."); 438 } 439 } 440 } 441 } while (hasMore && fieldStarted); 442 443 } else if (ch == cfg.Delimiter) { 444 prevChar = ch; 445 endColumn(); 446 447 } else if (useCustomRecDelim && ch == cfg.RecordDelimiter) { 448 if (fieldStarted || colCount > 0 || !cfg.SkipEmptyRecords) { 449 endColumn(); 450 endRecord(); 451 } else { 452 buf.LineStart = buf.Position + 1; 453 } 454 prevChar = ch; 455 456 } else if (!useCustomRecDelim && (ch == Chars.CR || ch == Chars.LF)) { 457 if (fieldStarted || colCount > 0 458 || (!cfg.SkipEmptyRecords && (ch == Chars.CR || prevChar != Chars.CR))) { 459 endColumn(); 460 endRecord(); 461 } else { 462 buf.LineStart = buf.Position + 1; 463 } 464 prevChar = ch; 465 466 } else if (cfg.UseComments && colCount == 0 && ch == cfg.Comment) { 467 prevChar = ch; 468 skipLine(); 469 470 } else if (cfg.TrimWhitespace && (ch == Chars.SPACE || ch == Chars.TAB)) { 471 fieldStarted = true; 472 buf.ColumnStart = buf.Position + 1; 473 474 } else { 475 fieldStarted = true; 476 buf.ColumnStart = buf.Position; 477 boolean lastWasBackslash = false; 478 boolean readingComplexEscape = false; 479 int escType = Esc.UNICODE; 480 int escLen = 0; 481 char escVal = (char) 0; 482 boolean firstLoop = true; 483 484 do { 485 if (!firstLoop && buf.Position == buf.Count) { 486 checkDataLength(); 487 } else { 488 if (!firstLoop) { 489 ch = buf.Buffer[buf.Position]; 490 } 491 if (!cfg.UseTextQualifier && cfg.EscapeMode == ESCAPE_MODE_BACKSLASH && ch == Chars.BACKSLASH) { 492 if (lastWasBackslash) { 493 lastWasBackslash = false; 494 } else { 495 updateCurrentValue(); 496 lastWasBackslash = true; 497 } 498 } else if (readingComplexEscape) { 499 escLen++; 500 switch (escType) { 501 case Esc.UNICODE: 502 escVal *= (char) 16; 503 escVal += hexToDec(ch); 504 if (escLen == 4) readingComplexEscape = false; 505 break; 506 case Esc.OCTAL: 507 escVal *= (char) 8; 508 escVal += (char) (ch - '0'); 509 if (escLen == 3) readingComplexEscape = false; 510 break; 511 case Esc.DECIMAL: 512 escVal *= (char) 10; 513 escVal += (char) (ch - '0'); 514 if (escLen == 3) readingComplexEscape = false; 515 break; 516 case Esc.HEX: 517 escVal *= (char) 16; 518 escVal += hexToDec(ch); 519 if (escLen == 2) readingComplexEscape = false; 520 break; 521 } 522 if (!readingComplexEscape) { 523 appendLetter(escVal); 524 } else { 525 buf.ColumnStart = buf.Position + 1; 526 } 527 } else if (cfg.EscapeMode == ESCAPE_MODE_BACKSLASH && lastWasBackslash) { 528 switch (ch) { 529 case 'n': 530 appendLetter(Chars.LF); 531 break; 532 case 'r': 533 appendLetter(Chars.CR); 534 break; 535 case 't': 536 appendLetter(Chars.TAB); 537 break; 538 case 'b': 539 appendLetter(Chars.BACKSPACE); 540 break; 541 case 'f': 542 appendLetter(Chars.FORM_FEED); 543 break; 544 case 'e': 545 appendLetter(Chars.ESCAPE); 546 break; 547 case 'v': 548 appendLetter(Chars.VERTICAL_TAB); 549 break; 550 case 'a': 551 appendLetter(Chars.ALERT); 552 break; 553 case '0': 554 case '1': 555 case '2': 556 case '3': 557 case '4': 558 case '5': 559 case '6': 560 case '7': 561 escType = Esc.OCTAL; 562 readingComplexEscape = true; 563 escLen = 1; 564 escVal = (char) (ch - '0'); 565 buf.ColumnStart = buf.Position + 1; 566 break; 567 case 'u': 568 case 'x': 569 case 'o': 570 case 'd': 571 case 'U': 572 case 'X': 573 case 'O': 574 case 'D': 575 switch (ch) { 576 case 'u': 577 case 'U': 578 escType = Esc.UNICODE; 579 break; 580 case 'x': 581 case 'X': 582 escType = Esc.HEX; 583 break; 584 case 'o': 585 case 'O': 586 escType = Esc.OCTAL; 587 break; 588 case 'd': 589 case 'D': 590 escType = Esc.DECIMAL; 591 break; 592 } 593 readingComplexEscape = true; 594 escLen = 0; 595 escVal = (char) 0; 596 buf.ColumnStart = buf.Position + 1; 597 break; 598 default: 599 break; 600 } 601 lastWasBackslash = false; 602 } else { 603 if (ch == cfg.Delimiter) { 604 endColumn(); 605 } else if ((!useCustomRecDelim && (ch == Chars.CR || ch == Chars.LF)) 606 || (useCustomRecDelim && ch == cfg.RecordDelimiter)) { 607 endColumn(); 608 endRecord(); 609 } 610 } 611 prevChar = ch; 612 firstLoop = false; 613 if (fieldStarted) { 614 buf.Position++; 615 if (cfg.SafetySwitch && buf.Position - buf.ColumnStart + colBuf.Position > 100000) { 616 close(); 617 throw new IOException("Maximum column length of 100,000 exceeded in column " 618 + NumberFormat.getIntegerInstance().format(colCount) 619 + " in record " 620 + NumberFormat.getIntegerInstance().format(rowIndex) 621 + ". Set the SafetySwitch property to false" 622 + " if you're expecting column lengths greater than 100,000 characters to" 623 + " avoid this error."); 624 } 625 } 626 } 627 } while (hasMore && fieldStarted); 628 } 629 630 if (hasMore) { 631 buf.Position++; 632 } 633 } 634 } while (hasMore && !readNextLine); 635 636 if (fieldStarted || prevChar == cfg.Delimiter) { 637 endColumn(); 638 endRecord(); 639 } 640 } 641 642 if (cfg.CaptureRawRecord) { 643 if (hasMore) { 644 if (rawBuf.Position == 0) { 645 rawRow = new String(buf.Buffer, buf.LineStart, buf.Position - buf.LineStart - 1); 646 } else { 647 rawRow = new String(rawBuf.Buffer, 0, rawBuf.Position) 648 + new String(buf.Buffer, buf.LineStart, buf.Position - buf.LineStart - 1); 649 } 650 } else { 651 rawRow = new String(rawBuf.Buffer, 0, rawBuf.Position); 652 } 653 } else { 654 rawRow = ""; 655 } 656 657 return readNextLine; 658 } 659 660 private void checkDataLength() throws IOException { 661 if (!init) { 662 if (path != null) { 663 reader = new BufferedReader(new InputStreamReader(new FileInputStream(path), cs), 664 Statics.MAX_FILE_BUFFER_SIZE); 665 } 666 cs = null; 667 init = true; 668 } 669 670 updateCurrentValue(); 671 672 if (cfg.CaptureRawRecord && buf.Count > 0) { 673 if (rawBuf.Buffer.length - rawBuf.Position < buf.Count - buf.LineStart) { 674 int newLength = rawBuf.Buffer.length + Math.max(buf.Count - buf.LineStart, rawBuf.Buffer.length); 675 char[] holder = new char[newLength]; 676 System.arraycopy(rawBuf.Buffer, 0, holder, 0, rawBuf.Position); 677 rawBuf.Buffer = holder; 678 } 679 System.arraycopy(buf.Buffer, buf.LineStart, rawBuf.Buffer, rawBuf.Position, buf.Count - buf.LineStart); 680 rawBuf.Position += buf.Count - buf.LineStart; 681 } 682 683 try { 684 buf.Count = reader.read(buf.Buffer, 0, buf.Buffer.length); 685 } catch (IOException ex) { 686 close(); 687 throw ex; 688 } 689 690 if (buf.Count == -1) { 691 hasMore = false; 692 } 693 694 buf.Position = 0; 695 buf.LineStart = 0; 696 buf.ColumnStart = 0; 697 } 698 699 public boolean readHeaders() throws IOException { 700 boolean result = readRecord(); 701 headers.Length = colCount; 702 headers.Headers = new String[colCount]; 703 for (int i = 0; i < headers.Length; i++) { 704 String columnValue = get(i); 705 headers.Headers[i] = columnValue; 706 headers.IndexByName.put(columnValue, Integer.valueOf(i)); 707 } 708 if (result) { 709 rowIndex--; 710 } 711 colCount = 0; 712 return result; 713 } 714 715 public String getHeader(int columnIndex) throws IOException { 716 checkClosed(); 717 if (columnIndex > -1 && columnIndex < headers.Length) { 718 return headers.Headers[columnIndex]; 719 } else { 720 return ""; 721 } 722 } 723 724 public boolean isQualified(int columnIndex) throws IOException { 725 checkClosed(); 726 if (columnIndex < colCount && columnIndex > -1) { 727 return qualified[columnIndex]; 728 } else { 729 return false; 730 } 731 } 732 733 private void endColumn() throws IOException { 734 String currentValue = ""; 735 if (fieldStarted) { 736 if (colBuf.Position == 0) { 737 if (buf.ColumnStart < buf.Position) { 738 int last = buf.Position - 1; 739 if (cfg.TrimWhitespace && !beganWithQualifier) { 740 while (last >= buf.ColumnStart 741 && (buf.Buffer[last] == Chars.SPACE || buf.Buffer[last] == Chars.TAB)) { 742 last--; 743 } 744 } 745 currentValue = new String(buf.Buffer, buf.ColumnStart, last - buf.ColumnStart + 1); 746 } 747 } else { 748 updateCurrentValue(); 749 int last = colBuf.Position - 1; 750 if (cfg.TrimWhitespace && !beganWithQualifier) { 751 while (last >= 0 && (colBuf.Buffer[last] == Chars.SPACE || colBuf.Buffer[last] == Chars.SPACE)) { 752 last--; 753 } 754 } 755 currentValue = new String(colBuf.Buffer, 0, last + 1); 756 } 757 } 758 colBuf.Position = 0; 759 fieldStarted = false; 760 761 if (colCount >= 100000 && cfg.SafetySwitch) { 762 close(); 763 throw new IOException( 764 "Maximum column count of 100,000 exceeded in record " 765 + NumberFormat.getIntegerInstance().format(rowIndex) 766 + ". Set the SafetySwitch property to false" 767 + " if you're expecting more than 100,000 columns per record to" 768 + " avoid this error."); 769 } 770 771 if (colCount == fields.length) { 772 int newLength = fields.length * 2; 773 String[] holder = new String[newLength]; 774 System.arraycopy(fields, 0, holder, 0, fields.length); 775 fields = holder; 776 777 boolean[] qh = new boolean[newLength]; 778 System.arraycopy(qualified, 0, qh, 0, qualified.length); 779 qualified = qh; 780 } 781 782 fields[colCount] = currentValue; 783 qualified[colCount] = beganWithQualifier; 784 currentValue = ""; 785 colCount++; 786 } 787 788 private void appendLetter(char letter) { 789 if (colBuf.Position == colBuf.Buffer.length) { 790 int newLength = colBuf.Buffer.length * 2; 791 char[] holder = new char[newLength]; 792 System.arraycopy(colBuf.Buffer, 0, holder, 0, colBuf.Position); 793 colBuf.Buffer = holder; 794 } 795 colBuf.Buffer[colBuf.Position++] = letter; 796 buf.ColumnStart = buf.Position + 1; 797 } 798 799 private void updateCurrentValue() { 800 if (fieldStarted && buf.ColumnStart < buf.Position) { 801 if (colBuf.Buffer.length - colBuf.Position < buf.Position - buf.ColumnStart) { 802 int newLength = colBuf.Buffer.length 803 + Math.max(buf.Position - buf.ColumnStart, colBuf.Buffer.length); 804 char[] holder = new char[newLength]; 805 System.arraycopy(colBuf.Buffer, 0, holder, 0, colBuf.Position); 806 colBuf.Buffer = holder; 807 } 808 System.arraycopy(buf.Buffer, buf.ColumnStart, colBuf.Buffer, colBuf.Position, 809 buf.Position - buf.ColumnStart); 810 colBuf.Position += buf.Position - buf.ColumnStart; 811 } 812 buf.ColumnStart = buf.Position + 1; 813 } 814 815 private void endRecord() throws IOException { 816 readNextLine = true; 817 rowIndex++; 818 } 819 820 public int getIndex(String headerName) throws IOException { 821 checkClosed(); 822 Object indexValue = headers.IndexByName.get(headerName); 823 if (indexValue != null) { 824 return ((Integer) indexValue).intValue(); 825 } else { 826 return -1; 827 } 828 } 829 830 public boolean skipRecord() throws IOException { 831 checkClosed(); 832 boolean recordRead = false; 833 if (hasMore) { 834 recordRead = readRecord(); 835 if (recordRead) { 836 rowIndex--; 837 } 838 } 839 return recordRead; 840 } 841 842 public boolean skipLine() throws IOException { 843 checkClosed(); 844 colCount = 0; 845 boolean skippedLine = false; 846 if (hasMore) { 847 boolean foundEol = false; 848 do { 849 if (buf.Position == buf.Count) { 850 checkDataLength(); 851 } else { 852 skippedLine = true; 853 char ch = buf.Buffer[buf.Position]; 854 if (ch == Chars.CR || ch == Chars.LF) { 855 foundEol = true; 856 } 857 prevChar = ch; 858 if (!foundEol) { 859 buf.Position++; 860 } 861 } 862 } while (hasMore && !foundEol); 863 colBuf.Position = 0; 864 buf.LineStart = buf.Position + 1; 865 } 866 rawBuf.Position = 0; 867 rawRow = ""; 868 return skippedLine; 869 } 870 871 public void close() { 872 if (!closed) { 873 close(true); 874 closed = true; 875 } 876 } 877 878 private void close(boolean closing) { 879 if (!closed) { 880 if (closing) { 881 cs = null; 882 headers.Headers = null; 883 headers.IndexByName = null; 884 buf.Buffer = null; 885 colBuf.Buffer = null; 886 rawBuf.Buffer = null; 887 } 888 try { 889 if (init) { 890 reader.close(); 891 } 892 } catch (Exception e) { 893 // ignore 894 } 895 reader = null; 896 closed = true; 897 } 898 } 899 900 private void checkClosed() throws IOException { 901 if (closed) { 902 throw new IOException("This instance of the CsvReader class has already been closed."); 903 } 904 } 905 906 @Override 907 protected void finalize() { 908 close(false); 909 } 910 911 private static class Esc { 912 private static final int UNICODE = 1; 913 private static final int OCTAL = 2; 914 private static final int DECIMAL = 3; 915 private static final int HEX = 4; 916 } 917 918 private static char hexToDec(char hex) { 919 char result; 920 if (hex >= 'a') { 921 result = (char) (hex - 'a' + 10); 922 } else if (hex >= 'A') { 923 result = (char) (hex - 'A' + 10); 924 } else { 925 result = (char) (hex - '0'); 926 } 927 return result; 928 } 929 930 private static class Cfg { 931 char Delimiter = Chars.COMMA; 932 char RecordDelimiter = '\n'; 933 char TextQualifier = '"'; 934 char Comment = '#'; 935 boolean UseTextQualifier = true; 936 boolean UseComments = false; 937 boolean CaptureRawRecord = false; 938 boolean TrimWhitespace = true; 939 boolean SkipEmptyRecords = true; 940 boolean SafetySwitch = true; 941 int EscapeMode = ESCAPE_MODE_DOUBLED; 942 final CsvReader Owner; 943 944 Cfg(CsvReader owner) { 945 this.Owner = owner; 946 } 947 } 948 949 private static class Hdrs { 950 String[] Headers = null; 951 int Length = 0; 952 Map<String, Integer> IndexByName = new HashMap<>(); 953 } 954 955 private static class Buf { 956 char[] Buffer = new char[1024]; 957 int Position = 0; 958 int Count = 0; 959 int ColumnStart = 0; 960 int LineStart = 0; 961 } 962 963 private static class ColBuf { 964 char[] Buffer = new char[1024]; 965 int Position = 0; 966 } 967 968 private static class RawBuf { 969 char[] Buffer = new char[1024]; 970 int Position = 0; 971 } 972 973 private static class Statics { 974 static final int INITIAL_COLUMN_COUNT = 50; 975 static final int MAX_FILE_BUFFER_SIZE = 4 * 1024; 976 } 977 978 private static class Chars { 979 static final char COMMA = ','; 980 static final char CR = '\r'; 981 static final char LF = '\n'; 982 static final char TAB = '\t'; 983 static final char SPACE = ' '; 984 static final char BACKSLASH = '\\'; 985 static final char BACKSPACE = '\b'; 986 static final char FORM_FEED = '\f'; 987 static final char ESCAPE = 27; 988 static final char VERTICAL_TAB = 11; 989 static final char ALERT = 7; 990 } 991 992 public static void main(String[] args) throws Exception { 993 CsvReader reader = new CsvReader("C:\\Users\\KK\\Desktop\\extract_answer_yuxi.csv", ',', Charset.forName("GBK")); 994 reader.readHeaders(); 995 System.out.println(Arrays.toString(reader.getHeaders())); 996 while (reader.readRecord()) { 997 System.out.println(Arrays.toString(reader.getValues())); 998 } 999 } 1000}