001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.input; 018 019import java.io.Closeable; 020import java.io.File; 021import java.io.IOException; 022import java.io.UnsupportedEncodingException; 023import java.nio.ByteBuffer; 024import java.nio.channels.SeekableByteChannel; 025import java.nio.charset.Charset; 026import java.nio.charset.CharsetEncoder; 027import java.nio.charset.StandardCharsets; 028import java.nio.file.Path; 029import java.nio.file.StandardOpenOption; 030import java.util.ArrayList; 031import java.util.Arrays; 032import java.util.Collections; 033import java.util.Iterator; 034import java.util.List; 035 036import org.apache.commons.io.Charsets; 037import org.apache.commons.io.FileSystem; 038import org.apache.commons.io.StandardLineSeparator; 039import org.apache.commons.io.build.AbstractStreamBuilder; 040import org.apache.commons.io.function.IOIterable; 041import org.apache.commons.io.function.IOIterator; 042 043/** 044 * Reads lines in a file reversely (similar to a BufferedReader, but starting at the last line). Useful for e.g. searching in log files. 045 * <p> 046 * To build an instance, use {@link Builder}. 047 * </p> 048 * <p> 049 * For example: 050 * </p> 051 * <pre> 052 * <code> 053 * try (ReversedLinesFileReader reader = ReversedLinesFileReader.builder() 054 * .setPath(path) 055 * .setBufferSize(4096) 056 * .setCharset(StandardCharsets.UTF_8) 057 * .get()) { 058 * reader.forEach(line -> System.out.println(line)); 059 * } 060 * </code> 061 * </pre> 062 * 063 * @see Builder 064 * @since 2.2 065 */ 066public class ReversedLinesFileReader implements Closeable, IOIterable<String> { 067 068 // @formatter:off 069 /** 070 * Builds a new {@link ReversedLinesFileReader}. 071 * 072 * <p> 073 * For example: 074 * </p> 075 * <pre>{@code 076 * ReversedLinesFileReader reader = ReversedLinesFileReader.builder() 077 * .setPath(path) 078 * .setBufferSize(4096) 079 * .setCharset(StandardCharsets.UTF_8) 080 * .get());} 081 * </pre> 082 * 083 * @see #get() 084 * @since 2.12.0 085 */ 086 // @formatter:on 087 public static class Builder extends AbstractStreamBuilder<ReversedLinesFileReader, Builder> { 088 089 /** 090 * Constructs a new builder of {@link ReversedLinesFileReader}. 091 */ 092 public Builder() { 093 setBufferSizeDefault(DEFAULT_BLOCK_SIZE); 094 setBufferSize(DEFAULT_BLOCK_SIZE); 095 setOpenOptions(StandardOpenOption.READ); 096 } 097 098 /** 099 * Builds a new {@link ReversedLinesFileReader}. 100 * <p> 101 * You must set an aspect that supports {@link #getInputStream()} on this builder, otherwise, this method throws an exception. 102 * </p> 103 * <p> 104 * This builder uses the following aspects: 105 * </p> 106 * <ul> 107 * <li>{@link #getPath()} gets the target aspect.</li> 108 * <li>{@link #getBufferSize()}</li> 109 * <li>{@link #getCharset()}</li> 110 * </ul> 111 * 112 * @return a new instance. 113 * @throws IllegalStateException if the {@code origin} is {@code null}. 114 * @throws UnsupportedOperationException if the origin cannot be converted to a {@link Path}. 115 * @throws IOException if an I/O error occurs converting to a {@link Path} using {@link #getPath()}. 116 * @see #getPath() 117 * @see #getBufferSize() 118 * @see #getCharset() 119 * @see #getUnchecked() 120 */ 121 @Override 122 public ReversedLinesFileReader get() throws IOException { 123 return new ReversedLinesFileReader(this); 124 } 125 126 } 127 128 private final class FilePart { 129 private final long partNumber; 130 131 private final byte[] data; 132 133 private byte[] leftOver; 134 135 private int currentLastBytePos; 136 137 /** 138 * Constructs a new instance. 139 * 140 * @param partNumber the part number. 141 * @param length its length. 142 * @param leftOverOfLastFilePart remainder. 143 * @throws IOException if there is a problem reading the file. 144 */ 145 private FilePart(final long partNumber, final int length, final byte[] leftOverOfLastFilePart) throws IOException { 146 this.partNumber = partNumber; 147 final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0); 148 this.data = new byte[dataLength]; 149 final long off = (partNumber - 1) * blockSize; 150 151 // read data 152 if (partNumber > 0 /* file not empty */) { 153 channel.position(off); 154 final int countRead = channel.read(ByteBuffer.wrap(data, 0, length)); 155 if (countRead != length) { 156 throw new IllegalStateException("Count of requested bytes and actually read bytes don't match"); 157 } 158 } 159 // copy left over part into data arr 160 if (leftOverOfLastFilePart != null) { 161 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length); 162 } 163 this.currentLastBytePos = data.length - 1; 164 this.leftOver = null; 165 } 166 167 /** 168 * Constructs the buffer containing any leftover bytes. 169 */ 170 private void createLeftOver() { 171 final int lineLengthBytes = currentLastBytePos + 1; 172 if (lineLengthBytes > 0) { 173 // create left over for next block 174 leftOver = Arrays.copyOf(data, lineLengthBytes); 175 } else { 176 leftOver = null; 177 } 178 currentLastBytePos = -1; 179 } 180 181 /** 182 * Finds the new-line sequence and return its length. 183 * 184 * @param data buffer to scan. 185 * @param i start offset in buffer. 186 * @return length of newline sequence or 0 if none found. 187 */ 188 private int getNewLineMatchByteCount(final byte[] data, final int i) { 189 for (final byte[] newLineSequence : newLineSequences) { 190 boolean match = true; 191 for (int j = newLineSequence.length - 1; j >= 0; j--) { 192 final int k = i + j - (newLineSequence.length - 1); 193 match &= k >= 0 && data[k] == newLineSequence[j]; 194 } 195 if (match) { 196 return newLineSequence.length; 197 } 198 } 199 return 0; 200 } 201 202 /** 203 * Reads a line. 204 * 205 * @return the line or null. 206 */ 207 private String readLine() { //NOPMD Bug in PMD 208 209 String line = null; 210 int newLineMatchByteCount; 211 212 final boolean isLastFilePart = partNumber == 1; 213 214 int i = currentLastBytePos; 215 216 if (i == -1 && isLastFilePart && leftOver != null) { 217 line = new String(leftOver, charset); 218 leftOver = null; 219 return line; 220 } 221 222 while (i > -1) { 223 224 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) { 225 // avoidNewlineSplitBuffer: for all except the last file part we 226 // take a few bytes to the next file part to avoid splitting of newlines 227 createLeftOver(); 228 break; // skip last few bytes and leave it to the next file part 229 } 230 231 // check for newline 232 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) { 233 final int lineStart = i + 1; 234 final int lineLengthBytes = currentLastBytePos - lineStart + 1; 235 236 if (lineLengthBytes < 0) { 237 throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes); 238 } 239 final byte[] lineData = Arrays.copyOfRange(data, lineStart, lineStart + lineLengthBytes); 240 241 line = new String(lineData, charset); 242 243 currentLastBytePos = i - newLineMatchByteCount; 244 245 if (isLastFilePart && currentLastBytePos == -1 && i == 0) { 246 leftOver = new byte[0]; 247 } 248 break; // found line 249 } 250 251 // move cursor 252 i -= byteDecrement; 253 254 // end of file part handling 255 if (i < 0) { 256 if (isLastFilePart) { 257 final int lineLengthBytes = currentLastBytePos + 1; 258 if (lineLengthBytes > 0) { 259 final byte[] lineData = Arrays.copyOf(data, lineLengthBytes); 260 line = new String(lineData, charset); 261 } 262 currentLastBytePos = -1; 263 } else { 264 createLeftOver(); 265 } 266 break; // end of file part 267 } 268 } 269 270 // there will be partNumber line break anymore, this is the first line of the file 271 if (line == null && isLastFilePart && leftOver != null) { 272 line = new String(leftOver, charset); 273 leftOver = null; 274 } 275 276 return line; 277 } 278 279 /** 280 * Handles block rollover 281 * 282 * @return the new FilePart or null. 283 * @throws IOException if there was a problem reading the file. 284 */ 285 private FilePart rollOver() throws IOException { 286 287 if (currentLastBytePos > -1) { 288 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... " 289 + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos); 290 } 291 292 if (partNumber > 1) { 293 return new FilePart(partNumber - 1, blockSize, leftOver); 294 } 295 // NO 1 was the last FilePart, we're finished 296 if (leftOver != null) { 297 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart=" 298 + new String(leftOver, charset)); 299 } 300 return null; 301 } 302 } 303 304 private static final String EMPTY_STRING = ""; 305 306 private static final int DEFAULT_BLOCK_SIZE = FileSystem.getCurrent().getBlockSize(); 307 308 /** 309 * Constructs a new {@link Builder}. 310 * 311 * @return a new {@link Builder}. 312 * @since 2.12.0 313 */ 314 public static Builder builder() { 315 return new Builder(); 316 } 317 318 private final int blockSize; 319 private final Charset charset; 320 private final SeekableByteChannel channel; 321 private final long totalByteLength; 322 private final long totalBlockCount; 323 private final byte[][] newLineSequences; 324 private final int avoidNewlineSplitBufferSize; 325 private final int byteDecrement; 326 private FilePart currentFilePart; 327 private boolean trailingNewlineOfFileSkipped; 328 329 private ReversedLinesFileReader(final Builder builder) throws IOException { 330 this.blockSize = builder.getBufferSize(); 331 this.charset = Charsets.toCharset(builder.getCharset()); 332 // check & prepare encoding 333 final CharsetEncoder charsetEncoder = charset.newEncoder(); 334 final float maxBytesPerChar = charsetEncoder.maxBytesPerChar(); 335 if (maxBytesPerChar == 1f || charset == StandardCharsets.UTF_8) { 336 // all one byte encodings are partNumber problem 337 byteDecrement = 1; 338 } else if (charset == Charset.forName("Shift_JIS") || // Same as for UTF-8 339 // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html 340 charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese) 341 charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean) 342 charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese) 343 charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese) 344 byteDecrement = 1; 345 } else if (charset == StandardCharsets.UTF_16BE || charset == StandardCharsets.UTF_16LE) { 346 // UTF-16 new line sequences are not allowed as second tuple of four byte 347 // sequences, 348 // however byte order has to be specified 349 byteDecrement = 2; 350 } else if (charset == StandardCharsets.UTF_16) { 351 throw new UnsupportedEncodingException("For UTF-16, you need to specify the byte order (use UTF-16BE or UTF-16LE)"); 352 } else { 353 throw new UnsupportedEncodingException("Encoding " + charset + " is not supported yet (feel free to submit a patch)"); 354 } 355 // NOTE: The new line sequences are matched in the order given, so it is 356 // important that \r\n is BEFORE \n 357 this.newLineSequences = new byte[][] { StandardLineSeparator.CRLF.getBytes(charset), StandardLineSeparator.LF.getBytes(charset), 358 StandardLineSeparator.CR.getBytes(charset) }; 359 this.avoidNewlineSplitBufferSize = newLineSequences[0].length; 360 // Open file 361 this.channel = builder.getChannel(SeekableByteChannel.class); 362 this.totalByteLength = channel.size(); 363 int lastBlockLength = (int) (totalByteLength % blockSize); 364 if (lastBlockLength > 0) { 365 this.totalBlockCount = totalByteLength / blockSize + 1; 366 } else { 367 this.totalBlockCount = totalByteLength / blockSize; 368 if (totalByteLength > 0) { 369 lastBlockLength = blockSize; 370 } 371 } 372 this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null); 373 } 374 375 /** 376 * Constructs a ReversedLinesFileReader with default block size of 4KB and the virtual machine's {@linkplain Charset#defaultCharset() default charset}. 377 * 378 * @param file the file to be read. 379 * @throws IOException if an I/O error occurs. 380 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 381 */ 382 @Deprecated 383 public ReversedLinesFileReader(final File file) throws IOException { 384 this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset()); 385 } 386 387 /** 388 * Constructs a ReversedLinesFileReader with default block size of 4KB and the 389 * specified encoding. 390 * 391 * @param file the file to be read. 392 * @param charset the charset to use, null uses the default Charset. 393 * @throws IOException if an I/O error occurs. 394 * @since 2.5 395 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 396 */ 397 @Deprecated 398 public ReversedLinesFileReader(final File file, final Charset charset) throws IOException { 399 this(file.toPath(), charset); 400 } 401 402 /** 403 * Constructs a ReversedLinesFileReader with the given block size and encoding. 404 * 405 * @param file the file to be read. 406 * @param blockSize size of the internal buffer (for ideal performance this 407 * should match with the block size of the underlying file 408 * system). 409 * @param charset the encoding of the file, null uses the default Charset. 410 * @throws IOException if an I/O error occurs. 411 * @since 2.3 412 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 413 */ 414 @Deprecated 415 public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException { 416 this(file.toPath(), blockSize, charset); 417 } 418 419 /** 420 * Constructs a ReversedLinesFileReader with the given block size and encoding. 421 * 422 * @param file the file to be read. 423 * @param blockSize size of the internal buffer (for ideal performance this 424 * should match with the block size of the underlying file 425 * system). 426 * @param charsetName the encoding of the file, null uses the default Charset. 427 * @throws IOException if an I/O error occurs. 428 * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported. 429 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 430 */ 431 @Deprecated 432 public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException { 433 this(file.toPath(), blockSize, charsetName); 434 } 435 436 /** 437 * Constructs a ReversedLinesFileReader with default block size of 4KB and the 438 * specified encoding. 439 * 440 * @param file the file to be read. 441 * @param charset the charset to use, null uses the default Charset. 442 * @throws IOException if an I/O error occurs. 443 * @since 2.7 444 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 445 */ 446 @Deprecated 447 public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException { 448 this(file, DEFAULT_BLOCK_SIZE, charset); 449 } 450 451 /** 452 * Constructs a ReversedLinesFileReader with the given block size and encoding. 453 * 454 * @param file the file to be read. 455 * @param blockSize size of the internal buffer (for ideal performance this 456 * should match with the block size of the underlying file 457 * system). 458 * @param charset the encoding of the file, null uses the default Charset. 459 * @throws IOException if an I/O error occurs. 460 * @since 2.7 461 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 462 */ 463 @Deprecated 464 public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException { 465 this(builder().setPath(file).setBufferSize(blockSize).setCharset(charset)); 466 } 467 468 /** 469 * Constructs a ReversedLinesFileReader with the given block size and encoding. 470 * 471 * @param file the file to be read. 472 * @param blockSize size of the internal buffer (for ideal performance this 473 * should match with the block size of the underlying file 474 * system). 475 * @param charsetName the encoding of the file, null uses the default Charset. 476 * @throws IOException if an I/O error occurs. 477 * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported. 478 * @since 2.7 479 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 480 */ 481 @Deprecated 482 public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException { 483 this(file, blockSize, Charsets.toCharset(charsetName)); 484 } 485 486 /** 487 * Closes underlying resources. 488 * 489 * @throws IOException if an I/O error occurs. 490 */ 491 @Override 492 public void close() throws IOException { 493 channel.close(); 494 } 495 496 @Override 497 public IOIterator<String> iterator() { 498 return new IOIterator<String>() { 499 500 private String next; 501 502 @Override 503 public boolean hasNext() throws IOException { 504 if (next == null) { 505 next = readLine(); 506 } 507 return next != null; 508 } 509 510 @Override 511 public String next() throws IOException { 512 if (next == null) { 513 next = readLine(); 514 } 515 final String tmp = next; 516 next = null; 517 return tmp; 518 } 519 520 @Override 521 public Iterator<String> unwrap() { 522 return null; 523 } 524 525 }; 526 } 527 528 /** 529 * Returns the lines of the file from bottom to top. 530 * 531 * @return the next line or null if the start of the file is reached. 532 * @throws IOException if an I/O error occurs. 533 */ 534 public String readLine() throws IOException { 535 String line = currentFilePart.readLine(); 536 while (line == null) { 537 currentFilePart = currentFilePart.rollOver(); 538 if (currentFilePart == null) { 539 // partNumber more FileParts: we're done, leave line set to null 540 break; 541 } 542 line = currentFilePart.readLine(); 543 } 544 // aligned behavior with BufferedReader that doesn't return a last, empty line 545 if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) { 546 trailingNewlineOfFileSkipped = true; 547 line = readLine(); 548 } 549 return line; 550 } 551 552 /** 553 * Returns {@code lineCount} lines of the file from bottom to top. 554 * <p> 555 * If there are less than {@code lineCount} lines in the file, then that's what 556 * you get. 557 * </p> 558 * <p> 559 * Note: You can easily flip the result with {@link Collections#reverse(List)}. 560 * </p> 561 * 562 * @param lineCount How many lines to read. 563 * @return A new list. 564 * @throws IOException if an I/O error occurs. 565 * @since 2.8.0 566 */ 567 public List<String> readLines(final int lineCount) throws IOException { 568 if (lineCount < 0) { 569 throw new IllegalArgumentException("lineCount < 0"); 570 } 571 final ArrayList<String> arrayList = new ArrayList<>(lineCount); 572 for (int i = 0; i < lineCount; i++) { 573 final String line = readLine(); 574 if (line == null) { 575 return arrayList; 576 } 577 arrayList.add(line); 578 } 579 return arrayList; 580 } 581 582 /** 583 * Returns the last {@code lineCount} lines of the file. 584 * <p> 585 * If there are less than {@code lineCount} lines in the file, then that's what 586 * you get. 587 * </p> 588 * 589 * @param lineCount How many lines to read. 590 * @return A String. 591 * @throws IOException if an I/O error occurs. 592 * @since 2.8.0 593 */ 594 public String toString(final int lineCount) throws IOException { 595 final List<String> lines = readLines(lineCount); 596 Collections.reverse(lines); 597 return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator(); 598 } 599 600 @Override 601 public Iterable<String> unwrap() { 602 return null; 603 } 604 605}