001 /*--------------------------------------------------------------------------+
002 $Id: LineSplitter.java 26268 2010-02-18 10:44:30Z juergens $
003 | |
004 | Copyright 2005-2010 Technische Universitaet Muenchen |
005 | |
006 | Licensed under the Apache License, Version 2.0 (the "License"); |
007 | you may not use this file except in compliance with the License. |
008 | You may obtain a copy of the License at |
009 | |
010 | http://www.apache.org/licenses/LICENSE-2.0 |
011 | |
012 | Unless required by applicable law or agreed to in writing, software |
013 | distributed under the License is distributed on an "AS IS" BASIS, |
014 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
015 | See the License for the specific language governing permissions and |
016 | limitations under the License. |
017 +--------------------------------------------------------------------------*/
018 package edu.tum.cs.commons.string;
019
020 /**
021 * This class is used to split a string in lines.
022 * <p>
023 * <b>Note:</b> According to tests I performed this is the fastest method to
024 * split a string. It is about nine times faster than the regex-bases split
025 * with:
026 *
027 * <pre>
028 * Pattern pattern = Pattern.compile("\r\n|\r|\n");
029 * pattern.split(content);
030 * </pre>
031 *
032 * @author Florian Deissenboeck
033 * @author $Author: juergens $
034 *
035 * @version $Revision: 26268 $
036 * @levd.rating GREEN Hash: F99C8B9E8F156988EBFA29796D5D1AEF
037 */
038 public class LineSplitter {
039
040 /** Character array. */
041 private char[] characters;
042
043 /** Starting index. */
044 private int startIndex;
045
046 /** Length of the line to be returned from {@link #getNextLine()}. */
047 private int length;
048
049 /**
050 * Set the string to split.
051 *
052 * @param content
053 * the string to split. if string is <code>null</code> or the
054 * empty string, {@link #getNextLine()} will return
055 * <code>null</code>
056 *
057 */
058 public void setContent(String content) {
059 if (content == null) {
060 characters = null;
061 } else {
062 characters = content.toCharArray();
063 }
064 startIndex = 0;
065 length = 0;
066 }
067
068 /**
069 * Obtain next identified line.
070 *
071 * @return <code>null</code> if all lines were returned. On returning the
072 * last line all references to the input string are deleted. So it
073 * is free for garbage collection.
074 */
075 public String getNextLine() {
076
077 if (characters == null) {
078 return null;
079 }
080
081 startIndex = startIndex + length;
082
083 if (startIndex >= characters.length) {
084
085 // delete reference to array to allow garbage collection
086 characters = null;
087 return null;
088 }
089
090 // length to skip may vary due to the length of the line separator (\r,
091 // \n or \r\n)
092 int skip = 0;
093
094 int i = startIndex;
095
096 while (skip == 0 && i < characters.length) {
097 char c = characters[i];
098
099 i++;
100
101 // Skip newlines.
102 if (c == '\n') {
103 skip = 1;
104 }
105
106 // Skip newlines.
107 if (c == '\r') {
108 skip = 1;
109 if (i < characters.length) {
110 if (characters[i] == '\n') {
111 skip = 2;
112 i++;
113 }
114 }
115 }
116
117 }
118
119 length = i - startIndex;
120
121 return new String(characters, startIndex, length - skip);
122 }
123 }