split函数
split(String regex, int limit)
第二个参数可以省略,目前就是想讨论这个参数的含义及其对字符串拆分的影响。
完整源代码:
/**
* Splits this string around matches of the given
* <a href="../util/regex/Pattern.html#sum">regular expression</a>.
*
* <p> The array returned by this method contains each substring of this
* string that is terminated by another substring that matches the given
* expression or is terminated by the end of the string. The substrings in
* the array are in the order in which they occur in this string. If the
* expression does not match any part of the input then the resulting array
* has just one element, namely this string.
*
* <p> When there is a positive-width match at the beginning of this
* string then an empty leading substring is included at the beginning
* of the resulting array. A zero-width match at the beginning however
* never produces such empty leading substring.
*
* <p> The {@code limit} parameter controls the number of times the
* pattern is applied and therefore affects the length of the resulting
* array.
* <ul>
* <li><p>
* If the <i>limit</i> is positive then the pattern will be applied
* at most <i>limit</i> - 1 times, the array's length will be
* no greater than <i>limit</i>, and the array's last entry will contain
* all input beyond the last matched delimiter.</p></li>
*
* <li><p>
* If the <i>limit</i> is zero then the pattern will be applied as
* many times as possible, the array can have any length, and trailing
* empty strings will be discarded.</p></li>
*
* <li><p>
* If the <i>limit</i> is negative then the pattern will be applied
* as many times as possible and the array can have any length.</p></li>
* </ul>
*
* <p> The string {@code "boo:and:foo"}, for example, yields the
* following results with these parameters:
*
* <blockquote><table class="plain">
* <caption style="display:none">Split example showing regex, limit, and result</caption>
* <thead>
* <tr>
* <th scope="col">Regex</th>
* <th scope="col">Limit</th>
* <th scope="col">Result</th>
* </tr>
* </thead>
* <tbody>
* <tr><th scope="row" rowspan="3" style="font-weight:normal">:</th>
* <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">2</th>
* <td>{@code { "boo", "and:foo" }}</td></tr>
* <tr><!-- : -->
* <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th>
* <td>{@code { "boo", "and", "foo" }}</td></tr>
* <tr><!-- : -->
* <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-2</th>
* <td>{@code { "boo", "and", "foo" }}</td></tr>
* <tr><th scope="row" rowspan="3" style="font-weight:normal">o</th>
* <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th>
* <td>{@code { "b", "", ":and:f", "", "" }}</td></tr>
* <tr><!-- o -->
* <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-2</th>
* <td>{@code { "b", "", ":and:f", "", "" }}</td></tr>
* <tr><!-- o -->
* <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">0</th>
* <td>{@code { "b", "", ":and:f" }}</td></tr>
* </tbody>
* </table></blockquote>
*
* <p> An invocation of this method of the form
* <i>str.</i>{@code split(}<i>regex</i>{@code ,} <i>n</i>{@code )}
* yields the same result as the expression
*
* <blockquote>
* <code>
* {@link java.util.regex.Pattern}.{@link
* java.util.regex.Pattern#compile(String) compile}(<i>regex</i>).{@link
* java.util.regex.Pattern#split(java.lang.CharSequence,int) split}(<i>str</i>, <i>n</i>)
* </code>
* </blockquote>
*
*
* @param regex
* the delimiting regular expression
*
* @param limit
* the result threshold, as described above
*
* @return the array of strings computed by splitting this string
* around matches of the given regular expression
*
* @throws PatternSyntaxException
* if the regular expression's syntax is invalid
*
* @see java.util.regex.Pattern
*
* @since 1.4
*/
public String[] split(String regex, int limit) {
/* fastpath if the regex is a
* (1) one-char String and this character is not one of the
* RegEx's meta characters ".$|()[{^?*+\\", or
* (2) two-char String and the first char is the backslash and
* the second is not the ascii digit or ascii letter.
*/
char ch = 0;
if (((regex.length() == 1 &&
".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) ||
(regex.length() == 2 &&
regex.charAt(0) == '\\' &&
(((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 &&
((ch-'a')|('z'-ch)) < 0 &&
((ch-'A')|('Z'-ch)) < 0)) &&
(ch < Character.MIN_HIGH_SURROGATE ||
ch > Character.MAX_LOW_SURROGATE))
{
int off = 0;
int next = 0;
boolean limited = limit > 0;
ArrayList<String> list = new ArrayList<>();
while ((next = indexOf(ch, off)) != -1) {
if (!limited || list.size() < limit - 1) {
list.add(substring(off, next));
off = next + 1;
} else { // last one
//assert (list.size() == limit - 1);
int last = length();
list.add(substring(off, last));
off = last;
break;
}
}
// If no match was found, return this
if (off == 0)
return new String[]{this};
// Add remaining segment
if (!limited || list.size() < limit)
list.add(substring(off, length()));
// Construct result
int resultSize = list.size();
if (limit == 0) {
while (resultSize > 0 && list.get(resultSize - 1).isEmpty()) {
resultSize--;
}
}
String[] result = new String[resultSize];
return list.subList(0, resultSize).toArray(result);
}
return Pattern.compile(regex).split(this, limit);
}
API Test
测试代码
import java.util.Arrays;
import java.util.Scanner;
public class StringSplit {
public static String splitString(int n, String s) {
//用“-”拆分字符串,只拆出第一个
String[] subStrings = s.split("-",2);
String result = subStrings[0]; //第一个字符串不处理,直接赋值
System.out.println("subStrings = " + Arrays.toString(subStrings));
//Test split API
String[] subStrings0 = s.split("-"); //拆分全部
System.out.println("subStrings0 = " + Arrays.toString(subStrings0));
String[] subStringsF2 = s.split("-",-2);
System.out.println("subStrings-2 = " + Arrays.toString(subStringsF2));
String[] subStringsF1 = s.split("-",-1);
System.out.println("subStrings-1 = " + Arrays.toString(subStringsF1));
String[] subStrings1 = s.split("-",1); //没拆分,拆分成1个,还是原始字符串
System.out.println("subStrings1 = " + Arrays.toString(subStrings1));
String[] subStrings3 = s.split("-",3); //拆分成3个
System.out.println("subStrings3 = " + Arrays.toString(subStrings3));
String[] subStrings5 = s.split("-",5);
System.out.println("subStrings5 = " + Arrays.toString(subStrings5));
return result;
}
public static void main(String[] args) {
Scanner scanner = new Scanner(System.in);
//获取输入的K和字符串S
int K = scanner.nextInt();
String S = scanner.next();
System.out.println("K = " + K + ", S = " + S);
StringSplit.splitString(K, S);
}
}
输入:
12
12abc-abCABc-4aB@
输出:
K = 12, S = 12abc-abCABc-4aB@
subStrings = [12abc, abCABc-4aB@]
subStrings0 = [12abc, abCABc, 4aB@]
subStrings-2 = [12abc, abCABc, 4aB@]
subStrings-1 = [12abc, abCABc, 4aB@]
subStrings1 = [12abc-abCABc-4aB@]
subStrings3 = [12abc, abCABc, 4aB@]
subStrings5 = [12abc, abCABc, 4aB@]Process finished with exit code 0
从测试结果可以看出,对于以上输入的字符串,用“-”拆分的时候,
当limit=0、-1、-2跟3、5的结果是一样高的,都拆分成三个。
当limit=2时 ,拆分成两个,仅第一个字符串用“-”拆分了。
当limit=1时,没有拆分,输出一个原字符串。