jdk、lang和guava split 字符串效率测试

最近要做一个数据采集分析的系统,每五分钟采集6G数据,采集数据每一行为一条完整记录,对一条记录split处理。为了找到最优split方法,测试jdk、lang和guava split 字符串效率。

测试读取250m的文件,有100万行数据,测试数据对比:




@AxisRange(min = 0)
@BenchmarkMethodChart(filePrefix = "split-benchmark-barchart")
public class SplitTest {
	@Rule
	public BenchmarkRule benchmarkRun = new BenchmarkRule();
	
	private static final Splitter splitter = Splitter.on('|').omitEmptyStrings();
	
	@Test
	public void jdkSplitTest() throws IOException {
		File file = new File("G:/huawu/PS_FileInnerMon1_20130723170104-14076.dat");
		Files.readLines(file, Charsets.UTF_8, new LineProcessor<Integer>() {
			int count;
			@Override
			public boolean processLine(String line) throws IOException {
				count++;
				line.split("|");
				return true;
			}

			@Override
			public Integer getResult() {
				return count;
			}
		
		});
	}
	
	@Test
	public void langSplitTest() throws IOException {
		File file = new File("G:/huawu/PS_FileInnerMon1_20130723170003-9836.dat");
		Files.readLines(file, Charsets.UTF_8, new LineProcessor<Integer>() {
			int count;
			@Override
			public boolean processLine(String line) throws IOException {
				count++;
				StringUtils.split(line, '|');
				return true;
			}

			@Override
			public Integer getResult() {
				return count;
			}
		
		});
	}
	
	@Test
	public void guavaSplitTest() throws IOException {
		File file = new File("G:/huawu/PS_FileInnerMon1_20130723170003-9836.dat");
		Files.readLines(file, Charsets.UTF_8, new LineProcessor<Integer>() {
			int count;
			@Override
			public boolean processLine(String line) throws IOException {
				count++;
				splitter.split(line);
				return true;
			}

			@Override
			public Integer getResult() {
				return count;
			}
		
		});
	}
}

猜你喜欢

转载自melin.iteye.com/blog/1934525