Java中 快速处理大数据文件方法工具类FileUtils

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/u013946356/article/details/88117309

对于亿级数量级数据进行模糊匹配的业务需求,传统sql查询肯定不行。一种解决方法是可以利用hive来匹配,另一种不妨试试Apache的FileUtils工具类,具体处理方法是:1.将要处理的数据导出文件格式 ;2.利用FileUtils类进行读写操作,没读一行进行业务逻辑判断,若符合自己需求,则再次写入到另一文件中。只要磁盘io给力,分分钟钟筛选完毕。

FileUtils.writeStringToFile(file, sb.toString(), "UTF-8",true); //true参数表示追加内容

package com.rzx.update.job;

import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;

import org.apache.commons.io.FileUtils;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;

/**
 * 
 */
@Component("fileDataThirdPayeeJob")
@RunWith(SpringJUnit4ClassRunner.class) // 使用junit4进行测试
@ContextConfiguration(locations = { "classpath:applicationContext.xml" }) // 加载配置文件
public class FileDataThirdPayeeJob {
	private static Logger logger = LoggerFactory.getLogger(FileDataThirdPayeeJob.class);

    /**
     * 一行一行地读取文件的例子
     *
     * @throws IOException
     */
    @Test
    public void execute() throws IOException {
    	logger.info("start读取文件================");
    	
        List<String> lines = FileUtils.readLines(new File("/home/full_amount/company_business.dat"), "UTF-8");
        
        logger.info("start1================");
        
        Iterator<String> line = lines.iterator();
        
        
        while(line.hasNext()){
        	
        	String companyInfo = line.next();
        	logger.info(companyInfo);
        	
        	//'预付卡,支付,收单,受理,银行卡,POS'
        	File file = new File("/home/yhj/third_payee_data/third_payee_data.dat");
            StringBuffer sb = new StringBuffer();
        	
            String[] str = {"预付卡","支付","收单","受理","银行卡","POS"};
            
            for(int i = 0;i < str.length-1;i ++){
            	if(companyInfo.contains(str[i])){
            		sb.append(companyInfo+"\t\u000B"+str[i]+"\u000B\t\n");
            	}
            }
            
            logger.info("end1================");
            
            logger.info("start1================");

        	FileUtils.writeStringToFile(file, sb.toString(), "UTF-8");
        	
        	
        	String[] str1={"智能卡","代理服务","服务卡","结算","充值","IC卡","一卡通","电子卡","磁卡","终端","转账","充值卡","缴费","金融设备","代缴","读卡机","收款机","终端机","记帐","磁条卡ICO","现钞","收转","吸储","网上支付","加油卡","就餐卡","健身卡","小额","售卡","代付","帐单","P2P","网贷"};
        
        	StringBuffer sb1 = new StringBuffer();
        	          
            for(int i = 0;i < str1.length-1;i ++){
            	if(companyInfo.contains(str1[i])){
            		sb.append(companyInfo+"\t\u000B"+str1[i]+"\u000B\t\n");
            	}
            }

            File file1 = new File("/home/yhj/third_payee_data/third_payee_data1.dat");
            
        	FileUtils.writeStringToFile(file1, sb1.toString(), "UTF-8");
        	
        	logger.info("end2================");
        	
        	
        	
        	
        	
        	
        }
        
    }


}

猜你喜欢

转载自blog.csdn.net/u013946356/article/details/88117309