逐行读取。
首先获取文件的编码格式;
读取文件内容。
log4j-1.2.17.jar
slf4j-api-1.4.3.jar
slf4j-log4j12-1.4.0.jar
1. 获取文件的编码格式
import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; public class TxtCommonMethods { private static final Logger log = LoggerFactory .getLogger(TxtCommonMethods.class); /** * @Title: getFileCharset * @Description: 判断文件的编码格式 * @param filePath * 文件绝对路径 * @return String * @author * @date 2015年12月26日 */ public static String getFileCharset(String filePath) { File file = new File(filePath); if (!file.exists()) { System.out.println("File not found."); } // 默认编码格式为GBK String charset = "GBK"; FileInputStream is = null; BufferedInputStream bis = null; try { byte[] first3Bytes = new byte[3]; boolean checked = false; is = new FileInputStream(file); bis = new BufferedInputStream(is); bis.mark(0); int read = bis.read(first3Bytes, 0, 3); if (-1 == read) { charset = "GBK"; } else if (first3Bytes[0] == (byte) 0xFF && first3Bytes[1] == (byte) 0xFE) { charset = "UTF-16LE"; checked = true; } else if (first3Bytes[0] == (byte) 0xFE && first3Bytes[1] == (byte) 0xFF) { charset = "UTF-16BE"; checked = true; } else if (first3Bytes[0] == (byte) 0xEF && first3Bytes[1] == (byte) 0xBB && first3Bytes[2] == (byte) 0xBF) { charset = "UTF-8"; checked = true; } bis.reset(); if (!checked) { int loc = 0; while ((read = bis.read()) != -1) { loc++; if (read >= 0xF0) { break; } if (0x80 <= read && read <= 0xBF) { // 单独出现BF以下的,也算GBK break; } if (0x80 <= read && read <= 0xDF) { read = bis.read(); if (0x80 <= read && read <= 0xBF) { // GBK continue; } else { break; } } else if (0xE0 <= read && read <= 0xEF) { read = bis.read(); if (0x80 <= read && read <= 0xBF) { read = bis.read(); if (0x80 <= read && read <= 0xBF) { charset = "UTF-8"; break; } else { break; } } else { break; } } } } } catch (FileNotFoundException e) { log.error( "Get charset of '" + filePath + "' fail:" + e.getMessage(), e); } catch (IOException e) { log.error( "Get charset of '" + filePath + "' fail:" + e.getMessage(), e); } catch (Exception e) { log.error( "Get charset of '" + filePath + "' fail:" + e.getMessage(), e); } finally { TxtIOUtils.closeStream(bis, null); TxtIOUtils.closeStream(is, null); } return charset; } }
2. 读取文件内容
import java.io.BufferedReader; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.io.UnsupportedEncodingException; import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class ReadLoadTxtFileRunnable implements Runnable { private static final Logger log = LoggerFactory .getLogger(ReadLoadTxtFileRunnable.class); private String filePath; public ReadLoadTxtFileRunnable(String filePath) { this.filePath = filePath; } @Override public void run() { this.read(filePath); } /** * @Title: read * @Description: 读取txt文件内容 * @param filePath * 文件绝对路径 * @return List * @author * @date 2015年12月26日 */ private void read(String filePath) { log.info("Read Whole Grid Load txt file,filePath=" + filePath); filePath = null == filePath ? null : filePath.trim(); if (null == filePath || "".equals(filePath)) { log.error("The filePath is null."); return null; } InputStream is = null; Reader reader = null; BufferedReader bufRead = null; try { is = new FileInputStream(filePath); // 判断文件的编码格式 String charset = TxtCommonMethods.getFileCharset(filePath); log.info("The charset of '" + filePath + "' is:" + charset); reader = new InputStreamReader(is, charset); bufRead = new BufferedReader(reader); String line = null; String[] arrs = null; while ((line = bufRead.readLine()) != null) { System.out.println(line); } } catch (FileNotFoundException e) { log.error("Read file '" + filePath + "' fail:" + e.getMessage(), e); } catch (UnsupportedEncodingException e) { log.error("Read file '" + filePath + "' fail:" + e.getMessage(), e); } catch (IOException e) { log.error("Read file '" + filePath + "' fail:" + e.getMessage(), e); } catch (Exception e) { log.error("Read file '" + filePath + "' fail:" + e.getMessage(), e); } finally { TxtIOUtils.closeReader(bufRead); TxtIOUtils.closeReader(reader); TxtIOUtils.closeStream(is, null); } log.info("End of Read Whole Grid Load txt file"); } }
3.TxtIOUtils.java
public class TxtIOUtils { private static final Logger log = LoggerFactory.getLogger(TxtIOUtils.class); public static void closeStream(InputStream is, OutputStream out) { if (null != out) { try { out.close(); } catch (IOException e) { log.error("Close OutputStream fail:" + e.getMessage(), e); } out = null; } if (null != is) { try { is.close(); } catch (IOException e) { log.error("Close InputStream fail:" + e.getMessage(), e); } is = null; } } public static void closeReader(Reader reader) { if (null != reader) { try { reader.close(); } catch (IOException e) { log.error("Close Reader fail:" + e.getMessage(), e); } reader = null; } } public static void closeWriter(Writer writer) { if (null != writer) { try { writer.close(); } catch (IOException e) { log.error("Close Reader fail:" + e.getMessage(), e); } writer = null; } } public static void closeChannel(Channel c) { if (null != c) { try { c.close(); } catch (IOException e) { log.error("Close Channel fail:" + e.getMessage(), e); } c = null; } } }