用httpclient抓取网页的时候,不会自动解压数据,需要自行解压。
//当响应头中,content_encoding = "deflate" 的时候: protected byte[] defalteUnCompress(byte[] src)throws Exception{ ByteArrayInputStream byteIn = new ByteArrayInputStream(src); ByteArrayOutputStream byteOut = new ByteArrayOutputStream(); InflaterInputStream gzipIn = null; byte[] content = new byte[1024]; int readLen = 0; try{ Inflater inf = new Inflater(true); //这个地方,直接new InflaterInputStream(in)就会出错 gzipIn = new InflaterInputStream(byteIn,inf); while ((readLen = gzipIn.read(content)) != -1) { byteOut.write(content, 0, readLen); } return byteOut.toByteArray(); }catch(Exception e){ throw e; }finally{ try{ if(byteIn != null){ byteIn.close(); byteIn = null; } if(byteOut != null){ byteOut.close(); byteOut = null; } if(gzipIn != null){ gzipIn.close(); gzipIn = null; } }catch(Exception e){} } } //当响应头中,content_encoding = "gzip,deflate" 的时候: protected byte[] gzipUnCompress(byte[] src)throws Exception{ ByteArrayInputStream byteIn = new ByteArrayInputStream(src); ByteArrayOutputStream byteOut = new ByteArrayOutputStream(); GZIPInputStream gzipIn = null; byte[] content = new byte[1024]; int readLen = 0; try{ gzipIn = new GZIPInputStream(byteIn); while ((readLen = gzipIn.read(content)) != -1) { byteOut.write(content, 0, readLen); } return byteOut.toByteArray(); }catch(Exception e){ throw e; }finally{ try{ if(byteIn != null){ byteIn.close(); byteIn = null; } if(byteOut != null){ byteOut.close(); byteOut = null; } if(gzipIn != null){ gzipIn.close(); gzipIn = null; } }catch(Exception e){} } }