用httpclient抓取网页时gzip、deflate的解压

用httpclient抓取网页的时候,不会自动解压数据,需要自行解压。

	
//当响应头中,content_encoding = "deflate" 的时候:

protected byte[] defalteUnCompress(byte[] src)throws Exception{
		ByteArrayInputStream byteIn = new ByteArrayInputStream(src);
		ByteArrayOutputStream byteOut = new ByteArrayOutputStream();
		InflaterInputStream gzipIn = null;
		byte[] content = new byte[1024];
		int readLen = 0;
		try{
			Inflater inf = new Inflater(true);
                        //这个地方,直接new InflaterInputStream(in)就会出错
			gzipIn = new InflaterInputStream(byteIn,inf);
			while ((readLen = gzipIn.read(content)) != -1) {
				byteOut.write(content, 0, readLen);
			}
			return byteOut.toByteArray();
		}catch(Exception e){
			throw e;
		}finally{
			try{
				if(byteIn != null){
					byteIn.close();
					byteIn = null;
				}
				if(byteOut != null){
					byteOut.close();
					byteOut = null;
				}
				if(gzipIn != null){
					gzipIn.close();
					gzipIn = null;
				}
			}catch(Exception e){}
		}
	}
//当响应头中,content_encoding = "gzip,deflate" 的时候:
	protected byte[] gzipUnCompress(byte[] src)throws Exception{
		ByteArrayInputStream byteIn = new ByteArrayInputStream(src);
		ByteArrayOutputStream byteOut = new ByteArrayOutputStream();
		GZIPInputStream gzipIn = null;
		byte[] content = new byte[1024];
		int readLen = 0;
		try{
			gzipIn = new GZIPInputStream(byteIn);
			while ((readLen = gzipIn.read(content)) != -1) {
				byteOut.write(content, 0, readLen);
			}
			return byteOut.toByteArray();
		}catch(Exception e){
			throw e;
		}finally{
			try{
				if(byteIn != null){
					byteIn.close();
					byteIn = null;
				}
				if(byteOut != null){
					byteOut.close();
					byteOut = null;
				}
				if(gzipIn != null){
					gzipIn.close();
					gzipIn = null;
				}
			}catch(Exception e){}
		}
	}

猜你喜欢

转载自eric-gcm.iteye.com/blog/2227303