Httpclient-4.3.6 爬虫

package kit;

import java.io.IOException;
import java.net.SocketTimeoutException;
import java.util.ArrayList;
import java.util.List;

import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpStatus;
import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.conn.HttpHostConnectException;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import org.apache.log4j.Logger;

public class HttpclientUtil {
	private static Logger logger = Logger.getLogger(HttpclientUtil.class);

	public static void main(String[] args) throws Exception {
		//http://restapi.amap.com/v3/geocode/geo?key=8325164e247e15eea68b59e89200988b&s=rsv3&city=010&callback=jsonp_365859_&address=%E5%8C%97%E4%BA%AC%E4%B8%B0%E5%8F%B0
//		String url="http://webapi.amap.com/maps?v=1.3&key=8325164e247e15eea68b59e89200988b";
//		String url="http://restapi.amap.com/v3/geocode/geo?key=8325164e247e15eea68b59e89200988b&s=rsv3&city=010&callback=jsonp_365859_&address=北京四惠";
//		String s=URLEncoder.encode("北京四惠","utf-8");
//		System.out.println(s);
////		new HttpclientUtil().requestByGet(url);
	}
	/**
	 * get方式请求
	 * 
	 * @param url
	 * @return
	 * @throws ClientProtocolException
	 * @throws IOException
	 */
	public String requestByGet(String url) throws ClientProtocolException,
			IOException {
		// 构造默认客户端
		CloseableHttpClient httpclient = HttpClients.createDefault();
		String html = "";
		try {
			// http://t.dianping.com/ajax/dealGroupShopDetail?dealGroupId=6195608&cityId=2&action=shops&regionId=0&page=1
			HttpGet request = new HttpGet(url);
			// 设置代理
			RequestConfig config = RequestConfig.custom()
					.setSocketTimeout(12000).build();
			// 设置消息头
			request.setHeader("Connection", "keep-alive");
			request.setHeader(
					"User-Agent",
					"5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36 LBBROWSER");
			request.setHeader("Accept-Language",
					"zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3");
			request.setConfig(config);
			System.out.println("Executing request " + request.getRequestLine());
			// 执行请求并返回响应
			CloseableHttpResponse response = httpclient.execute(request);
			html = EntityUtils.toString(response.getEntity(), "utf-8");
			// 根据状态码做出相应处理
			int statuscode = response.getStatusLine().getStatusCode();
			if (statuscode == HttpStatus.SC_OK) {
				logger.info("statuscode is ok 200!!!!!");
			}
			logger.info(html);
			try {
				System.out.println("----------------------------------------");
				logger.info(response.getStatusLine());
				// 关闭连接
				EntityUtils.consume(response.getEntity());
			} finally {
				// 关闭响应
				response.close();
			}
		} catch (HttpHostConnectException e) {
			logger.info("连接代理超时");
		} catch (SocketTimeoutException e) {
			logger.info("socket连接超时");
			e.printStackTrace();
		} finally {
			// 关闭客户端
			httpclient.close();
		}
		// 返回页面
		return html;
	}

	/**
	 * post方式请求
	 * 
	 * @param url
	 * @return
	 * @throws ClientProtocolException
	 * @throws IOException
	 */
	public String requestByPost(String url) throws ClientProtocolException,
			IOException {
		CloseableHttpClient httpclient = HttpClients.createDefault();
		try {
			HttpPost httpPost = new HttpPost(url);
			List<NameValuePair> nvps = new ArrayList<NameValuePair>();
			nvps.add(new BasicNameValuePair("username", "vip"));
			nvps.add(new BasicNameValuePair("password", "secret"));
			httpPost.setEntity(new UrlEncodedFormEntity(nvps));
			CloseableHttpResponse response2 = httpclient.execute(httpPost);
			try {
				HttpEntity entity2 = response2.getEntity();
				String response = EntityUtils.toString(entity2, "utf-8");
				EntityUtils.consume(entity2);
				return response;
			} finally {
				response2.close();
			}
		} finally {
			httpclient.close();
		}
	}

	/**
	 * get方式通过代理请求
	 * 
	 * @param proxyIp
	 * @param proxyPort
	 * @param url
	 * @return
	 * @throws ClientProtocolException
	 * @throws IOException
	 */
	public String requestByGetViaProxy(String proxyIp, int proxyPort, String url)
			throws ClientProtocolException, IOException {
		// 1.构造默认客户端
		CloseableHttpClient httpclient = HttpClients.createDefault();
		// 2.构造含客户端,同时可以设置消息头,代理
		/*
		 * CloseableHttpClient httpclient = HttpClients .custom() .setUserAgent(
		 * "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 6.0"
		 * )//.setProxy(proxy) .build();
		 */
		String html = "";
		try {
			// http://t.dianping.com/ajax/dealGroupShopDetail?dealGroupId=6195608&cityId=2&action=shops&regionId=0&page=1
			String uri = "http://t.dianping.com/ajax/dealGroupShopDetail?dealGroupId=6195608&cityId=2&action=shops&regionId=0&page=1";
			uri = url;
			// 目标主机
			HttpHost target = new HttpHost(url, 80, "http");
			// 代理主机
			HttpHost proxy = new HttpHost(proxyIp, proxyPort, "http");

			HttpGet request = new HttpGet(uri);
			// 设置代理
			RequestConfig config = RequestConfig.custom().setProxy(proxy)
					.setSocketTimeout(12000).build();
			// 设置消息头
			request.setHeader("Connection", "keep-alive");
			request.setHeader("User-Agent",
					"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:33.0) Gecko/20100101 Firefox/33.0");
			request.setHeader("Accept-Language",
					"zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3");
			request.setConfig(config);
			System.out.println("Executing request " + request.getRequestLine()
					+ " to " + target + " via " + proxy);
			// 执行请求并返回响应
			CloseableHttpResponse response = httpclient
					.execute(target, request);
			html = EntityUtils.toString(response.getEntity(), "utf-8");
			// 根据状态码做出相应处理
			int statuscode = response.getStatusLine().getStatusCode();
			if (statuscode == HttpStatus.SC_OK) {
				logger.info("statuscode is ok 200...................");
			}
			logger.info(html);
			try {
				System.out.println("----------------------------------------");
				// 响应内容长度
				// response.getEntity().getContentType();
				// 获取输入流并输出
				// InputStream is = response.getEntity().getContent();
				// 状态
				logger.info(response.getStatusLine());
				// 关闭连接
				EntityUtils.consume(response.getEntity());
			} finally {
				// 关闭响应
				response.close();
			}
		} catch (HttpHostConnectException e) {
			logger.info("连接代理超时");
		} catch (SocketTimeoutException e) {
			logger.info("socket连接超时");
			e.printStackTrace();
		} finally {
			// 关闭客户端
			httpclient.close();
		}
		// 返回页面
		return html;
	}

}

猜你喜欢

转载自blog.csdn.net/u010181847/article/details/42641385