JAVA抓取一个HTML源代码



package com.hyq.src;

import java.io.InputStream;
import java.net.URL;


public class Test {
	
	/**
	 * @param args
	 */
	public static void main(String[] args) {
		try{
			Test.testNetStream();
		}catch(Exception e){
			e.printStackTrace();
		}
	} 
	public static void testNetStream()throws Exception{
		URL url=new URL("http://www.imust.cn/");
		InputStream in=url.openStream();
		byte[] b=new byte[100000];
		in.read(b);
		in.close();
		String s=new String(b);
		System.out.println(s);
	}
}













package com.hyq.src;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;



public class Test {
	
	public static void main(String[] args)
	{
		System.out.println(Test.getHtmlSource("http://sports.163.com/zc/"));
	}
	
	public static String getHtmlSource(String url){
		StringBuffer stb=new StringBuffer();
		try{
			URLConnection uc=new URL(url).openConnection();
			BufferedReader br=new BufferedReader(new InputStreamReader(uc.getInputStream(),"gb2312"));
			String temp=null;
			while((temp=br.readLine())!=null){
				stb.append(temp).append("\n");
			}
			br.close();
			
		}catch(Exception e){
			e.printStackTrace();
		}
		return stb.toString();
		
	}

	
}



猜你喜欢

转载自stevenjohn.iteye.com/blog/1109851
今日推荐