JAVA爬虫Jsoup

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/weixin_38959210/article/details/84133864

里面的命名很烂,但能跑出来效果。

package cn.temptation.web;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

public class fangjia {



	public static void main(String[] args) throws IOException {
		
	
	
		 Document doc = Jsoup.connect("https://wuz.fang.anjuke.com/")
		.header("Accept-Encoding", "gzip, deflate")
	    .userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0")
	    .maxBodySize(0)
	    .timeout(600000)
	    .get();
		
		
		
		Elements xiaoqu=doc.select(".items-name");
		Elements jiage=doc.select(".price");
		//System.out.println(xiaoqu);
		//System.out.println(jiage);
		
		StringBuffer sb=new StringBuffer();
		sb.append(xiaoqu.toString());
		String str = sb.toString();
	    
		StringBuffer sb1=new StringBuffer();
		sb1.append(jiage.toString());
		String str1 = sb1.toString();
		
		
		//使用正则表达式
		Pattern pattern = Pattern.compile("[^\u4E00-\u9FA5]");
		//[\u4E00-\u9FA5]是unicode2的中文区间
		Matcher matcher = pattern.matcher(str);
		//System.out.println(matcher.replaceAll(" "));
		String sd=matcher.replaceAll(" ");

		String[] xiqoushuzu =sd.split(" ");

		List <String> list = new ArrayList<String>();
		
		for (int i = 0; i <xiqoushuzu.length; i++) {  

	          if (xiqoushuzu[i]!=null&&!xiqoushuzu[i].equals("")){   

	        	  list.add(xiqoushuzu[i]);
	          }      

	    }      

		for(String attribute : list) {
			  //System.out.println(attribute);
			}  
		//list 是所有小区的名字
		
		
		
		//使用正则表达式
		Pattern pattern1 = Pattern.compile("\\D");
		//[\u4E00-\u9FA5]是unicode2的中文区间
		Matcher matcher1 = pattern1.matcher(str1);
		//System.out.println(matcher.replaceAll(" "));
		String jg=matcher1.replaceAll(" ");
		String[] fangjia =jg.split(" ");
		List <String> list1 = new ArrayList<String>();
		for (int i = 0; i <fangjia.length; i++) {  
	          if (fangjia[i]!=null&&!fangjia[i].equals("")){   
	        	  list1.add(fangjia[i]);
	          }      
	    }      
		
		List<Integer> IntegerList = new ArrayList<Integer>();
    	for (String x : list1) {
			Integer z = Integer.parseInt(x);
			IntegerList.add(z);
		}  

    	for(int i = 0 ; i < IntegerList.size() ; i++) {
    		if(IntegerList.get(i)<1000) {
    			IntegerList.remove(i);
    		}
    		}
    	int total = 0;
    	for(int i = 0 ; i < IntegerList.size() ; i++) {
    		total += IntegerList.get(i);
    		}
    	
    	int avg = total/IntegerList.size();
    	System.out.println("吴忠市平均房价是:"+avg);
		//list 是所有小区的价格

	}
		
	}
	

猜你喜欢

转载自blog.csdn.net/weixin_38959210/article/details/84133864