java爬虫实现百度地图数据爬取

本次项目主要实现百度地图地点检索功能的数据爬取,可以获得检索的相关信息。主要是采用百度地图API接口实现,采用的是servlet,数据库采用的是mybatis。话不多说,上代码。

1.DAO层数据

package dao;
import java.util.List;
import dto.CityCodeDTO;
import dto.PageInfoDTO;
/**
 * 
 * @author Administrator
 *
 */
public interface PageInfoDAO {

public void save(PageInfoDTO pageInfo);

public List<PageInfoDTO> findPageData(PageInfoDTO param);

public Integer getPageCount();

public Integer getCityCode(CityCodeDTO cityCode);

}

2.百度地图的地区编码与区号不同,因此封装地区编码与百度对应关系

package dto;
import java.io.Serializable;
public class CityCodeDTO implements Serializable{
/**

*/
private static final long serialVersionUID = -7429099700161706593L;
private String code;
private String city;
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getCity() {
return city;
}
public void setCity(String city) {
this.city = city;
}
@Override
public String toString() {
return "CityCodeDTO [code=" + code + ", city=" + city + "]";
}
}

3.实体类文件(内部含有数据库调取参数)

package dto;
import java.io.Serializable;
import java.util.Date;
/**
 * 保存页面信息的实体类
 * @author Administrator
 *
 */
public class PageInfoDTO implements Serializable{

private static final long serialVersionUID = -8667380964768477281L;
/*信息id*/
private int id;
/*信息名字*/
private String name;
/*信息地址*/
private String address;
/*信息电话*/
private String telephone;
/*信息街景图id*/
private String street_id;
/*信息经纬度*/
private Double lat;

private Double lng;

private Integer valid;

private Date createdTime;

private Date modifiedTime;

private String createdName;

private Integer startIndex;

private Integer pageSize;

private String wordKey;


public String getWordKey() {
return wordKey;
}
public void setWordKey(String wordKey) {
this.wordKey = wordKey;
}
public Integer getStartIndex() {
return startIndex;
}
public void setStartIndex(Integer startIndex) {
this.startIndex = startIndex;
}
public Integer getPageSize() {
return pageSize;
}
public void setPageSize(Integer pageSize) {
this.pageSize = pageSize;
}
public Integer getValid() {
return valid;
}
public void setValid(Integer valid) {
this.valid = valid;
}
public Date getCreatedTime() {
return createdTime;
}
public void setCreatedTime(Date createdTime) {
this.createdTime = createdTime;
}
public Date getModifiedTime() {
return modifiedTime;
}
public void setModifiedTime(Date modifiedTime) {
this.modifiedTime = modifiedTime;
}
public String getCreatedName() {
return createdName;
}
public void setCreatedName(String createdName) {
this.createdName = createdName;
}
public String getModifiedName() {
return modifiedName;
}
public void setModifiedName(String modifiedName) {
this.modifiedName = modifiedName;
}
private String modifiedName;
public String getStreet_id() {
return street_id;
}
public void setStreet_id(String street_id) {
this.street_id = street_id;
}
public Double getLat() {
return lat;
}
public void setLat(Double lat) {
this.lat = lat;
}
public Double getLng() {
return lng;
}
public void setLng(Double lng) {
this.lng = lng;
}

public PageInfoDTO() {
super();
}
public PageInfoDTO(int id, String name, String address, String telephone) {
super();
this.id = id;
this.name = name;
this.address = address;
this.telephone = telephone;
}
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getAddress() {
return address;
}
public void setAddress(String address) {
this.address = address;
}
public String getTelephone() {
return telephone;
}
public void setTelephone(String telephone) {
this.telephone = telephone;
}
@Override
public String toString() {
return "PageInfoDTO [id=" + id + ", name=" + name + ", address=" + address + ", telephone=" + telephone
+ ", street_id=" + street_id + ", lat=" + lat + ", lng=" + lng + ", valid=" + valid + ", createdTime="
+ createdTime + ", modifiedTime=" + modifiedTime + ", createdName=" + createdName + ", startIndex="
+ startIndex + ", pageSize=" + pageSize + ", wordKey=" + wordKey + ", modifiedName=" + modifiedName
+ "]";
}


}

4.mapper文件

<?xml version="1.0" encoding="UTF-8" ?>  
<!DOCTYPE mapper PUBLIC "-//ibatis.apache.org//DTD Mapper 3.0//EN"      
 "http://ibatis.apache.org/dtd/ibatis-3-mapper.dtd">

<mapper namespace="dao.PageInfoDAO">


<insert  id="save" parameterType="dto.PageInfoDTO">
insert ignore into pageInf
(
wordKey,
name,
address,
telephone,
street_id,
lat,
lng,
valid,
createdTime,
createdName,
modifiedTime,
modifiedName

values
(
#{wordKey},
#{name},#{address},#{telephone},#{street_id},#{lat},#{lng},
0,
now(),
'zt',
now(),
'zt'
)
</insert>

<select id="findPageData" parameterType="dto.PageInfoDTO" resultType="dto.PageInfoDTO">
       select * 
       from pageInf
       <where>
          <if test='wordKey!=null and wordKey!=""'>
             and wordKey = #{wordKey}
          </if>
       </where>
       limit #{startIndex},#{pageSize}
     </select>
<select id="getPageCount" resultType="int">
select count(id) from pageInf
</select>
 
<select id="getCityCode" parameterType="dto.CityCodeDTO" resultType="Integer">
    select city_code from area_code
    <where>
          <if test='city!=null and city!=""'>
             and city like concat("%",#{city},"%") 
          </if>
          <if test='code!=null and code!=""'>
             and code = #{code}
          </if>
          </where>
</select>
</mapper>

5.servlet文件

package servlet;


import java.io.IOException;
import java.io.PrintWriter;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;


import org.apache.ibatis.session.SqlSession;
import org.apache.ibatis.session.SqlSessionFactory;
import org.apache.ibatis.session.SqlSessionFactoryBuilder;


import dao.PageInfoDAO;
import dto.CityCodeDTO;
import dto.PageInfoDTO;
import net.sf.json.JSONObject;
import util.LngAndLatUtil;


/**
 * Servlet implementation class MapServlet
 */
public class MapServlet extends HttpServlet {
private static final long serialVersionUID = 1L;

protected void service(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {


// 设置编码格式
request.setCharacterEncoding("UTF-8");
response.setContentType("text/html;charset=UTF-8");
PrintWriter out = response.getWriter();


// 请求地址解析
String uri = request.getRequestURI();
String action = uri.substring(uri.lastIndexOf("/"), uri.lastIndexOf("."));


if ("/query".equals(action)) {


// 处理query.do请求实现数据入库
String area = request.getParameter("area");
String word = request.getParameter("word");
LngAndLatUtil util = new LngAndLatUtil();
JSONObject obj = util.getLngAndLat(area, word);
out.println(obj);

} else if ("/list".equals(action)) {


// 处理list请求实现页面分页显示数据
// 获取dao对象
SqlSessionFactoryBuilder ssfd = new SqlSessionFactoryBuilder();
SqlSessionFactory ssf = ssfd
.build(LngAndLatUtil.class.getClassLoader().getResourceAsStream("SqlMapConfig.xml"));
SqlSession session = ssf.openSession();
PageInfoDAO dao = session.getMapper(PageInfoDAO.class);


// 获取页面参数
String area = request.getParameter("area");
String word = request.getParameter("word");
System.out.println("area"+area);
Integer pageCurrent = Integer.valueOf(request.getParameter("pageCurrent"));
if (pageCurrent == null) {
pageCurrent = 1;
}
int pageSize = 10;
int startIndex = (pageCurrent - 1) * 10;
PageInfoDTO param = new PageInfoDTO();
param.setPageSize(pageSize);
param.setStartIndex(startIndex); 

//判断area是区号,还是名称
CityCodeDTO cityCode =new CityCodeDTO();
Pattern p = Pattern.compile("[0-9]*");
Matcher m = p.matcher(area);
if (m.matches()) {
cityCode.setCode(area);
System.out.println(666);
}else{
cityCode.setCity(area);
}
Integer code = getCityCode(cityCode);

param.setWordKey(code+word);
System.out.println(code+word);

// 获取查询数据
List<PageInfoDTO> data = dao.findPageData(param);
Integer count = dao.getPageCount();


// 封装传输数据
Map<String, Object> map = new HashMap<String, Object>();
map.put("count", count);
map.put("data", data);
JSONObject obj = JSONObject.fromObject(map);



// 将数据传给页面
out.println(obj);
}
}
public static Integer getCityCode(CityCodeDTO cityCode){
    SqlSessionFactoryBuilder ssfd = new SqlSessionFactoryBuilder();
SqlSessionFactory ssf = ssfd.build(LngAndLatUtil.class.
getClassLoader().getResourceAsStream("SqlMapConfig.xml"));
SqlSession session = ssf.openSession();
PageInfoDAO dao = session.getMapper(PageInfoDAO.class);
Integer code = dao.getCityCode(cityCode);
return code;
    }
}

6.从页面上获取数据的工具类

package util;


import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


import org.apache.ibatis.session.SqlSession;
import org.apache.ibatis.session.SqlSessionFactory;
import org.apache.ibatis.session.SqlSessionFactoryBuilder;


import dao.PageInfoDAO;
import dto.CityCodeDTO;
import dto.PageInfoDTO;
import net.sf.json.JSONObject;


public class LngAndLatUtil{

/**
* 爬取数据存入数据库
* @param address 查询区域
* @param param查询关键词
* @return json字符串
*/
public static JSONObject getLngAndLat(String area, String word) {
Integer code = getCode(area);
JSONObject obj = null;
for (int i = 0; i < 10; i++) {
String url = "http://api.map.baidu.com/place/v2/search?query=" + word + "&tag=&scope=2&region=" + code
+ "&output=json&page_size=10&page_num=" + i + "&ret_coordtype=gcj02ll&ak=申请百度地图获取AK";
String str = loadJSON(url);
obj = JSONObject.fromObject(str);

//数据解析
if (obj.get("status").toString().equals("0")) {
List<JSONObject> list = obj.getJSONArray("results");

//遍历数据实现存储
for (JSONObject jx : list) {

String name = (String) jx.get("name");
String addre = (String) jx.get("address");
String telephone = (String) jx.get("telephone");
Double lat = (Double) jx.getJSONObject("location").get("lat");
Double lng = (Double) jx.getJSONObject("location").get("lng");
String street_id = (String) jx.get("street_id");
PageInfoDTO pi = new PageInfoDTO();
pi.setName(name);
pi.setAddress(addre);
pi.setStreet_id(street_id);
pi.setTelephone(telephone);
pi.setLat(lat);
pi.setLng(lng);
Integer citycode = getCode(area);

pi.setWordKey(citycode+word);
getDao(pi);
}


}
}
return obj;
}
private static Integer getCode(String area) {
//判断area是区号,还是名称
CityCodeDTO cityCode =new CityCodeDTO();
Pattern p = Pattern.compile("[0-9]*");
Matcher m = p.matcher(area);
if (m.matches()) {
cityCode.setCode(area);
}else{
cityCode.setCity(area);
}
Integer code = getCityCode(cityCode);
return code;
}
public static Integer getCityCode(CityCodeDTO cityCode){
    SqlSessionFactoryBuilder ssfd = new SqlSessionFactoryBuilder();
SqlSessionFactory ssf = ssfd.build(LngAndLatUtil.class.
getClassLoader().getResourceAsStream("SqlMapConfig.xml"));
SqlSession session = ssf.openSession();
PageInfoDAO dao = session.getMapper(PageInfoDAO.class);
Integer code = dao.getCityCode(cityCode);
return code;
    }
/**
* 实现存储数据
* @param pi 需要存储的数据
*/
    public static void getDao(PageInfoDTO pi){
    SqlSessionFactoryBuilder ssfd = new SqlSessionFactoryBuilder();
SqlSessionFactory ssf = ssfd.build(LngAndLatUtil.class.
getClassLoader().getResourceAsStream("SqlMapConfig.xml"));
SqlSession session = ssf.openSession();
PageInfoDAO dao = session.getMapper(PageInfoDAO.class);
dao.save(pi);
session.commit();
session.close();
    }
    
    /**
     * 根据地址(url)获取页面数据
     * @param uri
     * @return
     */
    public static String loadJSON (String uri) {
        StringBuilder json = new StringBuilder();
        try {
            URL url = new URL(uri);
            URLConnection yc = url.openConnection();
            BufferedReader in = new BufferedReader(new InputStreamReader(yc.getInputStream(),"utf-8"));
            String inputLine = null;
            while ( (inputLine = in.readLine()) != null) {
                json.append(inputLine);
            }
            in.close();
        } catch (MalformedURLException e) {
        } catch (IOException e) {
        }
        return json.toString();
    }
}

7.mybatis的配置文件

<?xml version="1.0" encoding="UTF-8" ?>  
<!DOCTYPE configuration PUBLIC "-//ibatis.apache.org//DTD Config 3.0//EN" 
"http://ibatis.apache.org/dtd/ibatis-3-config.dtd">
<configuration>
<environments default="environment">
<environment id="environment">
<transactionManager type="JDBC" />
<!-- 连接池的配置 -->
<dataSource type="POOLED">
<property name="driver" value="com.mysql.jdbc.Driver" />
<property name="url" value="jdbc:mysql:///baiduMap?"/>
<property name="username" value="root" />
<property name="password" value="123456" />
</dataSource>
</environment>
</environments>
<!-- 指定映射文件的位置 -->
<mappers>
<mapper resource="dto/PageInfoMapper.xml" />
</mappers>
</configuration>

8.web.xml文件

<?xml version="1.0" encoding="UTF-8"?>
<web-app xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://java.sun.com/xml/ns/javaee" xsi:schemaLocation="http://java.sun.com/xml/ns/javaee http://java.sun.com/xml/ns/javaee/web-app_2_5.xsd" version="2.5">
  <display-name>baiduMap</display-name>
  
  <servlet>
    <description></description>
    <servlet-name>MapServlet</servlet-name>
    <servlet-class>servlet.MapServlet</servlet-class>
    <init-param>
        <param-name>character</param-name>
        <param-value>utf-8</param-value>
    </init-param>
  </servlet>
  <servlet-mapping>
    <servlet-name>MapServlet</servlet-name>
    <url-pattern>*.do</url-pattern>
  </servlet-mapping>
</web-app>

9.页面js

$(document).ready(function(){
$("#body").on('click','#nextPage,#prePage,#firstPage,#lastPage',changePageData);
$(".query").on('click',getData);
})
function changePageData(){
var pageCount = $("#body").data("pageCount");
if(pageCount%10 == 0){
var page= pageCount/10;
}else{
page = parseInt(pageCount/10) + 1;
}
var pageCurrent = $("#body").data("pageCurrent");
if(!pageCurrent){
pageCurrent = 1;
}
if($(this).val()=="首页"){
pageCurrent = 1;
}
if($(this).val()=="上一页"){
pageCurrent --;
}
if($(this).val()=="下一页"&& page){
pageCurrent ++;
}
if($(this).val()=="尾页"){
pageCurrent = page;
}
$("#body").data("pageCurrent",pageCurrent);
getPageData();
}
function getData(){
var url = "query.do";
var area = $(".area").val();
var word = $(".word").val();
var param = {"area":area,"word":word};
console.log(param);
$.getJSON(url,param,function(obj){
console.log(888);
getPageData();
});
}
function getPageData(){
var url = "list.do";
console.log(url);
var pageCurrent = $("#body").data("pageCurrent");
if(!pageCurrent){
pageCurrent = 1;
}
var area = $(".area").val();
var word = $(".word").val();
var param = {"pageCurrent":pageCurrent,"area":area,"word":word};
console.log(345);
$.getJSON(url,param,function(data){
setTableRows(data);
console.log(666);
});
}
function setTableRows(map){
var tbody = $(".tBody");
var pageCount = map.count;
$("#body").data("pageCount",pageCount);
tbody.empty();
console.log(map.data);
for(var i in map.data){
var tr = $("<tr class='tr'></tr>");
var td = "<td>"+map.data[i].name+"</td>"+
"<td>"+map.data[i].address+"</td>"+
"<td>"+map.data[i].telephone+"</td>"+
"<td>"+map.data[i].street_id+"</td>"+
"<td>经度:"+map.data[i].lat+"<br/>纬度:"+map.data[i].lng+"</td>"+
"<td>"+map.data[i].valid+"</td>"+
"<td><input type='button'value='修改'></td>";
tr.append(td);
tbody.append(tr);
}
}
10.页面<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Insert title here</title>
<script type="text/javascript" src="js/jquery-3.2.1.min.js"></script>
<link rel="stylesheet" type="text/css" href="css/map.css">
<script type="text/javascript" src="js/map.js"></script>
</head>
<body>
<div id="body">
<div class="getData">
<input class="area" type="text" placeholder="请输入区域" autocomplete="on">
<input class="word" type="text" placeholder="请输入关键词" autocomplete="on"> 
<input class="query" type="button" value="搜索">
</div>
<div class="tableBody">
<table id="table" border='1' cellpadding="10" cellspacing="0">
<thead>
<tr id="tr">
<th class="th">名称</th>
<th class="th">地址</th>
<th class="th">电话</th>
<th class="th">街景图</th>
<th class="th">地址经纬度</th>
<th class="th">有效性</th>
<th class="th">操作</th>
</tr>
</thead>
<tbody class="tBody">

</tbody>
</table>
</div>
<div class="foot">
<input id="firstPage" class="page" type="button" value="首页">
<input id="prePage" class="page" type="button" value="上一页">
<input id="nextPage" class="page" type="button" value="下一页">
<input id="lastPage" class="page" type="button" value="尾页">
</div>
</div>
</body>
</html>

11.页面css

@charset "utf-8";

*{
margin:0;
padding:0;

.getData{
width:500px;
margin:20px auto 10px auto;
}
.tableBody{
width:1000px !important;
}
#table{
width:auto;
margin:20px auto 10px auto;
padding:0 auto;
width:1000px;
}
#body{
width:1000px;
margin:0 auto;
}
.th{
width: 80px;
}
.tr{
width:300px;

}
.tr td{
width:150px;
text-align:center;
}
.foot{
width:300px;
margin:0 auto;
}
.page{
margin-right:30px;
}

12.项目所涉及的jar包

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>com.3b</groupId>
  <artifactId>baiduMap</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  <packaging>war</packaging>
  <dependencies>
  <dependency>
  <groupId>net.sf.json-lib</groupId>
  <artifactId>json-lib</artifactId>
  <version>2.2.3</version>
  </dependency>
  <dependency>
  <groupId>javax.servlet</groupId>
  <artifactId>javax.servlet-api</artifactId>
  <version>3.1.0</version>
  </dependency>
  <dependency>
  <groupId>org.mybatis</groupId>
  <artifactId>mybatis</artifactId>
  <version>3.2.8</version>
  </dependency>
  <dependency>
  <groupId>junit</groupId>
  <artifactId>junit</artifactId>
  <version>3.8.1</version>
  </dependency>
  <dependency>
  <groupId>com.oracle</groupId>
  <artifactId>ojdbc14</artifactId>
  <version>10.2.0.4.0</version>
  <type>pom.lastUpdated</type>
  </dependency>
  <dependency>
  <groupId>mysql</groupId>
  <artifactId>mysql-connector-java</artifactId>
  <version>5.1.40</version>
  </dependency>
  </dependencies>

</project>


13.sql语句

SET FOREIGN_KEY_CHECKS=0;
-- ----------------------------
-- Table structure for pageinf
-- ----------------------------
DROP TABLE IF EXISTS `pageinf`;
CREATE TABLE `pageinf` (
  `id` int(11) NOT NULL,
  `wordKey` varchar(255) DEFAULT NULL,
  `name` varchar(255) DEFAULT NULL,
  `address` varchar(500) DEFAULT NULL,
  `telephone` varchar(255) DEFAULT NULL,
  `street_id` varchar(500) DEFAULT NULL,
  `lat` varchar(255) DEFAULT NULL,
  `lng` varchar(255) DEFAULT NULL,
  `valid` int(11) DEFAULT NULL,
  `createdTime` datetime DEFAULT NULL,
  `createdName` varchar(255) DEFAULT NULL,
  `modifiedTime` datetime DEFAULT NULL,
  `modifiedName` varchar(255) DEFAULT NULL,
  PRIMARY KEY (`id`),
  UNIQUE KEY `bdname` (`name`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8;





 

猜你喜欢

转载自blog.csdn.net/qq_40282981/article/details/79025395
今日推荐