[爬虫] 爬取高德地图的面状数据存为shp - 公园数据为例

版权声明:本文为博主原创文章,若有错误之处望大家批评指正!转载需附上原文链接,谢谢! https://blog.csdn.net/summer_dew/article/details/84591719

爬取的数据仅用于科研
爬取的接口就不讲解了,代码中有
代码写于2017年9月,今天找当时爬取的相关数据顺便找回,代码时效性差

在这里插入图片描述

# -*- coding:utf-8 -*-
# Author:PasserQi
# Time:2017/9/29
# Function:矢量化厦门市公园范围
import json
import time
import urllib

import arcgisscripting
import arcpy
import coordinate_conversion
import os
from bs4 import BeautifulSoup

outPath = r"G:\workspace\python\arcpy\park_polygon.shp"

AMAP_API_KEY = "***" #高德地图密匙
urlParamJson = {
    'city' : '厦门',
    'output' : 'xml',
    'key' : AMAP_API_KEY,
    'types' : '公园',
    'citylimit' : 'true', #只返回指定城市数据
    'offset' : '20'#每页条数
}
MAX_PAGE = 100 #最大页数

# return: list 厦门市公园POI的ID
def getParkPoiid():

    poiidList = []

    for page in range(1,MAX_PAGE) : #页数
        urlParamJson["page"] = page
        print "当前 %s 页..." % page
        params = urllib.urlencode(urlParamJson)
        url = "http://restapi.amap.com/v3/place/text?%s" % params
        http = urllib.urlopen(url)
        dom = BeautifulSoup(http)
        poiList = dom.findAll("poi")
        if len(poiList)==0: #没有数据时则跳出
            break
        for poi in poiList:
            poiid = poi.id.get_text()
            poiidList.append(poiid.encode("utf8") )

    return poiidList


def getParkInfoList(poiidList):
    parkInfoList = []
    i = 1 #number
    for poiid in poiidList:
        parkInfo = {}
        params = urllib.urlencode({
            'id' : poiid
        })
        url = "http://ditu.amap.com/detail/get/detail?%s" % params

        print "查询url %s" % url

        http = urllib.urlopen(url)
        jsonStr = http.read()
        park = json.loads(jsonStr)
        spec = park["data"]["spec"] #spec每个数据都有
        haveShp = "没有"
        for key in spec:
            if key=="mining_shape":  #有 面状或线状 信息
                haveShp = "有"
                parkInfo["shape"] = spec[key]["shape"] #保存 shape属性
                parkInfo["name"] = park["data"]["base"]["name"].encode("utf8")
                parkInfo["type"] = park["data"]["base"]["business"].encode("utf8")
                parkInfoList.append(parkInfo)

                if len(parkInfoList) % 11 == 0:
                    print "已获取 %s 个公园的矢量信息" % len(parkInfoList)

                break
        print "%s :%s" % (park["data"]["base"]["name"].encode("utf8"), haveShp )
        time.sleep(1)
        i = i+1
        if i%51==0:
            time.sleep(60)
    return parkInfoList

# @funtion: 通过coordinates解析出XY的数组
# @param: coordinates字符串 格式"x,y;x,y;x,y..."
# @desc: 传入为gcj02坐标系坐标,返回wgs84坐标
# @dependence: arcpy,coordinate_conversion
# @return: arcpy.array
def getXYArray(XYsStr):
    XYarray = arcpy.CreateObject("array")
    XYList = XYsStr.split(';')
    for XYstr in XYList:
        XY = XYstr.split(',')
        XY[0],XY[1] = float(XY[0]),float(XY[1])
        point = arcpy.CreateObject("point")
        point.X,point.Y = coordinate_conversion.gcj02towgs84(XY[0], XY[1])
        XYarray.add(point)
    return XYarray

def saveParkPolygon(parkInfoList):
    gp = arcgisscripting.create()

    outWorkspace = os.path.split(outPath)[0]
    outName = os.path.split(outPath)[-1]
    spat_ref = "4326"
    gp.CreateFeatureClass_management(outWorkspace, outName, "POLYGON", "", "", "", spat_ref)

    gp.AddField_management(outPath, "name", "TEXT", field_length=250)
    gp.AddField_management(outPath, "type", "TEXT", field_length=250)

    cur = gp.InsertCursor(outPath)
    newRow = cur.newRow()
    for parkInfo in parkInfoList:
        for attr in parkInfo:
            if attr=="shape":
                # array = getXYArray(parkInfo["shape"])
                XYsStr = parkInfo["shape"]
                XYarray = gp.CreateObject("array")
                XYList = XYsStr.split(';')
                for XYstr in XYList:
                    XY = XYstr.split(',')
                    XY[0], XY[1] = float(XY[0]), float(XY[1])
                    point = gp.CreateObject("point")
                    point.X, point.Y = coordinate_conversion.gcj02towgs84(XY[0], XY[1])
                    XYarray.add(point)
                newRow.setValue("Shape",XYarray)
            else:
                newRow.setValue(attr, parkInfo[attr] )
        cur.InsertRow(newRow)
    del cur,newRow



if __name__ == '__main__':
    poiidList = getParkPoiid() #得到公园id
    print "已得到 %s 个公园POI ID" %len(poiidList)

    parkInfoList = getParkInfoList(poiidList)
    print parkInfoList

    saveParkPolygon(parkInfoList)

猜你喜欢

转载自blog.csdn.net/summer_dew/article/details/84591719