[爬虫] 地理编码|根据名称获取POI的经纬度

使用高德地图对数据进行地理编码
高德地图POI服务:http://lbs.amap.com/api/webservice/guide/api/search

输入:上市公司数据,没有经纬度
这里写图片描述

功能:按照name,area查询,获取公司的经纬度

输出:POI信息,具有经纬度,坐标系WGS84
这里写图片描述
若没有搜索到,该条后面的信息是空的
最后一行输出失败的数量
这里写图片描述

coordinate_conversion文件:https://blog.csdn.net/summer_dew/article/details/80723434

# -*- coding:utf-8 -*-
# function: 根据一些信息查找经纬度
import xlwt
import xlrd
import urllib
from bs4 import BeautifulSoup
import coordinate_conversion



NAME = u"地理编码" #输出的名称
input_path = r'D:\Users\PasserQi\Desktop\GetAMapPOI\stock_basic_list.xls'
outPath = r"D:\Users\PasserQi\Desktop\GetAMapPOI\%s.xls" % NAME

saveField = ["查询条件","id","name","type","typecode","address","x","y","tel","pname","cityname","adname","business_area","photos"]
AMAP_API_KEY = "4fac3db866dcc3b8a735651d3a7db1c7" #高德地图密匙

# @Fun:获取查找的条件
# @Param:xls文件全路径
# @Return:[{}, {}, {}, ..] 查找的信息Items
def getItems(input_path):
    items = []
    word = xlrd.open_workbook(input_path)
    table = word.sheets()[0]
    nrows = table.nrows
    for i in range(1,nrows):
        items.append({
            "keywords" : table.cell(i,1).value,
            "name" : table.cell(i,3).value,
            "type" : table.cell(i,2).value
        })

    return items

# 将查找条件输出字符串
def itemToStr(item):
    ret = ""
    for key in item:
        ret += key +":" + item[key] + "\n"
    return ret


# 填写查询条件
# @param:一个查询条件
# @return:POI
def searchLocation(item):
    urlParamJson = {
        'city': item["name"].encode("utf8"),
        'output': 'xml',
        'key': AMAP_API_KEY,
        'keywords': item["keywords"].encode("utf8") ,
        "types": "公司",
        'citylimit': 'true',  # 只返回指定城市数据
        'offset': '20'  # 每页条数
    }
    params = urllib.urlencode(urlParamJson)
    url = "http://restapi.amap.com/v3/place/text?%s" % params
    print url
    http = urllib.urlopen(url)
    dom = BeautifulSoup(http)
    poiList = dom.findAll("poi")
    if len(poiList) == 0:  # 没有
        return None
    else:
        return poiList[0]


if __name__ == '__main__':
    # 获取查找条件
    items = getItems(input_path)

    #创建保存的xls
    w = xlwt.Workbook(encoding="utf-8")
    #create sheet
    sheet = w.add_sheet(NAME)
    for i in range( len(saveField) ) :
        sheet.write(0, i, saveField[i])

    cur = 1
    error = 0
    # 开始查找
    for item in items:
        poi = searchLocation(item)
        index = saveField.index("查询条件")  # 获取下标
        value = itemToStr(item)
        sheet.write(cur, index, value)  # 保存
        # 没有搜索到,退出本次
        if not poi:
            cur += 1
            error +=1 # 记录未搜索到的
            continue

        # 保存
        for tag in poi:
            name = tag.name  # 标签名
            if name == "photos":  # 图片
                index = saveField.index("photos")
                value = ""
                for i in tag:
                    photos_url = tag.url.get_text()
                    value = value + photos_url + ";"
                sheet.write(cur, index, value)
                continue
            if name in saveField:
                index = saveField.index(name)  # 获取下标
                value = tag.get_text()  # 获取值
                sheet.write(cur, index, value)  # 保存
            if name == "location":
                value = tag.get_text()
                x, y = value.split(',')
                x, y = coordinate_conversion.gcj02towgs84(float(x), float(y))
                # save x
                index = saveField.index('x')
                value = x
                sheet.write(cur, index, value)
                # save y
                index = saveField.index('y')
                value = y
                sheet.write(cur, index, value)
        cur += 1
        print cur

    sheet.write(cur, 0, "失败" + str(error) )
    w.save(outPath)

猜你喜欢

转载自blog.csdn.net/summer_dew/article/details/80724795