ES 上传,更新数据

import json
import hashlib
from pprint import pprint

from elasticsearch import Elasticsearch

es = Elasticsearch(hosts="10.109.24.153", port=9200)
es.indices.create(index="newlegalindex", ignore=400)
# with open("/Users/rouckiechen/PycharmProjects/falv/falv_zhongguorenda_spider/id_num.txt", encoding="utf-8") as f:
#     temp_str = f.readlines()
# for i in temp_str:
#     item = json.loads(i, strict=False)
#     href = item["sourceURL"]
#     uuid = hashlib.md5(href.encode(encoding='UTF-8')).hexdigest()
#     item["uuid"] = uuid
# res = es.index(index="newlegalindex", doc_type="mytype", body=item, id=uuid)
# print(i.replace("\n", ""))
# _id = i.replace("\n", "")
# print(href)
# print(uuid)
# if item["fgtitle"] == "" or item["fgtext"] == "":
#     print(item["fgtitle"], item["sourceURL"])

# es.update(index="newlegalindex", doc_type="mytype", id=_id, body={"doc": {"fbdept": "全国人大常委会"}})
query_json = {
    "bool": {
        "must": {
            "term": {
                "fbdept": "全国人大常委会"
            }
        },

        "should": [
            {
                "match": {
                    "sourcenum": "1003"
                }
            }
        ]
    }
}

# source_arr = ["_id", "fbdept", "sourceURL"]
source_arr = ["_id", "fbdept", "sourceURL"]
res = es.search(index="newlegalindex", body={"query": query_json, "_source": source_arr, "from": 0, "size": 3000})  # 获取所有数据
id_list = res['hits']['hits']
for i in id_list:
    _id = i["_id"]
    es.update(index="newlegalindex", doc_type="mytype", id=_id, body={"doc": {"sourcenum": "2001", "status": "2", "fgarea": ""}})

    print(_id)

猜你喜欢

转载自blog.csdn.net/Rouckie/article/details/88850901
今日推荐