将Excel的数据自动导入到neo4j中去

1.DataToNeo4jClass.py

# -*- coding: utf-8 -*-
from py2neo import Node, Graph, Relationship


class DataToNeo4j(object):
    """将excel中数据存入neo4j"""

    # 初始化
    def __init__(self):
        """建立连接"""
        link = Graph("http://localhost:7474", username="neo4j", password="jhy042369")
        self.graph = link
        # 定义label
        self.invoice_name = '名称'
        self.invoice_value = '值'
        self.graph.delete_all()  # 从中删除所有节点和关系Graph。

    # 创建实体(节点)
    def create_node(self, node_list_key, node_list_value):
        """建立节点"""
        for name in node_list_key:
            name_node = Node(self.invoice_name, name=name)
            self.graph.create(name_node)
        for name in node_list_value:
            value_node = Node(self.invoice_value, name=name)
            self.graph.create(value_node)  #建立实体

    # 创建关系  df_data 就是实体1——关系——实体2的DataFrame
    def create_relation(self, df_data):
        """建立联系"""

        m = 0
        for m in range(0, len(df_data)):
            try:
                #实体——关系——实体
                rel = Relationship(self.graph.find_one(label=self.invoice_name, property_key='name',
                                                       property_value=df_data['name'][m]),
                                   df_data['relation'][m],
                                   self.graph.find_one(label=self.invoice_value, property_key='name',
                                                       property_value=df_data['name2'][m]))
                self.graph.create(rel) #建立关系
            except AttributeError as e:
                print(e, m)

2.invoice_neo4j.py

# -*- coding: utf-8 -*-
from dataToNeo4jClass.DataToNeo4jClass import DataToNeo4j
import os
import pandas as pd


# 提取excel表格中数据,将其转换成dateframe类型
os.chdir('D:\\Example\\neo4j-python-pandas-py2neo-v3-master\\data')

#invoice_data = pd.read_excel('./Invoice_data_Demo.xls', header=0, encoding='utf8')
invoice_data = pd.read_excel('./Invoice_data_Demo.xls', header=0, encoding='utf8')
print(invoice_data)

#实体1放入一个列表中,实体2放入一个列表中
def data_extraction():
    """节点数据抽取"""

    # 取出名称到list
    node_list_key = []
    for i in range(0, len(invoice_data)):
        node_list_key.append(invoice_data['题名'][i])

    # 去除重复的名称
    node_list_key = list(set(node_list_key))

    # value抽出作node
    node_list_value = []
    for i in range(0, len(invoice_data)):
        for n in range(1, len(invoice_data.columns)):
            # 取出表头名称invoice_data.columns[i]  ,取出第一列第一行,第二列第一行....
            node_list_value.append(invoice_data[invoice_data.columns[n]][i])
    # 去重
    node_list_value = list(set(node_list_value))
    # 将list中浮点及整数类型全部转成string类型
    node_list_value = [str(i) for i in node_list_value]
    # print(node_list_value)
    return node_list_key, node_list_value

#建立成实体1——关系——实体2的DataFrame的表形式
def relation_extraction():
    """联系数据抽取"""

    links_dict = {}   # 存放下面三个
    name_list = []     # 存放实体
    relation_list = [] # 存放关系
    name2_list = []    # 存放实体

    for i in range(0, len(invoice_data)):
        m = 0
        name_node = invoice_data[invoice_data.columns[m]][i]
        while m < len(invoice_data.columns)-1:
            relation_list.append(invoice_data.columns[m+1])  # 存放列名称
            name2_list.append(invoice_data[invoice_data.columns[m+1]][i])
            name_list.append(name_node)
            m += 1

    # 将数据中int类型全部转成string
    name_list = [str(i) for i in name_list]
    name2_list = [str(i) for i in name2_list]

    # 整合数据,将三个list整合成一个dict
    links_dict['name'] = name_list   #实体
    links_dict['relation'] = relation_list  #关系(存放列名)
    links_dict['name2'] = name2_list   #实体
    # 将数据转成DataFrame
    df_data = pd.DataFrame(links_dict)
    return df_data


# 实例化对象
data_extraction()
relation_extraction()
create_data = DataToNeo4j()

create_data.create_node(data_extraction()[0], data_extraction()[1])  # 创建第一个实体和第二个实体
create_data.create_relation(relation_extraction())  # 建立关系
print(relation_extraction())

requirements.txt

atomicwrites==1.2.1
attrs==18.2.0
backcall==0.1.0
certifi==2016.2.28
Click==7.0
colorama==0.4.0
decorator==4.3.0
ipykernel==5.1.0
ipython==7.1.1
ipython-genutils==0.2.0
jedi==0.13.1
jieba==0.39
jupyter-client==5.2.3
jupyter-console==6.0.0
jupyter-core==4.4.0
more-itertools==4.3.0
neo4j-driver==1.6.2
neobolt==1.7.0
neotime==1.7.1
numpy==1.15.3
pandas==0.23.4
parso==0.3.1
pickleshare==0.7.5
pluggy==0.8.0
prompt-toolkit==1.0.15
py==1.7.0
py2neo==3
Pygments==2.2.0
pytest==3.9.3
python-dateutil==2.7.5
pytz==2018.6
pyzmq==17.1.2
six==1.11.0
tornado==5.1.1
traitlets==4.3.2
urllib3==1.22
wcwidth==0.1.7
wincertstore==0.2
xlrd==1.1.0

发布了81 篇原创文章 · 获赞 10 · 访问量 5万+

猜你喜欢

转载自blog.csdn.net/jiahonhyu0609/article/details/104776276