版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/sinat_26566137/article/details/82224317
judgedoc_info_field=['litigant_name','judgedoc_cnt','litigant_defendant_contract_dispute_cnt', 'litigant_defendant_bust_cnt',
'defendant_judgedoc_cnt']
judgedoc_info_schema = StructType(
[StructField(field_name, StringType(), True) for field_name in judgedoc_info_field])
sqlContext = SQLContext(sparkContext=sc)
dfkk2=sqlContext.read.load(feature_extract_judgedoc, format="csv", schema=judgedoc_info_schema, delimiter='\t')
# dfkk2 = sqlContext.read.csv(
# hdfspath_1,header = False)#hdfspath_1
dfkk2.createOrReplaceTempView('y3')
dfkk2.show()
dfhh1 = sqlContext.sql(
"""select * from y3 where litigant_name ='重庆农村商业银行股份有限公司'
""")
dfhh1.show()
############
def test_data_01(spark,sc):
sqlContext = SQLContext(sparkContext=sc)
judgedoc_info_field=['litigant_name','judgedoc_cnt','litigant_defendant_contract_dispute_cnt', 'litigant_defendant_bust_cnt',
'defendant_judgedoc_cnt']
judgedoc_info_field =['doc_id', 'litigant_name', 'litigant_type', 'bank_litigant_name','bank_litigant_type']
judgedoc_info_schema = StructType(
[StructField(field_name, StringType(), True) for field_name in judgedoc_info_field])
dfkk2=sqlContext.read.load(sue_by_bank, format="csv", schema=judgedoc_info_schema, delimiter='\t')
dfkk2.createOrReplaceTempView('y3')
dfhh11 = sqlContext.sql(
"""select * from y3 where bank_litigant_name = '中国建设银行'and bank_litigant_type ='被告'
""")#litigant_name ='国家工商行政管理总局商标评审委员会' and litigant_type = '被告' and
dfhh11.show()
spark.stop()