读\t分隔的csv文件

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/sinat_26566137/article/details/82224317
 judgedoc_info_field=['litigant_name','judgedoc_cnt','litigant_defendant_contract_dispute_cnt', 'litigant_defendant_bust_cnt',
                             'defendant_judgedoc_cnt']
    judgedoc_info_schema = StructType(
        [StructField(field_name, StringType(), True) for field_name in judgedoc_info_field])
    sqlContext = SQLContext(sparkContext=sc)
    dfkk2=sqlContext.read.load(feature_extract_judgedoc, format="csv", schema=judgedoc_info_schema, delimiter='\t')
    # dfkk2 = sqlContext.read.csv(
    #     hdfspath_1,header = False)#hdfspath_1
    dfkk2.createOrReplaceTempView('y3')
    dfkk2.show()
    dfhh1 = sqlContext.sql(
        """select * from y3 where litigant_name ='重庆农村商业银行股份有限公司'
         """)
    dfhh1.show()


############
def test_data_01(spark,sc):
    sqlContext = SQLContext(sparkContext=sc)
    judgedoc_info_field=['litigant_name','judgedoc_cnt','litigant_defendant_contract_dispute_cnt', 'litigant_defendant_bust_cnt',
                             'defendant_judgedoc_cnt']
    judgedoc_info_field =['doc_id', 'litigant_name', 'litigant_type', 'bank_litigant_name','bank_litigant_type']
    judgedoc_info_schema = StructType(
        [StructField(field_name, StringType(), True) for field_name in judgedoc_info_field])

    dfkk2=sqlContext.read.load(sue_by_bank, format="csv", schema=judgedoc_info_schema, delimiter='\t')
    dfkk2.createOrReplaceTempView('y3')
    dfhh11 = sqlContext.sql(
        """select * from y3 where bank_litigant_name = '中国建设银行'and bank_litigant_type ='被告'
         """)#litigant_name ='国家工商行政管理总局商标评审委员会' and litigant_type = '被告' and
    dfhh11.show()
    spark.stop()

猜你喜欢

转载自blog.csdn.net/sinat_26566137/article/details/82224317