python操作mysql,递归读取目录下文件

归档写过的python进行mysql数据库操作的code
import os
import re
import time
import datetime
import MySQLdb

c08imeimap={}

c08online1minuser={}
c08online5minuser={}
c08online10minuser={}


onlinenumperhour={'00':{},'01':{},'02':{},'03':{},'04':{},'05':{},'06':{},'07':{},'08':{},'09':{},'10':{},'11':{},\
                  '12':{},'13':{},'14':{},'15':{},'16':{},'17':{},'18':{},'19':{},'20':{},'21':{},'22':{},'23':{},}

def find_files(dir, wildcard, recursion):
    result = []
    exts = wildcard.split(" ")
    for root, subdirs, files in os.walk(dir):
        for name in files:
            for ext in exts:
                if (name.endswith(ext)):
                    full_name = os.path.join(root, name)
                    result.append((os.path.getmtime(full_name),full_name))
                    break
        if (not recursion):
            break
    return result

def recordandwirtefile(imei, m1times, m5times, m10times, fp):
    if imei is not None:
        fp.write(imei)
        fp.write(",")
    if m1times is not None:
        fp.write(str(m1times))
        fp.write(",")
    if m5times is not None:
        fp.write(str(m5times))
        fp.write(",")
    if m10times is not None:
        fp.write(str(m10times))
    fp.write('\r\n')

def update_sql():
    db=MySQLdb.connect(host='192.168.1.100', user = 'test', passwd='test',db='test',port = 3306)
    cursor = db.cursor()

    sql = "update oss_ana_user_activation set onemins=onemins+%d,fivemins=fivemins+%d,tenmins=tenmins+%d where channelid='%s' and projectid='%s' and cycle=date_sub(curdate(),interval 1 day)" \
        % (len(c08online1minuser), len(c08online5minuser), len(c08online10minuser), "C8", "P2")
    cursor.execute(sql)
    db.commit()
    db.close()

def get_yes():
     now_time = datetime.datetime.now()
     yes_time = now_time + datetime.timedelta(days=-1)
     return yes_time.strftime('%Y-%m-%d')

def exist_yestoday_user_active_perhour_sql():
    db=MySQLdb.connect(host='192.168.1.100', user = 'test', passwd='test',db='test',port = 3306)
    cursor = db.cursor()

    sql = "select * from oss_user_active_per_hour where cycle='%s'" % (get_yes())
    cursor.execute(sql)
    result = cursor.fetchall()
    db.commit()
    db.close()
    return len(result)

def insert_user_active_perhour_sql(ls):
    db=MySQLdb.connect(host='192.168.1.100', user = 'test', passwd='test',db='test',port = 3306)
    cursor = db.cursor()

    sql = "insert into oss_user_active_per_hour (cycle,h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11,h12,h13,h14,h15,h16,h17,h18,h19,h20,h21,h22,h23) \
        values ('%s',%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d)" \
        % (get_yes(),ls[0],ls[1],ls[2],ls[3],ls[4],ls[5],ls[6],ls[7],ls[8],ls[9],ls[10],ls[11],ls[12],\
        ls[13],ls[14],ls[15],ls[16],ls[17],ls[18],ls[19],ls[20],ls[21],ls[22],ls[23])
    cursor.execute(sql)
    db.commit()
    db.close()

def update_user_active_perhour_sql(ls):
    db=MySQLdb.connect(host='192.168.1.100', user = 'test', passwd='test',db='test',port = 3306)
    cursor = db.cursor()

    sql = "update oss_user_active_per_hour set h0=h0+%d,h1=h1+%d,h2=h2+%d,h3=h3+%d,h4=h4+%d,h5=h5+%d,h6=h6+%d,h7=h7+%d,h8=h8+%d,h9=h9+%d,h10=h10+%d,\
        h11=h11+%d,h12=h12+%d,h13=h13+%d,h14=h14+%d,h15=h15+%d,h16=h16+%d,h17=h17+%d,h18=h18+%d,h19=h19+%d,h20=h20+%d,h21=h21+%d,h22=h22+%d,h23=h23+%d where cycle='%s'" \
        % (ls[0],ls[1],ls[2],ls[3],ls[4],ls[5],ls[6],ls[7],ls[8],ls[9],ls[10],ls[11],ls[12],\
        ls[13],ls[14],ls[15],ls[16],ls[17],ls[18],ls[19],ls[20],ls[21],ls[22],ls[23],get_yes())
    cursor.execute(sql)
    db.commit()
    db.close()

def t2s(t):
    h,m,s = t.strip().split(":")
    return int(h) * 3600 + int(m) * 60 + int(s)

def stat_activenum_everyhour(hour, imei):
    if onlinenumperhour.has_key(hour):
        if onlinenumperhour[hour].has_key(imei):
            return
        else:
            onlinenumperhour[hour][imei]=1
    else:
        onlinenumperhour[hour]={imei : 1}

def get_activenum_everyhour():
    numlist=[]
    keys=sorted(onlinenumperhour.keys())
    for key in keys:
        numlist.append(len(onlinenumperhour[key]))
    return numlist

def process(dirs, fileout, resfile):
    filelist=find_files(dirs, '11 12 64 27', False)
    newlist=sorted(filelist, key=lambda filelist : filelist[0])
    for file in newlist:
        fp=open(file[1], 'rb')
        linelist = fp.readlines()
        fp.close()
        print file[1]
        for line in linelist:
            context = line.split("|")
            if len(context) < 10:
                continue
            if context[4] == " " or context[4] == "null":
                continue
            if context[3] == "null" or context[3] == "":
                continue
            if context[8] == '101' and context[4] == "lf-bjgd":
                continue
            elif context[4] == "C000008":
                if c08imeimap.has_key(context[3]):
                    if context[6] == '0':
                        start=t2s(c08imeimap[context[3]][8:10]+":"+c08imeimap[context[3]][10:12]+":"+c08imeimap[context[3]][12:14])
                        end=t2s(context[0][8:10]+":"+context[0][10:12]+":"+context[0][12:14])
                        #print start,end
                        if end-start > 60 and not c08online1minuser.has_key(context[3]):
                            c08online1minuser[context[3]] = 1
                            recordandwirtefile(context[3], end-start, 0, 0,fileout)
                        if end - start > 300 and not c08online5minuser.has_key(context[3]):
                            c08online5minuser[context[3]] = 1
                        if end - start > 600 and not c08online10minuser.has_key(context[3]):
                            c08online10minuser[context[3]] = 1
                    elif context[6] == '1':
                        c08imeimap[context[3]]=context[0]
                elif context[6] == '1':
                    c08imeimap[context[3]]=context[0]
            #statistic onlineuser num every hour
            if context[6] == '1':
                stat_activenum_everyhour(context[0][8:10], context[3])

    recordandwirtefile("C8", len(c08online1minuser), len(c08online5minuser), len(c08online10minuser), resfile)
    update_sql()

if __name__ == "__main__":
    dirs="/home/stat/"
    outfp = open('middleout.csv', 'wb')
    resfp = open('result.csv', 'wb')
    process(dirs, outfp, resfp)
    ls = get_activenum_everyhour()
    print ls
    res1 = exist_yestoday_user_active_perhour_sql()
    if res1 == 0:
        print "insert"
        insert_user_active_perhour_sql(ls)
    elif res1 == 1:
        print "update"
        update_user_active_perhour_sql(ls)
    else:
        print "sql query user_active_perhour err"
     
    outfp.close()
    resfp.close()

猜你喜欢

转载自blog.csdn.net/pyxllq/article/details/80338339
今日推荐