python学习_5_文件操作

对文件的操作分三步：
1.打开文件获取文件句柄, 句柄可以理解为这个文件
2.通过文件句柄操作文件
3.关闭文件

现有以下文件file.txt：

#感觉到好炽热
#刚好是你经过
#眼神表现洒脱
#手却不自主地

#1.文件基本操作

f=open('file.txt','r', encoding='utf-8')#只读方式打开文件, 获取文件句柄
print(f.readline())#读取一行内容, 返回一个list。这里读到的是文件第一行
print(f.read())#读取文件全部内容。这里读取除了第一行的, 剩下的所有文件内容
f.seek(0)#指针指向文件开头
print(f.readlines())#按行读取文件内容,每一行作为一个元素放入list,返回list
f.close()#关闭文件

#2.文件打开模式

3种基本模式

# r: 读模式, 但凡r, 文件指针都在文件头

f=open('file.txt','r', encoding='utf-8')#只读方式打开文件, 获取文件句柄
print(f.read())#读取文件全部内容
f.write()#r模式不可写, 会报错io.UnsupportedOperation: not writable
f1=open('apple.TXT','r',encoding='utf-8')#不能打开不存在的文件, FileNotFoundError: [Errno 2] No such file or

# w: 写模式

f = open('file.txt', 'w', encoding='utf-8')
f.write('besttest')#覆盖写
print(f.read())#w模式不可读, 会报错io.UnsupportedOperation: not readable
f1=open('apple.TXT','r',encoding='utf-8')#文件不存在则创建

# a: 追加模式, 但凡a, 文件指针都在文件尾

f = open('file.txt', 'a', encoding='utf-8')
f.write('besttestbesttestbesttestbesttest')#追加写
print(f.read())#a模式不可读, 会报错io.UnsupportedOperation: not readable
f1=open('apple.TXT','w',encoding='utf-8')#文件不存在则创建

#"+"模式, 表示可以同时读写某个文件

# r+: 读写模式

f = open('file.txt', 'r+', encoding='utf-8')
print(f.read())#读取文件全部内容
f.seek(0)
f.write('BESTTEST')#追加写在文件开头
f1=open('apple.TXT','w',encoding='utf-8')#不能打开不存在的文件, FileNotFoundError: [Errno 2] No such file or

# w+: 写读模式

f = open('file.txt', 'w+', encoding='utf-8')
f.seek(0)
print('1:',f.read())#这里不会读到内容
f.write('SYZ')#覆盖写
print('2:',f.read())#这里也不会读到内容,因为指针在文件末尾
f.seek(0)#指针定位到文件头
print('3:',f.read())#这里终于能读到内容了,因为w+模式只能读到刚写入文件的内容
f1=open('apple.TXT','w',encoding='utf-8')#文件不存在则创建

# a+: 追加读写模式

f = open('file.txt', 'a+', encoding='utf-8')
print(f.read())#可读。但因为a+模式文件指针放在末尾, 所以并没有读到文件内容。如果想读, 可以先f.seek(0)
f.write('manwei')#追加写在文件末尾
f1=open('apple.TXT','w',encoding='utf-8')#文件不存在则创建

#基本模式和"+"模式总结如下：

一般会使用a+这种比较友好的方式

#"U"模式, 表示在读取时, 可以将 \r \n \r\n自动转换成 \n （与 r 或 r+ 模式同使用）
#rU模式
#r+U模式

#"b"模式, 表示处理二进制文件（如：FTP发送上传ISO镜像文件，linux可忽略，windows处理二进制文件时需标注）
#rb模式
#wb模式
#ab模式

#3.文件指针

f = open('file.txt', encoding='utf-8')
print(f.read())# 此时指针指向最后一行, 且已经读完
print(f.readline())# 指针继续读下一行,但下一行已经没内容了。所以读不出来
f.seek(0)# 移动文件指针在文件头,只对读有效。写无效

#4.常用文件操作方法

f = open('file.txt','r+', encoding='utf-8')
f.readable()#判断文件是否可读
f.writable()#判断文件是否可写
f.encoding#打印文件编码
f.read()#读取所有内容, 大文件时不要用, 会把文件内容读到内存中, 拖垮内存
f.readline()#读一行
f.readlines()#读取所有内容, 按行读取, 返回一个list, 大文件时也不要用
f.tell()#获取文件指针
f.seek(0)#让文件指针指向文件开头
f.write('乖宝宝')#写入内容, 只能写字符串
f.flush()# 强行写磁盘
f.truncate()# 清空文件内容
f.writelines(['a','apple'])# 可以写list
f.close()#关闭文件

#5.大文件的高效操作方法

用上面的read()和readlines()方法操作文件的话，会先把文件所有内容读到内存中

这样的话，内存数据一多，非常卡，高效的操作，就是读一行操作一行，读过的内容就从内存中释放了

f = open('file.txt')
for line in f:
    print(line)#这样的话, line就是每行文件的内容, 读完一行就会释放一行的内容

#6.with: 避免忘关文件的方法

with open('file.txt','r') as f:#打开一个文件, 并把文件句柄给f
    for line in f:
        print(line)

with open('file.txt') as fr,open('file_bak','w') as fw:#多文件操作
    for line in fr:#循环fr中的每一行
        fw.write(line)#写到fw中

#7.修改文件

第一种方式：把文件的全部内容都读到内存中，然后把原有的文件内容清空，重新写新的内容

with open('file.txt','a+',encoding='utf-8') as fr:
    fr.seek(0)#指向文件头,才读的到内容
    res=fr.read()#指针指向文件尾
    new_res=res.replace('你','YOU')
    fr.seek(0)#指向文件头,才能清空文件内容
    fr.truncate()#清空文件内容
    fr.writelines(new_res)
    fr.seek(0)#再次指向文件头, 才能读
    print(fr.read())

第二种方式：把修改后的文件内容写到一个新的文件中

with open('file.txt') as fr,open('file_new','w') as fw:
    for line in fr:#循环file.txt中的每一行
        new_line=line.replace('你','YOU')
        fw.write(new_line)#写到file_new文件中

# 文件操作小练习_自动生成手机号码

需求：手机号以1861235开头, 必须是11位, 屏幕输入几就会生成几条手机号码写入文件

import random
f = open('phones.txt', 'a+')
num = input('请输入需要的手机号个数：')
for i in range(int(num)):
    start = '1861235'
    random_num = str(random.randint(1, 9999))  # random.randint()返回int
    new_num = random_num.zfill(4)  # zfill()指定需要生成4位数字, 不足的前面补0
    phone_num = start + new_num
    f.write(phone_num + '\n')
f.close()

# 文件操作小练习_监控日志封ip

1.每分钟读一次监控日志文件, 如果有攻击服务器的, 就把ip加入黑名单

2.打开日志文件

3.读ip地址

4.判断出现次数大于100次的ip，并加入黑名单

import time
file_point = 0
while True:
    all_ips = []
    f = open(r'D:\access.log', encoding='utf-8')
    f.seek(file_point)  # 移动文件指针到上次的位置
    for line in f:  # 直接循环一个文件对象时, 每次循环的是文件的每一行
        ip = line.split()[0]
        all_ips.append(ip)
    file_point = f.tell()  # 记录当前文件指针位置

    all_ips_set = set(all_ips)  # 集合天生去重, 这里的ip已经没有重复的了
    for i in all_ips_set:
        if all_ips.count(i) > 50:
            print("应该加入黑名单的ip是%s" % i)
    f.close()
    time.sleep(60)  # 暂停60秒

# 文件操作小练习_每一行前加字符串'syz_'

方法一

f = open('word', 'a+')
f.seek(0)
final_str = ''
for i in f:
    new_str = 'syz_' + str(i)
    final_str = final_str + new_str
f.seek(0)
f.truncate()
f.write(final_str)
f.close()

方法二

import os
with open('file.txt','r',encoding='utf-8') as fr,open('syz','a+',encoding='utf-8') as fw:
    for line in fr:
        fw.write('syz_'+line)
os.remove('file.txt')
os.rename('syz','file.txt')

# 文件操作小练习_修改文件内容

1.打开a文件, 修改'你'->'YOU'后, 把a文件内容写到b文件

2.删掉a文件3.改b文件名字为a文件名字

import os
with open('word', encoding='utf-8') as fr, open('.word.bak', 'w', encoding='utf-8') as fw:
    for line in fr:
        new_line = line.replace('花', 'flower')
        fw.write(new_line)
os.remove('word')  # 删除文件
os.rename('.word.bak', 'word')  # 文件改名

python学习_5_文件操作

猜你喜欢