数据读取
文件读取机制
- tensorflow文件分为以下三个步骤
- 将想要读取的文件放入文件名队列
- 读取文件内容,并实行解码
- 批处理,按照指定的批次大小将数据读出
文件队列构造
文件读取
文件内容解码
csv文件读取
import tensorflow as tf
import os
def csv_read(filelist):
# 构造文件队列
file_queue = tf.train.string_input_producer(filelist)
# 构造csv文件读取器
reader = tf.TextLineReader()
k, v = reader.read(file_queue)
# 解码每一行内容
records = [['None'], ['None']] # 指明每一行的格式
data, title = tf.decode_csv(v, record_defaults=records)
# 批处理
data_batch, label_batch = tf.train.batch([data, title],
batch_size=2, # 批次大小
num_threads=1, # 线程数量
capacity=1) # 队列大小
return data_batch, label_batch
def file_read(dirname):
file_list = []
for file in os.listdir(dirname):
file_list.append(os.path.join(dirname, file))
return file_list
if __name__ == '__main__':
dirname = './csv_file'
file_list = file_read(dirname)
data_batch, label_batch = csv_read(file_list)
# 运行图
with tf.Session() as sess:
coord = tf.train.Coordinator() # 定义进程协调器
threads = tf.train.start_queue_runners(sess, coord=coord) # 开启读取文件进程
# 循环多少次读取多少个batch
for idx in range(5):
print(sess.run([data_batch, label_batch]))
coord.request_stop() # 关闭进程协调器
coord.join(threads) # 回收线程资源
"""
[array([b'pineapple', b'pear'], dtype=object), array([b'3', b'4'], dtype=object)]
[array([b'apple', b'banana'], dtype=object), array([b'0', b'1'], dtype=object)]
[array([b'grape', b'apple'], dtype=object), array([b'2', b'0'], dtype=object)]
[array([b'banana', b'grape'], dtype=object), array([b'1', b'2'], dtype=object)]
[array([b'pineapple', b'pear'], dtype=object), array([b'3', b'4'], dtype=object)]
"""
图像数据读取
import tensorflow as tf
import os
import numpy as np
def img_read(filelist):
# 构造文件队列
file_queue = tf.train.string_input_producer(filelist)
# 构造读取器
reader = tf.WholeFileReader()
k, v = reader.read(file_queue)
# 解码
img = tf.image.decode_jpeg(v)
# 批处理并将图片调整到统一大小
img_resized = tf.image.resize_images(img, [200, 200])
img_resized.set_shape([200, 200, 3])
img_batch = tf.train.batch([img_resized],
batch_size=2, # 批次大小
num_threads=1, # 线程数量
capacity=1) # 队列大小
return img_batch
def file_read(dirname):
file_list = []
for file in os.listdir(dirname):
file_list.append(os.path.join(dirname, file))
return file_list
if __name__ == '__main__':
dirname = './img_file/apple'
file_list = file_read(dirname)
img_batch = img_read(file_list)
batches = []
# 运行图
with tf.Session() as sess:
coord = tf.train.Coordinator() # 定义进程协调器
threads = tf.train.start_queue_runners(sess, coord=coord) # 开启读取文件进程
# 循环多少次读取多少个batch
for idx in range(5):
batches.append(sess.run(img_batch))
print(np.array(batches).shape)
coord.request_stop() # 关闭进程协调器
coord.join(threads) # 回收线程资源
"""
(5, 2, 200, 200, 3) => [batches, 批次大小, w, h, c]
"""