由于在读取网络流的时候出现了问题,不得不去修改LoadStreams类,在读源码的过程中真的收益好多,这里就粗略记录一下,以供大致参考。
# 以接收网络视频流为例
class LoadStreams:
# 传入参数sources
def __init__(self, sources='streams.txt', img_size=640, stride=32, auto=True):
# 关于预测的一些参数
self.mode = 'stream'
self.img_size = img_size
self.stride = stride
# 判断该源是否为文件,网络视频流当然不是文件
if os.path.isfile(sources):
with open(sources) as f:
sources = [x.strip() for x in f.read().strip().splitlines() if len(x.strip())]
else:
# 转换成列表形式
sources = [sources]
# 列表的长度
n = len(sources)
# 视频流的一些信息
# self.imgs: 图片
# self.fps: 帧数
# self.frames: 有多少帧
# self.threads: 有几段流就开辟几个线程
self.imgs, self.fps, self.frames, self.threads = [None] * n, [0] * n, [0] * n, [None] * n
# 去掉源名字中乱七八糟的符号
self.sources = [clean_str(x) for x in sources] # clean source names for later
self.auto = auto
# enumerate循环,会显示索引号
for i, s in enumerate(sources): # index, source
# Start thread to read frames from video stream
# 使用f字符串进行拼接字符串
st = f'{
i + 1}/{
n}: {
s}... '
# 如果是youtube上的视频,怎样怎样
if urlparse(s).hostname in ('www.youtube.com', 'youtube.com', 'youtu.be'): # if source is YouTube video
check_requirements(('pafy', 'youtube_dl==2020.12.2'))
import pafy
s = pafy.new(s).getbest(preftype="mp4").url # YouTube URL
# eval() 函数用来执行一个字符串表达式,并返回表达式的值,视频流的话s = s
s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam
if s == 0:
assert not is_colab(), '--source 0 webcam unsupported on Colab. Rerun command in a local environment.'
assert not is_kaggle(), '--source 0 webcam unsupported on Kaggle. Rerun command in a local environment.'
# 用opencv创建cap对象,算是预提取流信息
cap = cv2.VideoCapture(s)
# assert(断言)用于判断一个表达式,在表达式条件为 false 的时候触发异常
# 判断cap是否打开
assert cap.isOpened(), f'{
st}Failed to open {
s}'
# 开始提取流信息
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS) # warning: may return 0 or nan
# 将信息存储,float('inf')相当于无穷大,秒啊
self.frames[i] = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float('inf') # infinite stream fallback
self.fps[i] = max((fps if math.isfinite(fps) else 0) % 100, 0) or 30 # 30 FPS fallback
# 正式读取第一帧画面
_, self.imgs[i] = cap.read() # guarantee first frame
# 初始化一个线程,专门负责读取画面信息,如果有多个摄像头,采用多线程的方式是否可以同时采集两个摄像头画面的数据
self.threads[i] = Thread(target=self.update, args=([i, cap, s]), daemon=True)
# 记录日志信息
LOGGER.info(f"{
st} Success ({
self.frames[i]} frames {
w}x{
h} at {
self.fps[i]:.2f} FPS)")
self.threads[i].start()
LOGGER.info('') # newline
# check for common shapes
s = np.stack([letterbox(x, self.img_size, stride=self.stride, auto=self.auto)[0].shape for x in self.imgs])
# np.unique去除数组中的重复数字,并进行排序之后输出。
self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
if not self.rect:
LOGGER.warning('WARNING: Stream shapes differ. For optimal performance supply similarly-shaped streams.')
def update(self, i, cap, stream):
# Read stream `i` frames in daemon thread
n, f, read = 0, self.frames[i], 1 # frame number, frame array, inference every 'read' frame
while cap.isOpened() and n < f:
n += 1
# _, self.imgs[index] = cap.read()
# cap.grab()用来指向下一帧,其语法格式为:
# retval = cv2.VideoCapture.grab()
# 如果该函数成功指向下一帧,则返回值 retval 为 True。
cap.grab()
if n % read == 0:
# cap.retrieve()用来解码,并返回函cv2.VideoCapture.grab()捕获的视频帧。该函数的语法格式为:
# retval, image = cv2.VideoCapture.retrieve()
# image 为返回的视频帧,如果未成功,则返回一个空图像。
#retval 为布尔类型,若未成功,返回False;否则返回True。
success, im = cap.retrieve()
if success:
self.imgs[i] = im
else:
LOGGER.warning('WARNING: Video stream unresponsive, please check your IP camera connection.')
self.imgs[i] = np.zeros_like(self.imgs[i])
cap.open(stream) # re-open stream if signal was lost
time.sleep(0.0) # wait time
# 以下__iter__和__next__是两个魔法方法,迭代此类时更新信息
def __iter__(self):
self.count = -1
return self
def __next__(self):
self.count += 1
if not all(x.is_alive() for x in self.threads) or cv2.waitKey(1) == ord('q'): # q to quit
cv2.destroyAllWindows()
raise StopIteration
# Letterbox
img0 = self.imgs.copy()
img = [letterbox(x, self.img_size, stride=self.stride, auto=self.rect and self.auto)[0] for x in img0]
# Stack
img = np.stack(img, 0)
# Convert
img = img[..., ::-1].transpose((0, 3, 1, 2)) # BGR to RGB, BHWC to BCHW
img = np.ascontiguousarray(img)
return self.sources, img, img0, None, ''
def __len__(self):
return len(self.sources) # 1E12 frames = 32 streams at 30 FPS for 30 years