import numpy as np
import math
import matplotlib.pyplot as plt
import pandas as pd
import datetime
from scipy import interpolate
from pandas import DataFrame,Series
#num_pi为要产生几个π的sin数据,num_ex为异常点的个数,num_gap为段缺失数据的个数,num_bk为单个缺失值的个数
def test_data_gen(num_pi,num_ex,num_gap,num_bk):
if (num_pi>0) :
num_point=72*num_pi
x=np.linspace(0,3.14*num_pi,num_point)
signal1=[(math.sin(i)+1) for i in x] #产生测试用的num_pi个sin数据
noise=0.2*(np.random.rand(num_point)-0.5)
signal1=signal1+noise#在sin数据上添加噪声
else:
print("Please input valid num_pi")
return
if (num_ex>0) :
#随机添加异常值
point_ex=[]
for i in range(num_ex):
point_ex.append(np.random.randint(0,len(signal1))) #异常值的位置
for _ in point_ex:
signal1[_]=signal1[_]*1.8
else:
pass
if (num_gap>0) :
#随机添加段数据缺失
longth_gap=np.random.randint(15)+5 #缺口大小5~20
point_gap=[] #缺口的位置
for i in range(num_gap):
point_gap.append(np.random.randint(num_point-20))
for i in point_gap:
for j in range(longth_gap):
signal1[i+j]=None
else:
pass
if (num_bk>0) :
#随机添加单点缺失值
point_break=[]
for i in range(num_bk):
point_break.append(np.random.randint(num_point))
for _ in point_break:
signal1[_]=None
else:
pass
#产生时间序列,每隔5分钟一个点
date_need=[]
start_dt = datetime.datetime(2017, 1, 1)
interval = datetime.timedelta(seconds=300)
for i in range(num_point):
date_need.append(start_dt + interval * i)
df = DataFrame(signal1,index = date_need[0:num_point])
df.to_excel('data_test.xlsx')
plt.figure(figsize=(10,5))
plt.plot(signal1)
plt.show()
return signal1
test_data_gen(4,2,6,0)
array([ 9.16153402e-01, 9.54379407e-01, 1.09731508e+00,
1.08006151e+00, 1.23044901e+00, 1.27691499e+00,
1.32374816e+00, 1.27310324e+00, 1.28585976e+00,
1.30158311e+00, 1.41862637e+00, 1.51293483e+00,
1.46542362e+00, 1.50866021e+00, 1.57576178e+00,
1.59855783e+00, 1.65355219e+00, 1.69197995e+00,
1.68356642e+00, 1.79228898e+00, 1.70167661e+00,
1.74825474e+00, 1.89430181e+00, 1.77041511e+00,
1.80329786e+00, 1.85901009e+00, 1.87932731e+00,
1.94802512e+00, 1.98010691e+00, 1.99723195e+00,
2.04331069e+00, 2.00862185e+00, 1.91811615e+00,
1.94157410e+00, 2.04888692e+00, 2.05231327e+00,
1.95910786e+00, 2.08828786e+00, 2.06663645e+00,
3.43317550e+00, 2.00197626e+00, 1.97676869e+00,
1.86914580e+00, nan, nan,
nan, nan, nan,
nan, nan, nan,
nan, 1.70425963e+00, 1.76188529e+00,
1.76557559e+00, 1.63184934e+00, 1.66148539e+00,
1.62250085e+00, nan, nan,
nan, nan, nan,
nan, nan, nan,
nan, 1.14210725e+00, 1.23029710e+00,
1.22109954e+00, 1.15263675e+00, 1.09320005e+00,
9.56507128e-01, 1.02984472e+00, 8.34937581e-01,
7.90033074e-01, 7.35435567e-01, 7.10364653e-01,
7.39270870e-01, 7.00604767e-01, 5.86747134e-01,
6.97862181e-01, 6.63542957e-01, 5.81515513e-01,
5.80450727e-01, 3.85541118e-01, 4.44475765e-01,
2.88093921e-01, 3.69923346e-01, 2.70215771e-01,
2.05105006e-01, 1.85783290e-01, 2.46754791e-01,
2.53761177e-01, 8.71627632e-02, 1.77697662e-01,
6.02048174e-02, 8.31957566e-02, 3.99724990e-02,
3.28240462e-03, 6.16072322e-02, 1.16086419e-02,
3.27460625e-03, 1.19376608e-01, -1.40544492e-02,
3.83701427e-04, 7.86154263e-02, -2.98355455e-02,
6.38220160e-02, 9.06618096e-02, -3.62089132e-02,
-5.62450271e-02, nan, nan,
nan, nan, nan,
nan, nan, nan,
nan, 2.17241333e-01, 2.06728630e-01,
2.17462397e-01, 1.59507418e-01, 2.78363880e-01,
2.73258695e-01, 3.06463501e-01, 3.94220579e-01,
5.00902489e-01, 3.91612197e-01, 4.55070436e-01,
4.39161563e-01, 6.00936734e-01, 5.51967858e-01,
5.97536536e-01, 6.62903418e-01, 7.76501626e-01,
8.55124750e-01, 8.89612156e-01, 7.81178853e-01,
9.77184582e-01, 9.16382328e-01, 9.82456695e-01,
9.20410834e-01, 9.64181887e-01, 1.06073301e+00,
1.15095579e+00, 1.18814609e+00, 1.21158390e+00,
1.24563322e+00, 1.27416712e+00, 1.29402865e+00,
1.34641975e+00, 1.43252962e+00, 1.51013044e+00,
1.61631219e+00, 1.60735152e+00, 1.53355385e+00,
1.66311479e+00, 1.68018342e+00, 1.65007061e+00,
1.78013818e+00, 1.79670488e+00, 1.81007076e+00,
1.74685062e+00, 1.82627683e+00, 1.93129591e+00,
1.86592967e+00, 1.87453634e+00, 1.91195971e+00,
1.86368228e+00, 1.99174036e+00, 1.99325218e+00,
1.90830361e+00, 2.04982831e+00, 1.96508067e+00,
2.07271133e+00, 1.89832375e+00, 2.07355193e+00,
1.96777179e+00, 1.98768210e+00, 2.01406509e+00,
2.04982411e+00, 2.00725271e+00, 1.89402920e+00,
2.04896310e+00, 1.92510920e+00, 1.97991570e+00,
1.91984596e+00, 1.88616008e+00, 1.82408361e+00,
1.80510005e+00, 1.92727163e+00, 1.79424571e+00,
1.69321125e+00, 1.79971060e+00, 1.77475016e+00,
1.68809736e+00, 1.60391652e+00, 1.54975865e+00,
1.56552817e+00, 1.48431861e+00, 1.52390521e+00,
1.43952244e+00, 1.45504009e+00, 1.45715980e+00,
1.26044021e+00, 1.34336588e+00, 1.24744027e+00,
1.16712633e+00, 1.22905592e+00, 1.06003829e+00,
nan, nan, nan,
nan, nan, nan,
nan, nan, nan,
nan, nan, nan,
nan, nan, 4.26128437e-01,
5.33140231e-01, 4.59125201e-01, 4.97971158e-01,
4.41713135e-01, 4.03272371e-01, 2.87368789e-01,
nan, nan, nan,
nan, nan, nan,
nan, nan, nan,
-1.70563867e-02, 1.42980337e-01, 6.21597065e-02,
1.73589890e-02, -1.82530766e-02, 2.95011364e-02,
7.15305927e-02, -1.49406228e-02, -9.65255582e-02,
1.08531246e-02, 3.59284949e-02, 7.22006398e-02,
1.02626912e-01, 5.39881980e-02, 3.51501113e-02,
9.47410685e-02, 1.21593352e-01, 6.33962286e-02,
1.07925225e-01, 8.90447881e-02, 7.69046578e-02,
5.99909834e-02, 7.59334045e-02, 2.12289125e-01,
3.21005203e-01, 1.82030964e-01, 1.91950850e-01,
2.54582516e-01, 4.04332967e-01, 4.36101435e-01,
4.91973908e-01, 5.14079045e-01, 5.44343191e-01,
4.69244104e-01, 4.80449321e-01, 6.97150590e-01,
5.97848906e-01, 7.75669998e-01, 7.21173591e-01,
7.98999899e-01, 8.45761521e-01, 9.00988314e-01,
8.45971092e-01, 9.01918952e-01, 1.02322215e+00])
”’
若要产生不同时间起始点的序列,在后面加上自己想设定的时、分、秒
start_dt = datetime.datetime(2017, 1, 1, hour ,min ,second)
要模拟产生不同采样时间间隔序列,设置timedelta的值即可
interval = datetime.timedelta(seconds=300)
”’