数据分析07

基于傅里叶变换的频域滤波
        ____________________IFFT_____________________
       |                                                                         |
       v                                                                        |
高能信号\                 FFT                  频域滤波          |
               >含噪信号----->含噪频谱---------->高能频谱
低能噪声/
代码:

# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import numpy as np
import numpy.fft as nf
import scipy.io.wavfile as wf
import matplotlib.pyplot as mp
sample_rate, noised_sigs = wf.read(
    '../../data/noised.wav')
noised_sigs = noised_sigs / 2 ** 15
times = np.arange(len(noised_sigs)) / sample_rate
freqs = nf.fftfreq(times.size, 1 / sample_rate)
noised_ffts = nf.fft(noised_sigs)
noised_pows = np.abs(noised_ffts)
fund_freq = np.abs(freqs[noised_pows.argmax()])
print(fund_freq)
noised_indices = np.where(np.abs(freqs) != fund_freq)
filter_ffts = noised_ffts.copy()
filter_ffts[noised_indices] = 0
filter_pows = np.abs(filter_ffts)
filter_sigs = nf.ifft(filter_ffts).real
wf.write('../../data/filter.wav', sample_rate,
         (filter_sigs * 2 ** 15).astype(np.int16))
mp.figure('Filter', facecolor='lightgray')
mp.subplot(221)
mp.title('Time Domain', fontsize=16)
mp.ylabel('Signal', fontsize=12)
mp.tick_params(labelsize=10)
mp.grid(linestyle=':')
mp.plot(times[:178], noised_sigs[:178],
        c='orangered', label='Noised')
mp.legend()
mp.subplot(222)
mp.title('Frequency Domain', fontsize=16)
mp.ylabel('Power', fontsize=12)
mp.tick_params(labelsize=10)
mp.grid(linestyle=':')
mp.semilogy(freqs[freqs >= 0],
            noised_pows[freqs >= 0], c='limegreen',
            label='Noised')
mp.legend()
mp.subplot(223)
mp.xlabel('Time', fontsize=12)
mp.ylabel('Signal', fontsize=12)
mp.tick_params(labelsize=10)
mp.grid(linestyle=':')
mp.plot(times[:178], filter_sigs[:178],
        c='hotpink', label='Filter')
mp.legend()
mp.subplot(224)
mp.xlabel('Frequency', fontsize=12)
mp.ylabel('Power', fontsize=12)
mp.tick_params(labelsize=10)
mp.grid(linestyle=':')
mp.plot(freqs[freqs >= 0], filter_pows[freqs >= 0],
        c='dodgerblue', label='Filter')
mp.legend()
mp.tight_layout()
mp.show()

3.随机数模块(random)

  • 生成服从特定统计规律的随机数序列
  1. 二项分布:np.random.binomial(n, p, size)
    产生size个随机数,每个随机数来自n次尝试中的成功次数,其中每次尝试成功的概率为p。
    猜硬币游戏:初始筹码1000,每轮猜9次,猜对5次及5次以上为赢,筹码加1,否则为输,筹码减1,求10000轮的过程中手中筹码的变化。
    代码:bi.py
    # -*- coding: utf-8 -*-
    from __future__ import unicode_literals
    import numpy as np
    import matplotlib.pyplot as mp
    outcomes = np.random.binomial(9, 0.5, 10000)
    chips = [1000]
    for outcome in outcomes:
        if outcome >= 5:
            chips.append(chips[-1] + 1)
        else:
            chips.append(chips[-1] - 1)
    chips = np.array(chips)
    mp.figure('Binomial Distribution',
              facecolor='lightgray')
    mp.title('Binomial Distribution', fontsize=20)
    mp.xlabel('Round', fontsize=14)
    mp.ylabel('Chip', fontsize=14)
    mp.tick_params(labelsize=12)
    mp.grid(linestyle=':')
    o, h, l, c = 0, chips.argmax(), chips.argmin(), \
        chips.size - 1
    if chips[o] < chips[c]:
        color = 'orangered'
    elif chips[c] < chips[o]:
        color = 'limegreen'
    else:
        color = 'dodgerblue'
    mp.plot(chips, c=color, label='Chip')
    mp.axhline(y=chips[o], linestyle='--',
               color='deepskyblue', linewidth=1)
    mp.axhline(y=chips[h], linestyle='--',
               color='crimson', linewidth=1)
    mp.axhline(y=chips[l], linestyle='--',
               color='seagreen', linewidth=1)
    mp.axhline(y=chips[c], linestyle='--',
               color='orange', linewidth=1)
    mp.legend()
    mp.show()

    1 2 3 4 5 6
    5 1 2 0 0 2
    0 2 0 6 1 1
    ...
  2. 超几何分布:np.random.hypergeometric(ngood,
                                 nbad, nsample, size)
    产生size个随机数,每个随机数来自随机抽取nsample个样本中
    好样本的个数,总样本由ngood个好样本和nbad个坏样本组成
    模球游戏:将25个好球和1个坏球放在一起,每次模3个球,全为好球加1分,只要摸到了坏球减6分,求100轮的过程中分值的变化。
    代码:hyper.py
    # -*- coding: utf-8 -*-
    from __future__ import unicode_literals
    import numpy as np
    import matplotlib.pyplot as mp
    outcomes = np.random.hypergeometric(25, 1, 3, 100)
    scores = [0]
    for outcome in outcomes:
        if outcome == 3:
            scores.append(scores[-1] + 1)
        else:
            scores.append(scores[-1] - 6)
    scores = np.array(scores)
    mp.figure('Hypergeometric Distribution',
              facecolor='lightgray')
    mp.title('Hypergeometric Distribution', fontsize=20)
    mp.xlabel('Round', fontsize=14)
    mp.ylabel('Score', fontsize=14)
    mp.tick_params(labelsize=12)
    mp.grid(linestyle=':')
    o, h, l, c = 0, scores.argmax(), scores.argmin(), \
        scores.size - 1
    if scores[o] < scores[c]:
        color = 'orangered'
    elif scores[c] < scores[o]:
        color = 'limegreen'
    else:
        color = 'dodgerblue'
    mp.plot(scores, c=color, label='Score')
    mp.axhline(y=scores[o], linestyle='--',
               color='deepskyblue', linewidth=1)
    mp.axhline(y=scores[h], linestyle='--',
               color='crimson', linewidth=1)
    mp.axhline(y=scores[l], linestyle='--',
               color='seagreen', linewidth=1)
    mp.axhline(y=scores[c], linestyle='--',
               color='orange', linewidth=1)
    mp.legend()
    mp.show()
  3. 标准正态分布:np.random.norm(size)
    产生size个随机数,服从标准正态(平均值=0, 标准差=1)分布。
                                                   2
                                                 x
                                              - ---
                                                 2
                                               e
    标准正态分布概率密度 = --------
                                               _____
                                            \/ 2 pi
    代码:norm.py
    # -*- coding: utf-8 -*-
    from __future__ import unicode_literals
    import numpy as np
    import matplotlib.pyplot as mp
    samples = np.random.normal(size=10000)
    mp.figure('Normal Distribution',
              facecolor='lightgray')
    mp.title('Normal Distribution', fontsize=20)
    mp.xlabel('Sample', fontsize=14)
    mp.ylabel('Occurrence', fontsize=14)
    mp.tick_params(labelsize=12)
    mp.grid(axis='y', linestyle=':')
    bins = mp.hist(samples, 100, normed=True,
                   edgecolor='steelblue',
                   facecolor='deepskyblue',
                   label='Normal')[1]
    probs = np.exp(-bins ** 2 / 2) / np.sqrt(2 * np.pi)
    mp.plot(bins, probs, 'o-', c='orangered',
            label='Probability')
    mp.legend()
    mp.show()

八.杂项功能

1.排序

  • 联合间接排序:numpy.lexsort((参考序列, 待排序列))
                                 ->有序索引
    [张三 李四 王五 赵六]
    [70   60    80    70]<-[30 20 30 20]
    numpy.sort_complex(复数数组)
    按照实部的升序排列,对于实部相同的元素,参考虚部的升序,直接返回排序后的结果数组。
    numpy.searchsorted(有序序列, 待插序列)
    ->位置数组,表示将待插序列中的元素插入到有序序列中的哪些位置处,结果依然有序
    numpy.insert(被插序列, 位置序列, 待插序列)
    ->将待插序列中的元素,按照位置序列中的位置,插入到被插序列中,返回插入后的结果
    代码:sort.py
    # -*- coding: utf-8 -*-
    from __future__ import unicode_literals
    import numpy as np
    ages = np.array([30, 20, 30, 20])
    scores = np.array([70, 60, 80, 70])
    names = np.array(['zhangsan', 'lisi',
                      'wangwu', 'zhaoliu'])
    # 按照成绩的升序打印姓名,成
    # 绩相同的按照年龄的升序排列
    print(np.take(names, np.lexsort((ages, scores))))
    compleies = scores + ages * 1j
    print(compleies)
    sorted_compleies = np.sort_complex(compleies)
    print(sorted_compleies)
    #             0  1  2  3  4  5  6
    a = np.array([1, 2, 4, 5, 6, 8, 9])
    b = np.array([7, 3])
    c = np.searchsorted(a, b)
    print(c)
    d = np.insert(a, c, b)
    print(d)

2.插值

  • import scipy.interpolate as si
    si.interp1d(离散水平坐标, 离散垂直坐标,
        kind=插值算法(缺省为线性插值))->插值器
    插值器(水平坐标)->垂直坐标
    代码:inter.py
    # -*- coding: utf-8 -*-
    from __future__ import unicode_literals
    import numpy as np
    import scipy.interpolate as si
    import matplotlib.pyplot as mp
    min_x, max_x = -2.5, 2.5
    con_x = np.linspace(min_x, max_x, 1001)
    con_y = np.sinc(con_x)
    dis_x = np.linspace(min_x, max_x, 11)
    dis_y = np.sinc(dis_x)
    # 线性插值
    linear = si.interp1d(dis_x, dis_y)
    lin_x = np.linspace(min_x, max_x, 51)
    lin_y = linear(lin_x)
    # 三次样条插值
    cubic = si.interp1d(dis_x, dis_y, kind='cubic')
    cub_x = np.linspace(min_x, max_x, 51)
    cub_y = cubic(cub_x)
    mp.figure('Interpolation', facecolor='lightgray')
    mp.subplot(221)
    mp.title('Continuous', fontsize=16)
    mp.ylabel('y', fontsize=12)
    mp.tick_params(labelsize=10)
    mp.grid(linestyle=':')
    mp.plot(con_x, con_y, c='hotpink',
            label='Continuous')
    mp.legend()
    mp.subplot(222)
    mp.title('Discrete', fontsize=16)
    mp.tick_params(labelsize=10)
    mp.grid(linestyle=':')
    mp.scatter(dis_x, dis_y, c='orangered', s=80,
               label='Discrete')
    mp.legend()
    mp.subplot(223)
    mp.title('Linear', fontsize=16)
    mp.xlabel('x', fontsize=12)
    mp.ylabel('y', fontsize=12)
    mp.tick_params(labelsize=10)
    mp.grid(linestyle=':')
    mp.plot(lin_x, lin_y, 'o-', c='limegreen',
            label='Linear')
    mp.scatter(dis_x, dis_y, c='orangered', s=80,
               zorder=3)
    mp.legend()
    mp.subplot(224)
    mp.title('Cubic', fontsize=16)
    mp.xlabel('x', fontsize=12)
    mp.tick_params(labelsize=10)
    mp.grid(linestyle=':')
    mp.plot(cub_x, cub_y, 'o-', c='dodgerblue',
            label='Cubic')
    mp.scatter(dis_x, dis_y, c='orangered', s=80,
               zorder=3)
    mp.legend()
    mp.tight_layout()
    mp.show()

3.积分

  • import scipy.integrate as si
    si.quad(积分函数, 积分下限, 积分上限)->积分值, 最大误差
    代码:integ.py
    # -*- coding: utf-8 -*-
    from __future__ import unicode_literals
    import numpy as np
    import scipy.integrate as si
    import matplotlib.pyplot as mp
    import matplotlib.patches as mc
    
    
    def f(x):
        return 2 * x ** 2 + 3 * x + 4
    
    
    a, b = -5, 5
    x1 = np.linspace(a, b, 1001)
    y1 = f(x1)
    area = si.quad(f, a, b)[0]
    print(area)
    n = 50
    x2 = np.linspace(a, b, n + 1)
    y2 = f(x2)
    area = 0
    for i in range(n):
        area += (y2[i] + y2[i + 1]) * (x2[i + 1] - x2[i]) / 2
    print(area)
    mp.figure('Integral', facecolor='lightgray')
    mp.title('Integral', fontsize=20)
    mp.xlabel('x', fontsize=14)
    mp.ylabel('y', fontsize=14)
    mp.tick_params(labelsize=10)
    mp.grid(linestyle=':')
    mp.plot(x1, y1, c='orangered', linewidth=6,
            label=r'$y=2x^2+3x+4$', zorder=0)
    for i in range(n):
        mp.gca().add_patch(mc.Polygon([
            [x2[i], 0], [x2[i], y2[i]],
            [x2[i + 1], y2[i + 1]], [x2[i + 1], 0]],
            fc='deepskyblue', ec='dodgerblue',
            alpha=0.5))
    mp.legend()
    mp.show()

4.图像

  • scipy.ndimage中提供了一些简单的图像处理,如高斯模糊、任意角度旋转、边缘识别等功能。
    代码:image.py
    # -*- coding: utf-8 -*-
    from __future__ import unicode_literals
    import numpy as np
    import scipy.misc as sm
    import scipy.ndimage as sn
    import matplotlib.pyplot as mp
    original = sm.imread('../../data/lily.jpg', True)
    median = sn.median_filter(original, (31, 31))
    rotate = sn.rotate(original, 45)
    prewitt = sn.prewitt(original)
    mp.figure('Image', facecolor='lightgray')
    mp.subplot(221)
    mp.title('Original', fontsize=16)
    mp.axis('off')
    mp.imshow(original, cmap='gray')
    mp.subplot(222)
    mp.title('Median', fontsize=16)
    mp.axis('off')
    mp.imshow(median, cmap='gray')
    mp.subplot(223)
    mp.title('Rotate', fontsize=16)
    mp.axis('off')
    mp.imshow(rotate, cmap='gray')
    mp.subplot(224)
    mp.title('Prewitt', fontsize=16)
    mp.axis('off')
    mp.imshow(prewitt, cmap='gray')
    mp.tight_layout()
    mp.show()

5.金融
代码:fin.py

# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import numpy as np
# 终值 = fv(利率, 期数, 每期支付, 现值)
# 将1000元以1%的年利率存入银行5年,每年加存100元,
# 到期后本息合计多少钱?
fv = np.fv(0.01, 5, -100, -1000)
print(round(fv, 2))
# 现值 = pv(利率, 期数, 每期支付, 终值)
# 将多少钱以1%的年利率存入银行5年,每年加存100元,
# 到期后本息合计fv元?
pv = np.pv(0.01, 5, -100, fv)
print(pv)
# 净现值 = npv(利率, 现金流)
# 将1000元以1%的年利率存入银行5年,每年加存100元,
# 相当于一次性存入多少钱?
npv = np.npv(0.01, [
    -1000, -100, -100, -100, -100, -100])
print(round(npv, 2))
fv = np.fv(0.01, 5, 0, npv)
print(round(fv, 2))
# 内部收益率 = irr(现金流)
# 将1000元存入银行5年,以后逐年提现100元、200元、
# 300元、400元、500元,银行利率达到多少,可在最后
# 一次提现后偿清全部本息,即净现值为0元?
irr = np.irr([-1000, 100, 200, 300, 400, 500])
print(round(irr, 2))
npv = np.npv(irr, [-1000, 100, 200, 300, 400, 500])
print(npv)
# 每期支付 = pmt(利率, 期数, 现值)
# 以1%的年利率从银行贷款1000元,分5年还清,
# 平均每年还多少钱?
pmt = np.pmt(0.01, 5, 1000)
print(round(pmt, 2))
# 期数 = nper(利率, 每期支付, 现值)
# 以1%的年利率从银行贷款1000元,平均每年还pmt元,
# 多少年还清?
nper = np.nper(0.01, pmt, 1000)
print(int(nper))
# 利率 = rate(期数, 每期支付, 现值, 终值)
# 从银行贷款1000元,平均每年还pmt元,nper年还清,
# 年利率多少?
rate = np.rate(nper, pmt, 1000, 0)
print(round(rate, 2))

猜你喜欢

转载自blog.csdn.net/qq_42584444/article/details/83987589