CUDA测试程序错误

CUDA测试程序运行错误

Code

代码

 
 1 import numpy as np 
 2 from timeit import default_timer as timer
 3 from numba import vectorize 
 4 
 5 @vectorize(["float32(float32, float32)"], target='cuda') 
 6 def vectorAdd(a, b): 
 7     return a + b 
 8 
 9 def main(): 
10     N = 320000 
11 
12     A = np.ones(N, dtype=np.float32 ) 
13     B = np.ones(N, dtype=np.float32 ) 
14     C = np.zeros(N, dtype=np.float32 ) 
15 
16     start = timer() 
17     C = vectorAdd(A, B) 
18     vectorAdd_time = timer() - start 
19 
20     print("c[:5] = " + str(C[:5])) 
21     print("c[-5:] = " + str(C[-5:])) 
22 
23     print("vectorAdd took %f seconds " % vectorAdd_time) 
24 
25 if __name__ == '__main__': 
26     main()

Question

相关BUG查询网址

Some Methods

以下参考网址

打开对应文件libs.py、nvvm.py:

def get_libdevice(arch):
    libdir = (os.environ.get('NUMBAPRO_LIBDEVICE') or
              os.environ.get('NUMBAPRO_CUDALIB'))
    pat = r'libdevice\.%s(\.\d+)*\.bc$' % arch
    candidates = find_file(re.compile(pat), libdir)
    return max(candidates) if candidates else None

def get_cudalib(lib, platform=None):
    if lib == 'nvvm' and os.environ.get('NUMBAPRO_NVVM'):
        return os.environ.get('NUMBAPRO_NVVM')
    libdir = os.environ.get('NUMBAPRO_CUDALIB')
    candidates = find_lib(lib, libdir, platform)
    return max(candidates) if candidates else None

加入对应环境变量后,复制D:\Anaconda3\pkgs\cudatoolkit-9.0-1\DLLs(当前电脑CUDA的安装路径)目录下的libdevice.10.bc文件的为libdevice.computeXX.10.bc(当前为libdevice.compute50.10.bc),放在当前文件夹下。配置结束后,运行结果如下:

告警相关:

def _compute_thread_per_block(kernel, tpb):
    if tpb != 0:
        return tpb

    else:
        try:
            tpb = kernel.autotune.best()
        except ValueError:
            warnings.warn('Could not autotune, using default tpb of 128')
            tpb = 128

        return tpb

猜你喜欢

转载自www.cnblogs.com/HuangJiaoZhu/p/9279013.html