遥感影像切分切片

遥感影像切片

生活当中,我们可能经常遇到处理一个很大的遥感影像情况,例如做一些逐像元运算,不便于并行处理。因此制作了将影像切分成多个小片的程序,切分后保持原有的数值、数据类型、波段数、投影。

切片

用法为
python 此文件的路径 栅格路径 输出文件夹 并行度 输出图像的行列数
例如:
python ./splitImage.py aa.tif ./output 8 1024

splitImage.py内容如下

import numpy as np
import gdal
import os
import sys
from  multiprocessing import Pool

NP2GDAL_CONVERSION = {
  "uint8": 1,
  "int8": 1,
  "uint16": 2,
  "int16": 3,
  "uint32": 4,
  "int32": 5,
  "float32": 6,
  "float64": 7,
  "complex64": 10,
  "complex128": 11,
}
def split(rasterpath,outpath,dex,i, j,size,cols,rows,gdaltype,datatype,gt,proj,nodata):
    dataset = gdal.Open(rasterpath)
    if ((i+1)*size > cols) | ((j+1)*size>rows):
        #x向越界
        if ((i+1)*size > cols) & ((j+1)*size<=rows):
            data0 = dataset.GetRasterBand(1).ReadAsArray(i * size, j * size, cols-i*size,size)
        #y向越界
        elif ((i+1)*size <= cols) & ((j+1)*size>rows):
            data0 = dataset.GetRasterBand(1).ReadAsArray(i * size, j * size, size,rows-j*size)
        #xy方向均越界
        else:
            print(cols-i*size,rows-j*size)
            data0 = dataset.GetRasterBand(1).ReadAsArray(i * size, j * size, cols-i*size,rows-j*size)
    else:
        data0 = dataset.GetRasterBand(1).ReadAsArray(i * size, j * size, size,size)
    #如果第一个波段的最大值等于最小值,认为是无效值,不对其创建分片
    if data0.max() == data0.min():
        return


    format = "GTiff"
    driver = gdal.GetDriverByName(format)
    bandsNum = dataset.RasterCount
    basename = os.path.basename(rasterpath)
    newbasename = basename[:basename.rfind(".")]
    dirpath = os.path.dirname(rasterpath)
    dst_ds = driver.Create(outpath + newbasename+'_%s%s.tif' % (dex[i], dex[j]), size, size, bandsNum, gdaltype)
    gtnew = (gt[0] + i * size * gt[1], gt[1], gt[2], gt[3] + j * size * gt[5], gt[4], gt[5])
    dst_ds.SetGeoTransform(gtnew)
    dst_ds.SetProjection(proj)
    for k in range(bandsNum):
        if ((i + 1) * size > cols) | ((j + 1) * size > rows):
            # x向越界
            if ((i + 1) * size > cols) & ((j + 1) * size <= rows):
                data2 = dataset.GetRasterBand(k+1).ReadAsArray(i * size, j * size, cols - i * size, size)
            # y向越界
            elif ((i + 1) * size <= cols) & ((j + 1) * size > rows):
                data2 = dataset.GetRasterBand(k+1).ReadAsArray(i * size, j * size, size, rows - j * size)
            # xy方向均越界
            else:
                data2 = dataset.GetRasterBand(k+1).ReadAsArray(i * size, j * size, cols - i * size, rows - j * size)
            smally,smallx=data2.shape
            if nodata==None:
                data1=np.zeros((size,size),dtype=datatype)
            else:
                data1 = np.ones((size, size), dtype=datatype)*nodata
            data1[0:smally,0:smallx]=data2

        else:
            data1=dataset.GetRasterBand(k+1).ReadAsArray(i*size,j*size,size,size)

        dst_ds.GetRasterBand(k+1).WriteArray(data1)
        if nodata != None:
            dst_ds.GetRasterBand(k + 1).SetNoDataValue(nodata)
    dataset=None
    dst_ds=None

if __name__=="__main__":
    args=sys.argv
    try:
        rasterpath = args[1]
        outpath = args[2]
        parelle = int(args[3])
        # 分片的像元行列数,输出为正方形的
        size = int(args[4])
    except:
        print("""Usage:python 此文件的路径 栅格路径 输出文件夹 并行度 输出图像的行列数\nUsage:python ./splitImage.py  aa.tif ./output 8 1024""")
        raise

    # rasterpath = "H:\\testsplit\\myimage.tif"
    # outpath = 'H:\\testsplit\\test4'
    # parelle=6
    # # 分片的像元行列数,输出为正方形的
    # size = 2048
    # 区分每块的位置,000到999
    dex = ["%.3d" % i for i in range(1000)]

    if not os.path.exists(outpath):
        os.makedirs(outpath)
    if not outpath.endswith(os.path.sep):
        outpath = outpath + os.path.sep
    dataset=gdal.Open(rasterpath)
    proj=dataset.GetProjection()
    gt=dataset.GetGeoTransform()
    datatype=dataset.GetRasterBand(1).ReadAsArray(0,0,1,1).dtype.name
    #获取无效值,认为每个波段的无效值是相同的
    nodata=dataset.GetRasterBand(1).GetNoDataValue()
    #numpy的数据类型,转换为gdal的数据类型
    gdaltype=NP2GDAL_CONVERSION[datatype]
    #总行列数
    cols,rows=dataset.RasterXSize,dataset.RasterYSize
    dataset=None
    #获取分片的行数和列数
    numx,numy=int(np.ceil(cols/size)),int(np.ceil(rows/size))
    pool = Pool(parelle)
    for i in range(numx):
        for j in range(numy):
            pool.apply_async(split, args=(rasterpath,outpath,dex,i, j,size,cols,rows,gdaltype,datatype,gt,proj,nodata))
    pool.close()
    pool.join()

切分后的文件列表如下

.
├── myimage_000000.tif
├── myimage_000001.tif
├── myimage_000002.tif
├── myimage_000003.tif
├── myimage_000004.tif
├── myimage_000005.tif
├── myimage_001000.tif
├── myimage_001001.tif
├── myimage_001002.tif
├── myimage_001003.tif
├── myimage_001004.tif
├── myimage_001005.tif
├── myimage_002000.tif
├── myimage_002001.tif
├── myimage_002002.tif
├── myimage_002003.tif
├── myimage_002004.tif
├── myimage_002005.tif
├── myimage_003000.tif
├── myimage_003001.tif
└── myimage_003002.tif

多进程版

import numpy as np
import gdal
import os
import sys
from  multiprocessing import Pool

NP2GDAL_CONVERSION = {
  "uint8": 1,
  "int8": 1,
  "uint16": 2,
  "int16": 3,
  "uint32": 4,
  "int32": 5,
  "float32": 6,
  "float64": 7,
  "complex64": 10,
  "complex128": 11,
}
def split(rasterpath,outpath,dex,i, j,size,cols,rows,gdaltype,datatype,gt,proj,nodata):
    dataset = gdal.Open(rasterpath)
    if ((i+1)*size > cols) | ((j+1)*size>rows):
        #x向越界
        if ((i+1)*size > cols) & ((j+1)*size<=rows):
            data0 = dataset.GetRasterBand(1).ReadAsArray(i * size, j * size, cols-i*size,size)
        #y向越界
        elif ((i+1)*size <= cols) & ((j+1)*size>rows):
            data0 = dataset.GetRasterBand(1).ReadAsArray(i * size, j * size, size,rows-j*size)
        #xy方向均越界
        else:
            print(cols-i*size,rows-j*size)
            data0 = dataset.GetRasterBand(1).ReadAsArray(i * size, j * size, cols-i*size,rows-j*size)
    else:
        data0 = dataset.GetRasterBand(1).ReadAsArray(i * size, j * size, size,size)
    #如果第一个波段的最大值等于最小值,认为是无效值,不对其创建分片
    if data0.max() == data0.min():
        return
    #S表示分片的意思

    format = "GTiff"
    driver = gdal.GetDriverByName(format)
    bandsNum = dataset.RasterCount
    basename = os.path.basename(rasterpath)
    newbasename = basename[:basename.rfind(".")]
    dirpath = os.path.dirname(rasterpath)
    dst_ds = driver.Create(outpath + newbasename+'_%s%s.tif' % (dex[i], dex[j]), size, size, bandsNum, gdaltype)
    gtnew = (gt[0] + i * size * gt[1], gt[1], gt[2], gt[3] + j * size * gt[5], gt[4], gt[5])
    dst_ds.SetGeoTransform(gtnew)
    dst_ds.SetProjection(proj)
    for k in range(bandsNum):
        if ((i + 1) * size > cols) | ((j + 1) * size > rows):
            # x向越界
            if ((i + 1) * size > cols) & ((j + 1) * size <= rows):
                data2 = dataset.GetRasterBand(k+1).ReadAsArray(i * size, j * size, cols - i * size, size)
            # y向越界
            elif ((i + 1) * size <= cols) & ((j + 1) * size > rows):
                data2 = dataset.GetRasterBand(k+1).ReadAsArray(i * size, j * size, size, rows - j * size)
            # xy方向均越界
            else:
                data2 = dataset.GetRasterBand(k+1).ReadAsArray(i * size, j * size, cols - i * size, rows - j * size)
            smally,smallx=data2.shape
            if nodata==None:
                data1=np.zeros((size,size),dtype=datatype)
            else:
                data1 = np.ones((size, size), dtype=datatype)*nodata
            data1[0:smally,0:smallx]=data2

        else:
            data1=dataset.GetRasterBand(k+1).ReadAsArray(i*size,j*size,size,size)

        dst_ds.GetRasterBand(k+1).WriteArray(data1)
        if nodata != None:
            dst_ds.GetRasterBand(k + 1).SetNoDataValue(nodata)
    dataset=None
    dst_ds=None

if __name__=="__main__":
    args=sys.argv
    try:
        rasterpath = args[1]
        outpath = args[2]
        parelle = int(args[3])
        # 分片的像元行列数,输出为正方形的
        size = int(args[4])
    except:
        print("""Usage:python 此文件的路径 栅格路径 输出文件夹 并行度 输出图像的行列数\nUsage:python ./splitImage.py  aa.tif ./output 8 1024""")
        raise

    # rasterpath = "H:\\testsplit\\myimage.tif"
    # outpath = 'H:\\testsplit\\test4'
    # parelle=6
    # # 分片的像元行列数,输出为正方形的
    # size = 2048
    # 区分每块的位置,000到999
    dex = ["%.3d" % i for i in range(1000)]

    if not os.path.exists(outpath):
        os.makedirs(outpath)
    if not outpath.endswith(os.path.sep):
        outpath = outpath + os.path.sep
    dataset=gdal.Open(rasterpath)
    proj=dataset.GetProjection()
    gt=dataset.GetGeoTransform()
    datatype=dataset.GetRasterBand(1).ReadAsArray(0,0,1,1).dtype.name
    #获取无效值,认为每个波段的无效值是相同的
    nodata=dataset.GetRasterBand(1).GetNoDataValue()
    #numpy的数据类型,转换为gdal的数据类型
    gdaltype=NP2GDAL_CONVERSION[datatype]
    #总行列数
    cols,rows=dataset.RasterXSize,dataset.RasterYSize
    dataset=None
    #获取分片的行数和列数
    numx,numy=int(np.ceil(cols/size)),int(np.ceil(rows/size))
    pool = Pool(parelle)
    for i in range(numx):
        for j in range(numy):
            pool.apply_async(split, args=(rasterpath,outpath,dex,i, j,size,cols,rows,gdaltype,datatype,gt,proj,nodata))
    pool.close()
    pool.join()

带一定的重合度(多进程版)

  • 相邻的切图之间,包括上下和左右,重合一定数量的像元,防止拼接时候出现一些裂缝等现象
  • 此方法更加通用
import numpy as np
import gdal
import os
import sys
from  multiprocessing import Pool

NP2GDAL_CONVERSION = {
  "uint8": 1,
  "int8": 1,
  "uint16": 2,
  "int16": 3,
  "uint32": 4,
  "int32": 5,
  "float32": 6,
  "float64": 7,
  "complex64": 10,
  "complex128": 11,
}
def split(rasterpath,outpath,dex,i, j,size,lap,cols,rows,gdaltype,datatype,gt,proj,nodata):
    dataset = gdal.Open(rasterpath)
    if (i+size > cols) | (j+size>rows):
        #x向越界
        if (i+size > cols) & (j+size<=rows):
            data0 = dataset.GetRasterBand(1).ReadAsArray(i,j, cols-i,size)
        #y向越界
        elif (i+size <= cols) & (j+size>rows):
            data0 = dataset.GetRasterBand(1).ReadAsArray(i , j, size,rows-j)
        #xy方向均越界
        else:
            #print(cols-i*size,rows-j*size)
            data0 = dataset.GetRasterBand(1).ReadAsArray(i , j , cols-i,rows-j)
    else:
        data0 = dataset.GetRasterBand(1).ReadAsArray(i, j , size,size)
    #如果第一个波段的最大值等于最小值,认为是无效值,不对其创建分片
    if data0.max() == data0.min():
        return

    format = "GTiff"
    driver = gdal.GetDriverByName(format)
    bandsNum = dataset.RasterCount
    basename = os.path.basename(rasterpath)
    newbasename = basename[:basename.rfind(".")]
    dirpath = os.path.dirname(rasterpath)
    dst_ds = driver.Create(outpath + newbasename+'_%s%s.tif' % (dex[int(i/(size-lap))], dex[int(j/(size-lap))]), size, size, bandsNum, gdaltype)
    gtnew = (gt[0] + i  * gt[1], gt[1], gt[2], gt[3] + j * gt[5], gt[4], gt[5])
    dst_ds.SetGeoTransform(gtnew)
    dst_ds.SetProjection(proj)
    for k in range(bandsNum):
        if (i+size > cols) | (j+size>rows):
            # x向越界
            if (i+size > cols) & (j+size<=rows):
                data2 = dataset.GetRasterBand(k+1).ReadAsArray(i , j , cols - i , size)
            # y向越界
            elif (i+size <= cols) & (j+size>rows):
                data2 = dataset.GetRasterBand(k+1).ReadAsArray(i , j , size, rows - j )
            # xy方向均越界
            else:
                data2 = dataset.GetRasterBand(k+1).ReadAsArray(i , j , cols - i , rows - j )
            smally,smallx=data2.shape
            if nodata==None:
                data1=np.zeros((size,size),dtype=datatype)
            else:
                data1 = np.ones((size, size), dtype=datatype)*nodata
            data1[0:smally,0:smallx]=data2

        else:
            data1=dataset.GetRasterBand(k+1).ReadAsArray(i,j,size,size)

        dst_ds.GetRasterBand(k+1).WriteArray(data1)
        if nodata != None:
            dst_ds.GetRasterBand(k + 1).SetNoDataValue(nodata)
    dataset=None
    dst_ds=None

if __name__=="__main__":
    args=sys.argv
    try:
        rasterpath = args[1]
        outpath = args[2]
        parelle = int(args[3])
        # 分片的像元行列数,输出为正方形的
        size = int(args[4])
        lap=int(args[5])
    except:
        print("""Usage:python 此文件的路径 栅格路径 输出文件夹 并行度 输出图像的行列数 重叠像元数\nUsage:python ./splitImage.py  aa.tif ./output 8 1024 10""")
        raise


    # 区分每块的位置,000到999
    dex = ["%.3d" % i for i in range(1000)]

    if not os.path.exists(outpath):
        os.makedirs(outpath)
    if not outpath.endswith(os.path.sep):
        outpath = outpath + os.path.sep
    dataset=gdal.Open(rasterpath)
    proj=dataset.GetProjection()
    gt=dataset.GetGeoTransform()
    datatype=dataset.GetRasterBand(1).ReadAsArray(0,0,1,1).dtype.name
    #获取无效值,认为每个波段的无效值是相同的
    nodata=dataset.GetRasterBand(1).GetNoDataValue()
    #numpy的数据类型,转换为gdal的数据类型
    gdaltype=NP2GDAL_CONVERSION[datatype]
    #总行列数
    cols,rows=dataset.RasterXSize,dataset.RasterYSize
    dataset=None
    #获取分片的行数和列数
    numx=int(np.ceil((cols-size)/(size-lap)+1))
    numy=int(np.ceil((rows-size)/(size-lap)+1))

    #numx,numy=int(np.ceil(cols/size)),int(np.ceil(rows/size))

    pool = Pool(parelle)
    numxs=[(size-lap)*(i-1) for i in range(1,numx+1)]
    numys = [(size - lap) * (i - 1) for i in range(1, numy + 1)]
    #直接传入左上角的索引
    for i in numxs:
        for j in numys:
            pool.apply_async(split, args=(rasterpath,outpath,dex,i, j,size,lap,cols,rows,gdaltype,datatype,gt,proj,nodata))
    pool.close()
    pool.join()

拼接

gdal_merge.py拼接

上一步拆分完了,做一些处理,最后可能还需拼接成一个大的影像。这里比较简单,直接调用gdal自带的gdal_merge.py拼接程序。如果电脑安装了python的gdal库的话就会有这个文件,可以通过搜索查找。
使用前首先将gdal_merge.py复制到当前目录下。
这里将一个文件夹下的所有tif文件拼接成一个文件。 具体可以修改tif变量。
用法为
python 此文件的路径 输入文件夹 输出tif nodata值
例如:
python ./mergeImage.py ./output /out.tif 0
mergeImage.py文件内容

import os
import sys
args=sys.argv
if len(args)!=4:
    print("usage:python thispy folder_path outtif nodata")
    sys.exit(1)
path=args[1]
outtif=args[2]
nodata=eval(args[3])
tifs=[os.path.join(path,i) for i in os.listdir(path) if i.endswith(".tif")]
#print(tifs)
#将 gdal_merge.py复制到当前目录下
os.system("python gdal_merge.py -init %s -n %s -a_nodata %s -o %s %s"%(nodata,nodata,nodata,outtif," ".join(tifs)))

文件数量过多导致的命令太长的问题

gdal拼接的代码修改版,原有的自带的gdal_merge.py如果输入文件名称加起来的长度太长的话容易导致报错,因为cmd的命令长度有限制,修改后解决这个问题
gdal_merge_my.py的地址

python gdal_merge_my.py -init 0 -n 0 -a_nodata 0 -o 输出影像名称  输出路径\*.tif
发布了57 篇原创文章 · 获赞 73 · 访问量 7万+

猜你喜欢

转载自blog.csdn.net/weixin_40450867/article/details/103140839