遥感影像切片
生活当中,我们可能经常遇到处理一个很大的遥感影像情况,例如做一些逐像元运算,不便于并行处理。因此制作了将影像切分成多个小片的程序,切分后保持原有的数值、数据类型、波段数、投影。
切片
用法为
python 此文件的路径 栅格路径 输出文件夹 并行度 输出图像的行列数
例如:
python ./splitImage.py aa.tif ./output 8 1024
splitImage.py内容如下
import numpy as np
import gdal
import os
import sys
from multiprocessing import Pool
NP2GDAL_CONVERSION = {
"uint8": 1,
"int8": 1,
"uint16": 2,
"int16": 3,
"uint32": 4,
"int32": 5,
"float32": 6,
"float64": 7,
"complex64": 10,
"complex128": 11,
}
def split(rasterpath,outpath,dex,i, j,size,cols,rows,gdaltype,datatype,gt,proj,nodata):
dataset = gdal.Open(rasterpath)
if ((i+1)*size > cols) | ((j+1)*size>rows):
#x向越界
if ((i+1)*size > cols) & ((j+1)*size<=rows):
data0 = dataset.GetRasterBand(1).ReadAsArray(i * size, j * size, cols-i*size,size)
#y向越界
elif ((i+1)*size <= cols) & ((j+1)*size>rows):
data0 = dataset.GetRasterBand(1).ReadAsArray(i * size, j * size, size,rows-j*size)
#xy方向均越界
else:
print(cols-i*size,rows-j*size)
data0 = dataset.GetRasterBand(1).ReadAsArray(i * size, j * size, cols-i*size,rows-j*size)
else:
data0 = dataset.GetRasterBand(1).ReadAsArray(i * size, j * size, size,size)
#如果第一个波段的最大值等于最小值,认为是无效值,不对其创建分片
if data0.max() == data0.min():
return
format = "GTiff"
driver = gdal.GetDriverByName(format)
bandsNum = dataset.RasterCount
basename = os.path.basename(rasterpath)
newbasename = basename[:basename.rfind(".")]
dirpath = os.path.dirname(rasterpath)
dst_ds = driver.Create(outpath + newbasename+'_%s%s.tif' % (dex[i], dex[j]), size, size, bandsNum, gdaltype)
gtnew = (gt[0] + i * size * gt[1], gt[1], gt[2], gt[3] + j * size * gt[5], gt[4], gt[5])
dst_ds.SetGeoTransform(gtnew)
dst_ds.SetProjection(proj)
for k in range(bandsNum):
if ((i + 1) * size > cols) | ((j + 1) * size > rows):
# x向越界
if ((i + 1) * size > cols) & ((j + 1) * size <= rows):
data2 = dataset.GetRasterBand(k+1).ReadAsArray(i * size, j * size, cols - i * size, size)
# y向越界
elif ((i + 1) * size <= cols) & ((j + 1) * size > rows):
data2 = dataset.GetRasterBand(k+1).ReadAsArray(i * size, j * size, size, rows - j * size)
# xy方向均越界
else:
data2 = dataset.GetRasterBand(k+1).ReadAsArray(i * size, j * size, cols - i * size, rows - j * size)
smally,smallx=data2.shape
if nodata==None:
data1=np.zeros((size,size),dtype=datatype)
else:
data1 = np.ones((size, size), dtype=datatype)*nodata
data1[0:smally,0:smallx]=data2
else:
data1=dataset.GetRasterBand(k+1).ReadAsArray(i*size,j*size,size,size)
dst_ds.GetRasterBand(k+1).WriteArray(data1)
if nodata != None:
dst_ds.GetRasterBand(k + 1).SetNoDataValue(nodata)
dataset=None
dst_ds=None
if __name__=="__main__":
args=sys.argv
try:
rasterpath = args[1]
outpath = args[2]
parelle = int(args[3])
# 分片的像元行列数,输出为正方形的
size = int(args[4])
except:
print("""Usage:python 此文件的路径 栅格路径 输出文件夹 并行度 输出图像的行列数\nUsage:python ./splitImage.py aa.tif ./output 8 1024""")
raise
# rasterpath = "H:\\testsplit\\myimage.tif"
# outpath = 'H:\\testsplit\\test4'
# parelle=6
# # 分片的像元行列数,输出为正方形的
# size = 2048
# 区分每块的位置,000到999
dex = ["%.3d" % i for i in range(1000)]
if not os.path.exists(outpath):
os.makedirs(outpath)
if not outpath.endswith(os.path.sep):
outpath = outpath + os.path.sep
dataset=gdal.Open(rasterpath)
proj=dataset.GetProjection()
gt=dataset.GetGeoTransform()
datatype=dataset.GetRasterBand(1).ReadAsArray(0,0,1,1).dtype.name
#获取无效值,认为每个波段的无效值是相同的
nodata=dataset.GetRasterBand(1).GetNoDataValue()
#numpy的数据类型,转换为gdal的数据类型
gdaltype=NP2GDAL_CONVERSION[datatype]
#总行列数
cols,rows=dataset.RasterXSize,dataset.RasterYSize
dataset=None
#获取分片的行数和列数
numx,numy=int(np.ceil(cols/size)),int(np.ceil(rows/size))
pool = Pool(parelle)
for i in range(numx):
for j in range(numy):
pool.apply_async(split, args=(rasterpath,outpath,dex,i, j,size,cols,rows,gdaltype,datatype,gt,proj,nodata))
pool.close()
pool.join()
切分后的文件列表如下
.
├── myimage_000000.tif
├── myimage_000001.tif
├── myimage_000002.tif
├── myimage_000003.tif
├── myimage_000004.tif
├── myimage_000005.tif
├── myimage_001000.tif
├── myimage_001001.tif
├── myimage_001002.tif
├── myimage_001003.tif
├── myimage_001004.tif
├── myimage_001005.tif
├── myimage_002000.tif
├── myimage_002001.tif
├── myimage_002002.tif
├── myimage_002003.tif
├── myimage_002004.tif
├── myimage_002005.tif
├── myimage_003000.tif
├── myimage_003001.tif
└── myimage_003002.tif
多进程版
import numpy as np
import gdal
import os
import sys
from multiprocessing import Pool
NP2GDAL_CONVERSION = {
"uint8": 1,
"int8": 1,
"uint16": 2,
"int16": 3,
"uint32": 4,
"int32": 5,
"float32": 6,
"float64": 7,
"complex64": 10,
"complex128": 11,
}
def split(rasterpath,outpath,dex,i, j,size,cols,rows,gdaltype,datatype,gt,proj,nodata):
dataset = gdal.Open(rasterpath)
if ((i+1)*size > cols) | ((j+1)*size>rows):
#x向越界
if ((i+1)*size > cols) & ((j+1)*size<=rows):
data0 = dataset.GetRasterBand(1).ReadAsArray(i * size, j * size, cols-i*size,size)
#y向越界
elif ((i+1)*size <= cols) & ((j+1)*size>rows):
data0 = dataset.GetRasterBand(1).ReadAsArray(i * size, j * size, size,rows-j*size)
#xy方向均越界
else:
print(cols-i*size,rows-j*size)
data0 = dataset.GetRasterBand(1).ReadAsArray(i * size, j * size, cols-i*size,rows-j*size)
else:
data0 = dataset.GetRasterBand(1).ReadAsArray(i * size, j * size, size,size)
#如果第一个波段的最大值等于最小值,认为是无效值,不对其创建分片
if data0.max() == data0.min():
return
#S表示分片的意思
format = "GTiff"
driver = gdal.GetDriverByName(format)
bandsNum = dataset.RasterCount
basename = os.path.basename(rasterpath)
newbasename = basename[:basename.rfind(".")]
dirpath = os.path.dirname(rasterpath)
dst_ds = driver.Create(outpath + newbasename+'_%s%s.tif' % (dex[i], dex[j]), size, size, bandsNum, gdaltype)
gtnew = (gt[0] + i * size * gt[1], gt[1], gt[2], gt[3] + j * size * gt[5], gt[4], gt[5])
dst_ds.SetGeoTransform(gtnew)
dst_ds.SetProjection(proj)
for k in range(bandsNum):
if ((i + 1) * size > cols) | ((j + 1) * size > rows):
# x向越界
if ((i + 1) * size > cols) & ((j + 1) * size <= rows):
data2 = dataset.GetRasterBand(k+1).ReadAsArray(i * size, j * size, cols - i * size, size)
# y向越界
elif ((i + 1) * size <= cols) & ((j + 1) * size > rows):
data2 = dataset.GetRasterBand(k+1).ReadAsArray(i * size, j * size, size, rows - j * size)
# xy方向均越界
else:
data2 = dataset.GetRasterBand(k+1).ReadAsArray(i * size, j * size, cols - i * size, rows - j * size)
smally,smallx=data2.shape
if nodata==None:
data1=np.zeros((size,size),dtype=datatype)
else:
data1 = np.ones((size, size), dtype=datatype)*nodata
data1[0:smally,0:smallx]=data2
else:
data1=dataset.GetRasterBand(k+1).ReadAsArray(i*size,j*size,size,size)
dst_ds.GetRasterBand(k+1).WriteArray(data1)
if nodata != None:
dst_ds.GetRasterBand(k + 1).SetNoDataValue(nodata)
dataset=None
dst_ds=None
if __name__=="__main__":
args=sys.argv
try:
rasterpath = args[1]
outpath = args[2]
parelle = int(args[3])
# 分片的像元行列数,输出为正方形的
size = int(args[4])
except:
print("""Usage:python 此文件的路径 栅格路径 输出文件夹 并行度 输出图像的行列数\nUsage:python ./splitImage.py aa.tif ./output 8 1024""")
raise
# rasterpath = "H:\\testsplit\\myimage.tif"
# outpath = 'H:\\testsplit\\test4'
# parelle=6
# # 分片的像元行列数,输出为正方形的
# size = 2048
# 区分每块的位置,000到999
dex = ["%.3d" % i for i in range(1000)]
if not os.path.exists(outpath):
os.makedirs(outpath)
if not outpath.endswith(os.path.sep):
outpath = outpath + os.path.sep
dataset=gdal.Open(rasterpath)
proj=dataset.GetProjection()
gt=dataset.GetGeoTransform()
datatype=dataset.GetRasterBand(1).ReadAsArray(0,0,1,1).dtype.name
#获取无效值,认为每个波段的无效值是相同的
nodata=dataset.GetRasterBand(1).GetNoDataValue()
#numpy的数据类型,转换为gdal的数据类型
gdaltype=NP2GDAL_CONVERSION[datatype]
#总行列数
cols,rows=dataset.RasterXSize,dataset.RasterYSize
dataset=None
#获取分片的行数和列数
numx,numy=int(np.ceil(cols/size)),int(np.ceil(rows/size))
pool = Pool(parelle)
for i in range(numx):
for j in range(numy):
pool.apply_async(split, args=(rasterpath,outpath,dex,i, j,size,cols,rows,gdaltype,datatype,gt,proj,nodata))
pool.close()
pool.join()
带一定的重合度(多进程版)
- 相邻的切图之间,包括上下和左右,重合一定数量的像元,防止拼接时候出现一些裂缝等现象
- 此方法更加通用
import numpy as np
import gdal
import os
import sys
from multiprocessing import Pool
NP2GDAL_CONVERSION = {
"uint8": 1,
"int8": 1,
"uint16": 2,
"int16": 3,
"uint32": 4,
"int32": 5,
"float32": 6,
"float64": 7,
"complex64": 10,
"complex128": 11,
}
def split(rasterpath,outpath,dex,i, j,size,lap,cols,rows,gdaltype,datatype,gt,proj,nodata):
dataset = gdal.Open(rasterpath)
if (i+size > cols) | (j+size>rows):
#x向越界
if (i+size > cols) & (j+size<=rows):
data0 = dataset.GetRasterBand(1).ReadAsArray(i,j, cols-i,size)
#y向越界
elif (i+size <= cols) & (j+size>rows):
data0 = dataset.GetRasterBand(1).ReadAsArray(i , j, size,rows-j)
#xy方向均越界
else:
#print(cols-i*size,rows-j*size)
data0 = dataset.GetRasterBand(1).ReadAsArray(i , j , cols-i,rows-j)
else:
data0 = dataset.GetRasterBand(1).ReadAsArray(i, j , size,size)
#如果第一个波段的最大值等于最小值,认为是无效值,不对其创建分片
if data0.max() == data0.min():
return
format = "GTiff"
driver = gdal.GetDriverByName(format)
bandsNum = dataset.RasterCount
basename = os.path.basename(rasterpath)
newbasename = basename[:basename.rfind(".")]
dirpath = os.path.dirname(rasterpath)
dst_ds = driver.Create(outpath + newbasename+'_%s%s.tif' % (dex[int(i/(size-lap))], dex[int(j/(size-lap))]), size, size, bandsNum, gdaltype)
gtnew = (gt[0] + i * gt[1], gt[1], gt[2], gt[3] + j * gt[5], gt[4], gt[5])
dst_ds.SetGeoTransform(gtnew)
dst_ds.SetProjection(proj)
for k in range(bandsNum):
if (i+size > cols) | (j+size>rows):
# x向越界
if (i+size > cols) & (j+size<=rows):
data2 = dataset.GetRasterBand(k+1).ReadAsArray(i , j , cols - i , size)
# y向越界
elif (i+size <= cols) & (j+size>rows):
data2 = dataset.GetRasterBand(k+1).ReadAsArray(i , j , size, rows - j )
# xy方向均越界
else:
data2 = dataset.GetRasterBand(k+1).ReadAsArray(i , j , cols - i , rows - j )
smally,smallx=data2.shape
if nodata==None:
data1=np.zeros((size,size),dtype=datatype)
else:
data1 = np.ones((size, size), dtype=datatype)*nodata
data1[0:smally,0:smallx]=data2
else:
data1=dataset.GetRasterBand(k+1).ReadAsArray(i,j,size,size)
dst_ds.GetRasterBand(k+1).WriteArray(data1)
if nodata != None:
dst_ds.GetRasterBand(k + 1).SetNoDataValue(nodata)
dataset=None
dst_ds=None
if __name__=="__main__":
args=sys.argv
try:
rasterpath = args[1]
outpath = args[2]
parelle = int(args[3])
# 分片的像元行列数,输出为正方形的
size = int(args[4])
lap=int(args[5])
except:
print("""Usage:python 此文件的路径 栅格路径 输出文件夹 并行度 输出图像的行列数 重叠像元数\nUsage:python ./splitImage.py aa.tif ./output 8 1024 10""")
raise
# 区分每块的位置,000到999
dex = ["%.3d" % i for i in range(1000)]
if not os.path.exists(outpath):
os.makedirs(outpath)
if not outpath.endswith(os.path.sep):
outpath = outpath + os.path.sep
dataset=gdal.Open(rasterpath)
proj=dataset.GetProjection()
gt=dataset.GetGeoTransform()
datatype=dataset.GetRasterBand(1).ReadAsArray(0,0,1,1).dtype.name
#获取无效值,认为每个波段的无效值是相同的
nodata=dataset.GetRasterBand(1).GetNoDataValue()
#numpy的数据类型,转换为gdal的数据类型
gdaltype=NP2GDAL_CONVERSION[datatype]
#总行列数
cols,rows=dataset.RasterXSize,dataset.RasterYSize
dataset=None
#获取分片的行数和列数
numx=int(np.ceil((cols-size)/(size-lap)+1))
numy=int(np.ceil((rows-size)/(size-lap)+1))
#numx,numy=int(np.ceil(cols/size)),int(np.ceil(rows/size))
pool = Pool(parelle)
numxs=[(size-lap)*(i-1) for i in range(1,numx+1)]
numys = [(size - lap) * (i - 1) for i in range(1, numy + 1)]
#直接传入左上角的索引
for i in numxs:
for j in numys:
pool.apply_async(split, args=(rasterpath,outpath,dex,i, j,size,lap,cols,rows,gdaltype,datatype,gt,proj,nodata))
pool.close()
pool.join()
拼接
gdal_merge.py拼接
上一步拆分完了,做一些处理,最后可能还需拼接成一个大的影像。这里比较简单,直接调用gdal自带的gdal_merge.py
拼接程序。如果电脑安装了python的gdal库的话就会有这个文件,可以通过搜索查找。
使用前首先将gdal_merge.py复制到当前目录下。
这里将一个文件夹下的所有tif文件拼接成一个文件。 具体可以修改tif变量。
用法为
python 此文件的路径 输入文件夹 输出tif nodata值
例如:
python ./mergeImage.py ./output /out.tif 0
mergeImage.py文件内容
import os
import sys
args=sys.argv
if len(args)!=4:
print("usage:python thispy folder_path outtif nodata")
sys.exit(1)
path=args[1]
outtif=args[2]
nodata=eval(args[3])
tifs=[os.path.join(path,i) for i in os.listdir(path) if i.endswith(".tif")]
#print(tifs)
#将 gdal_merge.py复制到当前目录下
os.system("python gdal_merge.py -init %s -n %s -a_nodata %s -o %s %s"%(nodata,nodata,nodata,outtif," ".join(tifs)))
文件数量过多导致的命令太长的问题
gdal拼接的代码修改版,原有的自带的gdal_merge.py如果输入文件名称加起来的长度太长的话容易导致报错,因为cmd的命令长度有限制,修改后解决这个问题
gdal_merge_my.py的地址
python gdal_merge_my.py -init 0 -n 0 -a_nodata 0 -o 输出影像名称 输出路径\*.tif