高分辨率图像建筑物提取数据集制作

目录结构


/dataset/

	xxxx.tif  # 原始图像

	image-3000

		0.tif # 切割后的tif

		1.tif

		........

		xxx_0.json # 生成的json文件也放在该文件夹下

		xxx_1.json

		.......

		xxx_0_json # 调用labelme_json_to_dataset 0.json 生成的0_json文件夹

			img.png # 原始图片的png格式

			info.yaml

			label.png # 标签图片

			label_names.txt 

			label_viz.png

		xxx_1_json

		.......

		0.png  # 将json文件夹中的label.png 提取出来

		1.png 

		.......

		label_0.tif # 将上边的png标签文件转换为tif格式

收集数据，高分辨率图像
1. 无人机数据，航空数据等

图像切割，像素大小该为多少？

本数据集平均像素大小（40000*50000） tif格式，LZW压缩方式压缩

要考虑计算机显卡，目标建筑无尽量不被切割等问题，本利使用3000*3000


# data:2020-01-04

# user:dean

# desc:图像切割脚本

import tifffile as tiff  # 也可使用pillow或opencv 但若图片过大时可能会出问题

import os

width = 1500*2   # 切割图像大小

height = 1500*2  # 切割图像大小

home = "/media/dean/Document/AI_dataset/DOM/"

file_name = "裴庄村51-dom"

image_dir = os.path.join(home,file_name)

image = os.path.join(image_dir,file_name+".tif")

target_dir = os.path.join(image_dir,"image-"+str(width))  # 切割后图片存储位置

if not os.path.exists(target_dir):

    os.mkdir(target_dir)

img = tiff.imread(image)  # 导入图片

print("导入图片完成",img.shape) # 原始图片大小

pic_width = img.shape[1]

pic_height = img.shape[0]

row_num = pic_width//width  # 纵向切割数量

col_num = pic_height // height  # 横向切割数量

print("开始进行切割，可切割总数为{}".format(col_num*row_num))

for j in range(col_num):

    for i in range(row_num):

        num = j * row_num + i

        print("正在进行第{}张切割".format(num + 1))

        row = i * width

        row_end = row + width

        col = j * height

        col_end = col + height

        # print(col,col_end,row,row_end)

        cropped = img[col:col_end,row:row_end]

        name = "{}_{}.tif".format(file_name,num)

        image_path = os.path.join(target_dir,name)

        tiff.imsave(image_path, cropped)

标注工具 labelme

使用label标注每张图片
```
pip install labelme  # 安装labelme
```

每张图片标注后会生成对应name.json文件


labelme_json_to_dataset xxx.json


# data:2020-01-04

# user:dean

# desc:批量将json文件转为 label

import os

dir = r"I:\人工智能数据\DOM\裴庄村51-dom\image-3000"

files = [os.path.join(dir,file) for file in os.listdir(dir) if file.endswith(".json")]

for file in files:

    cmd = "labelme_json_to_dataset {}".format(file)

    print(cmd)

    os.system(cmd)

将所有的json/label.png 提取到统一文件夹


# data:2020-01-04

# user:dean

# desc:将label文件夹中的laebl提取出来

import tifffile as tiff

from PIL import Image

import os

target_dir = r"/media/dean/Document/AI_dataset/DOM/裴庄村51-dom/image-3000"  # json_label 所在的文件夹

files = [os.path.join(target_dir,file) for file in os.listdir(target_dir)]

for i in files:

    if os.path.isdir(i):

        lables = os.listdir(i)

        for file in lables:

            if file == "label.png":

                image_path = os.path.join(i, "label.png")

                imgae = Image.open(image_path)

                parent_dir_name = os.path.basename(os.path.dirname(image_path))

                new_name = "{}.png".format(parent_dir_name.split("_")[1])

                imgae.save(os.path.join(target_dir,new_name))

                print("第{}个文件夹".format(i))

                break;

将所有的label.png转换为tif格式并转换为单通道黑白照片


# coding:utf-8

# file: change_format.py

# author: Dean

# contact: 1028968939@qq.com

# time: 2020/1/4 20:41

# desc: 将png 标签转化为单通道 黑白标签 并转化为tif

import os

from PIL import Image

threshold = 0

table = []

for i in range(256):

    if i > threshold:

        table.append(255)

    else:

        table.append(0)

target_dir = r"/media/dean/Document/AI_dataset/DOM/裴庄村51-dom/image-3000"

files = [os.path.join(target_dir,file) for file in os.listdir(target_dir) if file.endswith(".png")]

for file in files:

    image_file_name = os.path.basename(file)

    num = image_file_name.split(".")[0]

    image_file = Image.open(file)  # open colour image

    # image_file = image_file.convert('L') # convert image to black and white

    image_file = image_file.point(table, '1')

    new_file = os.path.join(target_dir,"{}.tif".format(num))

    image_file.save(new_file)

    print(new_file)

结束（根据需要提取相应数据即可）

菜单

高分辨率图像建筑物提取数据集制作

目录结构

收集数据，高分辨率图像

图像切割，像素大小该为多少？

标注工具 labelme

评论

7层协议

ajax_json

30系显卡发售价

C动态字符串

河南省2026年高考全部录取方式汇总表

curl

ARM架构

centos7安装zabbix

docker 推送镜像到阿里云

15种排序算法