更新数据分块
This commit is contained in:
parent
30eeff4b1d
commit
cc8f070aaa
@ -1,3 +1,4 @@
|
|||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
import random
|
import random
|
||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -106,15 +107,15 @@ def process_images(input_folder, background_image_path, output_base):
|
|||||||
递归处理所有子文件夹并保持目录结构
|
递归处理所有子文件夹并保持目录结构
|
||||||
"""
|
"""
|
||||||
# 预处理背景路径(只需执行一次)
|
# 预处理背景路径(只需执行一次)
|
||||||
if os.path.isfile(background_image_path):
|
# if os.path.isfile(background_image_path):
|
||||||
background_paths = [background_image_path]
|
# background_paths = [background_image_path]
|
||||||
else:
|
# else:
|
||||||
valid_ext = ['.jpg', '.jpeg', '.png', '.bmp', '.webp']
|
# valid_ext = ['.jpg', '.jpeg', '.png', '.bmp', '.webp']
|
||||||
background_paths = [
|
# background_paths = [
|
||||||
os.path.join(background_image_path, f)
|
# os.path.join(background_image_path, f)
|
||||||
for f in os.listdir(background_image_path)
|
# for f in os.listdir(background_image_path)
|
||||||
if os.path.splitext(f)[1].lower() in valid_ext
|
# if os.path.splitext(f)[1].lower() in valid_ext
|
||||||
]
|
# ]
|
||||||
|
|
||||||
# 递归遍历输入目录
|
# 递归遍历输入目录
|
||||||
for root, dirs, files in os.walk(input_folder):
|
for root, dirs, files in os.walk(input_folder):
|
||||||
@ -136,10 +137,10 @@ def process_images(input_folder, background_image_path, output_base):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
# 去背景处理
|
# 去背景处理
|
||||||
foreground = remove_background(input_path)
|
result = remove_background(input_path)
|
||||||
|
|
||||||
|
|
||||||
result = edge_fill2(foreground)
|
# result = edge_fill2(result)
|
||||||
|
|
||||||
# 保存结果
|
# 保存结果
|
||||||
cv2.imwrite(output_path, result)
|
cv2.imwrite(output_path, result)
|
||||||
@ -148,10 +149,53 @@ def process_images(input_folder, background_image_path, output_base):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error processing {input_path}: {str(e)}")
|
print(f"Error processing {input_path}: {str(e)}")
|
||||||
|
|
||||||
|
def process_single_file(input_path, output_path):
|
||||||
|
"""处理单个文件的独立函数"""
|
||||||
|
try:
|
||||||
|
result = remove_background(input_path)
|
||||||
|
# result = edge_fill2(result) # 按需启用
|
||||||
|
cv2.imwrite(output_path, result)
|
||||||
|
print(f"Processed: {input_path} -> {output_path}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error processing {input_path}: {str(e)}")
|
||||||
|
|
||||||
|
def process_imageswithpool(input_folder, background_image_path, output_base):
|
||||||
|
"""
|
||||||
|
多线程版本的处理函数
|
||||||
|
使用ThreadPoolExecutor并行处理文件
|
||||||
|
"""
|
||||||
|
with ThreadPoolExecutor(max_workers=os.cpu_count()*2) as executor:
|
||||||
|
futures = []
|
||||||
|
for root, dirs, files in os.walk(input_folder):
|
||||||
|
# 创建输出目录(主线程保证目录创建顺序)
|
||||||
|
relative_path = os.path.relpath(root, input_folder)
|
||||||
|
output_dir = os.path.join(output_base, relative_path)
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# 提交任务到线程池
|
||||||
|
for filename in files:
|
||||||
|
if not filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
|
||||||
|
continue
|
||||||
|
|
||||||
|
input_path = os.path.join(root, filename)
|
||||||
|
output_path = os.path.join(output_dir, filename)
|
||||||
|
futures.append(executor.submit(
|
||||||
|
process_single_file,
|
||||||
|
input_path,
|
||||||
|
output_path
|
||||||
|
))
|
||||||
|
|
||||||
|
# 可选:等待所有任务完成并处理异常
|
||||||
|
for future in futures:
|
||||||
|
try:
|
||||||
|
future.result()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Unhandled error in thread: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
# 使用示例
|
# 使用示例
|
||||||
input_directory = 'L:/Tobacco/2023_JY/20230821/SOURCE'
|
input_directory = 'L:/Grade_datasets/JY_A'
|
||||||
background_image_path = 'F:/dataset/02.TA_EC/rundata/BACKGROUND/ZY_B'
|
background_image_path = 'F:/dataset/02.TA_EC/rundata/BACKGROUND/ZY_B'
|
||||||
output_directory = 'L:/Test'
|
output_directory = 'L:/Grade_datasets/MOVE_BACKGROUND'
|
||||||
|
|
||||||
process_images(input_directory, background_image_path, output_directory)
|
process_imageswithpool(input_directory, background_image_path, output_directory)
|
42
dataset/splitdataset.py
Normal file
42
dataset/splitdataset.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
import os
|
||||||
|
import random
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
def split_dataset(input_folder, output_folder):
|
||||||
|
# 创建输出子目录
|
||||||
|
subdatasets = [os.path.join(output_folder, f'subdataset_{i+1}') for i in range(3)]
|
||||||
|
for subdataset in subdatasets:
|
||||||
|
os.makedirs(subdataset, exist_ok=True)
|
||||||
|
|
||||||
|
# 遍历每个类别文件夹
|
||||||
|
for root, dirs, _ in os.walk(input_folder):
|
||||||
|
for category in dirs:
|
||||||
|
category_folder = os.path.join(root, category)
|
||||||
|
images = [os.path.join(category_folder, f) for f in os.listdir(category_folder)
|
||||||
|
if f.endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff'))]
|
||||||
|
|
||||||
|
# 如果图像数量少于 1200,进行补充
|
||||||
|
if len(images) < 1200:
|
||||||
|
images.extend(random.choices(images, k=1200 - len(images)))
|
||||||
|
|
||||||
|
# 打乱图像顺序
|
||||||
|
random.shuffle(images)
|
||||||
|
|
||||||
|
# 为每个子数据集创建类别子文件夹
|
||||||
|
for subdataset_path in subdatasets:
|
||||||
|
category_subfolder = os.path.join(subdataset_path, category)
|
||||||
|
os.makedirs(category_subfolder, exist_ok=True)
|
||||||
|
|
||||||
|
# 将图像分配到子数据集中,每个子数据集 400 张
|
||||||
|
for i, image_path in enumerate(images):
|
||||||
|
subdataset_index = i // 400
|
||||||
|
subdataset_path = subdatasets[subdataset_index]
|
||||||
|
category_subfolder = os.path.join(subdataset_path, category)
|
||||||
|
shutil.copy(image_path, category_subfolder)
|
||||||
|
|
||||||
|
print(f'Dataset split into 3 subdatasets with 400 images per category at {output_folder}')
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
input_folder = 'L:/Grade_datasets/MOVE_BACKGROUND'
|
||||||
|
output_folder = 'L:/Grade_datasets/SPLIT'
|
||||||
|
split_dataset(input_folder, output_folder)
|
Loading…
Reference in New Issue
Block a user