Compare commits

..

2 Commits

Author SHA1 Message Date
yoiannis
4cb9790dee Merge branch 'master' of http://git.yoiannis.top/paper/TA_EC 2025-03-12 09:38:48 +08:00
yoiannis
cc8f070aaa 更新数据分块 2025-03-12 09:38:47 +08:00
2 changed files with 87 additions and 1 deletions

View File

@ -1,3 +1,4 @@
from concurrent.futures import ThreadPoolExecutor
import random
import cv2
import numpy as np
@ -148,10 +149,53 @@ def process_images(input_folder, background_image_path, output_base):
except Exception as e:
print(f"Error processing {input_path}: {str(e)}")
def process_single_file(input_path, output_path):
"""处理单个文件的独立函数"""
try:
result = remove_background(input_path)
# result = edge_fill2(result) # 按需启用
cv2.imwrite(output_path, result)
print(f"Processed: {input_path} -> {output_path}")
except Exception as e:
print(f"Error processing {input_path}: {str(e)}")
def process_imageswithpool(input_folder, background_image_path, output_base):
"""
多线程版本的处理函数
使用ThreadPoolExecutor并行处理文件
"""
with ThreadPoolExecutor(max_workers=os.cpu_count()*2) as executor:
futures = []
for root, dirs, files in os.walk(input_folder):
# 创建输出目录(主线程保证目录创建顺序)
relative_path = os.path.relpath(root, input_folder)
output_dir = os.path.join(output_base, relative_path)
os.makedirs(output_dir, exist_ok=True)
# 提交任务到线程池
for filename in files:
if not filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
continue
input_path = os.path.join(root, filename)
output_path = os.path.join(output_dir, filename)
futures.append(executor.submit(
process_single_file,
input_path,
output_path
))
# 可选:等待所有任务完成并处理异常
for future in futures:
try:
future.result()
except Exception as e:
print(f"Unhandled error in thread: {str(e)}")
# 使用示例
input_directory = 'L:/Grade_datasets/JY_A'
background_image_path = 'F:/dataset/02.TA_EC/rundata/BACKGROUND/ZY_B'
output_directory = 'L:/Grade_datasets/MOVE_BACKGROUND'
process_images(input_directory, background_image_path, output_directory)
process_imageswithpool(input_directory, background_image_path, output_directory)

42
dataset/splitdataset.py Normal file
View File

@ -0,0 +1,42 @@
import os
import random
import shutil
def split_dataset(input_folder, output_folder):
# 创建输出子目录
subdatasets = [os.path.join(output_folder, f'subdataset_{i+1}') for i in range(3)]
for subdataset in subdatasets:
os.makedirs(subdataset, exist_ok=True)
# 遍历每个类别文件夹
for root, dirs, _ in os.walk(input_folder):
for category in dirs:
category_folder = os.path.join(root, category)
images = [os.path.join(category_folder, f) for f in os.listdir(category_folder)
if f.endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff'))]
# 如果图像数量少于 1200进行补充
if len(images) < 1200:
images.extend(random.choices(images, k=1200 - len(images)))
# 打乱图像顺序
random.shuffle(images)
# 为每个子数据集创建类别子文件夹
for subdataset_path in subdatasets:
category_subfolder = os.path.join(subdataset_path, category)
os.makedirs(category_subfolder, exist_ok=True)
# 将图像分配到子数据集中,每个子数据集 400 张
for i, image_path in enumerate(images):
subdataset_index = i // 400
subdataset_path = subdatasets[subdataset_index]
category_subfolder = os.path.join(subdataset_path, category)
shutil.copy(image_path, category_subfolder)
print(f'Dataset split into 3 subdatasets with 400 images per category at {output_folder}')
if __name__ == "__main__":
input_folder = 'L:/Grade_datasets/MOVE_BACKGROUND'
output_folder = 'L:/Grade_datasets/SPLIT'
split_dataset(input_folder, output_folder)