import os, random, shutil, tqdm, argparse, cv2 import numpy as np import xml.etree.ElementTree as ET from config import names PROJECT_NAME = os.environ['PROJECT_NAME'] def convert_annotation(xmlpath, xmlname, opt): with open(xmlpath, "r", encoding='utf-8') as in_file: tree = ET.parse(in_file) root = tree.getroot() img = cv2.imdecode(np.fromfile('{}/{}.{}'.format(opt.path, xmlname[:-4], opt.postfix), np.uint8), cv2.IMREAD_COLOR) h, w = img.shape[:2] for obj in root.iter('object'): cls = obj.find('name').text if cls not in names: continue xmlbox = obj.find('bndbox') b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) if ((b[1] - b[0]) * (b[3] - b[2])) / (h * w) > opt.min_area_ratio and ((b[1] - b[0]) * (b[3] - b[2])) / (h * w) < opt.max_area_ratio: return True return False def select_pic(opt): postfix = opt.postfix xmlpath = opt.xml_path need_copy = [] list = os.listdir(xmlpath) random.shuffle(list) error_file_list = [] for i in tqdm.tqdm(range(0, len(list)), desc=f'selecting... {len(need_copy)}/{opt.num}'): try: path = os.path.join(xmlpath, list[i]) if ('.xml' in path) or ('.XML' in path): if convert_annotation(path, list[i], opt): need_copy.append(f'{list[i][:-3]}{postfix}') except Exception as e: error_file_list.append(list[i]) if len(need_copy) >= opt.num: break print(f'this file convert failure\n{error_file_list}') return need_copy def parse_opt(): parser = argparse.ArgumentParser() parser.add_argument('--path', type=str, required=True, help='base_image_path') parser.add_argument('--xml_path', type=str, required=True, help='base_xml_path') parser.add_argument('--num', type=int, required=True, help='image num') parser.add_argument('--postfix', type=str, default='jpg', help='image postfix') parser.add_argument('--min_area_ratio', type=float, default=0.0, help='min_area_ratio') parser.add_argument('--max_area_ratio', type=float, default=0.1, help='max_area_ratio') opt = parser.parse_known_args()[0] return opt if __name__ == '__main__': opt = parse_opt() pwd = os.getcwd() dest_image_path = f'{pwd}/{PROJECT_NAME}/img_train' if os.path.exists(dest_image_path): shutil.rmtree(dest_image_path) os.makedirs(dest_image_path, exist_ok=True) need_copy = select_pic(opt) for path in tqdm.tqdm(need_copy, desc=f'from {opt.path} copy to {dest_image_path}'): shutil.copy(f'{opt.path}/{path}', f'{dest_image_path}/{path}')