import os import time from multiprocessing import Pool
defgetFile(path) : #获取目录下的文件list fileList = [] for root, dirs, files inlist(os.walk(path)) : for i in files : if i.endswith('.txt') or i.endswith('.10w') : fileList.append(root + "\\" + i) return fileList
defoperFile(filePath) : #统计每个文件中行数和字符数,并返回 filePath = filePath fp = open(filePath) content = fp.readlines() fp.close() lines = len(content) alphaNum = 0 for i in content : alphaNum += len(i.strip('\n')) return lines,alphaNum,filePath
for s in scales: h_scaled, w_scaled = int(h*s),int(w*s) img_scaled = cv2.resize(img, (h_scaled,w_scaled), interpolation=cv2.INTER_CUBIC) # extract patches for i inrange(0, h_scaled-patch_size+1, stride): for j inrange(0, w_scaled-patch_size+1, stride): x = img_scaled[i:i+patch_size, j:j+patch_size] # data aug for k inrange(0, aug_times): #x_aug = data_aug(x, mode=np.random.randint(0,8)) x_aug = data_aug(x, mode=0) patches.append(x_aug) return patches
if __name__ == '__main__': # parameters src_dir = './data/Train400/' save_dir = './data/npy_data/' file_list = glob.glob(src_dir+'*.png') # get name list of all .png files num_threads = 16 print('Start...') # initrialize res = [] # generate patches for i inrange(0,len(file_list),num_threads): # use multi-process to speed up p = Pool(num_threads) patch = p.map(gen_patches,file_list[i:min(i+num_threads,len(file_list))]) #patch = p.map(gen_patches,file_list[i:i+num_threads]) for x in patch: res += x print('Picture '+str(i)+' to '+str(i+num_threads)+' are finished...')