从目录或特定文件(CSV、npy )读取数据及预处理(scale、generator)的相关代码。

文件夹图片读取

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# ref: https://www.kaggle.com/uysimty/keras-cnn-dog-or-cat-classification

# load data
filenames = os.listdir("../input/train/train")
categories = []
for filename in filenames:
category = filename.split('.')[0]
if category == 'dog':
categories.append(1)
else:
categories.append(0)
df = pd.DataFrame({
'filename': filenames,
'category': categories
})
df.head()
df.tail()
# 如果报错“TypeError: If class_mode="binary", y_col="category" column values must be strings.” 可执行注释语句
# df['category'] = df['category'].astype('str');
# See total in count
df['category'].value_counts().plot.bar()
# see sample image
sample = random.choice(filenames)
image = load_img("../input/train/train/"+sample)
plt.imshow(image)


# Prepare test and train data
train_df, validate_df = train_test_split(df, test_size=0.20, random_state=42)
train_df = train_df.reset_index(drop=True)
validate_df = validate_df.reset_index(drop=True)

# Generator
train_datagen = ImageDataGenerator(
rotation_range=15,
rescale=1./255,
shear_range=0.1,
zoom_range=0.2,
horizontal_flip=True,
width_shift_range=0.1,
height_shift_range=0.1
)

train_generator = train_datagen.flow_from_dataframe(
train_df,
"../input/train/train/",
x_col='filename',
y_col='category',
target_size=IMAGE_SIZE,
class_mode='binary',
batch_size=batch_size
)

# See how our generator work
example_df = train_df.sample(n=1).reset_index(drop=True)
example_generator = train_datagen.flow_from_dataframe(
example_df,
"../input/train/train/",
x_col='filename',
y_col='category',
target_size=IMAGE_SIZE,
class_mode='binary'
)

plt.figure(figsize=(12, 12))
for i in range(0, 15):
plt.subplot(5, 3, i+1)
for X_batch, Y_batch in example_generator:
image = X_batch[0]
plt.imshow(image)
break
plt.tight_layout()
plt.show()

.h5数据集读取

1
2
3
4
5
6
from keras.utils.io_utils import HDF5Matrix
base_path = os.path.join('..', 'input')
train_h5_path = os.path.join(base_path, 'food_c101_n10099_r32x32x1.h5')
test_h5_path = os.path.join(base_path, 'food_test_c101_n1000_r32x32x1.h5')
X_train = HDF5Matrix(train_h5_path, 'images')[:]
Y_train = HDF5Matrix(test_h5_path, 'category')[:]

图片generater

例1

本例中,在for循环中索引数据集,并通过调用keras库中函数train_on_batch对网络进行训练。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
def train_for_n(nb_epoch=5000, plt_frq=25,BATCH_SIZE=32):
for e in tqdm(range(nb_epoch)):

# Make generative images
image_batch = X_train[np.random.randint(0,X_train.shape[0],size=BATCH_SIZE),:,:,:]
noise_gen = np.random.uniform(0,1,size=[BATCH_SIZE,n_cat])
generated_images = generator.predict(noise_gen)

# Train discriminator on generated images
X = np.concatenate((image_batch, generated_images))
y = np.zeros([2*BATCH_SIZE,2])
y[0:BATCH_SIZE,1] = 1
y[BATCH_SIZE:,0] = 1

make_trainable(discriminator,True)
d_loss = discriminator.train_on_batch(X,y)
losses["d"].append(d_loss)

# train Generator-Discriminator stack on input noise to non-generated output class
noise_tr = np.random.uniform(0,1,size=[BATCH_SIZE,n_cat])
y2 = np.zeros([BATCH_SIZE,2])
y2[:,1] = 1

make_trainable(discriminator,False)
g_loss = GAN.train_on_batch(noise_tr, y2 )
losses["g"].append(g_loss)

# Updates plots
if e%plt_frq==plt_frq-1:
plot_loss(losses)
plot_gen()

例2

1
2
3
4
5
6
7
8
9
10
11
12
13
14
datagen = ImageDataGenerator(
featurewise_center=False, # set input mean to 0 over the dataset
samplewise_center=False, # set each sample mean to 0
featurewise_std_normalization=False, # divide inputs by std of the dataset
samplewise_std_normalization=False, # divide each input by its std
zca_whitening=False, # apply ZCA whitening
rotation_range=10, # randomly rotate images in the range (degrees, 0 to 180)
zoom_range = 0.1, # Randomly zoom image
width_shift_range=0.1, # randomly shift images horizontally (fraction of total width)
height_shift_range=0.1, # randomly shift images vertically (fraction of total height)
horizontal_flip=False, # randomly flip images
vertical_flip=False) # randomly flip images

datagen.fit(X_train) # 图片生成器方法,计算依赖于数据的变化所需要的统计信息