顔写真から年齢を推定/CNN - Python×医療事務(×トロント）

↓UTKファイルをダウンロード

https://drive.google.com/drive/folders/0BxYys69jI14kU0I1YUQyY1ZDRUE

ダウンロードしたファイルを右クリックしzipに変換　または　名前をUTKFace.zipに変更

Googlecolabにアップロード

f:id:chocolate22554:20210228162218p:plain

アップロードに結構　時間かかる

アップロード後、ファイル右クリック　パスをコピー　○○の部分に貼り付け

z = zipfile.ZipFile('○○')

f:id:chocolate22554:20210228162608p:plain

エラー発生

ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

f:id:chocolate22554:20210228162816p:plain

UTKFaceをFinder内で一旦解凍し改めてzip化し直した

名前はUTKFace 2.zip に変わった

f:id:chocolate22554:20210228170322p:plain

するとなんかのダウンロードが始まった

エラー内にある　train set？なんかtrain関係の何かがそもそもダウンロードされてなかった？

f:id:chocolate22554:20210228170303p:plain

そして出来ました😂

f:id:chocolate22554:20210228174001p:plain

f:id:chocolate22554:20210228174025p:plain

些細なことで出来ないこともあるけど些細なことで出来るからそれが醍醐味ですね＾＾

とりあえずコツコツ作ってどこかでアイデアを収束させたい

↓全コード

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os, zipfile, io, re
from PIL import Image
from sklearn.model_selection import train_test_split
import keras
from keras.models import Sequential, Model, load_model
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.pooling import GlobalAveragePooling2D
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator

image_size=100 # 入力画像のサイズ

# ZipFile()で.zipファイルを解凍
z = zipfile.ZipFile('/content/UTKFace 2.zip')
imgfiles = [ x for x in z.namelist() if re.search(r"^UTKFace.*jpg$", x)]

X=
Y=

for imgfile in imgfiles:
 image = Image.open(io.BytesIO(z.read(imgfile)))
 image = image.convert('RGB')
 image = image.resize*1
 data = np.asarray(image)
file = os.path.basename(imgfile)
# split()を用いてファイル名から年齢を取得
 file_split = [i for i in file.split('_')]
 X.append(data)
 Y.append(int(file_split[0]))
z.close()

X = np.array(X)
Y = np.array(Y)

# print(X.shape, Y.shape) 

# trainデータとtestデータに分割
X_train, X_test, y_train, y_test = train_test_split(
 X,
 Y,
 random_state = 0,
 test_size = 0.2
)
# print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

# データ型の変換＆正規化
X_train = X_train.astype('float32') / 255
X_test = X_test.astype('float32') / 255

# trainデータからvalidデータを分割
X_train, X_val, y_train, y_val = train_test_split(
 X_train,
 y_train,
 random_state = 0,
 test_size = 0.2
)
# print(X_train.shape, y_train.shape, X_val.shape, y_val.shape)

# CNN
def create_model(X, y):
 model = Sequential()
 model.add(Conv2D(32, (3, 3), padding='same',input_shape=X.shape[1:], activation='relu'))
 model.add(Conv2D(32, (3, 3), activation='relu'))
 model.add(MaxPooling2D(pool_size=(2, 2)))
 model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
 model.add(Flatten()) # 1次元ベクトルに変換
 model.add(Dense(64, activation='relu'))
 model.add(Dense(1))
 model.compile(loss='mse',optimizer='Adam',metrics=['mae'])
return model

# 学習過程を可視化するための関数
def plot_history(history):
 plt.plot(history.history['loss'],"o-",label="loss",)
 plt.plot(history.history['val_loss'],"o-",label="val_loss")
 plt.title('model loss')
 plt.xlabel('epoch')
 plt.ylabel('loss')
 plt.legend(loc='upper right')
 plt.show()
 
# Data Augmentation
datagen = ImageDataGenerator(
 featurewise_center = False,
 samplewise_center = False,
 featurewise_std_normalization = False,
 samplewise_std_normalization = False,
 zca_whitening = False,
 rotation_range = 0,
 width_shift_range = 0.1,
 height_shift_range = 0.1,
 horizontal_flip = True,
 vertical_flip = False
)

# EarlyStopping
early_stopping = EarlyStopping(
 monitor = 'val_loss',
 patience = 10,
 verbose = 1
)

# reduce learning rate
reduce_lr = ReduceLROnPlateau(
 monitor = 'val_loss',
 factor = 0.1,
 patience = 3,
 verbose = 1
)

# 学習
model = create_model(X_train, y_train)
history = model.fit_generator(
datagen.flow(X_train, y_train, batch_size = 128),
 steps_per_epoch = X_train.shape[0] // 128,
 epochs = 50,
 validation_data = (X_val, y_val),
 callbacks = [early_stopping, reduce_lr],
 shuffle = True,
 verbose = 1)

model.save('age_prediction.hdf5')
plot_history(history) 

# モデルの読み込み
model = load_model('age_prediction.hdf5')

# testデータ30件の予測値
preds=model.predict(X_test[0:30])

# 正解値＆予測値を出力
plt.figure(figsize=(16, 6))
for i in range(30):
 plt.subplot(3, 10, i+1)
 plt.axis("off")
 pred = round(preds[i][0],1)
 true = y_test[i]
if abs(pred - true) < 3.0:
 plt.title(str(true) + '\n' + str(pred))
else:
 plt.title(str(true) + '\n' + str(pred), color = "red")
 plt.imshow(X_test[i])
plt.show()

*1:image_size, image_size