|
|
@@ -8,20 +8,27 @@ import torch.optim as optim
|
|
|
from torch.optim import lr_scheduler
|
|
|
import torch.backends.cudnn as cudnn
|
|
|
import numpy as np
|
|
|
-import torchvision
|
|
|
from torchvision import datasets, models, transforms
|
|
|
-from fastapi import FastAPI, File, UploadFile
|
|
|
-import uvicorn
|
|
|
-import json
|
|
|
import time
|
|
|
import os
|
|
|
+import zipfile
|
|
|
import copy
|
|
|
import platform
|
|
|
from torch.utils.tensorboard import SummaryWriter
|
|
|
|
|
|
cudnn.benchmark = True
|
|
|
|
|
|
-app = FastAPI()
|
|
|
+data_phase = ['train', 'val']
|
|
|
+database_dir = './data'
|
|
|
+output_dir = './runs' # 模型保存和日志备份大目录
|
|
|
+newest_log = './newest_log' # 最新日志保存目录
|
|
|
+log_port = 6667 # tensorboard日志端口
|
|
|
+writer: SummaryWriter
|
|
|
+
|
|
|
+# 用异步接收请求判断
|
|
|
+task_list = ['train', 'data_process']
|
|
|
+running_task = set()
|
|
|
+exit_flag = False
|
|
|
|
|
|
|
|
|
def print_progress_bar(iteration, total, prefix='', suffix='', decimals=1, length=100, fill='█', print_end="\r"):
|
|
|
@@ -39,14 +46,10 @@ def print_progress_bar(iteration, total, prefix='', suffix='', decimals=1, lengt
|
|
|
|
|
|
|
|
|
# 备份log
|
|
|
-def move_log(model_id):
|
|
|
- if not os.path.exists(output_dir):
|
|
|
- print('缺失路径: ', output_dir)
|
|
|
- return
|
|
|
-
|
|
|
+def move_log(model_save_dir):
|
|
|
log_name = os.listdir(newest_log)[0]
|
|
|
log_path = os.path.join(newest_log, log_name)
|
|
|
- save_path = os.path.join(output_dir, log_name)
|
|
|
+ save_path = os.path.join(model_save_dir, log_name)
|
|
|
|
|
|
shutil.copy(log_path, save_path)
|
|
|
print('log 已备份')
|
|
|
@@ -105,7 +108,7 @@ def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
|
|
|
epoch_acc = running_corrects.double() / dataset_sizes[phase]
|
|
|
|
|
|
writer.add_scalar(phase + " loss", epoch_loss, epoch + 1)
|
|
|
- writer.add_scalar(phase + " accuracy", epoch_loss, epoch + 1)
|
|
|
+ writer.add_scalar(phase + " accuracy", epoch_acc, epoch + 1)
|
|
|
|
|
|
print(f'{phase} Loss: {epoch_loss:.6f} Acc: {epoch_acc:.6f}')
|
|
|
|
|
|
@@ -127,65 +130,58 @@ def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
|
|
|
return model
|
|
|
|
|
|
|
|
|
-def train(epoch=30, save_path='resnet.pth', load_my_model=False, model_path=None,
|
|
|
- is_freeze=True, freeze_num=7, is_transfer_learn=False, transfer_cls=None):
|
|
|
+def train(epoch=30, save_path='resnet.pt', model_path=None,
|
|
|
+ freeze=7, learn_rate=0.01, momentum=0.9, decay=0.7):
|
|
|
# 如果不加载训练过的模型则加载预训练模型
|
|
|
- if load_my_model:
|
|
|
- model = models.resnet50(pretrained=False)
|
|
|
- num_features = model.fc.in_features
|
|
|
- if is_transfer_learn:
|
|
|
- # 加载旧模型后,更改为新模型的分类格式
|
|
|
- model.fc = nn.Linear(num_features, transfer_cls)
|
|
|
- model.load_state_dict(torch.load(model_path))
|
|
|
-
|
|
|
- # 修改最后一层
|
|
|
- num_features = model.fc.in_features
|
|
|
- model.fc = nn.Linear(num_features, class_num)
|
|
|
- else:
|
|
|
- # 修改最后一层
|
|
|
- model.fc = nn.Linear(num_features, class_num)
|
|
|
- model.load_state_dict(torch.load(model_path))
|
|
|
-
|
|
|
- else:
|
|
|
+ if model_path is None or model_path == '':
|
|
|
model = models.resnet50(pretrained=True)
|
|
|
# 修改最后一层
|
|
|
num_features = model.fc.in_features
|
|
|
model.fc = nn.Linear(num_features, class_num)
|
|
|
+ else:
|
|
|
+ model = torch.load(model_path)
|
|
|
+ old_cls_num = model.fc.out_features
|
|
|
+ if class_num == old_cls_num:
|
|
|
+ print('分类头适合, 进行训练')
|
|
|
+ else:
|
|
|
+ # 修改最后一层
|
|
|
+ num_features = model.fc.in_features
|
|
|
+ model.fc = nn.Linear(num_features, class_num)
|
|
|
+ print(f"修改分类头: {old_cls_num} --> {class_num}")
|
|
|
|
|
|
model = model.to(device)
|
|
|
|
|
|
criterion = nn.CrossEntropyLoss()
|
|
|
- optimizer_ft = optim.SGD(model.parameters(), lr=0.0005, momentum=0.9)
|
|
|
+ optimizer_ft = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
|
|
|
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
|
|
|
|
|
|
# 冻结部分参数
|
|
|
- if is_freeze:
|
|
|
- for i, c in enumerate(model.children()):
|
|
|
- if i == freeze_num:
|
|
|
- break
|
|
|
- for param in c.parameters():
|
|
|
- param.requires_grad = False
|
|
|
+ for i, c in enumerate(model.children()):
|
|
|
+ if i == freeze:
|
|
|
+ break
|
|
|
+ for param in c.parameters():
|
|
|
+ param.requires_grad = False
|
|
|
|
|
|
- for param in model.parameters():
|
|
|
- print(param.requires_grad)
|
|
|
+ for param in model.parameters():
|
|
|
+ print(param.requires_grad)
|
|
|
|
|
|
model = train_model(model, criterion, optimizer_ft,
|
|
|
exp_lr_scheduler, num_epochs=epoch)
|
|
|
|
|
|
- torch.save(model.state_dict(), save_path)
|
|
|
+ torch.save(model, save_path)
|
|
|
+
|
|
|
|
|
|
+'''
|
|
|
+ epoch: 训练次数
|
|
|
+ save_path: 模型保存路径
|
|
|
|
|
|
-@app.post("/train/params_json")
|
|
|
-async def upload_json(file: UploadFile = File(...)):
|
|
|
- contents = await file.read()
|
|
|
- json_data = contents.decode("utf-8")
|
|
|
- # 处理JSON数据
|
|
|
- print(json_data)
|
|
|
- json_data = json.loads(json_data)
|
|
|
+ model_path: 加载的模型路径
|
|
|
+ freeze_num: 冻结层数
|
|
|
|
|
|
- global data_dir, device, class_num, dataloaders, dataset_sizes, output_dir, writer
|
|
|
+ '''
|
|
|
+def load_param(epoch, data_dir, model_path, freeze, learn_rate, momentum, decay):
|
|
|
+ global device, class_num, dataloaders, dataset_sizes, output_dir, writer
|
|
|
|
|
|
- data_dir = json_data['data_dir_path']
|
|
|
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
|
|
|
|
data_transforms = {
|
|
|
@@ -216,30 +212,12 @@ async def upload_json(file: UploadFile = File(...)):
|
|
|
class_num = len(class_names)
|
|
|
print('class:', class_num)
|
|
|
|
|
|
- '''
|
|
|
- epoch: 训练次数
|
|
|
- save_path: 模型保存路径
|
|
|
- load_my_model: 是否加载训练过的模型
|
|
|
- model_path: 加载的模型路径
|
|
|
- is_freeze: 是否冻结模型部分层数
|
|
|
- freeze_num: 冻结层数
|
|
|
- is_transfer_learn: 是否迁移学习
|
|
|
- transfer_cls: 迁移学习旧模型分类头数量
|
|
|
- '''
|
|
|
- # 数据集路径在本文件上面
|
|
|
- epoch = json_data['params']['epoch']
|
|
|
- load_my_model = json_data['load_my_model']
|
|
|
- model_path = json_data['model_path']
|
|
|
- is_transfer_learn = json_data['is_transfer_learn']
|
|
|
- transfer_cls = json_data['transfer_cls']
|
|
|
-
|
|
|
- if not os.path.exists(output_dir):
|
|
|
- os.makedirs(output_dir)
|
|
|
+ print(f"输入参数: 训练次数: {epoch}, 模型路径: {model_path}")
|
|
|
|
|
|
model_id = len(os.listdir(output_dir)) + 1
|
|
|
- output_dir = os.path.join(output_dir, str(model_id))
|
|
|
- if not os.path.exists(output_dir):
|
|
|
- os.mkdir(output_dir)
|
|
|
+ model_save_dir = os.path.join(output_dir, str(model_id))
|
|
|
+ if not os.path.exists(model_save_dir):
|
|
|
+ os.mkdir(model_save_dir)
|
|
|
|
|
|
# 删除旧的log
|
|
|
if len(os.listdir(newest_log)) > 0:
|
|
|
@@ -247,30 +225,16 @@ async def upload_json(file: UploadFile = File(...)):
|
|
|
writer = SummaryWriter(newest_log)
|
|
|
writer.add_text('model', "model id: " + str(model_id))
|
|
|
|
|
|
- save_path = os.path.join(output_dir, str(json_data['model_name']) + '_out' + str(class_num) + '.pth')
|
|
|
+ save_path = os.path.join(model_save_dir, 'resnet50_out' + str(class_num) + '.pt')
|
|
|
|
|
|
- train(epoch=epoch, save_path=save_path,
|
|
|
- load_my_model=load_my_model,
|
|
|
+ train(epoch=epoch,
|
|
|
+ save_path=save_path,
|
|
|
model_path=model_path,
|
|
|
- is_transfer_learn=is_transfer_learn, transfer_cls=transfer_cls
|
|
|
- )
|
|
|
+ freeze=freeze,
|
|
|
+ learn_rate=learn_rate,
|
|
|
+ momentum=momentum,
|
|
|
+ decay=decay)
|
|
|
+
|
|
|
writer.flush()
|
|
|
writer.close()
|
|
|
- move_log(model_id)
|
|
|
-
|
|
|
- return {"train end"}
|
|
|
-
|
|
|
-
|
|
|
-if __name__ == '__main__':
|
|
|
- data_dir: str
|
|
|
- device = None
|
|
|
- class_num: int
|
|
|
- dataloaders = None
|
|
|
- dataset_sizes = None
|
|
|
-
|
|
|
- data_phase = ['train', 'val']
|
|
|
- output_dir = './runs'
|
|
|
- newest_log = './newest_log'
|
|
|
- writer: SummaryWriter
|
|
|
-
|
|
|
- uvicorn.run(app, host='0.0.0.0', port=6661)
|
|
|
+ move_log(model_save_dir)
|