欢迎您访问 最编程 本站为您分享编程语言代码,编程技术文章!
您现在的位置是: 首页

[Pytorch 基础教程 27] DeepFM 推荐算法

最编程 2024-04-29 07:06:36
...
import torch import torch.nn as nn import torch.optim as optim import numpy as np import pandas as pd import argparse from torch.utils.data import DataLoader from torch.utils.data import sampler from data.dataset import build_dataset from model.DeepFM import DeepFM def train(epoch): model.train() for batch_idx, (xi, xv, y) in enumerate(loader_train): xi, xv, y = torch.squeeze(xi).to(torch.float32), \ torch.squeeze(xv), \ torch.squeeze(y).to(torch.float32) #print("xi的大小:\n", xi.shape, "\n") # torch.Size([128, 7]) #print("xv的大小:\n", xv.shape, "\n") # torch.Size([128, 10]) #print("y的大小:\n", y.shape, "\n") # torch.Size([128]) if args.gpu: # 迁移到GPU中,注意迁移的device要和模型的device相同 xi, xv, y = xi.to(device), xv.to(device), y.to(device) # 梯度清零 optimizer.zero_grad() # 向前传递,和计算loss值 out = model(xi, xv) loss = nn.BCELoss()(torch.squeeze(out, dim=1), y) # 反向传播 loss.backward() # 更新参数 optimizer.step() if batch_idx % 200 == 0: print("epoch {}, batch_idx {}, loss {}".format(epoch, batch_idx, loss)) def test(epoch, best_acc=0): model.eval() test_loss = 0.0 # cost function error correct = 0.0 for batch_idx, (xi, xv, y) in enumerate(loader_test): xi, xv, y = torch.squeeze(xi).to(torch.float32), \ torch.squeeze(xv), \ torch.squeeze(y).to(torch.float32) if args.gpu: xi, xv, y = xi.to(device), \ xv.to(device), \ y.to(device) out = model(xi, xv) test_loss += nn.BCELoss()(torch.squeeze(out, dim=1), y).item() correct += ((torch.squeeze(out, dim=1) > 0.5) == y).sum().item() if correct/len(loader_test) > best_acc: best_acc = correct/len(loader_test) torch.save(model, args.save_path) print("epoch {}, test loss {}, test acc {}".format(epoch, test_loss/len(loader_test), correct/len(loader_test))) return best_acc if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('-gpu', action='store_true', default=True, help='use gpu or not ') parser.add_argument('-bs', type=int, default=128, help='batch size for dataloader') parser.add_argument('-epoches', type=int, default=15, help='batch size for dataloader') parser.add_argument('-warm', type=int, default=1, help='warm up training phase') parser.add_argument('-lr', type=float, default=1e-3, help='initial learning rate') parser.add_argument('-resume', action='store_true', default=False, help='resume training') parser.add_argument('-train_path', action='store_true', default='data/raw/trainingSamples.csv', help='train data path') parser.add_argument('-test_path', action='store_true', default='data/raw/testSamples.csv', help='test data path') parser.add_argument('-save_path', action='store_true', default='checkpoint/DeepFM/DeepFm_best.pth', help='save model path') args = parser.parse_args() # 连续型特征(7个) continous_feature_names = ['releaseYear', 'movieRatingCount', 'movieAvgRating', 'movieRatingStddev', 'userRatingCount', 'userAvgRating', 'userRatingStddev'] # 类别型特征,注意id类的特征也是属于类别型特征,有10个特征(8个genre,2个id) categorial_feature_names = ['userGenre1', 'userGenre2', 'userGenre3', 'userGenre4', 'userGenre5', 'movieGenre1', 'movieGenre2', 'movieGenre3', 'userId', 'movieId'] categorial_feature_vocabsize = [20] * 8 + [30001] + [1001] # [20, 20, 20, 20, 20, 20, 20, 20, 30001, 1001] ,最后两个分别是userId 和 movieId # build dataset for train and test batch_size = args.bs train_data = build_dataset(args.train_path) # 用dataloader读取数据 loader_train = DataLoader(train_data, batch_size=batch_size, num_workers=8, shuffle=True, pin_memory=True) test_data = build_dataset(args.test_path) loader_test = DataLoader(test_data, batch_size=batch_size, num_workers=8) # 正向传播时:开启自动求导的异常侦测 torch.autograd.set_detect_anomaly(True) device = torch.device("cuda" if args.gpu else "cpu") # train model model = DeepFM(categorial_feature_vocabsize, continous_feature_names, categorial_feature_names, embed_dim=64) model = model.to(device) optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-3) best_acc = 0 for ep in range(args.epoches): # ep为训练的轮次epoch train(ep) best_acc = test(ep, best_acc)