diff --git a/.gitignore b/.gitignore index 6121cab..f9bdb82 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ /runs/ .vim/ .ipynb_checkpoints +/models/ diff --git a/CIFAR-10.ipynb b/CIFAR-10.ipynb index c1e7125..407fed7 100644 --- a/CIFAR-10.ipynb +++ b/CIFAR-10.ipynb @@ -36,13 +36,14 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 78, "metadata": {}, "outputs": [], "source": [ "import torch\n", "import torchvision\n", - "from torchvision import datasets, transforms" + "from torchvision import datasets, transforms\n", + "from time import time" ] }, { @@ -211,7 +212,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 80, "metadata": {}, "outputs": [], "source": [ @@ -253,7 +254,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 93, "metadata": {}, "outputs": [], "source": [ @@ -275,49 +276,311 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 91, + "metadata": {}, + "outputs": [], + "source": [ + "def train(net, use_gpu=False):\n", + " if use_gpu:\n", + " net = net.cuda()\n", + "\n", + " start = time()\n", + " for epoch in range(2):\n", + " running_loss = 0.0\n", + " for i, data in enumerate(trainloader, 0):\n", + " # 取得数据\n", + " inputs, labels = data\n", + " if use_gpu:\n", + " inputs = inputs.cuda()\n", + " labels = labels.cuda()\n", + " # 清空参数的梯度\n", + " optimizer.zero_grad()\n", + " # 前向传递 + 反向传递 + 优化\n", + " outputs = net(inputs)\n", + " if use_gpu:\n", + " outputs = outputs.cuda()\n", + " loss = criterion(outputs, labels)\n", + " loss.backward()\n", + " optimizer.step()\n", + " # 输出统计数据\n", + " running_loss += loss.item()\n", + " if i % 2000 == 1999: # 每两千个 min-batch 输出 一次\n", + " print('[%d, %d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))\n", + " running_loss = 0.0\n", + " end = time()\n", + " print(f'Finished Training gpu: {use_gpu} elapsed: {end - start}')" + ] + }, + { + "cell_type": "code", + "execution_count": 94, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[%d, %d] loss: %.3f (1, 2000, 2.2382535316944123)\n", - "[%d, %d] loss: %.3f (1, 4000, 1.8497418144643307)\n", - "[%d, %d] loss: %.3f (1, 6000, 1.6522739474773407)\n", - "[%d, %d] loss: %.3f (1, 8000, 1.5884874777793885)\n", - "[%d, %d] loss: %.3f (1, 10000, 1.4998263758718968)\n", - "[%d, %d] loss: %.3f (1, 12000, 1.4663247963935138)\n", - "[%d, %d] loss: %.3f (2, 2000, 1.3832163302749396)\n", - "[%d, %d] loss: %.3f (2, 4000, 1.3675593894198537)\n", - "[%d, %d] loss: %.3f (2, 6000, 1.343863714851439)\n", - "[%d, %d] loss: %.3f (2, 8000, 1.3134881352633239)\n", - "[%d, %d] loss: %.3f (2, 10000, 1.3093935765009372)\n", - "[%d, %d] loss: %.3f (2, 12000, 1.2925918441154063)\n", - "Finished Training\n" + "[1, 2000] loss: 2.185\n", + "[1, 4000] loss: 1.899\n", + "[1, 6000] loss: 1.716\n", + "[1, 8000] loss: 1.603\n", + "[1, 10000] loss: 1.552\n", + "[1, 12000] loss: 1.478\n", + "[2, 2000] loss: 1.414\n", + "[2, 4000] loss: 1.404\n", + "[2, 6000] loss: 1.367\n", + "[2, 8000] loss: 1.363\n", + "[2, 10000] loss: 1.317\n", + "[2, 12000] loss: 1.321\n", + "Finished Training gpu: True elapsed: 119.58787202835083\n" ] } ], "source": [ - "for epoch in range(2):\n", - " running_loss = 0.0\n", - " for i, data in enumerate(trainloader, 0):\n", - " # 取得数据\n", - " inputs, labels = data\n", - " # 清空参数的梯度\n", - " optimizer.zero_grad()\n", - " # 前向传递 + 反向传递 + 优化\n", - " outputs = net(inputs)\n", - " loss = criterion(outputs, labels)\n", - " loss.backward()\n", - " optimizer.step()\n", - " # 输出统计数据\n", - " running_loss += loss.item()\n", - " if i % 2000 == 1999: # 每两千个 min-batch 输出 一次\n", - " print('[%d, %d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))\n", - " running_loss = 0.0\n", - " \n", - "print('Finished Training')" + "train(net, True) # train with gpu" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "快速保存一下训练好的模型" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [], + "source": [ + "MODEL_PATH = './models/cifar_10_net.pth'\n", + "torch.save(net.state_dict(), MODEL_PATH)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5. 测试网络\n", + "\n", + "现在,我们已经使用训练数据集对网络进行了两个周期的训练。是时候检验一下成果了。\n", + "\n", + "通过将测试数据集放进网络进行预测,再将预测结果与真实值进行对比,就可以衡量模型的效果。\n", + "\n", + "好,第一步,我们先看下测试集里面的图片长什么样子。" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Ground truth: ship ship frog dog\n" + ] + } + ], + "source": [ + "images, labels = next(iter(testloader))\n", + "\n", + "imshow(torchvision.utils.make_grid(images))\n", + "print('Ground truth:', ' '.join('%5s' % classes[labels[j]] for j in range(4)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "接下来,把之前保存的模型加载进来(这里不是必须,主要是演示一下实际情况下应该怎么保存和加载模型)" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 100, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "net_cpu = Net()\n", + "net_cpu.load_state_dict(torch.load(MODEL_PATH))" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[ 2.7197, 4.4017, -2.5704, -2.7393, -2.8257, -3.1298, -3.3686, -3.0573,\n", + " 5.1788, 1.8149],\n", + " [ 0.1971, -2.3470, 1.6536, 0.7511, 0.4000, 0.1193, 0.6587, -1.4892,\n", + " 0.6543, -1.6453],\n", + " [-0.9781, -0.9720, -0.0265, 0.8705, 0.5040, 0.6212, 1.5730, 0.1373,\n", + " -0.9968, 0.0374],\n", + " [-1.0276, -0.6625, 0.1193, 0.9498, 1.0150, 0.7573, 1.2253, 1.2796,\n", + " -2.8082, 0.3094]], grad_fn=)" + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "outputs = net_cpu(images)\n", + "outputs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "模型的输出是一个二维数据 ,第一维代表着不同的图像,第二维是图像对于10个不同类别的应激能量,能量越高,代表着神经网络认为这个图片\n", + "属于对应类别的可能性越高。 因此,我们只要取得最高能量的元素的索引就知道它属于哪个类别了。" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicted: ship bird frog horse\n" + ] + }, + { + "data": { + "text/plain": [ + "(tensor([8, 2, 6, 7]), 0.0014629364013671875)" + ] + }, + "execution_count": 103, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "start = time()\n", + "with torch.no_grad():\n", + " _, predicted = torch.max(outputs, dim=1)\n", + " print('Predicted: ', ' '.join('%5s' % classes[predicted[j]]\n", + " for j in range(4)))\n", + "end = time()\n", + "predicted, end - start" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "命中率貌似有一半的样子,用整测试集验证一下:" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy of the network on the 10000 test images: 55 %\n" + ] + } + ], + "source": [ + "correct = 0\n", + "total = 0\n", + "with torch.no_grad():\n", + " for data in testloader:\n", + " images, labels = data\n", + " outputs = net_cpu(images)\n", + " _, predicted = torch.max(outputs.data, 1)\n", + " total += labels.size(0)\n", + " correct += (predicted == labels).sum().item()\n", + "print('Accuracy of the network on the 10000 test images: %d %%' % (\n", + " 100 * correct / total))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "瞎猜的命中率是10%,而网络的预测结果显然好得多, 看起来网络确实学习到了一些东西。\n", + "我们来看看模型对于不同类别的表现是怎么样子。" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy of plane : 71 %\n", + "Accuracy of car : 70 %\n", + "Accuracy of bird : 38 %\n", + "Accuracy of cat : 27 %\n", + "Accuracy of deer : 49 %\n", + "Accuracy of dog : 36 %\n", + "Accuracy of frog : 64 %\n", + "Accuracy of horse : 70 %\n", + "Accuracy of ship : 64 %\n", + "Accuracy of truck : 58 %\n" + ] + } + ], + "source": [ + "class_correct = list(0. for i in range(10))\n", + "class_total = list(0. for i in range(10))\n", + "with torch.no_grad():\n", + " for data in testloader:\n", + " images, labels = data\n", + " outputs = net_cpu(images)\n", + " _, predicted = torch.max(outputs, 1)\n", + " c = (predicted == labels).squeeze()\n", + " for i in range(4):\n", + " label = labels[i]\n", + " class_correct[label] += c[i].item()\n", + " class_total[label] += 1\n", + "\n", + "\n", + "for i in range(10):\n", + " print('Accuracy of %5s : %2d %%' % (\n", + " classes[i], 100 * class_correct[i] / class_total[i]))" ] } ], diff --git a/TorchObjectDetection.ipynb b/TorchObjectDetection.ipynb new file mode 100644 index 0000000..c3ffe1c --- /dev/null +++ b/TorchObjectDetection.ipynb @@ -0,0 +1,52 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "本教程将会在预训练模型 [Mask R_CNN](https://arxiv.org/abs/1703.06870) 上针对 [Penn-Fudan Database for Pedestrian Detection and Segmentation](https://www.cis.upenn.edu/~jshi/ped_html/)数据进行调优。这个数据集有170张图片,345个行人,通过本教程可学习到如何使用 torchvision 的新特性来训练针对特定数据集的分割模型。\n", + "\n", + "\n", + "# 定义数据集\n", + "\n", + "按照训练物体检测,分割和人体关键点模型的参考脚本,可以很方便地支持添加新的自定义数据集。新的数据集必须继承 `torch.utils.data.Dataset` 类,同时实现 `__len__` 和 `__getitem__` 方法\n", + "\n", + "唯一需要注意的话,我们要坟 `__getitem__` 返回的格式如下:\n", + "\n", + "* image: 一个 `PILImage` 图像对象,其尺寸为 `(H,W)`\n", + "* target: 一个 `dict` 对象,含有以下的键:\n", + " * `boxes[FloatTensor[N, 4)`: 含有 `N` 个 bounding box 的数组,其元素为4个,格式为`[x0, y0, x1, y1]`\n", + " * `labels (Int64Tensor[N])`: 每个 bounding box 的标签。 `0` 表示背景\n", + " * `image_id (Int64Tensor[1])`: 图像id,必须在整个数据集中唯一。\n", + " * `area (Tensor[N])`: bounding box 的面积。用以 Coco metric 评估,分离大小不同的boxes\n", + " * `iscrowd (UintTensor[N])': 该值为True时,将不会被用以评估\n", + " * 可选 `masks (UInt8Tensor[N, H, W])`: 每个物体的分离蒙板\n", + " * 可选 `keypoints (FloatTensor[N, K, 3])`: 对于 `N` 个物体,含有 `K` 个关键点。关键点的格式为`[x, y, visibility]`。`visibility=0` 表示关键点不可见。\n", + "\n", + "\n", + "data source: https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}