Skip to content

Commit c2cb2aa

Browse files
update backend for PyTorch Update (#130)
* update backend * version fixes #123
1 parent a0fe622 commit c2cb2aa

File tree

16 files changed

+38
-27
lines changed

16 files changed

+38
-27
lines changed

LICENSE

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
MIT License
22

3-
Copyright (c) 2017 Hang Zhang. All rights reserved.
4-
Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All rights reserved.
3+
Copyright (c) 2017- Hang Zhang. All rights reserved.
4+
Copyright (c) 2018- Amazon.com, Inc. or its affiliates. All rights reserved.
55

66
Permission is hereby granted, free of charge, to any person obtaining a copy
77
of this software and associated documentation files (the "Software"), to deal

docs/source/experiments/segmentation.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,15 +83,15 @@ Test Pre-trained Model
8383

8484

8585
<code xml:space="preserve" id="cmd_enc101_ade" style="display: none; text-align: left; white-space: pre-wrap">
86-
CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset ADE20K --model EncNet --aux --se-loss --backbone resnet101
86+
CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset ADE20K --model EncNet --aux --se-loss --backbone resnet101 --base-size 640 --crop-size 576
8787
</code>
8888

8989
<code xml:space="preserve" id="cmd_enc101_voc" style="display: none; text-align: left; white-space: pre-wrap">
9090
# First finetuning COCO dataset pretrained model on augmented set
9191
# You can also train from scratch on COCO by yourself
92-
CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset Pascal_aug --model-zoo EncNet_Resnet101_COCO --aux --se-loss --lr 0.001 --syncbn --ngpus 4 --checkname res101
92+
CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset Pascal_aug --model-zoo EncNet_Resnet101_COCO --aux --se-loss --lr 0.001 --syncbn --ngpus 4 --checkname res101 --ft
9393
# Finetuning on original set
94-
CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset Pascal_voc --model encnet --aux --se-loss --backbone resnet101 --lr 0.0001 --syncbn --ngpus 4 --checkname res101 --resume runs/Pascal_aug/encnet/res101/checkpoint.params
94+
CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset Pascal_voc --model encnet --aux --se-loss --backbone resnet101 --lr 0.0001 --syncbn --ngpus 4 --checkname res101 --resume runs/Pascal_aug/encnet/res101/checkpoint.params --ft
9595
</code>
9696

9797
Quick Demo

docs/source/experiments/texture.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ Test Pre-trained Model
2222
cd PyTorch-Encoding/
2323
python scripts/prepare_minc.py
2424

25-
- Download pre-trained model (pre-trained on train-1 split using single training size of 224, with an error rate of :math:`19.70\%` using single crop on test-1 set)::
25+
- Download pre-trained model (pre-trained on train-1 split using single training size of 224, with an error rate of :math:`18.96\%` using single crop on test-1 set)::
2626

2727
cd experiments/recognition
2828
python model/download_models.py

encoding/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,4 @@
1010

1111
"""An optimized PyTorch package with CUDA backend."""
1212
from .version import __version__
13-
from . import nn, functions, dilated, parallel, utils, models, datasets, optimizer
13+
from . import nn, functions, dilated, parallel, utils, models, datasets

encoding/datasets/cityscapes.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import os
88
import sys
9+
import random
910
import numpy as np
1011
from tqdm import tqdm, trange
1112
from PIL import Image, ImageOps, ImageFilter
@@ -93,7 +94,7 @@ def _sync_transform(self, img, mask):
9394
mask = mask.transpose(Image.FLIP_LEFT_RIGHT)
9495
crop_size = self.crop_size
9596
# random scale (short edge from 480 to 720)
96-
short_size = random.randint(int(self.base_size*0.5), int(self.base_size*2.5))
97+
short_size = random.randint(int(self.base_size*0.5), int(self.base_size*2.0))
9798
w, h = img.size
9899
if h > w:
99100
ow = short_size

encoding/lib/cpu/nms_cpu.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include <torch/tensor.h>
12
#include <ATen/ATen.h>
23
#include <ATen/NativeFunctions.h>
34

@@ -42,7 +43,8 @@ std::vector<at::Tensor> Non_Max_Suppression_CPU(
4243

4344
auto num_boxes = input.size(1);
4445
auto batch_size = input.size(0);
45-
auto mask = input.type().toScalarType(at::kByte).tensor({batch_size, num_boxes});
46+
auto mask = torch::zeros({batch_size, num_boxes}, input.type().toScalarType(at::kByte));
47+
//auto mask = input.type().toScalarType(at::kByte).tensor({batch_size, num_boxes});
4648
mask.fill_(1);
4749
auto *rawMask = mask.data<unsigned char>();
4850
auto *rawIdx = sorted_inds.data<int64_t>();

encoding/lib/cpu/roi_align_cpu.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include <torch/tensor.h>
12
#include <ATen/ATen.h>
23
//#include <omp.h>
34

@@ -404,7 +405,7 @@ at::Tensor ROIAlign_Forward_CPU(
404405
AT_ASSERT(roi_cols == 4 || roi_cols == 5);
405406

406407
// Output at::Tensor is (num_rois, C, pooled_height, pooled_width)
407-
auto output = input.type().tensor({num_rois, channels, pooled_height, pooled_width});
408+
auto output = torch::zeros({num_rois, channels, pooled_height, pooled_width}, input.options());
408409

409410
AT_ASSERT(input.is_contiguous());
410411
AT_ASSERT(bottom_rois.is_contiguous());
@@ -451,7 +452,7 @@ at::Tensor ROIAlign_Backward_CPU(
451452
AT_ASSERT(roi_cols == 4 || roi_cols == 5);
452453

453454
// Output at::Tensor is (num_rois, C, pooled_height, pooled_width)
454-
auto grad_in = bottom_rois.type().tensor({b_size, channels, height, width}).zero_();
455+
auto grad_in = torch::zeros({b_size, channels, height, width}, bottom_rois.options());
455456

456457
AT_ASSERT(bottom_rois.is_contiguous());
457458

encoding/lib/cpu/syncbn_cpu.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include <torch/tensor.h>
12
#include <ATen/ATen.h>
23
#include <vector>
34

@@ -45,8 +46,8 @@ std::vector<at::Tensor> BatchNorm_Backward_CPU(
4546
std::vector<at::Tensor> Sum_Square_Forward_CPU(
4647
const at::Tensor input) {
4748
/* outputs */
48-
at::Tensor sum = input.type().tensor({input.size(1)}).zero_();
49-
at::Tensor square = input.type().tensor({input.size(1)}).zero_();
49+
at::Tensor sum = torch::zeros({input.size(1)}, input.options());
50+
at::Tensor square = torch::zeros({input.size(1)}, input.options());
5051
return {sum, square};
5152
}
5253

encoding/lib/gpu/encoding_kernel.cu

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include <vector>
2+
#include <torch/tensor.h>
23
#include <ATen/ATen.h>
34
#include <ATen/cuda/CUDAContext.h>
45

@@ -165,7 +166,7 @@ at::Tensor Aggregate_Forward_CUDA(
165166
const at::Tensor X_,
166167
const at::Tensor C_) {
167168
/* Device tensors */
168-
auto E_ = A_.type().tensor({A_.size(0), C_.size(0), C_.size(1)}).zero_();
169+
auto E_ = torch::zeros({A_.size(0), C_.size(0), C_.size(1)}, A_.options());
169170
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
170171
// B, K, D
171172
dim3 blocks(C_.size(1), C_.size(0), X_.size(0));
@@ -214,7 +215,7 @@ at::Tensor ScaledL2_Forward_CUDA(
214215
const at::Tensor X_,
215216
const at::Tensor C_,
216217
const at::Tensor S_) {
217-
auto SL_ = X_.type().tensor({X_.size(0), X_.size(1), C_.size(0)}).zero_();
218+
auto SL_ = torch::zeros({X_.size(0), X_.size(1), C_.size(0)}, X_.options());
218219
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
219220
dim3 blocks(C_.size(0), X_.size(1), X_.size(0));
220221
dim3 threads(getNumThreads(C_.size(1)));

encoding/lib/gpu/encodingv2_kernel.cu

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include <vector>
2+
#include <torch/tensor.h>
23
#include <ATen/ATen.h>
34
#include <ATen/Functions.h>
45
#include <ATen/cuda/CUDAContext.h>
@@ -239,7 +240,7 @@ at::Tensor Encoding_Dist_Inference_Forward_CUDA(
239240
const at::Tensor STD_) {
240241
// const at::Tensor S_,
241242
// X \in R^{B, N, D}, C \in R^{K, D}, S \in R^K
242-
auto KD_ = X_.type().tensor({X_.size(0), X_.size(1), C_.size(0)}).zero_();
243+
auto KD_ = torch::zeros({X_.size(0), X_.size(1), C_.size(0)}, X_.options());
243244
// E(x), E(x^2)
244245
int N = X_.size(0) * X_.size(1);
245246
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
@@ -301,7 +302,7 @@ std::vector<at::Tensor> Encoding_Dist_Forward_CUDA(
301302
double eps) {
302303
// const at::Tensor S_,
303304
// X \in R^{B, N, D}, C \in R^{K, D}, S \in R^K
304-
auto KD_ = X_.type().tensor({X_.size(0), X_.size(1), C_.size(0)}).zero_();
305+
auto KD_ = torch::zeros({X_.size(0), X_.size(1), C_.size(0)}, X_.options());
305306
// E(x), E(x^2)
306307
int N = X_.size(0) * X_.size(1);
307308
auto SVar_ = (X_.pow(2).sum(0).sum(0).view({1, X_.size(2)}) -
@@ -373,7 +374,7 @@ at::Tensor AggregateV2_Forward_CUDA(
373374
const at::Tensor C_,
374375
const at::Tensor STD_) {
375376
/* Device tensors */
376-
auto E_ = A_.type().tensor({A_.size(0), C_.size(0), C_.size(1)}).zero_();
377+
auto E_ = torch::zeros({A_.size(0), C_.size(0), C_.size(1)}, A_.options());
377378
// auto IS_ = 1.0f / (S_ + eps).sqrt();
378379
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
379380
// B, K, D

0 commit comments

Comments
 (0)