zhangming8
diff --git a/‎README.md
Lines changed: 35 additions & 20 deletions b/‎README.md
Lines changed: 35 additions & 20 deletions
diff --git a/‎config.py
Lines changed: 6 additions & 6 deletions b/‎config.py
Lines changed: 6 additions & 6 deletions
diff --git a/‎data/coco_dataset.py
Lines changed: 1 addition & 1 deletion b/‎data/coco_dataset.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎data/datasets/coco.py
Lines changed: 10 additions & 3 deletions b/‎data/datasets/coco.py
Lines changed: 10 additions & 3 deletions
diff --git a/‎models/yolox.py
Lines changed: 2 additions & 2 deletions b/‎models/yolox.py
Lines changed: 2 additions & 2 deletions
@@ -1,10 +1,12 @@
 ## A pytorch easy re-implement of "YOLOX: Exceeding YOLO Series in 2021"
 
 ## 1. Notes
+
     This is a pytorch easy re-implement of "YOLOX: Exceeding YOLO Series in 2021" [https://arxiv.org/abs/2107.08430]
     The repo is still under development
 
 ## 2. Environment
+
     pytorch>=1.7.0, python>=3.6, Ubuntu/Windows, see more in 'requirements.txt'
 
     cd /path/to/your/work
@@ -16,7 +18,9 @@
 
 #### Model Zoo
 
-All weights can be downloaded from [GoogleDrive](https://drive.google.com/drive/folders/1qEMLzikH5JwRNRoHpeCa6BJBeSQ6xXCH?usp=sharing) or [BaiduDrive](https://pan.baidu.com/s/1UsbdnyVwRJhr9Vy1tmJLeQ) (code:bc72)
+All weights can be downloaded
+from [GoogleDrive](https://drive.google.com/drive/folders/1qEMLzikH5JwRNRoHpeCa6BJBeSQ6xXCH?usp=sharing)
+or [BaiduDrive](https://pan.baidu.com/s/1UsbdnyVwRJhr9Vy1tmJLeQ) (code:bc72)
 
 |Model      |test size  |mAP<sup>val<br>0.5:0.95 |mAP<sup>test<br>0.5:0.95 | Params<br>(M) |
 | ------    |:---:      |:---:                   | :---:                   |:---:          |
@@ -28,9 +32,11 @@ All weights can be downloaded from [GoogleDrive](https://drive.google.com/drive/
 |yolox-x    |640        |50.5                    |51.1                     |99.1           |
 |yolox-x    |800        |51.2                    |51.9                     |99.1           |
 
-mAP was reevaluated on COCO val2017 and test2017, and some results are slightly better than the official implement [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX). You can reproduce them by scripts in 'evaluate.sh'
+mAP was reevaluated on COCO val2017 and test2017, and some results are slightly better than the official
+implement [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX). You can reproduce them by scripts in 'evaluate.sh'
 
 #### Dataset
+
     download COCO:
     http://images.cocodataset.org/zips/train2017.zip
     http://images.cocodataset.org/zips/val2017.zip
@@ -45,34 +51,37 @@ mAP was reevaluated on COCO val2017 and test2017, and some results are slightly
     change opt.dataset_path = "/path/to/dataset" in 'config.py'
 
 #### Train
+
     See more example in 'train.sh'
-    a. Train from scratch:(backbone="CSPDarknet-s" means using yolox-s, and you can change it to any other backbone, eg: CSPDarknet-nano, tiny, s, m, l, x)
-    python train.py gpus='0' backbone="CSPDarknet-s" num_epochs=300 exp_id="coco_CSPDarknet-s_640x640" use_amp=True val_intervals=2 data_num_workers=6 metric="ap" batch_size=48
+    a. Train from scratch:(backbone="CSPDarknet-s" means using yolox-s, and you can change it, eg: CSPDarknet-nano, tiny, s, m, l, x)
+    python train.py gpus='0' backbone="CSPDarknet-s" num_epochs=300 exp_id="coco_CSPDarknet-s_640x640" use_amp=True val_intervals=2 data_num_workers=6 batch_size=48
 
     b. Finetune, download pre-trained weight on COCO and finetune on customer dataset:
-    python train.py gpus='0' backbone="CSPDarknet-s" num_epochs=300 exp_id="coco_CSPDarknet-s_640x640" use_amp=True val_intervals=2 data_num_workers=6 metric="ap" batch_size=48 load_model="../weights/yolox-s.pth" resume=False
+    python train.py gpus='0' backbone="CSPDarknet-s" num_epochs=300 exp_id="coco_CSPDarknet-s_640x640" use_amp=True val_intervals=2 data_num_workers=6 batch_size=48 load_model="../weights/yolox-s.pth"
 
     c. Resume, you can use 'resume=True' when your training is accidentally stopped:
-    python train.py gpus='0' backbone="CSPDarknet-s" num_epochs=300 exp_id="coco_CSPDarknet-s_640x640" use_amp=True val_intervals=2 data_num_workers=6 metric="ap" batch_size=48 load_model="exp/coco_CSPDarknet-s_640x640/model_last.pth" resume=True
-    
-    d. Some tips:
-    Ⅰ You can also change params in 'train.sh'(these params will replace opt.xxx in config.py) and use 'nohup sh train.sh &' to train
-    Ⅱ If you want to close mulit-size training, change opt.random_size = None in 'config.py' or set random_size=None in 'train.sh'
-    Ⅲ Mulit-gpu train: change opt.gpus = "3,5,6,7"
-    Ⅳ Visualized log by tensorboard: tensorboard --logdir exp/your_exp_id/logs_2021-08-xx-xx-xx and visit http://localhost:6006
+    python train.py gpus='0' backbone="CSPDarknet-s" num_epochs=300 exp_id="coco_CSPDarknet-s_640x640" use_amp=True val_intervals=2 data_num_workers=6 batch_size=48 load_model="exp/coco_CSPDarknet-s_640x640/model_last.pth" resume=True
+
+#### Some Tips:
+
+    a. You can also change params in 'train.sh'(these params will replace opt.xxx in config.py) and use 'nohup sh train.sh &' to train
+    b. Multi-gpu train: set opt.gpus = "3,5,6,7" in 'config.py' or set gpus="3,5,6,7" in 'train.sh'
+    c. If you want to close multi-size training, change opt.random_size = None in 'config.py' or set random_size=None in 'train.sh'
+    d. random_size = (14, 26) means: Randomly select an integer from interval (14,26) and multiply by 32 as the input size
+    e. Visualized log by tensorboard: 
+        tensorboard --logdir exp/your_exp_id/logs_2021-08-xx-xx-xx and visit http://localhost:6006
        Your can also use the following shell scripts:
-           grep 'train epoch' exp/your_exp_id/logs_2021-08-xx-xx-xx/log.txt
-           grep 'val epoch' exp/your_exp_id/logs_2021-08-xx-xx-xx/log.txt
-           grep 'AP' exp/your_exp_id/logs_2021-08-xx-xx-xx/log.txt |grep 0.95
-    
+        (1) grep 'train epoch' exp/your_exp_id/logs_2021-08-xx-xx-xx/log.txt
+        (2) grep 'val epoch' exp/your_exp_id/logs_2021-08-xx-xx-xx/log.txt
+
 #### Evaluate
 
-    The weights will be saved in './exp/your_exp_id/model_xx.pth'
+    Module weights will be saved in './exp/your_exp_id/model_xx.pth'
     change 'load_model'='weight/path/to/evaluate.pth' and backbone='backbone-type' in 'evaluate.sh'
     sh evaluate.sh
-    
+
 #### Predict/Inference/Demo
-    
+
     a. Predict images, change img_dir and load_model
     python predict.py gpus='0' backbone="CSPDarknet-s" vis_thresh=0.3 load_model="exp/coco_CSPDarknet-s_640x640/model_best.pth" img_dir='/path/to/dataset/images/val2017'
 
@@ -82,7 +91,7 @@ mAP was reevaluated on COCO val2017 and test2017, and some results are slightly
     You can also change params in 'predict.sh', and use 'sh predict.sh'
 
 #### Train Customer Dataset(VOC format)
-    
+
     1. put your annotations(.xml) and images(.jpg) into:
         /path/to/voc_data/images/train2017/*.jpg  # train images
         /path/to/voc_data/images/train2017/*.xml  # train xml annotations
@@ -106,21 +115,27 @@ mAP was reevaluated on COCO val2017 and test2017, and some results are slightly
 ## 4. Multi/One-class Multi-object Tracking(MOT)
 
 #### one-class/single-class MOT Dataset
+
     DOING
 
 #### Multi-class MOT Dataset
+
     DOING
 
 #### Train
+
     DOING
 
 #### Evaluate
+
     DOING
 
 #### Predict/Inference/Demo
+
     DOING
 
 ## 5. Acknowledgement
+
     https://github.com/Megvii-BaseDetection/YOLOX
     https://github.com/PaddlePaddle/PaddleDetection
     https://github.com/open-mmlab/mmdetection
 
@@ -64,7 +64,6 @@ def update_nano_tiny(cfg, inp_params):
 opt.basic_lr_per_img = 0.01 / 64.0
 opt.scheduler = "yoloxwarmcos"
 opt.no_aug_epochs = 15  # close mixup and mosaic augments in the last 15 epochs
-opt.accumulate = 1  # real batch size = accumulate * batch_size
 opt.min_lr_ratio = 0.05
 opt.weight_decay = 5e-4
 opt.warmup_epochs = 5
@@ -87,13 +86,12 @@ def update_nano_tiny(cfg, inp_params):
 opt.ema = True  # False, Exponential Moving Average
 opt.grad_clip = dict(max_norm=35, norm_type=2)  # None, clip gradient makes training more stable
 opt.print_iter = 1  # print loss every 1 iteration
-opt.metric = "loss"  # 'Ap' 'loss', used to save 'model_best.pth'
-opt.val_intervals = 1  # evaluate(when metric='Ap') and save best ckpt every 1 epoch
+opt.val_intervals = 2  # evaluate val dataset and save best ckpt every 2 epoch
 opt.save_epoch = 1  # save check point every 1 epoch
 opt.resume = False  # resume from 'model_last.pth' when set True
-opt.use_amp = False  # True
+opt.use_amp = False  # True, Automatic mixed precision
 opt.cuda_benchmark = True
-opt.nms_thresh = 0.65
+opt.nms_thresh = 0.65  # nms IOU threshold in post process
 opt.occupy_mem = False  # pre-allocate gpu memory for training to avoid memory Fragmentation.
 
 opt.rgb_means = [0.485, 0.456, 0.406]
@@ -113,7 +111,6 @@ def update_nano_tiny(cfg, inp_params):
     opt.label_name = new_label
 opt.num_classes = len(opt.label_name)
 opt.gpus_str = opt.gpus
-opt.metric = opt.metric.lower()
 opt.gpus = [int(i) for i in opt.gpus.split(',')]
 opt.gpus = [i for i in range(len(opt.gpus))] if opt.gpus[0] >= 0 else [-1]
 if opt.master_batch_size == -1:
@@ -131,6 +128,9 @@ def update_nano_tiny(cfg, inp_params):
     opt.load_model = os.path.join(opt.save_dir, 'model_last.pth')
 if opt.random_size is not None and (opt.random_size[1] - opt.random_size[0] > 1):
     opt.cuda_benchmark = False
+    # TODO, will stuck after evaluating when multi-size training
+    opt.val_intervals = 10000
+    print("[Warning] disable evaluate when multi-size training")
 if opt.reid_dim > 0:
     assert opt.tracking_id_nums is not None
 if opt.random_size is None:
 
@@ -56,7 +56,7 @@ def get_dataloader(opt, no_aug=False):
                                augment=False))
     val_sampler = torch.utils.data.SequentialSampler(val_dataset)
     val_kwargs = {"num_workers": opt.data_num_workers, "pin_memory": True, "sampler": val_sampler,
-                  "batch_size": opt.batch_size, "drop_last": True}
+                  "batch_size": opt.batch_size, "drop_last": False}
     val_loader = torch.utils.data.DataLoader(val_dataset, **val_kwargs)
 
     return train_loader, val_loader
 
@@ -2,9 +2,11 @@
 # -*- coding:utf-8 -*-
 # Copyright (c) Megvii, Inc. and its affiliates.
 
+import io
 import os
 import cv2
 import json
+import contextlib
 import numpy as np
 from pycocotools.coco import COCO
 from pycocotools.cocoeval import COCOeval
@@ -81,9 +83,14 @@ def run_coco_eval(self, results, save_dir):
         coco_eval = COCOeval(self.coco, coco_det, "bbox")
         coco_eval.evaluate()
         coco_eval.accumulate()
-        coco_eval.summarize()
-        ap, ap_0_5 = coco_eval.stats[0], coco_eval.stats[1]
-        return ap, ap_0_5
+
+        redirect_string = io.StringIO()
+        with contextlib.redirect_stdout(redirect_string):
+            coco_eval.summarize()
+        str_result = redirect_string.getvalue()
+        ap, ap_0_5, ap_7_5, ap_small, ap_medium, ap_large = coco_eval.stats[:6]
+        print(str_result)
+        return ap, ap_0_5, ap_7_5, ap_small, ap_medium, ap_large, str_result
 
     def _load_coco_annotations(self):
         return [self.load_anno_from_ids(_ids) for _ids in self.ids]
 
@@ -64,7 +64,7 @@ def forward(self, inputs, targets=None, show_time=False):
             body_feats = self.backbone(inputs)
             neck_feats = self.neck(body_feats)
             yolo_outputs = self.head(neck_feats)
-            # print('yolo_outputs:', [[i.shape, i.dtype] for i in yolo_outputs])  # float16 when use_amp=True
+            # print('yolo_outputs:', [[i.shape, i.dtype, i.device] for i in yolo_outputs])  # float16 when use_amp=True
 
             if show_time:
                 s2 = sync_time(inputs)
@@ -73,7 +73,7 @@ def forward(self, inputs, targets=None, show_time=False):
             if targets is not None:
                 loss = self.loss(yolo_outputs, targets)
                 # for k, v in loss.items():
-                #     print(k, v, v.dtype)  # always float32
+                #     print(k, v, v.dtype, v.device)  # always float32
 
         if targets is not None:
             return yolo_outputs, loss