Weihong-Liu
diff --git a/‎anchor-cluster.py
+115 b/‎anchor-cluster.py
+115
diff --git a/‎check_voc.py
+35 b/‎check_voc.py
+35
diff --git a/‎gen_tfrecord.py
+100 b/‎gen_tfrecord.py
+100
diff --git a/‎gen_yolo_train_test.py
+110 b/‎gen_yolo_train_test.py
+110
@@ -0,0 +1,115 @@
+#coding=utf-8
+import xml.etree.ElementTree as ET
+import numpy as np
+import glob
+ 
+def iou(box, clusters):
+    """
+    计算一个ground truth边界盒和k个先验框(Anchor)的交并比(IOU)值。
+    参数box: 元组或者数据，代表ground truth的长宽。
+    参数clusters: 形如(k,2)的numpy数组，其中k是聚类Anchor框的个数
+    返回：ground truth和每个Anchor框的交并比。
+    """
+    x = np.minimum(clusters[:, 0], box[0])
+    y = np.minimum(clusters[:, 1], box[1])
+    if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0:
+        raise ValueError("Box has no area")
+    intersection = x * y
+    box_area = box[0] * box[1]
+    cluster_area = clusters[:, 0] * clusters[:, 1]
+    iou_ = intersection / (box_area + cluster_area - intersection)
+    return iou_
+
+
+def avg_iou(boxes, clusters):
+    """
+    计算一个ground truth和k个Anchor的交并比的均值。
+    """
+    return np.mean([np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])])
+
+def kmeans(boxes, k, dist=np.median):
+    """
+    利用IOU值进行K-means聚类
+    参数boxes: 形状为(r, 2)的ground truth框，其中r是ground truth的个数
+    参数k: Anchor的个数
+    参数dist: 距离函数
+    返回值：形状为(k, 2)的k个Anchor框
+    """
+    # 即是上面提到的r
+    rows = boxes.shape[0]
+    # 距离数组，计算每个ground truth和k个Anchor的距离
+    distances = np.empty((rows, k))
+    # 上一次每个ground truth"距离"最近的Anchor索引
+    last_clusters = np.zeros((rows,))
+    # 设置随机数种子
+    np.random.seed()
+
+    # 初始化聚类中心，k个簇，从r个ground truth随机选k个
+    clusters = boxes[np.random.choice(rows, k, replace=False)]
+    # 开始聚类
+    while True:
+        # 计算每个ground truth和k个Anchor的距离，用1-IOU(box,anchor)来计算
+        for row in range(rows):
+            distances[row] = 1 - iou(boxes[row], clusters)
+        # 对每个ground truth，选取距离最小的那个Anchor，并存下索引
+        nearest_clusters = np.argmin(distances, axis=1)
+        # 如果当前每个ground truth"距离"最近的Anchor索引和上一次一样，聚类结束
+        if (last_clusters == nearest_clusters).all():
+            break
+        # 更新簇中心为簇里面所有的ground truth框的均值
+        for cluster in range(k):
+            clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0)
+        # 更新每个ground truth"距离"最近的Anchor索引
+        last_clusters = nearest_clusters
+
+    return clusters
+
+# 加载自己的数据集，只需要所有labelimg标注出来的xml文件即可
+def load_dataset(path):
+    dataset = []
+    for xml_file in glob.glob("{}/*xml".format(path)):
+        tree = ET.parse(xml_file)
+        # 图片高度
+        height = int(tree.findtext("./size/height"))
+        # 图片宽度
+        width = int(tree.findtext("./size/width"))
+        
+        for obj in tree.iter("object"):
+            # 偏移量
+            xmin = int(obj.findtext("bndbox/xmin")) / width
+            ymin = int(obj.findtext("bndbox/ymin")) / height
+            xmax = int(obj.findtext("bndbox/xmax")) / width
+            ymax = int(obj.findtext("bndbox/ymax")) / height
+            xmin = np.float64(xmin)
+            ymin = np.float64(ymin)
+            xmax = np.float64(xmax)
+            ymax = np.float64(ymax)
+            if xmax == xmin or ymax == ymin:
+                print(xml_file)
+            # 将Anchor的长宽放入dateset，运行kmeans获得Anchor
+            dataset.append([xmax - xmin, ymax - ymin])
+    return np.array(dataset)
+ 
+if __name__ == '__main__':
+    import argparse
+    import os
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--voc-root', help="VOC格式数据集路径", type=str)
+    parser.add_argument('--clusters', help="anchor数量", type=int, default=9)
+    parser.add_argument('--input-size', help="输入网络大小", type=str, default=416)
+
+    args = parser.parse_args()
+
+    ANNOTATIONS_PATH = os.path.join(args.voc_root,'Annotations')  # xml文件所在文件夹
+    CLUSTERS = args.clusters    #聚类数量，anchor数量
+    INPUTDIM = args.input_size  #输入网络大小
+ 
+    data = load_dataset(ANNOTATIONS_PATH)
+    out = kmeans(data, k=CLUSTERS)
+    print('Boxes:')
+    print(np.array(out)*INPUTDIM)    
+    print("Accuracy: {:.2f}%".format(avg_iou(data, out) * 100))       
+    final_anchors = np.around(out[:, 0] / out[:, 1], decimals=2).tolist()
+    print("Before Sort Ratios:\n {}".format(final_anchors))
+    print("After Sort Ratios:\n {}".format(sorted(final_anchors)))
+    
@@ -0,0 +1,35 @@
+from pathlib import Path
+import os
+import argparse
+
+def check_files(ann_root, img_root):
+    if os.path.exists(ann_root):
+        ann = Path(ann_root)
+    else:
+        raise Exception("标注文件路径错误")
+    if os.path.exists(img_root):
+        img = Path(img_root)
+    else:
+        raise Exception("图像文件路径错误")
+    ann_files = []
+    img_files = []
+    for an, im in zip(ann.iterdir(),img.iterdir()):
+        ann_files.append(an.stem)
+        img_files.append(im.stem)
+
+    if set(ann_files)==set(img_files):
+        print('标注文件和图像文件匹配')
+    else:
+        print('标注文件和图像文件不匹配')
+
+if __name__ == "__main__":
+    
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--voc-root', type=str, required=True, 
+        help='VOC格式数据集根目录，该目录下必须包含JPEGImages和Annotations这两个文件夹')
+    opt = parser.parse_args()
+    
+    IMG_DIR = os.path.join(opt.voc_root, "JPEGImages")
+    XML_DIR = os.path.join(opt.voc_root, "Annotations")
+
+    check_files(XML_DIR, IMG_DIR)
@@ -0,0 +1,100 @@
+from __future__ import division  
+from __future__ import print_function  
+from __future__ import absolute_import  
+  
+import os  
+import io  
+import pandas as pd  
+import tensorflow as tf  
+  
+from PIL import Image  
+from object_detection.utils import dataset_util  
+from collections import namedtuple, OrderedDict  
+import tqdm
+import argparse
+
+# flags = tf.app.flags  
+# flags.DEFINE_string('csv_input', '', 'Path to the CSV input')  
+# flags.DEFINE_string('output_path', '', 'Path to output TFRecord')  
+# FLAGS = flags.FLAGS  
+# TO-DO replace this with label map  
+labels = ['cow', 'tvmonitor', 'car', 'aeroplane', 'sheep', 
+'motorbike', 'train', 'chair', 'person', 'sofa', 
+'pottedplant', 'diningtable', 'horse', 'bottle', 
+'boat', 'bus', 'bird', 'bicycle', 'cat', 'dog']
+
+def class_text_to_int(row_label, labels):
+    return labels.index(row_label)+1
+  
+def split(df, group):  
+    data = namedtuple('data', ['filename', 'object'])  
+    gb = df.groupby(group)  
+    return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]  
+  
+  
+def create_tf_example(group, path):  
+    with tf.io.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:  
+        encoded_jpg = fid.read()  
+    encoded_jpg_io = io.BytesIO(encoded_jpg)  
+    image = Image.open(encoded_jpg_io)  
+    width, height = image.size  
+  
+    filename = group.filename.encode('utf8')  
+    image_format = b'jpg'  
+    xmins = []  
+    xmaxs = []  
+    ymins = []  
+    ymaxs = []  
+    classes_text = []  
+    classes = []  
+  
+    for index, row in group.object.iterrows():  
+        xmins.append(row['xmin'] / width)  
+        xmaxs.append(row['xmax'] / width)  
+        ymins.append(row['ymin'] / height)  
+        ymaxs.append(row['ymax'] / height)  
+        classes_text.append(row['class'].encode('utf8'))  
+        classes.append(class_text_to_int(row['class'], group.filename))
+  
+    tf_example = tf.train.Example(features=tf.train.Features(feature={  
+        'image/height': dataset_util.int64_feature(height),  
+        'image/width': dataset_util.int64_feature(width),  
+        'image/filename': dataset_util.bytes_feature(filename),  
+        'image/source_id': dataset_util.bytes_feature(filename),  
+        'image/encoded': dataset_util.bytes_feature(encoded_jpg),  
+        'image/format': dataset_util.bytes_feature(image_format),  
+        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),  
+        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),  
+        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),  
+        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),  
+        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),  
+        'image/object/class/label': dataset_util.int64_list_feature(classes),  
+    }))  
+    return tf_example  
+  
+  
+def main(csv_input, output_path):  
+    writer = tf.io.TFRecordWriter(output_path)  
+    path = os.path.join(os.getcwd(), 'images')  
+    examples = pd.read_csv(csv_input)  
+    grouped = split(examples, 'filename')  
+    num=0  
+    for group in grouped:  
+        num+=1  
+        tf_example = create_tf_example(group, path)  
+        writer.write(tf_example.SerializeToString())  
+        if(num%100==0):  #每完成100个转换，打印一次  
+            print(num)  
+  
+    writer.close()  
+    output_path = os.path.join(os.getcwd(), output_path)  
+    print('Successfully created the TFRecords: {}'.format(output_path))  
+  
+  
+if __name__ == '__main__':  
+    # tf.app.run()
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--csv_input", type=str, required=True, help="csv文件路径")
+    parser.add_argument("--output_path", type=str, default="pascal_voc2007.tfrecord", help="tfrecord文件数据路径,默认保存在当前路径")
+    opt = parser.parse_args()
+    main(opt.csv_input, opt.output_path)
@@ -0,0 +1,110 @@
+'''
+Pascal VOC格式数据集生成ImageSets/Main/train.txt,val.txt,trainval.ttx和test.txt
+'''
+from pathlib import Path
+import os
+import sys
+# from voc2coco import voc_root
+import xml.etree.ElementTree as ET
+import random
+import argparse
+from sklearn.model_selection import train_test_split
+from sklearn.utils import shuffle
+import shutil
+
+def mkdir(path):
+    # 去除首位空格
+    path = path.strip()
+    # 去除尾部 \ 符号
+    path = path.rstrip("\\")
+    # 判断路径是否存在
+    # 存在     True
+    # 不存在   False
+    isExists = os.path.exists(path)
+    # 判断结果
+    if not isExists:
+        # 如果不存在则创建目录
+        # 创建目录操作函数
+        os.makedirs(path)
+        print(path + ' 创建成功')
+        return True
+    else:
+        # 如果目录存在则不创建，并提示目录已存在
+        print(path + ' 目录已存在')
+        return False
+
+def write_txt(txt_path, data):
+    '''写入txt文件'''
+    with open(txt_path,'w') as f:
+        for d in data:
+            f.write(str(d))
+            f.write('\n')
+            
+if __name__ == '__main__':
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--yolo-root', type=str, required=True, 
+        help='YOLO格式数据集根目录，该目录下必须包含images和labels这两个文件夹')
+    parser.add_argument('--from_voc',type=bool, default=False, 
+        help='从VOC数据集中的ImageSets/Main文件夹下提取')  
+    parser.add_argument('--voc-root',type=str,
+        help='VOC数据集路径，需要包含ImageSets/Main文件夹')  
+    parser.add_argument('--test-ratio',type=float, default=0.2,
+        help='验证集比例，默认为0.2')  
+    parser.add_argument('--ext', type=str, default='.png', 
+        help='YOLO图像数据后缀，注意带"." ' ) 
+    opt = parser.parse_args()
+
+    yolo_root = opt.yolo_root
+    print('YOLO格式数据集路径：', yolo_root)
+
+    ANNO = os.path.join(yolo_root, 'labels')
+    JPEG = os.path.join(yolo_root, 'images')
+    
+    if opt.from_voc:
+        print('从VOC数据集中分割数据集')
+        if not opt.voc_root:
+            raise Exception('需要提供VOC格式路径')
+        voc_root = opt.voc_root
+        voc_sets = os.path.join(voc_root,'ImageSets/Main')
+        if not os.path.exists(voc_sets):
+            raise Exception('VOC数据集不存在ImageSets/Main路径')
+        else:
+            file_lists = list(Path(voc_sets).iterdir())
+            for file in file_lists:
+                img_ids = [x.strip() for x in open(file,'r').readlines()]
+                img_full_path = [os.path.join(JPEG, img_id+opt.ext) for img_id in img_ids]
+                file_to_write = os.path.join(yolo_root,file.name)
+                write_txt(file_to_write, img_full_path)
+    else:
+        print('从YOLO数据集中按比例随机分割数据集')
+        p = Path(JPEG)
+        files = []
+        for file in p.iterdir():
+            # name,sufix = file.name.split('.')
+            if file.name.split('.')[1]==opt.ext[1:]:
+                files.append(str(file))
+            # print(name, sufix)
+        print('数据集长度:',len(files))
+        files = shuffle(files)
+        ratio = opt.test_ratio
+        trainval, test = train_test_split(files, test_size=ratio)
+        train, val = train_test_split(trainval,test_size=0.2)
+        print('训练集数量: ',len(train))
+        print('验证集数量: ',len(val))
+        print('测试集数量: ',len(test))
+
+        
+        # 写入各个txt文件
+        trainval_txt = os.path.join(yolo_root,'trainval.txt')
+        write_txt(trainval_txt, trainval)
+
+        train_txt = os.path.join(yolo_root,'train.txt')
+        write_txt(train_txt, train)
+
+        val_txt = os.path.join(yolo_root,'val.txt')
+        write_txt(val_txt, val)
+
+        test_txt = os.path.join(yolo_root,'test.txt')
+        write_txt(test_txt, test)
+