preparing to add jinja/automatic way to generate correct yamls

aelbouchti · aelbouchti · commit e50e64351aa9 · 2019-01-09T14:47:16.000+01:00
diff --git a/estimator.py b/estimator.py
@@ -13,7 +13,8 @@
 slim = tf.contrib.slim
 
 #Open and read the yaml file:
-stream = open(os.path.join(os.getcwd(), "yaml","config","config_multiclass.yaml"))
+cwd = os.getcwd()
+stream = open(os.path.join(cwd, "yaml","config","config_multiclass.yaml"))
 data = load(stream)
 stream.close()
 #==================================#
@@ -26,7 +27,7 @@
 checkpoint_dir= data["checkpoint_dir"]
 checkpoint_pattern  = data["checkpoint_pattern"]
 checkpoint_file = os.path.join(checkpoint_dir, checkpoint_pattern)
-train_dir = os.path.join(os.getcwd(), "train_"+model_name)
+train_dir = os.path.join(cwd, "train_"+ model_name)
 #Define the checkpoint state to determine initialization: from pre-trained weigths or recovery
 ckpt_state = tf.train.get_checkpoint_state(train_dir)
 #TODO: Place image_size on yaml/cnn/model_name.yaml
@@ -57,17 +58,19 @@
 num_batches_per_epoch = int(num_samples / batch_size)
 #num_batches = num_steps for one epcoh
 decay_steps = int(num_epochs_before_decay * num_batches_per_epoch)
-#==================================#
+#==================================#.
 #==================================#
 
 #==================================#
 #=======Network Informations=======#
 #==================================#
-network_file = open(os.path.join(os.getcwd(), "yaml", "cnn", model_name+".yaml"))
+network_file = open(os.path.join(cwd, "yaml", "cnn", model_name+".yaml"))
 network_config = load(network_file)
 network_file.close()
 variables_to_exclude = network_config.pop("variables_to_exclude")
+print(variables_to_exclude)
 argscope_config = network_config.pop("argscope")
+print(argscope_config)
 if "prediction_fn" in network_config.keys():
     network_config["prediction_fn"] = getattr(tf.contrib.layers, network_config["prediction_fn"])
 if "activation_fn" in network_config.keys():
@@ -79,7 +82,7 @@
 
 #Create log_dir:argscope_config
 if not os.path.exists(train_dir):
-    os.mkdir(os.path.join(os.getcwd(),train_dir))
+    os.mkdir(train_dir)
 #===================================================================== Training ===========================================================================#
 #Adding the graph:
 #Set the verbosity to INFO level
diff --git a/utils/images/visu_spark.py b/utils/images/visu_spark.py
@@ -1,4 +1,5 @@
 import pyspark
+import pyspark.sql.functions as F
 
 
 spark = pyspark.sql.SparkSession \
@@ -19,8 +20,9 @@ def load_images(filenames_pattern, train_size=1.):
     (use Dataframe.split([train_size, 1 - train_size]))
     """
     df = spark.read.load(filenames_pattern, format="image")
-    df.select("image.origin", "image.width", "image.height").show()
-    
+    a = df.withColumn("image.data", F.decode(df.image.data,'UTF-8'))\
+        .drop("image.data").withColumnRenamed("image.data", "image.data")
+    a["image.data"].show(1)     
     return df
 
 def per_pixel_mean(dataframe):
diff --git a/yaml/config/config.yaml b/yaml/config/config.yaml
@@ -2,17 +2,16 @@
 dataset_dir : "D:/MURA-v1.1"
 #Portion of GPU to attribute for training
 gpu_p : 1.
-#Model name to call automatically:
-model_name : "mobilenet_v2_140"
+#Model name, it serves to target the correct yaml file file in the "cnn" folder:
+model_name : "mobilenet_v2"
 #Checkpoint directory (For transfer learning)
 checkpoint_dir : "D:/mobilenet"
 checkpoint_pattern : "mobilenet_v2_1.4_224.ckpt"
 #File pattern to recognize
 file_pattern : "mura_*.tfrecord"
 file_pattern_for_counting : "mura"
 #Num samples in the training dataset
-#Chest-X ray num_samples
-
+#MURA ray num_samples
 num_samples : 36807
 #Mapping from class to id
 names_to_labels : {
diff --git a/yaml/config/config_multilabel.yaml b/yaml/config/config_multilabel.yaml
@@ -2,8 +2,8 @@
 dataset_dir : "D:/chest"
 #Portion of GPU to attribute for training
 gpu_p : 1.
-#Model name to call automatically:
-model_name : "mobilenet_v2_140"
+#Model name, it serves to target the correct yaml file file in the "cnn" folder:
+model_name : "mobilenet_v2"
 #Checkpoint directory (For transfer learning)
 checkpoint_dir : "D:/mobilenet"
 checkpoint_pattern : "mobilenet_v2_1.4_224.ckpt"
diff --git a/yaml/config/config_multitask.yaml b/yaml/config/config_multitask.yaml
@@ -2,6 +2,8 @@
 dataset_dir : "D:/chest"
 #Portion of GPU to attribute for training
 gpu_p : 1.
+#Model name, it serves to target the correct yaml file file in the "cnn" folder:
+model_name : "mobilenet_v2"
 #Checkpoint directory (For transfer learning)
 checkpoint_dir : "D:/mobilenet"
 checkpoint_pattern : "mobilenet_v2_1.4_224.ckpt"