@@ -64,6 +64,8 @@ CONET_OUTPUT = os.path.join(config["output"], "conet")
64
64
CONET_GINKGO_OUTPUT = os .path .join (config ["output" ], "conet_ginkgo" )
65
65
CONET_TREE_OUTPUT = os .path .join (config ["output" ], "conet_tree_distances" )
66
66
CONET_GINKGO_TREE_OUTPUT = os .path .join (config ["output" ], "conet_ginkgo_tree_distances" )
67
+ CONET_CBS_OUTPUT = os .path .join (config ["output" ], "conet_cbs" )
68
+ CONET_CBS_TREE_OUTPUT = os .path .join (config ["output" ], "conet_cbs_tree_distances" )
67
69
68
70
try :
69
71
other_cnv_methods = config ["other_methods" ]
@@ -557,6 +559,79 @@ rule conet_ginkgo_inference:
557
559
shell :
558
560
"python {params.script} --bin_path {params.bin_path} --counts {input.d_mat} --cnvs {input.init_cnvs} --intermediate_dir {params.intermediate_dir} --seed {wildcards.rep_id} --out_cnvs {output.conet_inferred_cnvs} --out_tree {output.conet_inferred_tree} --out_attachments {output.conet_inferred_attachments}"
559
561
562
+ rule conet_cbs_inference :
563
+ params :
564
+ script = config ["conet" ]["script" ],
565
+ n_nodes = n_nodes ,
566
+ scratch = config ["conet" ]["scratch" ],
567
+ mem = config ["conet" ]["mem" ],
568
+ time = config ["conet" ]["time" ],
569
+ script_inp = str (n_nodes )+ "nodes_{rep_id}" ,
570
+ intermediate_dir = CONET_CBS_OUTPUT + '/' + str (n_nodes ) + 'nodes' + '/' + '{rep_id}' + '/' ,
571
+ bin_path = config ["conet" ]["bin_path" ],
572
+ em_iters = config ["conet" ]["em_iters" ],
573
+ pt_iters = config ["conet" ]["pt_iters" ],
574
+ input :
575
+ d_mat = SIM_OUTPUT + '_' + sim_prefix + '/' + str (n_nodes ) + 'nodes' + '/' + '{rep_id}' + '_d_mat.txt' ,
576
+ output :
577
+ conet_inferred_cnvs = CONET_CBS_OUTPUT + '/' + str (n_nodes ) + 'nodes' + '/' + '{rep_id}' + '_conet_cbs_inferred.txt' ,
578
+ conet_inferred_tree = CONET_CBS_OUTPUT + '/' + str (n_nodes ) + 'nodes' + '/' + '{rep_id}' + '_conet_cbs_tree.txt' ,
579
+ conet_inferred_attachments = CONET_CBS_OUTPUT + '/' + str (n_nodes ) + 'nodes' + '/' + '{rep_id}' + '_conet_cbsattachments.txt'
580
+ threads : 10
581
+ # shell:
582
+ # "python {params.script} --bin_path {params.bin_path} --counts {input.d_mat} --cnvs {input.init_cnvs} --intermediate_dir {params.intermediate_dir} --seed {wildcards.rep_id} --em_iters {params.em_iters} --pt_iters {params.pt_iters} --out_cnvs {output.conet_inferred_cnvs} --out_tree {output.conet_inferred_tree} --out_attachments {output.conet_inferred_attachments}"
583
+ run :
584
+ real_ind = list (map (lambda x : x .start , list (tree .nodes )))
585
+ real_ind .extend (list (map (lambda x : x .end , list (tree .nodes ))))
586
+ real_ind = list (set (real_ind ))
587
+ real_ind .sort ()
588
+ if wildcards .known_breakpoints :
589
+ # Extract real breakpoints indices
590
+ candidates = real_ind
591
+ save_counts_in_CONET_format (dirpath + "counts_synthetic" , corr_reads , candidates )
592
+ else :
593
+ save_counts_in_CONET_format (dirpath + "counts_synthetic" , corr_reads , [])
594
+ subprocess .run (["Rscript" , "CBS_MergeLevels.R" , "--mincells=5" ,
595
+ f"--output={ dirpath } cbs_out" , f"--dataset={ dirpath } counts_synthetic" ])
596
+ X = np .loadtxt (dirpath + "cbs_out" , delimiter = "," )
597
+ candidates = []
598
+ for i in range (X .shape [0 ]):
599
+ if X [i , 4 ] == 1.0 :
600
+ candidates .append (i )
601
+ save_counts_in_CONET_format (dirpath + "counts_synthetic" , corr_reads , candidates )
602
+ print (f"Found { len (candidates )} breakpoint candidates..." )
603
+
604
+ # Convert model data to CONET format
605
+ data_converter = dc .DataConverter (dirpath + "counts_synthetic" ,
606
+ delimiter = ';' ,
607
+ default_bin_length = 1 ,
608
+ event_length_normalizer = corr_reads .shape [1 ], # number of loci
609
+ add_chromosome_ends = False ,
610
+ neutral_cn = 2.0 )
611
+ data_converter .create_CoNET_input_files (dirpath , add_chr_ends_to_indices = False )
612
+
613
+ # Perform CONET inference
614
+ conet = c .CONET (str (Path (conf .conet_binary_dir ) / Path ("CONET" )))
615
+ params = cp .CONETParameters (tree_structure_prior_k1 = 0.01 ,
616
+ data_dir = dirpath , counts_penalty_s1 = 100000 , counts_penalty_s2 = 100000 ,
617
+ param_inf_iters = 500000 , seed = random .randint (0 , 1000 ), mixture_size = 2 , pt_inf_iters = 1000000 ,
618
+ use_event_lengths_in_attachment = False ,
619
+ event_length_penalty_k0 = 1 )
620
+ conet .infer_tree (params )
621
+ print (f"CONET inference finished on model { i } " )
622
+ result = ir .InferenceResult (conf .conet_binary_dir , corr_reads .T )
623
+
624
+ inferred_cn = result .get_inferred_copy_numbers (2 , conf .bins , conf .cells )
625
+ inferred_brkp = result .bp_matrix .astype (int )
626
+ inferred_nodes = set (result .tree .nodes )
627
+ inferred_edges = set (result .tree .edges )
628
+
629
+ real_cn = data [0 ]
630
+ real_brkp = data [4 ][:, real_ind ].astype (int )
631
+ real_nodes = set ((n .start , n .end ) for n in tree .nodes )
632
+ real_edges = set (((e [0 ].start , e [0 ].end ), (e [1 ].start , e [1 ].end )) for e in tree .edges )
633
+
634
+
560
635
rule run_medalt :
561
636
params :
562
637
script = config ["medalt" ]["script" ],
@@ -610,3 +685,160 @@ rule compute_medalt_ginkgo_tree_distances:
610
685
medalt_tree_distance = f'{ MEDALT_TREE_OUTPUT } /{ str (n_nodes )} nodes_' + '{regions}regions_{reads}reads/{rep_id}_medalt_ginkgo_tree_distance.txt'
611
686
shell :
612
687
"python {params.script} {input.medalt_tree} {input.cnvs} {output.medalt_tree_distance}"
688
+
689
+ rule compute_deltas :
690
+ input :
691
+ simulations = expand (f'{ SIM_OUTPUT } _{ sim_prefix } /{ str (n_nodes )} nodes_' + '{regions}regions_{reads}reads/{rep_id}_d_mat.txt' ,
692
+ regions = [x for x in n_regions ], reads = [x for x in n_reads ], rep_id = [x for x in range (0 ,all_n_tps )]),
693
+ best_full_tree = expand (f'{ TREES_OUTPUT } _best_full_tree/{ str (n_nodes )} nodes_' + '{regions}regions_{reads}reads/{rep_id}_full_tree.txt' ,
694
+ regions = [x for x in n_regions ], reads = [x for x in n_reads ], rep_id = [x for x in range (0 ,all_n_tps )], tree_rep_id = [x for x in range (0 ,tree_rep )]),
695
+
696
+ best_cluster_tree = expand (f'{ TREES_OUTPUT } _best_cluster_tree/{ str (n_nodes )} nodes_' + '{regions}regions_{reads}reads/{rep_id}_cluster_tree.txt' ,
697
+ regions = [x for x in n_regions ], reads = [x for x in n_reads ], rep_id = [x for x in range (0 ,all_n_tps )]),
698
+
699
+ best_full_tree_sum = expand (f'{ TREES_OUTPUT } _best_full_tree_sum/{ str (n_nodes )} nodes_' + '{regions}regions_{reads}reads/{rep_id}_full_tree.txt' ,
700
+ regions = [x for x in n_regions ], reads = [x for x in n_reads ], rep_id = [x for x in range (0 ,all_n_tps )], tree_rep_id = [x for x in range (0 ,tree_rep )]),
701
+
702
+ best_cluster_tree_sum = expand (f'{ TREES_OUTPUT } _best_cluster_tree_sum/{ str (n_nodes )} nodes_' + '{regions}regions_{reads}reads/{rep_id}_cluster_tree.txt' ,
703
+ regions = [x for x in n_regions ], reads = [x for x in n_reads ], rep_id = [x for x in range (0 ,all_n_tps )]),
704
+
705
+ other_cnvs = expand (os .path .join (config ["output" ], '{method}' ) + '/' + str (n_nodes ) + 'nodes_' + '{regions}' + 'regions_' + '{reads}' + 'reads' + '/' + '{rep_id}' + '_{method}_inferred.txt' ,
706
+ regions = [x for x in n_regions ], reads = [x for x in n_reads ], rep_id = [x for x in range (0 ,all_n_tps )], method = other_cnv_methods ),
707
+ output :
708
+ out_fname = os .path .join (output_path , 'deltas_sims.csv' )
709
+ run :
710
+ rows = []
711
+ rows .append ('index,rep_id,method,delta' )
712
+ i = 0
713
+ for true_cnvs in input .simulations :
714
+ # True
715
+ rep_id = true_cnvs .split ('/' )[- 1 ].split ('_' )[0 ]
716
+ gt = np .loadtxt (true_cnvs , delimiter = ',' )
717
+ if gt .shape [0 ] == n_bins : # should be cells by bins
718
+ gt = np .transpose (gt )
719
+
720
+ # Diploid
721
+ i += 1
722
+ method = 'diploid'
723
+ inf = np .ones (gt .shape ) * 2
724
+ error = np .sqrt (np .mean ((gt - inf )** 2 ))
725
+ rows .append (f'{ i } ,{ rep_id } ,{ method } ,{ error } ' )
726
+
727
+ # i += 1
728
+ # method = 'ginkgo'
729
+ # inf_cnvs = f'{GINKGO_OUTPUT}/{n_nodes}nodes/{rep_id}_ginkgo_inferred.txt'
730
+ # inf = np.loadtxt(inf_cnvs, delimiter=' ')
731
+ # error = np.sqrt(np.mean((gt-inf)**2))
732
+ # rows.append(f'{i},{rep_id},{method},{error}')
733
+ #
734
+ # i += 1
735
+ # method = 'conet_ginkgo'
736
+ # inf_cnvs = f'{CONET_GINKGO_OUTPUT}/{n_nodes}nodes/{rep_id}_conet_ginkgo_inferred.txt'
737
+ # inf = np.loadtxt(inf_cnvs, delimiter=',')
738
+ # error = np.sqrt(np.mean((gt-inf)**2))
739
+ # rows.append(f'{i},{rep_id},{method},{error}')
740
+
741
+ # i += 1
742
+ # method = 'conet_knownbp'
743
+ # inf_cnvs = f'{CONET_KNOWNBP_OUTPUT}/{n_nodes}nodes/{rep_id}_conet_knownbp_inferred.txt'
744
+ # inf = np.loadtxt(inf_cnvs, delimiter=';').T
745
+ # error = np.sqrt(np.mean((gt-inf)**2))
746
+ # rows.append(f'{i},{rep_id},{method},{error}')
747
+
748
+ for other_cnvs_file in other_cnvs :
749
+ i += 1
750
+ method = other_cnvs_file .split ('' )
751
+ inf = np .loadtxt (other_cnvs_file , delimiter = ',' )
752
+ if inf .shape != gt .shape :
753
+ inf = inf .T
754
+ error = np .sqrt (np .mean ((gt - inf )** 2 ))
755
+ rows .append (f'{ i } ,{ rep_id } ,{ method } ,{ error } ' )
756
+
757
+ for fairness in ['unfair' , 'fair' ]:
758
+ i += 1
759
+ method = f'cluster_tree_{ fairness } '
760
+ inf_cnvs = f'{ TREES_OUTPUT } _best_cluster_tree/{ fairness } /{ n_nodes } nodes/{ rep_id } _cluster_tree_cnvs.csv'
761
+ inf = np .loadtxt (inf_cnvs , delimiter = ',' )
762
+ error = np .sqrt (np .mean ((gt - inf )** 2 ))
763
+ rows .append (f'{ i } ,{ rep_id } ,{ method } ,{ error } ' )
764
+
765
+ i += 1
766
+ method = f'cluster_tree_sum_{ fairness } '
767
+ inf_cnvs = f'{ TREES_OUTPUT } _best_cluster_tree_sum/{ fairness } /{ n_nodes } nodes/{ rep_id } _cluster_tree_cnvs.csv'
768
+ inf = np .loadtxt (inf_cnvs , delimiter = ',' )
769
+ error = np .sqrt (np .mean ((gt - inf )** 2 ))
770
+ rows .append (f'{ i } ,{ rep_id } ,{ method } ,{ error } ' )
771
+
772
+ i += 1
773
+ method = f'full_tree_{ fairness } '
774
+ inf_cnvs = f'{ TREES_OUTPUT } _best_full_tree/{ fairness } /{ n_nodes } nodes/{ rep_id } _full_tree_cnvs.csv'
775
+ inf = np .loadtxt (inf_cnvs , delimiter = ',' )
776
+ error = np .sqrt (np .mean ((gt - inf )** 2 ))
777
+ rows .append (f'{ i } ,{ rep_id } ,{ method } ,{ error } ' )
778
+
779
+ i += 1
780
+ method = f'full_tree_sum_{ fairness } '
781
+ inf_cnvs = f'{ TREES_OUTPUT } _best_full_tree_sum/{ fairness } /{ n_nodes } nodes/{ rep_id } _full_tree_cnvs.csv'
782
+ inf = np .loadtxt (inf_cnvs , delimiter = ',' )
783
+ error = np .sqrt (np .mean ((gt - inf )** 2 ))
784
+ rows .append (f'{ i } ,{ rep_id } ,{ method } ,{ error } ' )
785
+
786
+ # i += 1
787
+ # method = f'cluster_tree_{fairness}_knownbps'
788
+ # inf_cnvs = f'{TREES_OUTPUT}_best_cluster_tree/{fairness}/{n_nodes}nodes/{rep_id}_cluster_tree_cnvs_knownbps.csv'
789
+ # inf = np.loadtxt(inf_cnvs, delimiter=',')
790
+ # error = np.sqrt(np.mean((gt-inf)**2))
791
+ # rows.append(f'{i},{rep_id},{method},{error}')
792
+ #
793
+ # i += 1
794
+ # method = f'cluster_tree_sum_{fairness}_knownbps'
795
+ # inf_cnvs = f'{TREES_OUTPUT}_best_cluster_tree_sum/{fairness}/{n_nodes}nodes/{rep_id}_cluster_tree_cnvs_knownbps.csv'
796
+ # inf = np.loadtxt(inf_cnvs, delimiter=',')
797
+ # error = np.sqrt(np.mean((gt-inf)**2))
798
+ # rows.append(f'{i},{rep_id},{method},{error}')
799
+ #
800
+ # i += 1
801
+ # method = f'full_tree_{fairness}_knownbps'
802
+ # inf_cnvs = f'{TREES_OUTPUT}_best_full_tree/{fairness}/{n_nodes}nodes/{rep_id}_full_tree_cnvs_knownbps.csv'
803
+ # inf = np.loadtxt(inf_cnvs, delimiter=',')
804
+ # error = np.sqrt(np.mean((gt-inf)**2))
805
+ # rows.append(f'{i},{rep_id},{method},{error}')
806
+ #
807
+ # i += 1
808
+ # method = f'full_tree_sum_{fairness}_knownbps'
809
+ # inf_cnvs = f'{TREES_OUTPUT}_best_full_tree_sum/{fairness}/{n_nodes}nodes/{rep_id}_full_tree_cnvs_knownbps.csv'
810
+ # inf = np.loadtxt(inf_cnvs, delimiter=',')
811
+ # error = np.sqrt(np.mean((gt-inf)**2))
812
+ # rows.append(f'{i},{rep_id},{method},{error}')
813
+
814
+ i += 1
815
+ method = f'cluster_tree_{ fairness } _adapted'
816
+ inf_cnvs = f'{ TREES_OUTPUT } _best_cluster_tree/{ fairness } /{ n_nodes } nodes/{ rep_id } _cluster_tree_cnvs_adapted.csv'
817
+ inf = np .loadtxt (inf_cnvs , delimiter = ',' )
818
+ error = np .sqrt (np .mean ((gt - inf )** 2 ))
819
+ rows .append (f'{ i } ,{ rep_id } ,{ method } ,{ error } ' )
820
+
821
+ i += 1
822
+ method = f'cluster_tree_sum_{ fairness } _adapted'
823
+ inf_cnvs = f'{ TREES_OUTPUT } _best_cluster_tree_sum/{ fairness } /{ n_nodes } nodes/{ rep_id } _cluster_tree_cnvs_adapted.csv'
824
+ inf = np .loadtxt (inf_cnvs , delimiter = ',' )
825
+ error = np .sqrt (np .mean ((gt - inf )** 2 ))
826
+ rows .append (f'{ i } ,{ rep_id } ,{ method } ,{ error } ' )
827
+
828
+ i += 1
829
+ method = f'full_tree_{ fairness } _adapted'
830
+ inf_cnvs = f'{ TREES_OUTPUT } _best_full_tree/{ fairness } /{ n_nodes } nodes/{ rep_id } _full_tree_cnvs_adapted.csv'
831
+ inf = np .loadtxt (inf_cnvs , delimiter = ',' )
832
+ error = np .sqrt (np .mean ((gt - inf )** 2 ))
833
+ rows .append (f'{ i } ,{ rep_id } ,{ method } ,{ error } ' )
834
+
835
+ i += 1
836
+ method = f'full_tree_sum_{ fairness } _adapted'
837
+ inf_cnvs = f'{ TREES_OUTPUT } _best_full_tree_sum/{ fairness } /{ n_nodes } nodes/{ rep_id } _full_tree_cnvs_adapted.csv'
838
+ inf = np .loadtxt (inf_cnvs , delimiter = ',' )
839
+ error = np .sqrt (np .mean ((gt - inf )** 2 ))
840
+ rows .append (f'{ i } ,{ rep_id } ,{ method } ,{ error } ' )
841
+
842
+
843
+ with open (output .out_fname , 'w' ) as f :
844
+ f .write ('\n ' .join (rows ))
0 commit comments