diff --git a/runcards/runcard.yml b/runcards/runcard.yml index d4c9a0f..4f9105f 100644 --- a/runcards/runcard.yml +++ b/runcards/runcard.yml @@ -2,13 +2,13 @@ # PDF Set # ################################################### pdfsetting: - pdf: NNPDF40_nnlo_as_0118_1000 + pdf: 210219-02-rs-nnpdf40-1000 existing_enhanced: False ################################################### # Size of compressed PDF replicas # ################################################### -compressed: 500 +compressed: 100 ################################################### # Choice of Minimizer # diff --git a/src/pycompressor/compressing.py b/src/pycompressor/compressing.py index 2fc33b8..a462a1e 100644 --- a/src/pycompressor/compressing.py +++ b/src/pycompressor/compressing.py @@ -15,14 +15,18 @@ from pycompressor.pdfgrid import XGrid from pycompressor.pdfgrid import PdfSet from pycompressor.compressor import Compress +from pycompressor.utils import map_index from pycompressor.utils import extract_index +from pycompressor.utils import preprocess_enhanced +from pycompressor.utils import restore_permutation from pycompressor.estimators import ALLOWED_ESTIMATORS + console = Console() log = logging.getLogger(__name__) # Initial scale (in GeV) -Q0 = 1 +Q0 = 1.65 # Total number of flavour to 2nf+1=7 NF = 4 @@ -37,11 +41,11 @@ def splash(): style = Style(color="blue") logo = Table(show_header=True, header_style="bold blue", style=style) - logo.add_column("š–•š–žš•®š–”š–’š–•š–—š–Šš–˜š–˜š–”š–—", justify="center", width=60) + logo.add_column("š–•š–žš•®š–”š–’š–•š–—š–Šš–˜š–˜š–”š–—", justify="center", width=76) logo.add_row("[bold blue]Fast python compressor for PDF replicas.") logo.add_row("[bold blue]https://n3pdf.github.io/pycompressor/") logo.add_row("[bold blue]Ā© N3PDF 2021") - logo.add_row("[bold blue]Authors: Stefano Carrazza, Juan E. Cruz-Martinez, Tanjona R. Rabemananjara") + logo.add_row("[bold blue]Authors: Stefano Carrazza, Juan M. Cruz-Martinez, Tanjona R. Rabemananjara") console.print(logo) @@ -67,7 +71,7 @@ def check_validity(pdfsetting, compressed, gans, est_dic): def check_adiabaticity(pdfsetting, gans, compressed): """ Check whether we are in an adiabatic optimization and if so if it can be performed """ pdf_name = pdfsetting["pdf"] - if pdfsetting.get("existing_enhanced") and not gans.get("enhanced"): + if pdfsetting.get("existing_enhanced") and not gans.get("enhanced"): adiabatic_result = f"{pdf_name}/compress_{pdf_name}_{compressed}_output.dat" if not pathlib.Path(adiabatic_result).exists(): raise CheckError( @@ -121,7 +125,6 @@ def compressing(pdfsetting, compressed, minimizer, est_dic, gans): postgans(str(pdf), outfolder, nbgen) splash() - # Set seed rndgen = Generator(PCG64(seed=0)) console.print("\n• Load PDF sets & Printing Summary:", style="bold blue") @@ -134,15 +137,29 @@ def compressing(pdfsetting, compressed, minimizer, est_dic, gans): try: postgan = pdf + "_enhanced" final_result = {"pdfset_name": postgan} - enhanced = PdfSet(postgan, xgrid, Q0, NF).build_pdf() + enhcd_grid = PdfSet(postgan, xgrid, Q0, NF).build_pdf() + processed, pindex, counts = preprocess_enhanced(enhcd_grid) + # Shuffled the enhanced PDF grid and save the shuffling + # index in order to restore it later. + shuffled_index = rndgen.choice( + processed.shape[0], + processed.shape[0], + replace=False + ) + enhanced = processed[shuffled_index] except RuntimeError as excp: raise LoadingEnhancedError(f"{excp}") nb_iter, ref_estimators = 100000, None - init_index = np.array(extract_index(pdf, compressed)) + extr_index = np.array(extract_index(pdf, compressed)) + map_pindex = map_index(pindex, extr_index) + init_index = map_index(shuffled_index, map_pindex) + assert extr_index.shape[0] == init_index.shape[0] else: final_result = {"pdfset_name": pdf} nb_iter, ref_estimators = 15000, None init_index, enhanced = rndindex, prior + # reset seeds + rndgen = Generator(PCG64(seed=1)) # Create output folder outrslt = postgan if enhanced_already_exists else pdf @@ -159,7 +176,7 @@ def compressing(pdfsetting, compressed, minimizer, est_dic, gans): table.add_row("PDF set name", f"{pdf}") table.add_row("Size of Prior", f"{prior.shape[0] - 1} replicas") if enhanced_already_exists: - table.add_row("Size of enhanced", f"{enhanced.shape[0] - 1} replicas") + table.add_row("Size of enhanced", f"{enhcd_grid.shape[0] - 1} replicas") table.add_row("Size of compression", f"{compressed} replicas") table.add_row("Input energy Q0", f"{Q0} GeV") table.add_row( @@ -196,6 +213,10 @@ def compressing(pdfsetting, compressed, minimizer, est_dic, gans): erf, index = comp.cma_algorithm(std_dev=0.8) else: raise ValueError(f"{minimizer} is not a valid minimizer.") + # Restore the shuffled index back in case of compression from + # an enhanced set + if enhanced_already_exists: + index = restore_permutation(index, shuffled_index, pindex) # Prepare output file final_result["ERFs"] = erf_list @@ -207,7 +228,8 @@ def compressing(pdfsetting, compressed, minimizer, est_dic, gans): console.print(f"\n• Final ERF: [bold red]{erf}.", style="bold red") # Compute final ERFs for the final choosen replicas - final_err_func = comp.final_erfs(index) + samples = enhcd_grid if enhanced_already_exists else enhanced + final_err_func = comp.final_erfs(samples, index) serfile = open(f"{out_folder}/erf_reduced.dat", "a+") serfile.write(f"{compressed}:") serfile.write(json.dumps(final_err_func)) diff --git a/src/pycompressor/compressor.py b/src/pycompressor/compressor.py index 523cc88..b8f3ba8 100644 --- a/src/pycompressor/compressor.py +++ b/src/pycompressor/compressor.py @@ -82,7 +82,7 @@ def all_error_function(self, index): erf_res = self.err_func.compute_all_erf(reduc_rep) return erf_res - def final_erfs(self, index): + def final_erfs(self, enhanced, index): """Compute the final ERF after minimization. Parameters @@ -96,7 +96,7 @@ def final_erfs(self, index): Dictionary containing the list of estimators and their respective values. """ - selected_replicas = self.enhanced[index] + selected_replicas = enhanced[index] erfs = self.err_func.compute_all_erf(selected_replicas) return erfs diff --git a/src/pycompressor/errfunction.py b/src/pycompressor/errfunction.py index 7f6574d..d2a8fd6 100644 --- a/src/pycompressor/errfunction.py +++ b/src/pycompressor/errfunction.py @@ -276,7 +276,7 @@ class ErfComputation: Number of trials """ - def __init__(self, prior, est_dic, nreduc, folder, rndgen, trials=1000, norm=True): + def __init__(self, prior, est_dic, nreduc, folder, rndgen, trials=10000, norm=True): self.prior = prior self.est_dic = est_dic # Compute estimators for PRIOR replicas diff --git a/src/pycompressor/utils.py b/src/pycompressor/utils.py index 35ef677..64f46b0 100644 --- a/src/pycompressor/utils.py +++ b/src/pycompressor/utils.py @@ -8,6 +8,77 @@ log = logging.getLogger(__name__) +def preprocess_enhanced(enhanced, dec_check=15): + """Pre-process the enhanced set by removing duplicates + in the PDF grid. + + Parameters + ---------- + enhanced: np.array(float) + enhanced PDF grid + + Returns + ------- + tuple(np.array, np.array, np.array) + tuple that returns the pre-processed array, the indices + that are kept and the number of times each array occured. + """ + + rounded = np.round(enhanced, dec_check) + preprocessed, index, counts = np.unique( + rounded, + axis=0, + return_index=True, + return_counts=True + ) + return preprocessed, index, counts + + +def map_index(refarr, arr): + """Map the the elements in `arr` to the index in which + they occur in `refarr`. + + Parameters + ---------- + arr: np.array(int) + one dimensional array of integers with size N + refarr: np.array(int) + one dimentional array of integers with size M + + Returns + ------- + np.array(int) + one dimentional array of integers with size N + """ + + inds = {e:i for i, e in enumerate(refarr)} + return np.vectorize(inds.get)(arr) + + +def restore_permutation(index, shuffle, preprocess): + """Undo the maping of indices due to the preprocessing + and the shuffling. + + Parameters + ---------- + index: np.array() + array containing the final indices + shuffle: np.array(float) + array containing the permutation + preprocess: np.array(float) + array containing the indices of the pre-processing + + Returns + ------- + np.array(float) + array of index + """ + + undo_shuffle = shuffle[index] + undo_preproc = preprocess[undo_shuffle] + return undo_preproc + + def remap_index(index, shuffled): new_idx = [] for idx in index: