|
| 1 | +Data series forecasting using a transformer. |
| 2 | + |
| 3 | +Follow the example.py in the folder to see a very simple usage of the model |
| 4 | + |
| 5 | +## Prepare the data for the model |
| 6 | +```Python |
| 7 | +# Set paths to data |
| 8 | +data_path = "../data/task-usage_job-ID-3418339_total.csv" |
| 9 | +results_path = "..." |
| 10 | +results_file = "...csv" |
| 11 | +# Prepare dataset |
| 12 | +df, scaler = prepare_data(data_path) |
| 13 | +``` |
| 14 | + |
| 15 | +## Set up a device |
| 16 | +```Python |
| 17 | +# Considering using cuda if available. |
| 18 | +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| 19 | +``` |
| 20 | + |
| 21 | +## Load and prepare the model |
| 22 | + |
| 23 | +```Python |
| 24 | +# Load the model configuration |
| 25 | +with open("models/multistep/config.json") as jfile: |
| 26 | + config = json.load(jfile) |
| 27 | +# Initialize the model |
| 28 | +model = init_transformer(config, device) |
| 29 | +# Load the model |
| 30 | +model_state, _ = torch.load("models/model_data", map_location=device) |
| 31 | +model.load_state_dict(model_state) |
| 32 | +# Set the model for evaluation mode |
| 33 | +model.eval() |
| 34 | +``` |
| 35 | + |
| 36 | +## Select the loss function. |
| 37 | +```Python |
| 38 | +# This model has been trained with MSE, but others can be considered. |
| 39 | +loss_f = torch.nn.MSELoss() |
| 40 | +``` |
| 41 | + |
| 42 | +## Convert dataset to pytorch dataloader functions |
| 43 | +```Python |
| 44 | +# Notice that the first argument is "test". Using "train" or "validation" will provide access to other parts of the data. However, they will also feed the model with other data structures not as a sliding window as in test. |
| 45 | +test_dataset = LoadGoogleDataset("test", seq_len=config["seq_len"], prediction_step=config["prediction_step"], |
| 46 | + data_frame=df) |
| 47 | +test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False) |
| 48 | +``` |
| 49 | + |
| 50 | +## Run test/forecast loop |
| 51 | +```Python |
| 52 | +loss_progress = list() |
| 53 | +if config["prediction_step"] > 1: |
| 54 | + outputs = dict() |
| 55 | + targets = dict() |
| 56 | + for ii in range(config["prediction_step"]): |
| 57 | + outputs[str(ii)] = list() |
| 58 | + targets[str(ii)] = list() |
| 59 | +else: |
| 60 | + outputs = list() |
| 61 | + targets = list() |
| 62 | +for x_enc, x_dec, target in test_loader: |
| 63 | + with torch.no_grad(): |
| 64 | + # Send data to device and prepare dimensions |
| 65 | + x_enc, x_dec, target = x_enc.to(device), x_dec.to(device), target.to(device) |
| 66 | + x_dec = x_dec.unsqueeze(-1) |
| 67 | + # Forecast |
| 68 | + out = model.forward(x_enc.float(), x_dec.float(), training=False) |
| 69 | + # Compute loss |
| 70 | + loss = loss_f(out.double(), target.double()) |
| 71 | + # Store results and target values |
| 72 | + if config["prediction_step"] > 1: |
| 73 | + for ii in range(config["prediction_step"]): |
| 74 | + outputs[str(ii)].append(out.squeeze().cpu().detach().tolist()[ii]) |
| 75 | + targets[str(ii)].append(target.squeeze().cpu().detach().tolist()[ii]) |
| 76 | + else: |
| 77 | + outputs.append(out.squeeze().cpu().detach().tolist()) |
| 78 | + targets.append(target.squeeze().cpu().detach().tolist()) |
| 79 | + # Keep loss values in a list |
| 80 | + loss_progress.append(loss.cpu().detach().tolist()) |
| 81 | +``` |
| 82 | + |
| 83 | +## Re-scale data |
| 84 | +```Python |
| 85 | +# re-scale outputs |
| 86 | +l_df = len(df["Efficiency"]) |
| 87 | +df_computed = df |
| 88 | + |
| 89 | +values = dict() |
| 90 | + |
| 91 | +if config["prediction_step"] > 1: |
| 92 | + eff_out = dict() |
| 93 | + tgt_out = dict() |
| 94 | + for ii in range(config["prediction_step"]): |
| 95 | + real_eff = np.zeros(len(df["Efficiency"])) |
| 96 | + real_eff[l_df - len(outputs[str(ii)]):] = outputs[str(ii)] |
| 97 | + df_computed["Efficiency"] = real_eff |
| 98 | + df_unscaled = scaler.inverse_transform(df_computed) |
| 99 | + eff_out[str(ii)] = df_unscaled[l_df - len(outputs[str(ii)]):, -1] |
| 100 | + |
| 101 | + real_eff = np.zeros(len(df["Efficiency"])) |
| 102 | + real_eff[l_df - len(outputs[str(ii)]):] = targets[str(ii)] |
| 103 | + df_computed["Efficiency"] = real_eff |
| 104 | + df_unscaled = scaler.inverse_transform(df_computed) |
| 105 | + tgt_out[str(ii)] = df_unscaled[l_df - len(outputs[str(ii)]):, -1] |
| 106 | + |
| 107 | + values["eff_" + str(ii)] = eff_out[str(ii)].tolist() |
| 108 | + values["tgt_" + str(ii)] = tgt_out[str(ii)].tolist() |
| 109 | + |
| 110 | +else: |
| 111 | + real_eff = np.zeros(len(df["Efficiency"])) |
| 112 | + real_eff[l_df - len(outputs):] = outputs |
| 113 | + df_computed["Efficiency"] = real_eff |
| 114 | + df_unscaled = scaler.inverse_transform(df_computed) |
| 115 | + eff_out = df_unscaled[l_df - len(outputs):, -1] |
| 116 | + |
| 117 | + real_eff = np.zeros(len(df["Efficiency"])) |
| 118 | + real_eff[l_df - len(outputs):] = targets |
| 119 | + df_computed["Efficiency"] = real_eff |
| 120 | + df_unscaled = scaler.inverse_transform(df_computed) |
| 121 | + tgt_out = df_unscaled[l_df - len(outputs):, -1] |
| 122 | + |
| 123 | + values["eff"] = eff_out.tolist() |
| 124 | + values["tgt"] = tgt_out.tolist() |
| 125 | +``` |
| 126 | + |
| 127 | +## Save data |
| 128 | +```Python |
| 129 | +with open(results_path + results_file, 'w') as f: |
| 130 | + dict_writer = writer(f) |
| 131 | + dict_writer.writerow(values.keys()) |
| 132 | + dict_writer.writerows(zip(*values.values())) |
| 133 | +``` |
| 134 | + |
| 135 | +Transfomer model adapted from: |
| 136 | + |
| 137 | +Wu, N., Green, B., Ben, X., & O’Banion, S. (2020). Deep Transformer Models for Time Series Forecasting: The Influenza Prevalence Case. ArXiv. http://arxiv.org/abs/2001.08317 |
| 138 | + |
| 139 | + |
0 commit comments