diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index 62318942da..5e8897a208 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -579,9 +579,10 @@ def _calc_stats(data, samples_per_window, sample_interval, H): """ data_mean = data.values[H].mean(axis=0) - data_mean = _to_centered_series(data_mean, data.index, samples_per_window) + data_mean = _to_centered_series(data_mean, data.index, samples_per_window, + H) data_max = data.values[H].max(axis=0) - data_max = _to_centered_series(data_max, data.index, samples_per_window) + data_max = _to_centered_series(data_max, data.index, samples_per_window, H) # shift to get forward difference, .diff() is backward difference instead data_diff = data.diff().shift(-1) data_slope = data_diff / sample_interval @@ -594,30 +595,48 @@ def _slope_nstd_windowed(slopes, data, H, samples_per_window, sample_interval): with np.errstate(divide='ignore', invalid='ignore'): nstd = slopes[H[:-1, ]].std(ddof=1, axis=0) \ / data.values[H].mean(axis=0) - return _to_centered_series(nstd, data.index, samples_per_window) + return _to_centered_series(nstd, data.index, samples_per_window, H) def _max_diff_windowed(data, H, samples_per_window): raw = np.diff(data) raw = np.abs(raw[H[:-1, ]]).max(axis=0) - return _to_centered_series(raw, data.index, samples_per_window) + return _to_centered_series(raw, data.index, samples_per_window, H) def _line_length_windowed(data, H, samples_per_window, sample_interval): raw = np.sqrt(np.diff(data)**2. + sample_interval**2.) raw = np.sum(raw[H[:-1, ]], axis=0) - return _to_centered_series(raw, data.index, samples_per_window) + return _to_centered_series(raw, data.index, samples_per_window, H) -def _to_centered_series(vals, idx, samples_per_window): - vals = np.pad(vals, ((0, len(idx) - len(vals)),), mode='constant', - constant_values=np.nan) - shift = samples_per_window // 2 # align = 'center' only - return pd.Series(index=idx, data=vals).shift(shift) +def _to_centered_series(vals, idx, samples_per_window, H): + # Get center of interval using zero-indexing, round down to nearest + # index if there are an even number of rows + if samples_per_window % 2 == 0: + center_row = samples_per_window//2 - 1 + else: + center_row = samples_per_window//2 + try: + # Maintain tz that is stripped when idx is put in H + if idx.tz is not None: + c = pd.DatetimeIndex(idx.values[H][center_row, :], + tz='UTC').tz_convert(idx.tz) + else: + c = idx.values[H][center_row, :] + # If the index is a range + except AttributeError: + c = idx.values[H][center_row, :] -def _clear_sample_index(clear_windows, samples_per_window, align, H): + # Assign summary values for each interval to the indices of the center row + centered = pd.Series(index=idx, dtype='object') + centered.loc[c] = vals + return centered + + +def _clear_sample_index(clear_windows, samples_per_window, gaps, H, align): """ Returns indices of clear samples in clear windows """ @@ -635,12 +654,22 @@ def _clear_sample_index(clear_windows, samples_per_window, align, H): # shift = - (samples_per_window // 2) # else: # shift = 0 - shift = -(samples_per_window // 2) - idx = clear_windows.shift(shift) + + # Account for the row # on which the interval is centered not actually + # being in row samples_per_window // 2 if samples_per_window is even + if samples_per_window % 2 == 0: + shift = -(samples_per_window // 2 - 1) + else: + shift = -(samples_per_window // 2) + clear_cols = clear_windows.shift(shift) # drop rows at the end corresponding to windows past the end of data - idx = idx.drop(clear_windows.index[1 - samples_per_window:]) - idx = idx.astype(bool) # shift changed type to object - clear_samples = np.unique(H[:, idx]) + clear_cols = clear_cols.drop(clear_windows.index[1 - samples_per_window:]) + clear_cols = clear_cols.astype(bool) # shift changed type to object + # Boolean mask for column indices of intervals with temporal gaps + gap_cols = [True if c not in gaps else False for c in range(0, + len(clear_windows) - (samples_per_window - 1))] + mask = np.logical_and(clear_cols, gap_cols) + clear_samples = np.unique(H[:, mask]) return clear_samples @@ -756,8 +785,6 @@ def detect_clearsky(measured, clearsky, times=None, infer_limits=False, ------ ValueError If measured is not a Series and times is not provided - NotImplementedError - If timestamps are not equally spaced References ---------- @@ -784,6 +811,11 @@ def detect_clearsky(measured, clearsky, times=None, infer_limits=False, * option to return individual test components and clearsky scaling parameter * uses centered windows (Matlab function uses left-aligned windows) + + 2023-03-24 - This algorithm does accept data with skipped or missing + timestamps. The DatetimeIndex (either times or index of measured) + provided still must be regular, i.e. the length of intervals between + points are equal except in the case that data is missing. """ if times is None: @@ -803,6 +835,13 @@ def detect_clearsky(measured, clearsky, times=None, infer_limits=False, if not isinstance(clearsky, pd.Series): clear = pd.Series(clearsky, index=times) + # This clause is designed to address cases where measured has missing time + # steps - if this is the case, clear should be set to have the same + # missing time intervals as measured. Not doing this may cause issues with + # arrays of different lengths when evaluating comparison criteria and + # when indexing the Hankel matrix to construct clear_samples + elif len(clearsky.index) != len(times): + clear = pd.Series(clearsky, index=times) else: clear = clearsky @@ -828,6 +867,19 @@ def detect_clearsky(measured, clearsky, times=None, infer_limits=False, H = hankel(np.arange(samples_per_window), np.arange(samples_per_window-1, len(times))) + # Identify intervals with missing indices + time_h = times.values[H] + # Get maximum time step (in minutes) between consecutive Timestamps + # for each column + time_h_diff_max = np.max(np.diff(time_h, axis=0) / + np.timedelta64(1, '60s'), axis=0) + # Get column indices where max time step > sample_interval + gaps = np.ravel(np.argwhere(time_h_diff_max > sample_interval)) + # Get column indices where at least one of the values is a NaN + gaps = set().union(*[ + gaps, np.ravel(np.argwhere(np.isnan(meas\ + .values[H].mean(axis=0))))]) + # calculate measurement statistics meas_mean, meas_max, meas_slope_nstd, meas_slope = _calc_stats( meas, samples_per_window, sample_interval, H) @@ -853,26 +905,55 @@ def detect_clearsky(measured, clearsky, times=None, infer_limits=False, line_diff = meas_line_length - clear_line_length slope_max_diff = _max_diff_windowed( meas - scaled_clear, H, samples_per_window) + # evaluate comparison criteria - c1 = np.abs(meas_mean - alpha*clear_mean) < mean_diff - c2 = np.abs(meas_max - alpha*clear_max) < max_diff - c3 = (line_diff > lower_line_length) & (line_diff < upper_line_length) + # Condition 1 + c1 = np.abs(meas_mean - alpha*clear_mean) + c1_where_nan = c1[c1.isna()].index + c1 = c1 < mean_diff + # Condition 2 + c2 = np.abs(meas_max - alpha*clear_max) + c2_where_nan = c2[c2.isna()].index + c2 = c2 < max_diff + # Condition 3a & 3b + c3_where_nan = line_diff[line_diff.isna()].index + c3a = line_diff > lower_line_length + c3b = line_diff < upper_line_length + c3 = np.logical_and(c3a, c3b) + # Condition 4 + c4_where_nan = meas_slope_nstd[meas_slope_nstd.isna()].index c4 = meas_slope_nstd < var_diff + # Condition 5 + c5_where_nan = slope_max_diff[slope_max_diff.isna()].index c5 = slope_max_diff < slope_dev - c6 = (clear_mean != 0) & ~np.isnan(clear_mean) - clear_windows = c1 & c2 & c3 & c4 & c5 & c6 + # Condition 6 + c6 = clear_mean != 0 + c6_where_nan = clear_mean[clear_mean.isna()].index + + # np.logical_and() maintains NaNs + clear_windows = pd.Series( + index=times, data=np.logical_and.reduce([c1, c2, c3, c4, c5, c6])) + windows_where_nan = pd.DatetimeIndex(set().union(*[ + c1_where_nan,c2_where_nan, c3_where_nan, c4_where_nan, c5_where_nan, + c6_where_nan])) + clear_windows[windows_where_nan] = np.nan # create array to return - clear_samples = np.full_like(meas, False, dtype='bool') + # dtype='bool' removed because it typecast NaNs to False values + clear_samples = np.full_like(meas, False) # find the samples contained in any window classified as clear - idx = _clear_sample_index(clear_windows, samples_per_window, 'center', - H) + idx = _clear_sample_index(clear_windows, samples_per_window, gaps, H, + 'center') clear_samples[idx] = True + # Assign NaN to datapoints that were originally NaNs + where_nan = np.argwhere(np.isnan(meas.values)) + clear_samples[where_nan] = np.nan + # find a new alpha previous_alpha = alpha - clear_meas = meas[clear_samples] - clear_clear = clear[clear_samples] + clear_meas = meas[idx] + clear_clear = clear[idx] def rmse(alpha): return np.sqrt(np.mean((clear_meas - alpha*clear_clear)**2)) diff --git a/pvlib/data/detect_clearsky_data_missing1.csv b/pvlib/data/detect_clearsky_data_missing1.csv new file mode 100644 index 0000000000..333b3b2a90 --- /dev/null +++ b/pvlib/data/detect_clearsky_data_missing1.csv @@ -0,0 +1,64 @@ +# latitude: 35.04 +# longitude: -106.62 +# elevation: 1619.0 +,CS,GHI,Clear or not +2020-01-01 11:00:00-07:00,536.302172709558,509.4870640740801,1.0 +2020-01-01 11:01:00-07:00,537.591516239233,510.7119404272713,1.0 +2020-01-01 11:02:00-07:00,538.8629773699926,511.91982850149293,1.0 +2020-01-01 11:03:00-07:00,540.1165267379411,513.110700401044,1.0 +2020-01-01 11:04:00-07:00,541.3521354397624,514.2845286677742,1.0 +2020-01-01 11:05:00-07:00,542.5697750193947,515.441286268425,1.0 +2020-01-01 11:06:00-07:00,543.7694182656351,516.5809473523533,1.0 +2020-01-01 11:07:00-07:00,544.9510360161104,517.7034842153048,1.0 +2020-01-01 11:08:00-07:00,546.1146019716558,518.808871873073,1.0 +2020-01-01 11:09:00-07:00,547.2600894760479,519.8970850022455,1.0 +2020-01-01 11:10:00-07:00,548.3874723122931,520.9680986966785,1.0 +2020-01-01 11:11:00-07:00,549.4967247102148,522.021888474704,1.0 +2020-01-01 11:12:00-07:00,550.5878213431366,523.0584302759797,1.0 +2020-01-01 11:13:00-07:00,551.6607373248604,524.0777004586174,1.0 +2020-01-01 11:14:00-07:00,552.7154482165139,525.0796758056882,1.0 +2020-01-01 11:15:00-07:00,553.7519300136697,526.0643335129862,1.0 +2020-01-01 11:16:00-07:00,554.7701591532945,527.0316511956297,1.0 +2020-01-01 11:17:00-07:00,555.7701131753,527.981607516535,1.0 +2020-01-01 11:18:00-07:00,556.7517680497913,528.9141796473017,1.0 +2020-01-01 11:19:00-07:00,557.7151022068979,529.829347096553,1.0 +2020-01-01 11:20:00-07:00,558.6600938303893,530.7270891388698,1.0 +2020-01-01 11:21:00-07:00,559.5867215409612,531.6073854639131,1.0 +2020-01-01 11:22:00-07:00,560.4949643935382,532.4702161738613,1.0 +2020-01-01 11:23:00-07:00,561.3848018765772,533.3155617827483,1.0 +2020-01-01 11:24:00-07:00,562.2562139096694,534.1434032141859,1.0 +2020-01-01 11:25:00-07:00,563.1091808493264,534.9537218068601,1.0 +2020-01-01 11:26:00-07:00,563.9436834785143,535.7464993045886,1.0 +2020-01-01 11:27:00-07:00,564.7597030125947,536.5217178619649,1.0 +2020-01-01 11:28:00-07:00,565.5572210969589,537.2793600421109,1.0 +2020-01-01 11:29:00-07:00,566.336220321224,538.0194093051628,1.0 +2020-01-01 11:31:00-07:00,567.8385900341821,539.446660532473,1.0 +2020-01-01 11:32:00-07:00,568.5619273418005,540.1338309747105,1.0 +2020-01-01 11:33:00-07:00,569.2666778570792,540.8033439642253,1.0 +2020-01-01 11:34:00-07:00,569.9528257924741,541.4551845028503,1.0 +2020-01-01 11:35:00-07:00,570.6203557904192,542.0893380008982,1.0 +2020-01-01 11:36:00-07:00,571.2692529152785,542.7057902695145,1.0 +2020-01-01 11:37:00-07:00,571.8995026579759,543.304527525077,1.0 +2020-01-01 11:38:00-07:00,572.5110909343163,543.8855363876004,1.0 +2020-01-01 11:39:00-07:00,573.1040040834109,544.4488038792404,1.0 +2020-01-01 11:40:00-07:00,573.6782292505169,544.994317787991,1.0 +2020-01-01 11:41:00-07:00,574.2337528522768,545.522065209663,1.0 +2020-01-01 11:42:00-07:00,574.7705628911892,546.0320347466297,1.0 +2020-01-01 11:43:00-07:00,575.2886474013371,546.5242150312703,1.0 +2020-01-01 11:44:00-07:00,575.7879948384899,546.9985950965654,1.0 +2020-01-01 11:45:00-07:00,576.2685940834351,547.4551643792634,1.0 +2020-01-01 11:46:00-07:00,576.7304344361013,547.8939127142962,1.0 +2020-01-01 11:47:00-07:00,577.1735056190306,548.3148303380791,1.0 +2020-01-01 11:48:00-07:00,577.5977977761287,548.7179078873222,1.0 +2020-01-01 11:49:00-07:00,578.0033014716739,549.1031363980902,1.0 +2020-01-01 11:50:00-07:00,578.3900076930416,549.4705073083895,1.0 +2020-01-01 11:51:00-07:00,578.7579080863377,549.8200126820208,1.0 +2020-01-01 11:52:00-07:00,579.1069939850056,550.1516442857553,1.0 +2020-01-01 11:53:00-07:00,579.4372578890877,550.4653949946334,1.0 +2020-01-01 11:54:00-07:00,579.7486924650192,550.7612578417682,1.0 +2020-01-01 11:55:00-07:00,580.0412908007218,551.0392262606856,1.0 +2020-01-01 11:56:00-07:00,580.3150464019636,551.2992940818655,1.0 +2020-01-01 11:57:00-07:00,580.569953194532,551.5414555348053,1.0 +2020-01-01 11:58:00-07:00,580.8060055234938,551.7657052473191,1.0 +2020-01-01 11:59:00-07:00,581.0231981526458,551.9720382450136,1.0 +2020-01-01 12:00:00-07:00,581.2215262659929,552.1604499526933,1.0 diff --git a/pvlib/data/detect_clearsky_data_missing2.csv b/pvlib/data/detect_clearsky_data_missing2.csv new file mode 100644 index 0000000000..88e9367af0 --- /dev/null +++ b/pvlib/data/detect_clearsky_data_missing2.csv @@ -0,0 +1,64 @@ +# latitude: 35.04 +# longitude: -106.62 +# elevation: 1619.0 +,CS,GHI,Clear or not +2020-01-01 11:00:00-07:00,536.302172709558,509.4870640740801,1.0 +2020-01-01 11:01:00-07:00,537.591516239233,510.7119404272713,1.0 +2020-01-01 11:02:00-07:00,538.8629773699926,511.91982850149293,1.0 +2020-01-01 11:03:00-07:00,540.1165267379411,513.110700401044,1.0 +2020-01-01 11:04:00-07:00,541.3521354397624,514.2845286677742,1.0 +2020-01-01 11:05:00-07:00,542.5697750193947,515.441286268425,1.0 +2020-01-01 11:06:00-07:00,543.7694182656351,516.5809473523533,1.0 +2020-01-01 11:07:00-07:00,544.9510360161104,517.7034842153048,1.0 +2020-01-01 11:08:00-07:00,546.1146019716558,518.808871873073,1.0 +2020-01-01 11:09:00-07:00,547.2600894760479,519.8970850022455,1.0 +2020-01-01 11:10:00-07:00,548.3874723122931,520.9680986966785,1.0 +2020-01-01 11:11:00-07:00,549.4967247102148,522.021888474704,1.0 +2020-01-01 11:12:00-07:00,550.5878213431366,523.0584302759797,1.0 +2020-01-01 11:13:00-07:00,551.6607373248604,524.0777004586174,1.0 +2020-01-01 11:14:00-07:00,552.7154482165139,525.0796758056882,1.0 +2020-01-01 11:15:00-07:00,553.7519300136697,526.0643335129862,1.0 +2020-01-01 11:16:00-07:00,554.7701591532945,527.0316511956297,1.0 +2020-01-01 11:17:00-07:00,555.7701131753,527.981607516535,1.0 +2020-01-01 11:18:00-07:00,556.7517680497913,528.9141796473017,1.0 +2020-01-01 11:19:00-07:00,557.7151022068979,529.829347096553,1.0 +2020-01-01 11:20:00-07:00,558.6600938303893,530.7270891388698,1.0 +2020-01-01 11:21:00-07:00,559.5867215409612,531.6073854639131,1.0 +2020-01-01 11:22:00-07:00,560.4949643935382,532.4702161738613,1.0 +2020-01-01 11:23:00-07:00,561.3848018765772,533.3155617827483,1.0 +2020-01-01 11:24:00-07:00,562.2562139096694,534.1434032141859,1.0 +2020-01-01 11:25:00-07:00,563.1091808493264,534.9537218068601,1.0 +2020-01-01 11:26:00-07:00,563.9436834785143,535.7464993045886,1.0 +2020-01-01 11:27:00-07:00,564.7597030125947,536.5217178619649,1.0 +2020-01-01 11:28:00-07:00,565.5572210969589,537.2793600421109,1.0 +2020-01-01 11:29:00-07:00,566.336220321224,538.0194093051628,1.0 +2020-01-01 11:31:00-07:00,567.8385900341821,539.446660532473,0.0 +2020-01-01 11:32:00-07:00,568.5619273418005,540.1338309747105,0.0 +2020-01-01 11:33:00-07:00,569.2666778570792,540.8033439642253,0.0 +2020-01-01 11:34:00-07:00,569.9528257924741,541.4551845028503,0.0 +2020-01-01 11:35:00-07:00,570.6203557904192,300.0,0.0 +2020-01-01 11:36:00-07:00,571.2692529152785,200.0,0.0 +2020-01-01 11:37:00-07:00,571.8995026579759,250.0,0.0 +2020-01-01 11:38:00-07:00,572.5110909343163,310.0,0.0 +2020-01-01 11:39:00-07:00,573.1040040834109,330.0,0.0 +2020-01-01 11:40:00-07:00,573.6782292505169,544.994317787991,1.0 +2020-01-01 11:41:00-07:00,574.2337528522768,545.522065209663,1.0 +2020-01-01 11:42:00-07:00,574.7705628911892,546.0320347466297,1.0 +2020-01-01 11:43:00-07:00,575.2886474013371,546.5242150312703,1.0 +2020-01-01 11:44:00-07:00,575.7879948384899,546.9985950965654,1.0 +2020-01-01 11:45:00-07:00,576.2685940834351,547.4551643792634,1.0 +2020-01-01 11:46:00-07:00,576.7304344361013,547.8939127142962,1.0 +2020-01-01 11:47:00-07:00,577.1735056190306,548.3148303380791,1.0 +2020-01-01 11:48:00-07:00,577.5977977761287,548.7179078873222,1.0 +2020-01-01 11:49:00-07:00,578.0033014716739,549.1031363980902,1.0 +2020-01-01 11:50:00-07:00,578.3900076930416,549.4705073083895,1.0 +2020-01-01 11:51:00-07:00,578.7579080863377,549.8200126820208,1.0 +2020-01-01 11:52:00-07:00,579.1069939850056,550.1516442857553,1.0 +2020-01-01 11:53:00-07:00,579.4372578890877,550.4653949946334,1.0 +2020-01-01 11:54:00-07:00,579.7486924650192,550.7612578417682,1.0 +2020-01-01 11:55:00-07:00,580.0412908007218,551.0392262606856,1.0 +2020-01-01 11:56:00-07:00,580.3150464019636,551.2992940818655,1.0 +2020-01-01 11:57:00-07:00,580.569953194532,551.5414555348053,1.0 +2020-01-01 11:58:00-07:00,580.8060055234938,551.7657052473191,1.0 +2020-01-01 11:59:00-07:00,581.0231981526458,551.9720382450136,1.0 +2020-01-01 12:00:00-07:00,581.2215262659929,552.1604499526933,1.0 diff --git a/pvlib/data/detect_clearsky_data_missing3.csv b/pvlib/data/detect_clearsky_data_missing3.csv new file mode 100644 index 0000000000..7b703cb6b2 --- /dev/null +++ b/pvlib/data/detect_clearsky_data_missing3.csv @@ -0,0 +1,50 @@ +# latitude: 35.04 +# longitude: -106.62 +# elevation: 1619.0 +,CS,GHI,Clear or not +2020-01-01 11:00:00-07:00,536.302172709558,509.4870640740801,1.0 +2020-01-01 11:01:00-07:00,537.591516239233,510.7119404272713,1.0 +2020-01-01 11:02:00-07:00,538.8629773699926,511.91982850149293,1.0 +2020-01-01 11:03:00-07:00,540.1165267379411,513.110700401044,1.0 +2020-01-01 11:04:00-07:00,541.3521354397624,514.2845286677742,1.0 +2020-01-01 11:05:00-07:00,542.5697750193947,515.441286268425,1.0 +2020-01-01 11:06:00-07:00,543.7694182656351,516.5809473523533,1.0 +2020-01-01 11:07:00-07:00,544.9510360161104,517.7034842153048,1.0 +2020-01-01 11:08:00-07:00,546.1146019716558,518.808871873073,1.0 +2020-01-01 11:09:00-07:00,547.2600894760479,519.8970850022455,1.0 +2020-01-01 11:10:00-07:00,548.3874723122931,520.9680986966785,1.0 +2020-01-01 11:11:00-07:00,549.4967247102148,522.021888474704,1.0 +2020-01-01 11:12:00-07:00,550.5878213431366,523.0584302759797,1.0 +2020-01-01 11:13:00-07:00,551.6607373248604,524.0777004586174,1.0 +2020-01-01 11:14:00-07:00,552.7154482165139,525.0796758056882,1.0 +2020-01-01 11:15:00-07:00,553.7519300136697,526.0643335129862,1.0 +2020-01-01 11:16:00-07:00,554.7701591532945,527.0316511956297,1.0 +2020-01-01 11:17:00-07:00,555.7701131753,527.981607516535,1.0 +2020-01-01 11:18:00-07:00,556.7517680497913,528.9141796473017,1.0 +2020-01-01 11:19:00-07:00,557.7151022068979,529.829347096553,1.0 +2020-01-01 11:20:00-07:00,558.6600938303893,530.7270891388698,1.0 +2020-01-01 11:21:00-07:00,559.5867215409612,531.6073854639131,1.0 +2020-01-01 11:22:00-07:00,560.4949643935382,532.4702161738613,1.0 +2020-01-01 11:23:00-07:00,561.3848018765772,533.3155617827483,1.0 +2020-01-01 11:24:00-07:00,562.2562139096694,534.1434032141859,1.0 +2020-01-01 11:25:00-07:00,563.1091808493264,534.9537218068601,1.0 +2020-01-01 11:26:00-07:00,563.9436834785143,535.7464993045886,1.0 +2020-01-01 11:27:00-07:00,564.7597030125947,536.5217178619649,1.0 +2020-01-01 11:28:00-07:00,565.5572210969589,537.2793600421109,1.0 +2020-01-01 11:29:00-07:00,566.336220321224,538.0194093051628,1.0 +2020-01-01 11:45:00-07:00,576.2685940834351,547.4551643792634,1.0 +2020-01-01 11:46:00-07:00,576.7304344361013,547.8939127142962,1.0 +2020-01-01 11:47:00-07:00,577.1735056190306,548.3148303380791,1.0 +2020-01-01 11:48:00-07:00,577.5977977761287,548.7179078873222,1.0 +2020-01-01 11:49:00-07:00,578.0033014716739,549.1031363980902,1.0 +2020-01-01 11:50:00-07:00,578.3900076930416,549.4705073083895,1.0 +2020-01-01 11:51:00-07:00,578.7579080863377,549.8200126820208,1.0 +2020-01-01 11:52:00-07:00,579.1069939850056,550.1516442857553,1.0 +2020-01-01 11:53:00-07:00,579.4372578890877,550.4653949946334,1.0 +2020-01-01 11:54:00-07:00,579.7486924650192,550.7612578417682,1.0 +2020-01-01 11:55:00-07:00,580.0412908007218,551.0392262606856,1.0 +2020-01-01 11:56:00-07:00,580.3150464019636,551.2992940818655,1.0 +2020-01-01 11:57:00-07:00,580.569953194532,551.5414555348053,1.0 +2020-01-01 11:58:00-07:00,580.8060055234938,551.7657052473191,1.0 +2020-01-01 11:59:00-07:00,581.0231981526458,551.9720382450136,1.0 +2020-01-01 12:00:00-07:00,581.2215262659929,552.1604499526933,1.0 diff --git a/pvlib/data/detect_clearsky_data_nans1.csv b/pvlib/data/detect_clearsky_data_nans1.csv new file mode 100644 index 0000000000..d4aabff89e --- /dev/null +++ b/pvlib/data/detect_clearsky_data_nans1.csv @@ -0,0 +1,65 @@ +# latitude: 35.04 +# longitude: -106.62 +# elevation: 1619.0 +,CS,GHI,Clear or not +2020-01-01 11:00:00-07:00,536.302172709558,509.4870640740801,1.0 +2020-01-01 11:01:00-07:00,537.591516239233,510.7119404272713,1.0 +2020-01-01 11:02:00-07:00,538.8629773699926,511.91982850149293,1.0 +2020-01-01 11:03:00-07:00,540.1165267379411,513.110700401044,1.0 +2020-01-01 11:04:00-07:00,541.3521354397624,514.2845286677742,1.0 +2020-01-01 11:05:00-07:00,542.5697750193947,515.441286268425,1.0 +2020-01-01 11:06:00-07:00,543.7694182656351,516.5809473523533,1.0 +2020-01-01 11:07:00-07:00,544.9510360161104,517.7034842153048,1.0 +2020-01-01 11:08:00-07:00,546.1146019716558,518.808871873073,1.0 +2020-01-01 11:09:00-07:00,547.2600894760479,519.8970850022455,1.0 +2020-01-01 11:10:00-07:00,548.3874723122931,520.9680986966785,1.0 +2020-01-01 11:11:00-07:00,549.4967247102148,522.021888474704,1.0 +2020-01-01 11:12:00-07:00,550.5878213431366,523.0584302759797,1.0 +2020-01-01 11:13:00-07:00,551.6607373248604,524.0777004586174,1.0 +2020-01-01 11:14:00-07:00,552.7154482165139,525.0796758056882,1.0 +2020-01-01 11:15:00-07:00,553.7519300136697,526.0643335129862,1.0 +2020-01-01 11:16:00-07:00,554.7701591532945,527.0316511956297,1.0 +2020-01-01 11:17:00-07:00,555.7701131753,527.981607516535,1.0 +2020-01-01 11:18:00-07:00,556.7517680497913,528.9141796473017,1.0 +2020-01-01 11:19:00-07:00,557.7151022068979,529.829347096553,1.0 +2020-01-01 11:20:00-07:00,558.6600938303893,530.7270891388698,1.0 +2020-01-01 11:21:00-07:00,559.5867215409612,531.6073854639131,1.0 +2020-01-01 11:22:00-07:00,560.4949643935382,532.4702161738613,1.0 +2020-01-01 11:23:00-07:00,561.3848018765772,533.3155617827483,1.0 +2020-01-01 11:24:00-07:00,562.2562139096694,534.1434032141859,1.0 +2020-01-01 11:25:00-07:00,563.1091808493264,534.9537218068601,1.0 +2020-01-01 11:26:00-07:00,563.9436834785143,535.7464993045886,1.0 +2020-01-01 11:27:00-07:00,564.7597030125947,536.5217178619649,1.0 +2020-01-01 11:28:00-07:00,565.5572210969589,537.2793600421109,1.0 +2020-01-01 11:29:00-07:00,566.336220321224,538.0194093051628,1.0 +2020-01-01 11:30:00-07:00,567.0966821472473,, +2020-01-01 11:31:00-07:00,567.8385900341821,539.446660532473,1.0 +2020-01-01 11:32:00-07:00,568.5619273418005,540.1338309747105,1.0 +2020-01-01 11:33:00-07:00,569.2666778570792,540.8033439642253,1.0 +2020-01-01 11:34:00-07:00,569.9528257924741,541.4551845028503,1.0 +2020-01-01 11:35:00-07:00,570.6203557904192,542.0893380008982,1.0 +2020-01-01 11:36:00-07:00,571.2692529152785,542.7057902695145,1.0 +2020-01-01 11:37:00-07:00,571.8995026579759,543.304527525077,1.0 +2020-01-01 11:38:00-07:00,572.5110909343163,543.8855363876004,1.0 +2020-01-01 11:39:00-07:00,573.1040040834109,544.4488038792404,1.0 +2020-01-01 11:40:00-07:00,573.6782292505169,544.994317787991,1.0 +2020-01-01 11:41:00-07:00,574.2337528522768,545.522065209663,1.0 +2020-01-01 11:42:00-07:00,574.7705628911892,546.0320347466297,1.0 +2020-01-01 11:43:00-07:00,575.2886474013371,546.5242150312703,1.0 +2020-01-01 11:44:00-07:00,575.7879948384899,546.9985950965654,1.0 +2020-01-01 11:45:00-07:00,576.2685940834351,547.4551643792634,1.0 +2020-01-01 11:46:00-07:00,576.7304344361013,547.8939127142962,1.0 +2020-01-01 11:47:00-07:00,577.1735056190306,548.3148303380791,1.0 +2020-01-01 11:48:00-07:00,577.5977977761287,548.7179078873222,1.0 +2020-01-01 11:49:00-07:00,578.0033014716739,549.1031363980902,1.0 +2020-01-01 11:50:00-07:00,578.3900076930416,549.4705073083895,1.0 +2020-01-01 11:51:00-07:00,578.7579080863377,549.8200126820208,1.0 +2020-01-01 11:52:00-07:00,579.1069939850056,550.1516442857553,1.0 +2020-01-01 11:53:00-07:00,579.4372578890877,550.4653949946334,1.0 +2020-01-01 11:54:00-07:00,579.7486924650192,550.7612578417682,1.0 +2020-01-01 11:55:00-07:00,580.0412908007218,551.0392262606856,1.0 +2020-01-01 11:56:00-07:00,580.3150464019636,551.2992940818655,1.0 +2020-01-01 11:57:00-07:00,580.569953194532,551.5414555348053,1.0 +2020-01-01 11:58:00-07:00,580.8060055234938,551.7657052473191,1.0 +2020-01-01 11:59:00-07:00,581.0231981526458,551.9720382450136,1.0 +2020-01-01 12:00:00-07:00,581.2215262659929,552.1604499526933,1.0 diff --git a/pvlib/data/detect_clearsky_data_nans2.csv b/pvlib/data/detect_clearsky_data_nans2.csv new file mode 100644 index 0000000000..5f91388b73 --- /dev/null +++ b/pvlib/data/detect_clearsky_data_nans2.csv @@ -0,0 +1,65 @@ +# latitude: 35.04 +# longitude: -106.62 +# elevation: 1619.0 +,CS,GHI,Clear or not +2020-01-01 11:00:00-07:00,536.302172709558,509.4870640740801,1.0 +2020-01-01 11:01:00-07:00,537.591516239233,510.7119404272713,1.0 +2020-01-01 11:02:00-07:00,538.8629773699926,511.91982850149293,1.0 +2020-01-01 11:03:00-07:00,540.1165267379411,513.110700401044,1.0 +2020-01-01 11:04:00-07:00,541.3521354397624,514.2845286677742,1.0 +2020-01-01 11:05:00-07:00,542.5697750193947,515.441286268425,1.0 +2020-01-01 11:06:00-07:00,543.7694182656351,516.5809473523533,1.0 +2020-01-01 11:07:00-07:00,544.9510360161104,517.7034842153048,1.0 +2020-01-01 11:08:00-07:00,546.1146019716558,518.808871873073,1.0 +2020-01-01 11:09:00-07:00,547.2600894760479,519.8970850022455,1.0 +2020-01-01 11:10:00-07:00,548.3874723122931,520.9680986966785,1.0 +2020-01-01 11:11:00-07:00,549.4967247102148,522.021888474704,1.0 +2020-01-01 11:12:00-07:00,550.5878213431366,523.0584302759797,1.0 +2020-01-01 11:13:00-07:00,551.6607373248604,524.0777004586174,1.0 +2020-01-01 11:14:00-07:00,552.7154482165139,525.0796758056882,1.0 +2020-01-01 11:15:00-07:00,553.7519300136697,526.0643335129862,1.0 +2020-01-01 11:16:00-07:00,554.7701591532945,527.0316511956297,1.0 +2020-01-01 11:17:00-07:00,555.7701131753,527.981607516535,1.0 +2020-01-01 11:18:00-07:00,556.7517680497913,528.9141796473017,1.0 +2020-01-01 11:19:00-07:00,557.7151022068979,529.829347096553,1.0 +2020-01-01 11:20:00-07:00,558.6600938303893,530.7270891388698,1.0 +2020-01-01 11:21:00-07:00,559.5867215409612,531.6073854639131,1.0 +2020-01-01 11:22:00-07:00,560.4949643935382,532.4702161738613,1.0 +2020-01-01 11:23:00-07:00,561.3848018765772,533.3155617827483,1.0 +2020-01-01 11:24:00-07:00,562.2562139096694,534.1434032141859,1.0 +2020-01-01 11:25:00-07:00,563.1091808493264,534.9537218068601,1.0 +2020-01-01 11:26:00-07:00,563.9436834785143,535.7464993045886,1.0 +2020-01-01 11:27:00-07:00,564.7597030125947,536.5217178619649,1.0 +2020-01-01 11:28:00-07:00,565.5572210969589,, +2020-01-01 11:29:00-07:00,566.336220321224,, +2020-01-01 11:30:00-07:00,567.0966821472473,538.7418480398849,0.0 +2020-01-01 11:31:00-07:00,567.8385900341821,539.446660532473,0.0 +2020-01-01 11:32:00-07:00,568.5619273418005,540.1338309747105,0.0 +2020-01-01 11:33:00-07:00,569.2666778570792,540.8033439642253,0.0 +2020-01-01 11:34:00-07:00,569.9528257924741,300.0,0.0 +2020-01-01 11:35:00-07:00,570.6203557904192,200.0,0.0 +2020-01-01 11:36:00-07:00,571.2692529152785,250.0,0.0 +2020-01-01 11:37:00-07:00,571.8995026579759,310.0,0.0 +2020-01-01 11:38:00-07:00,572.5110909343163,330.0,0.0 +2020-01-01 11:39:00-07:00,573.1040040834109,544.4488038792404,1.0 +2020-01-01 11:40:00-07:00,573.6782292505169,544.994317787991,1.0 +2020-01-01 11:41:00-07:00,574.2337528522768,545.522065209663,1.0 +2020-01-01 11:42:00-07:00,574.7705628911892,546.0320347466297,1.0 +2020-01-01 11:43:00-07:00,575.2886474013371,546.5242150312703,1.0 +2020-01-01 11:44:00-07:00,575.7879948384899,546.9985950965654,1.0 +2020-01-01 11:45:00-07:00,576.2685940834351,547.4551643792634,1.0 +2020-01-01 11:46:00-07:00,576.7304344361013,547.8939127142962,1.0 +2020-01-01 11:47:00-07:00,577.1735056190306,548.3148303380791,1.0 +2020-01-01 11:48:00-07:00,577.5977977761287,548.7179078873222,1.0 +2020-01-01 11:49:00-07:00,578.0033014716739,549.1031363980902,1.0 +2020-01-01 11:50:00-07:00,578.3900076930416,549.4705073083895,1.0 +2020-01-01 11:51:00-07:00,578.7579080863377,549.8200126820208,1.0 +2020-01-01 11:52:00-07:00,579.1069939850056,550.1516442857553,1.0 +2020-01-01 11:53:00-07:00,579.4372578890877,550.4653949946334,1.0 +2020-01-01 11:54:00-07:00,579.7486924650192,550.7612578417682,1.0 +2020-01-01 11:55:00-07:00,580.0412908007218,551.0392262606856,1.0 +2020-01-01 11:56:00-07:00,580.3150464019636,551.2992940818655,1.0 +2020-01-01 11:57:00-07:00,580.569953194532,551.5414555348053,1.0 +2020-01-01 11:58:00-07:00,580.8060055234938,551.7657052473191,1.0 +2020-01-01 11:59:00-07:00,581.0231981526458,551.9720382450136,1.0 +2020-01-01 12:00:00-07:00,581.2215262659929,552.1604499526933,1.0 diff --git a/pvlib/tests/test_clearsky.py b/pvlib/tests/test_clearsky.py index c2ef607f0f..3fc3dbf58d 100644 --- a/pvlib/tests/test_clearsky.py +++ b/pvlib/tests/test_clearsky.py @@ -8,7 +8,7 @@ import pytest from numpy.testing import assert_allclose -from .conftest import assert_frame_equal, assert_series_equal +from .conftest import assert_frame_equal, assert_series_equal, DATA_DIR from pvlib.location import Location from pvlib import clearsky @@ -16,9 +16,6 @@ from pvlib import atmosphere from pvlib import irradiance -from .conftest import DATA_DIR - - def test_ineichen_series(): times = pd.date_range(start='2014-06-24', end='2014-06-25', freq='3h', tz='America/Phoenix') @@ -656,21 +653,89 @@ def test_detect_clearsky_arrays(detect_clearsky_data): assert (clear_samples == expected['Clear or not'].values).all() -def test_detect_clearsky_irregular_times(detect_clearsky_data): - expected, cs = detect_clearsky_data - times = cs.index.values.copy() - times[0] += 10**9 - times = pd.DatetimeIndex(times) - with pytest.raises(NotImplementedError): - clearsky.detect_clearsky(expected['GHI'].values, cs['ghi'].values, - times, 10) +def test_detect_clearsky_missing_index1(): + # Test for an isolated missing index + data_file = DATA_DIR / 'detect_clearsky_data_missing1.csv' + data = pd.read_csv( + data_file, index_col=0, parse_dates=True, comment='#') + meas, cs, expected = data['GHI'], data['CS'], data['Clear or not'] + clear_samples = clearsky.detect_clearsky( + meas, cs) + assert_series_equal(expected, clear_samples, check_dtype=False, + check_names=False) -def test_detect_clearsky_missing_index(detect_clearsky_data): - expected, cs = detect_clearsky_data - with pytest.raises(ValueError): - clearsky.detect_clearsky(expected['GHI'].values, cs['ghi'].values) +def test_detect_clearsky_missing_index2(): + # Test for a missing index followed by an overcast period + data_file = DATA_DIR / 'detect_clearsky_data_missing2.csv' + data = pd.read_csv( + data_file, index_col=0, parse_dates=True, comment='#') + meas, cs, expected = data['GHI'], data['CS'], data['Clear or not'] + clear_samples = clearsky.detect_clearsky( + meas, cs) + assert_series_equal(expected, clear_samples, check_dtype=False, + check_names=False) + +def test_detect_clearsky_missing_index3(): + # Test for 15 consecutive missing indices + data_file = DATA_DIR / 'detect_clearsky_data_missing3.csv' + data = pd.read_csv( + data_file, index_col=0, parse_dates=True, comment='#') + meas, cs, expected = data['GHI'], data['CS'], data['Clear or not'] + clear_samples = clearsky.detect_clearsky( + meas, cs) + assert_series_equal(expected, clear_samples, check_dtype=False, + check_names=False) + +def test_detect_clearsky_nans1(): + # Test for 1 NaN value - should mark as NaN + data_file = DATA_DIR / 'detect_clearsky_data_nans1.csv' + data = pd.read_csv( + data_file, index_col=0, parse_dates=True, comment='#') + meas, cs, expected = data['GHI'], data['CS'], data['Clear or not'] + clear_samples = clearsky.detect_clearsky( + meas, cs) + assert_series_equal(expected, clear_samples, check_dtype=False, + check_names=False) + + +def test_detect_clearsky_nans2(): + # Test for 1 NaN value - should mark as NaN + data_file = DATA_DIR / 'detect_clearsky_data_nans2.csv' + data = pd.read_csv( + data_file, index_col=0, parse_dates=True, comment='#') + meas, cs, expected = data['GHI'], data['CS'], data['Clear or not'] + clear_samples = clearsky.detect_clearsky( + meas, cs) + assert_series_equal(expected, clear_samples, check_dtype=False, + check_names=False) + +def test_detect_clearsky_diff_index_lengths(detect_clearsky_data): + ''' + Intended to test the following if/else clauses + + if not isinstance(clear_sky, pd.Series): + clear = pd.Series(clear_sky, index=times) + # This clause is designed to address cases where measured has missing time + # steps - if this is the case, clear should be set to have the same + # missing time intervals as measured. Not doing this may cause issues with + # arrays of different lengths when evaluating comparison criteria and + # when indexing the Hankel matrix to construct clear_samples + elif len(clear_sky.index) != len(times): + clear = pd.Series(clear_sky, index=times) + else: + clear = clear_sky + ''' + expected, cs = detect_clearsky_data + expected.drop(index=expected.index[10], inplace=True) + clear_samples = clearsky.detect_clearsky( + expected['GHI'], cs['ghi'], times=expected.index, + window_length=10) + new_expected = np.array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., + 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., + 1., 1., 1., 0., 0., 0., 0.]) + assert (clear_samples.values == new_expected).all() def test_detect_clearsky_not_enough_data(detect_clearsky_data): expected, cs = detect_clearsky_data @@ -705,7 +770,7 @@ def test__line_length_windowed(detect_clearsky_helper_data): expected['line_length'] = sqt + sqt.shift(-1) result = clearsky._line_length_windowed( x, H, samples_per_window, sample_interval) - assert_series_equal(result, expected['line_length']) + assert_series_equal(result, expected['line_length'], check_dtype=False) def test__max_diff_windowed(detect_clearsky_helper_data): @@ -714,7 +779,7 @@ def test__max_diff_windowed(detect_clearsky_helper_data): expected['max_diff'] = pd.Series( data=[np.nan, 3., 5., 7., 9., 11., np.nan], index=x.index) result = clearsky._max_diff_windowed(x, H, samples_per_window) - assert_series_equal(result, expected['max_diff']) + assert_series_equal(result, expected['max_diff'], check_dtype=False) def test__calc_stats(detect_clearsky_helper_data): @@ -736,10 +801,11 @@ def test__calc_stats(detect_clearsky_helper_data): result = clearsky._calc_stats( x, samples_per_window, sample_interval, H) res_mean, res_max, res_slope_nstd, res_slope = result - assert_series_equal(res_mean, expected['mean']) - assert_series_equal(res_max, expected['max']) - assert_series_equal(res_slope_nstd, expected['slope_nstd']) - assert_series_equal(res_slope, expected['slope']) + assert_series_equal(res_mean, expected['mean'], check_dtype=False) + assert_series_equal(res_max, expected['max'], check_dtype=False) + assert_series_equal(res_slope_nstd, expected['slope_nstd'], + check_dtype=False) + assert_series_equal(res_slope, expected['slope'], check_dtype=False) def test_bird(): diff --git a/pvlib/tests/test_tools.py b/pvlib/tests/test_tools.py index 583141a726..f695cea61c 100644 --- a/pvlib/tests/test_tools.py +++ b/pvlib/tests/test_tools.py @@ -1,9 +1,8 @@ import pytest - from pvlib import tools import numpy as np import pandas as pd - +from .conftest import DATA_DIR @pytest.mark.parametrize('keys, input_dict, expected', [ (['a', 'b'], {'a': 1, 'b': 2, 'c': 3}, {'a': 1, 'b': 2}), @@ -91,6 +90,15 @@ def test__golden_sect_DataFrame_nans(): assert np.allclose(x, expected, atol=1e-8, equal_nan=True) +def test_get_sample_intervals(): + data_file = DATA_DIR / 'detect_clearsky_data_missing3.csv' + data = pd.read_csv( + data_file, index_col=0, parse_dates=True, comment='#') + sample_interval, samples_per_window = tools._get_sample_intervals( + data.index, 10) + assert np.allclose(sample_interval, 1) + assert np.allclose(samples_per_window, 10) + def test_degrees_to_index_1(): """Test that _degrees_to_index raises an error when something other than 'latitude' or 'longitude' is passed.""" diff --git a/pvlib/tools.py b/pvlib/tools.py index fe1b79a5f1..9d2775703b 100644 --- a/pvlib/tools.py +++ b/pvlib/tools.py @@ -389,20 +389,10 @@ def _get_sample_intervals(times, win_length): sky detection functions """ deltas = np.diff(times.values) / np.timedelta64(1, '60s') - - # determine if we can proceed - if times.inferred_freq and len(np.unique(deltas)) == 1: - sample_interval = times[1] - times[0] - sample_interval = sample_interval.seconds / 60 # in minutes - samples_per_window = int(win_length / sample_interval) - return sample_interval, samples_per_window - else: - message = ( - 'algorithm does not yet support unequal time intervals. consider ' - 'resampling your data and checking for gaps from missing ' - 'periods, leap days, etc.' - ) - raise NotImplementedError(message) + vals, counts = np.unique(deltas, return_counts=True) + sample_interval = vals[np.argmax(counts)] + samples_per_window = int(win_length / sample_interval) + return sample_interval, samples_per_window def _degrees_to_index(degrees, coordinate):