@@ -153,9 +153,9 @@ class Encoder(nn.Module):
153
153
channels: sequence of block output channels.
154
154
out_channels: number of channels in the bottom layer (latent space) of the autoencoder.
155
155
num_res_blocks: number of residual blocks (see _ResBlock) per level.
156
- norm_num_groups: number of groups for the GroupNorm layers, num_channels must be divisible by this number.
156
+ norm_num_groups: number of groups for the GroupNorm layers, channels must be divisible by this number.
157
157
norm_eps: epsilon for the normalization.
158
- attention_levels: indicate which level from num_channels contain an attention block.
158
+ attention_levels: indicate which level from channels contain an attention block.
159
159
with_nonlocal_attn: if True use non-local attention block.
160
160
include_fc: whether to include the final linear layer. Default to True.
161
161
use_combined_linear: whether to use a single linear layer for qkv projection, default to False.
@@ -299,9 +299,9 @@ class Decoder(nn.Module):
299
299
in_channels: number of channels in the bottom layer (latent space) of the autoencoder.
300
300
out_channels: number of output channels.
301
301
num_res_blocks: number of residual blocks (see _ResBlock) per level.
302
- norm_num_groups: number of groups for the GroupNorm layers, num_channels must be divisible by this number.
302
+ norm_num_groups: number of groups for the GroupNorm layers, channels must be divisible by this number.
303
303
norm_eps: epsilon for the normalization.
304
- attention_levels: indicate which level from num_channels contain an attention block.
304
+ attention_levels: indicate which level from channels contain an attention block.
305
305
with_nonlocal_attn: if True use non-local attention block.
306
306
use_convtranspose: if True, use ConvTranspose to upsample feature maps in decoder.
307
307
include_fc: whether to include the final linear layer. Default to True.
@@ -483,7 +483,7 @@ class AutoencoderKL(nn.Module):
483
483
channels: number of output channels for each block.
484
484
attention_levels: sequence of levels to add attention.
485
485
latent_channels: latent embedding dimension.
486
- norm_num_groups: number of groups for the GroupNorm layers, num_channels must be divisible by this number.
486
+ norm_num_groups: number of groups for the GroupNorm layers, channels must be divisible by this number.
487
487
norm_eps: epsilon for the normalization.
488
488
with_encoder_nonlocal_attn: if True use non-local attention block in the encoder.
489
489
with_decoder_nonlocal_attn: if True use non-local attention block in the decoder.
@@ -518,18 +518,18 @@ def __init__(
518
518
519
519
# All number of channels should be multiple of num_groups
520
520
if any ((out_channel % norm_num_groups ) != 0 for out_channel in channels ):
521
- raise ValueError ("AutoencoderKL expects all num_channels being multiple of norm_num_groups" )
521
+ raise ValueError ("AutoencoderKL expects all channels being multiple of norm_num_groups" )
522
522
523
523
if len (channels ) != len (attention_levels ):
524
- raise ValueError ("AutoencoderKL expects num_channels being same size of attention_levels" )
524
+ raise ValueError ("AutoencoderKL expects channels being same size of attention_levels" )
525
525
526
526
if isinstance (num_res_blocks , int ):
527
527
num_res_blocks = ensure_tuple_rep (num_res_blocks , len (channels ))
528
528
529
529
if len (num_res_blocks ) != len (channels ):
530
530
raise ValueError (
531
531
"`num_res_blocks` should be a single integer or a tuple of integers with the same length as "
532
- "`num_channels `."
532
+ "`channels `."
533
533
)
534
534
535
535
self .encoder : nn .Module = Encoder (
0 commit comments