• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1model:
2  #base_learning_rate: 1.0e-05
3  target: ldm.models.diffusion.ddpm.LatentDiffusion
4  params:
5    linear_start: 0.00085
6    linear_end: 0.0120
7    num_timesteps_cond: 1
8    log_every_t: 200
9    timesteps: 1000
10    first_stage_key: "image"
11    cond_stage_key: "caption"
12    cond_stage_trainable: False # TODO: allow config easily
13    image_size: 64
14    channels: 4
15    conditioning_key: crossattn
16    monitor: val/loss_simple_ema
17    scale_factor: 0.18215
18    #use_ema: False # TODO: set in args
19    use_fp16: True
20    parameterization: "eps" # default, original ldm
21    #parameterization: "velocity"
22
23    unet_config:
24      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
25      params:
26        image_size: 32 # unused
27        in_channels: 4
28        out_channels: 4
29        model_channels: 320
30        attention_resolutions: [ 4, 2, 1 ]
31        num_res_blocks: 1
32        channel_mult: [ 1, 1, 1, 1 ]
33        #num_heads: 8  #wukong
34        num_head_channels: 64  # SD_VERSION v2.0
35        use_spatial_transformer: True
36        enable_flash_attention: True
37        use_linear_in_transformer: True #SD_VERSION v2.0
38        transformer_depth: 1
39        #context_dim: 768
40        context_dim: 1024 # SD_VERSION v2.0
41        use_checkpoint: True
42        legacy: False
43        use_fp16: True
44        dropout: 0.1
45
46    first_stage_config:
47      target: ldm.models.autoencoder.AutoencoderKL
48      params:
49        embed_dim: 4
50        monitor: val/rec_loss
51        use_fp16: True
52        ddconfig:
53          double_z: true
54          z_channels: 4
55          resolution: 256
56          in_channels: 3
57          out_ch: 3
58          ch: 128
59          ch_mult:
60          - 1
61          - 2
62          - 4
63          - 4
64          num_res_blocks: 2
65          attn_resolutions: []
66
67    cond_stage_config:
68      target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
69      params:
70        use_fp16: True
71        tokenizer_name: "BpeTokenizer"
72        context_length: 77
73        vocab_size: 49408
74        output_dim: 1024
75        width: 1024
76        layers: 23
77        heads: 16
78        epsilon: 1e-5
79        use_quick_gelu: False
80