@@ -3138,6 +3138,17 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
3138
3138
* encryption, which uses DVA[2] for the IV+salt.
3139
3139
*/
3140
3140
int gbh_copies = gio -> io_prop .zp_gang_copies ;
3141
+ if (gbh_copies == 0 ) {
3142
+ /*
3143
+ * This should only happen in the case where we're filling in
3144
+ * DDT entries for a parent that wants more copies than the DDT
3145
+ * has. In that case, we cannot gang without creating a mixed
3146
+ * blkptr, which is illegal.
3147
+ */
3148
+ ASSERT3U (gio -> io_child_type , = = , ZIO_CHILD_DDT );
3149
+ pio -> io_error = EAGAIN ;
3150
+ return (pio );
3151
+ }
3141
3152
ASSERT3S (gbh_copies , > , 0 );
3142
3153
ASSERT3S (gbh_copies , <=, SPA_DVAS_PER_BP );
3143
3154
@@ -3149,7 +3160,7 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
3149
3160
3150
3161
flags |= METASLAB_ASYNC_ALLOC ;
3151
3162
VERIFY (zfs_refcount_held (& mc -> mc_allocator [pio -> io_allocator ].
3152
- mca_alloc_slots , pio ));
3163
+ mca_alloc_slots , pio ) || gio -> io_prop . zp_copies == 0 );
3153
3164
3154
3165
/*
3155
3166
* The logical zio has already placed a reservation for
@@ -3229,6 +3240,7 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
3229
3240
zp .zp_encrypt = gio -> io_prop .zp_encrypt ;
3230
3241
zp .zp_byteorder = gio -> io_prop .zp_byteorder ;
3231
3242
zp .zp_direct_write = B_FALSE ;
3243
+ zp .zp_must_gang = B_FALSE ;
3232
3244
memset (zp .zp_salt , 0 , ZIO_DATA_SALT_LEN );
3233
3245
memset (zp .zp_iv , 0 , ZIO_DATA_IV_LEN );
3234
3246
memset (zp .zp_mac , 0 , ZIO_DATA_MAC_LEN );
@@ -3784,8 +3796,9 @@ zio_ddt_write(zio_t *zio)
3784
3796
*/
3785
3797
int have_dvas = ddt_phys_dva_count (ddp , v , BP_IS_ENCRYPTED (bp ));
3786
3798
IMPLY (have_dvas == 0 , ddt_phys_birth (ddp , v ) == 0 );
3799
+ int gang_dvas = ddt_phys_gang_count (ddp , v , BP_IS_ENCRYPTED (bp ));
3787
3800
3788
- /* Number of DVAs requested bya the IO. */
3801
+ /* Number of DVAs requested by the IO. */
3789
3802
uint8_t need_dvas = zp -> zp_copies ;
3790
3803
3791
3804
/*
@@ -3937,7 +3950,19 @@ zio_ddt_write(zio_t *zio)
3937
3950
* grow the DDT entry by to satisfy the request.
3938
3951
*/
3939
3952
zio_prop_t czp = * zp ;
3940
- czp .zp_copies = czp .zp_gang_copies = need_dvas ;
3953
+ if (gang_dvas > 0 && have_dvas > 0 ) {
3954
+ czp .zp_gang_copies = need_dvas +
3955
+ (zp -> zp_gang_copies - zp -> zp_copies );
3956
+ czp .zp_copies = need_dvas ;
3957
+ czp .zp_must_gang = B_TRUE ;
3958
+ } else if (gang_dvas == 0 && have_dvas > 0 ) {
3959
+ czp .zp_copies = need_dvas ;
3960
+ czp .zp_gang_copies = 0 ;
3961
+ } else {
3962
+ czp .zp_copies = need_dvas ;
3963
+ czp .zp_gang_copies = need_dvas +
3964
+ (zp -> zp_gang_copies - zp -> zp_copies );
3965
+ }
3941
3966
zio_t * cio = zio_write (zio , spa , txg , bp , zio -> io_orig_abd ,
3942
3967
zio -> io_orig_size , zio -> io_orig_size , & czp ,
3943
3968
zio_ddt_child_write_ready , NULL ,
@@ -4109,6 +4134,7 @@ zio_dva_allocate(zio_t *zio)
4109
4134
ASSERT (BP_IS_HOLE (bp ));
4110
4135
ASSERT0 (BP_GET_NDVAS (bp ));
4111
4136
ASSERT3U (zio -> io_prop .zp_copies , > , 0 );
4137
+
4112
4138
ASSERT3U (zio -> io_prop .zp_copies , <=, spa_max_replication (spa ));
4113
4139
ASSERT3U (zio -> io_size , = = , BP_GET_PSIZE (bp ));
4114
4140
@@ -4141,14 +4167,15 @@ zio_dva_allocate(zio_t *zio)
4141
4167
* back to spa_sync() which is abysmal for performance.
4142
4168
*/
4143
4169
ASSERT (ZIO_HAS_ALLOCATOR (zio ));
4144
- error = metaslab_alloc ( spa , mc , zio -> io_size , bp ,
4145
- zio -> io_prop .zp_copies , zio -> io_txg , NULL , flags ,
4146
- & zio -> io_alloc_list , zio , zio -> io_allocator );
4170
+ error = zio -> io_prop . zp_must_gang ? ENOSPC : metaslab_alloc ( spa ,
4171
+ mc , zio -> io_size , bp , zio -> io_prop .zp_copies , zio -> io_txg , NULL ,
4172
+ flags , & zio -> io_alloc_list , zio , zio -> io_allocator );
4147
4173
4148
4174
/*
4149
4175
* Fallback to normal class when an alloc class is full
4150
4176
*/
4151
- if (error == ENOSPC && mc != spa_normal_class (spa )) {
4177
+ if (error == ENOSPC && mc != spa_normal_class (spa ) &&
4178
+ !zio -> io_prop .zp_must_gang ) {
4152
4179
/*
4153
4180
* When the dedup or special class is spilling into the normal
4154
4181
* class, there can still be significant space available due
@@ -4200,7 +4227,8 @@ zio_dva_allocate(zio_t *zio)
4200
4227
}
4201
4228
4202
4229
if (error == ENOSPC && zio -> io_size > SPA_MINBLOCKSIZE ) {
4203
- if (zfs_flags & ZFS_DEBUG_METASLAB_ALLOC ) {
4230
+ if (zfs_flags & ZFS_DEBUG_METASLAB_ALLOC &&
4231
+ !zio -> io_prop .zp_must_gang ) {
4204
4232
zfs_dbgmsg ("%s: metaslab allocation failure, "
4205
4233
"trying ganging: zio %px, size %llu, error %d" ,
4206
4234
spa_name (spa ), zio , (u_longlong_t )zio -> io_size ,
@@ -5422,6 +5450,16 @@ zio_done(zio_t *zio)
5422
5450
}
5423
5451
5424
5452
if (zio -> io_error && zio == zio -> io_logical ) {
5453
+
5454
+ /*
5455
+ * A DDT child tried to create a mixed gang/non-gang BP. We're
5456
+ * going to have to just retry as a non-dedup IO.
5457
+ */
5458
+ if (zio -> io_error == EAGAIN && IO_IS_ALLOCATING (zio ) &&
5459
+ zio -> io_prop .zp_dedup ) {
5460
+ zio -> io_reexecute |= ZIO_REEXECUTE_NOW ;
5461
+ zio -> io_prop .zp_dedup = B_FALSE ;
5462
+ }
5425
5463
/*
5426
5464
* Determine whether zio should be reexecuted. This will
5427
5465
* propagate all the way to the root via zio_notify_parent().
0 commit comments