Skip to content

Commit a7e1ac7

Browse files
naotakdave
authored andcommitted
btrfs: zoned: reserve zones for an active metadata/system block group
Ensure a metadata and system block group can be activated on write time, by leaving a certain number of active zones when trying to activate a data block group. Zones for two metadata block groups (normal and tree-log) and one system block group are reserved, according to the profile type: two zones per block group on the DUP profile and one zone per block group otherwise. The reservation must be freed once a non-data block group is allocated. If not, we over-reserve the active zones and data block group activation will suffer. For the dynamic reservation count, we need to manage the reservation count per device. The reservation count variable is protected by fs_info->zone_active_bgs_lock. Signed-off-by: Naohiro Aota <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent c1c3c2b commit a7e1ac7

File tree

3 files changed

+98
-5
lines changed

3 files changed

+98
-5
lines changed

fs/btrfs/disk-io.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3467,6 +3467,8 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
34673467

34683468
btrfs_free_zone_cache(fs_info);
34693469

3470+
btrfs_check_active_zone_reservation(fs_info);
3471+
34703472
if (!sb_rdonly(sb) && fs_info->fs_devices->missing_devices &&
34713473
!btrfs_check_rw_degradable(fs_info, NULL)) {
34723474
btrfs_warn(fs_info,

fs/btrfs/zoned.c

Lines changed: 88 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1889,6 +1889,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
18891889
struct map_lookup *map;
18901890
struct btrfs_device *device;
18911891
u64 physical;
1892+
const bool is_data = (block_group->flags & BTRFS_BLOCK_GROUP_DATA);
18921893
bool ret;
18931894
int i;
18941895

@@ -1910,19 +1911,40 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
19101911
goto out_unlock;
19111912
}
19121913

1914+
spin_lock(&fs_info->zone_active_bgs_lock);
19131915
for (i = 0; i < map->num_stripes; i++) {
1916+
struct btrfs_zoned_device_info *zinfo;
1917+
int reserved = 0;
1918+
19141919
device = map->stripes[i].dev;
19151920
physical = map->stripes[i].physical;
1921+
zinfo = device->zone_info;
19161922

1917-
if (device->zone_info->max_active_zones == 0)
1923+
if (zinfo->max_active_zones == 0)
19181924
continue;
19191925

1926+
if (is_data)
1927+
reserved = zinfo->reserved_active_zones;
1928+
/*
1929+
* For the data block group, leave active zones for one
1930+
* metadata block group and one system block group.
1931+
*/
1932+
if (atomic_read(&zinfo->active_zones_left) <= reserved) {
1933+
ret = false;
1934+
spin_unlock(&fs_info->zone_active_bgs_lock);
1935+
goto out_unlock;
1936+
}
1937+
19201938
if (!btrfs_dev_set_active_zone(device, physical)) {
19211939
/* Cannot activate the zone */
19221940
ret = false;
1941+
spin_unlock(&fs_info->zone_active_bgs_lock);
19231942
goto out_unlock;
19241943
}
1944+
if (!is_data)
1945+
zinfo->reserved_active_zones--;
19251946
}
1947+
spin_unlock(&fs_info->zone_active_bgs_lock);
19261948

19271949
/* Successfully activated all the zones */
19281950
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags);
@@ -2061,18 +2083,21 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
20612083
for (i = 0; i < map->num_stripes; i++) {
20622084
struct btrfs_device *device = map->stripes[i].dev;
20632085
const u64 physical = map->stripes[i].physical;
2086+
struct btrfs_zoned_device_info *zinfo = device->zone_info;
20642087

2065-
if (device->zone_info->max_active_zones == 0)
2088+
if (zinfo->max_active_zones == 0)
20662089
continue;
20672090

20682091
ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
20692092
physical >> SECTOR_SHIFT,
2070-
device->zone_info->zone_size >> SECTOR_SHIFT,
2093+
zinfo->zone_size >> SECTOR_SHIFT,
20712094
GFP_NOFS);
20722095

20732096
if (ret)
20742097
return ret;
20752098

2099+
if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA))
2100+
zinfo->reserved_active_zones++;
20762101
btrfs_dev_clear_active_zone(device, physical);
20772102
}
20782103

@@ -2111,8 +2136,10 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
21112136

21122137
/* Check if there is a device with active zones left */
21132138
mutex_lock(&fs_info->chunk_mutex);
2139+
spin_lock(&fs_info->zone_active_bgs_lock);
21142140
list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
21152141
struct btrfs_zoned_device_info *zinfo = device->zone_info;
2142+
int reserved = 0;
21162143

21172144
if (!device->bdev)
21182145
continue;
@@ -2122,17 +2149,21 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
21222149
break;
21232150
}
21242151

2152+
if (flags & BTRFS_BLOCK_GROUP_DATA)
2153+
reserved = zinfo->reserved_active_zones;
2154+
21252155
switch (flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
21262156
case 0: /* single */
2127-
ret = (atomic_read(&zinfo->active_zones_left) >= 1);
2157+
ret = (atomic_read(&zinfo->active_zones_left) >= (1 + reserved));
21282158
break;
21292159
case BTRFS_BLOCK_GROUP_DUP:
2130-
ret = (atomic_read(&zinfo->active_zones_left) >= 2);
2160+
ret = (atomic_read(&zinfo->active_zones_left) >= (2 + reserved));
21312161
break;
21322162
}
21332163
if (ret)
21342164
break;
21352165
}
2166+
spin_unlock(&fs_info->zone_active_bgs_lock);
21362167
mutex_unlock(&fs_info->chunk_mutex);
21372168

21382169
if (!ret)
@@ -2374,3 +2405,55 @@ int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
23742405

23752406
return 0;
23762407
}
2408+
2409+
/*
2410+
* Reserve zones for one metadata block group, one tree-log block group, and one
2411+
* system block group.
2412+
*/
2413+
void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info)
2414+
{
2415+
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2416+
struct btrfs_block_group *block_group;
2417+
struct btrfs_device *device;
2418+
/* Reserve zones for normal SINGLE metadata and tree-log block group. */
2419+
unsigned int metadata_reserve = 2;
2420+
/* Reserve a zone for SINGLE system block group. */
2421+
unsigned int system_reserve = 1;
2422+
2423+
if (!test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags))
2424+
return;
2425+
2426+
/*
2427+
* This function is called from the mount context. So, there is no
2428+
* parallel process touching the bits. No need for read_seqretry().
2429+
*/
2430+
if (fs_info->avail_metadata_alloc_bits & BTRFS_BLOCK_GROUP_DUP)
2431+
metadata_reserve = 4;
2432+
if (fs_info->avail_system_alloc_bits & BTRFS_BLOCK_GROUP_DUP)
2433+
system_reserve = 2;
2434+
2435+
/* Apply the reservation on all the devices. */
2436+
mutex_lock(&fs_devices->device_list_mutex);
2437+
list_for_each_entry(device, &fs_devices->devices, dev_list) {
2438+
if (!device->bdev)
2439+
continue;
2440+
2441+
device->zone_info->reserved_active_zones =
2442+
metadata_reserve + system_reserve;
2443+
}
2444+
mutex_unlock(&fs_devices->device_list_mutex);
2445+
2446+
/* Release reservation for currently active block groups. */
2447+
spin_lock(&fs_info->zone_active_bgs_lock);
2448+
list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) {
2449+
struct map_lookup *map = block_group->physical_map;
2450+
2451+
if (!(block_group->flags &
2452+
(BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)))
2453+
continue;
2454+
2455+
for (int i = 0; i < map->num_stripes; i++)
2456+
map->stripes[i].dev->zone_info->reserved_active_zones--;
2457+
}
2458+
spin_unlock(&fs_info->zone_active_bgs_lock);
2459+
}

fs/btrfs/zoned.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@ struct btrfs_zoned_device_info {
2222
u8 zone_size_shift;
2323
u32 nr_zones;
2424
unsigned int max_active_zones;
25+
/*
26+
* Reserved active zones for one metadata and one system block group.
27+
* It can vary per-device depending on the allocation status.
28+
*/
29+
int reserved_active_zones;
2530
atomic_t active_zones_left;
2631
unsigned long *seq_zones;
2732
unsigned long *empty_zones;
@@ -78,6 +83,7 @@ void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logica
7883
int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info);
7984
int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
8085
struct btrfs_space_info *space_info, bool do_finish);
86+
void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info);
8187
#else /* CONFIG_BLK_DEV_ZONED */
8288
static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
8389
struct blk_zone *zone)
@@ -252,6 +258,8 @@ static inline int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
252258
return 0;
253259
}
254260

261+
static inline void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info) { }
262+
255263
#endif
256264

257265
static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)

0 commit comments

Comments
 (0)