diff options
Diffstat (limited to '')
-rw-r--r-- | mm/zswap.c | 249 |
1 files changed, 153 insertions, 96 deletions
diff --git a/mm/zswap.c b/mm/zswap.c index 162013952074..f6316b66fb23 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -43,7 +43,7 @@ * statistics **********************************/ /* The number of compressed pages currently stored in zswap */ -atomic_t zswap_stored_pages = ATOMIC_INIT(0); +atomic_long_t zswap_stored_pages = ATOMIC_INIT(0); /* * The statistics below are not protected from concurrent access for @@ -402,7 +402,7 @@ static void __zswap_pool_empty(struct percpu_ref *ref) spin_unlock_bh(&zswap_pools_lock); } -static int __must_check zswap_pool_get(struct zswap_pool *pool) +static int __must_check zswap_pool_tryget(struct zswap_pool *pool) { if (!pool) return 0; @@ -410,6 +410,12 @@ static int __must_check zswap_pool_get(struct zswap_pool *pool) return percpu_ref_tryget(&pool->ref); } +/* The caller must already have a reference. */ +static void zswap_pool_get(struct zswap_pool *pool) +{ + percpu_ref_get(&pool->ref); +} + static void zswap_pool_put(struct zswap_pool *pool) { percpu_ref_put(&pool->ref); @@ -440,7 +446,7 @@ static struct zswap_pool *zswap_pool_current_get(void) rcu_read_lock(); pool = __zswap_pool_current(); - if (!zswap_pool_get(pool)) + if (!zswap_pool_tryget(pool)) pool = NULL; rcu_read_unlock(); @@ -461,7 +467,7 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor) if (strcmp(zpool_get_type(pool->zpool), type)) continue; /* if we can't get it, it's about to be destroyed */ - if (!zswap_pool_get(pool)) + if (!zswap_pool_tryget(pool)) continue; return pool; } @@ -703,12 +709,11 @@ static void zswap_lru_add(struct list_lru *list_lru, struct zswap_entry *entry) /* * Note that it is safe to use rcu_read_lock() here, even in the face of - * concurrent memcg offlining. Thanks to the memcg->kmemcg_id indirection - * used in list_lru lookup, only two scenarios are possible: + * concurrent memcg offlining: * - * 1. list_lru_add() is called before memcg->kmemcg_id is updated. The + * 1. list_lru_add() is called before list_lru_one is dead. The * new entry will be reparented to memcg's parent's list_lru. - * 2. list_lru_add() is called after memcg->kmemcg_id is updated. The + * 2. list_lru_add() is called after list_lru_one is dead. The * new entry will be added directly to memcg's parent's list_lru. * * Similar reasoning holds for list_lru_del(). @@ -802,7 +807,7 @@ static void zswap_entry_free(struct zswap_entry *entry) obj_cgroup_put(entry->objcg); } zswap_entry_cache_free(entry); - atomic_dec(&zswap_stored_pages); + atomic_long_dec(&zswap_stored_pages); } /********************************* @@ -875,7 +880,8 @@ static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node) return 0; } -static bool zswap_compress(struct folio *folio, struct zswap_entry *entry) +static bool zswap_compress(struct page *page, struct zswap_entry *entry, + struct zswap_pool *pool) { struct crypto_acomp_ctx *acomp_ctx; struct scatterlist input, output; @@ -887,13 +893,13 @@ static bool zswap_compress(struct folio *folio, struct zswap_entry *entry) gfp_t gfp; u8 *dst; - acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); + acomp_ctx = raw_cpu_ptr(pool->acomp_ctx); mutex_lock(&acomp_ctx->mutex); dst = acomp_ctx->buffer; sg_init_table(&input, 1); - sg_set_folio(&input, folio, PAGE_SIZE, 0); + sg_set_page(&input, page, PAGE_SIZE, 0); /* * We need PAGE_SIZE * 2 here since there maybe over-compression case, @@ -920,7 +926,7 @@ static bool zswap_compress(struct folio *folio, struct zswap_entry *entry) if (comp_ret) goto unlock; - zpool = entry->pool->zpool; + zpool = pool->zpool; gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; if (zpool_malloc_support_movable(zpool)) gfp |= __GFP_HIGHMEM | __GFP_MOVABLE; @@ -1053,7 +1059,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry, count_vm_event(ZSWPWB); if (entry->objcg) - count_objcg_event(entry->objcg, ZSWPWB); + count_objcg_events(entry->objcg, ZSWPWB, 1); zswap_entry_free(entry); @@ -1096,7 +1102,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry, * for reclaim by this ratio. */ static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_one *l, - spinlock_t *lock, void *arg) + void *arg) { struct zswap_entry *entry = container_of(item, struct zswap_entry, lru); bool *encountered_page_in_swapcache = (bool *)arg; @@ -1152,7 +1158,7 @@ static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_o * It's safe to drop the lock here because we return either * LRU_REMOVED_RETRY or LRU_RETRY. */ - spin_unlock(lock); + spin_unlock(&l->lock); writeback_result = zswap_writeback_entry(entry, swpentry); @@ -1173,7 +1179,6 @@ static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_o zswap_written_back_pages++; } - spin_lock(lock); return ret; } @@ -1233,7 +1238,7 @@ static unsigned long zswap_shrinker_count(struct shrinker *shrinker, nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED); } else { nr_backing = zswap_total_pages(); - nr_stored = atomic_read(&zswap_stored_pages); + nr_stored = atomic_long_read(&zswap_stored_pages); } if (!nr_stored) @@ -1403,68 +1408,27 @@ resched: /********************************* * main API **********************************/ -bool zswap_store(struct folio *folio) + +static ssize_t zswap_store_page(struct page *page, + struct obj_cgroup *objcg, + struct zswap_pool *pool) { - swp_entry_t swp = folio->swap; - pgoff_t offset = swp_offset(swp); - struct xarray *tree = swap_zswap_tree(swp); + swp_entry_t page_swpentry = page_swap_entry(page); struct zswap_entry *entry, *old; - struct obj_cgroup *objcg = NULL; - struct mem_cgroup *memcg = NULL; - - VM_WARN_ON_ONCE(!folio_test_locked(folio)); - VM_WARN_ON_ONCE(!folio_test_swapcache(folio)); - - /* Large folios aren't supported */ - if (folio_test_large(folio)) - return false; - - if (!zswap_enabled) - goto check_old; - - /* Check cgroup limits */ - objcg = get_obj_cgroup_from_folio(folio); - if (objcg && !obj_cgroup_may_zswap(objcg)) { - memcg = get_mem_cgroup_from_objcg(objcg); - if (shrink_memcg(memcg)) { - mem_cgroup_put(memcg); - goto reject; - } - mem_cgroup_put(memcg); - } - - if (zswap_check_limits()) - goto reject; /* allocate entry */ - entry = zswap_entry_cache_alloc(GFP_KERNEL, folio_nid(folio)); + entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page)); if (!entry) { zswap_reject_kmemcache_fail++; - goto reject; - } - - /* if entry is successfully added, it keeps the reference */ - entry->pool = zswap_pool_current_get(); - if (!entry->pool) - goto freepage; - - if (objcg) { - memcg = get_mem_cgroup_from_objcg(objcg); - if (memcg_list_lru_alloc(memcg, &zswap_list_lru, GFP_KERNEL)) { - mem_cgroup_put(memcg); - goto put_pool; - } - mem_cgroup_put(memcg); + return -EINVAL; } - if (!zswap_compress(folio, entry)) - goto put_pool; - - entry->swpentry = swp; - entry->objcg = objcg; - entry->referenced = true; + if (!zswap_compress(page, entry, pool)) + goto compress_failed; - old = xa_store(tree, offset, entry, GFP_KERNEL); + old = xa_store(swap_zswap_tree(page_swpentry), + swp_offset(page_swpentry), + entry, GFP_KERNEL); if (xa_is_err(old)) { int err = xa_err(old); @@ -1481,10 +1445,15 @@ bool zswap_store(struct folio *folio) if (old) zswap_entry_free(old); - if (objcg) { - obj_cgroup_charge_zswap(objcg, entry->length); - count_objcg_event(objcg, ZSWPOUT); - } + /* + * The entry is successfully compressed and stored in the tree, there is + * no further possibility of failure. Grab refs to the pool and objcg. + * These refs will be dropped by zswap_entry_free() when the entry is + * removed from the tree. + */ + zswap_pool_get(pool); + if (objcg) + obj_cgroup_get(objcg); /* * We finish initializing the entry while it's already in xarray. @@ -1496,37 +1465,115 @@ bool zswap_store(struct folio *folio) * The publishing order matters to prevent writeback from seeing * an incoherent entry. */ + entry->pool = pool; + entry->swpentry = page_swpentry; + entry->objcg = objcg; + entry->referenced = true; if (entry->length) { INIT_LIST_HEAD(&entry->lru); zswap_lru_add(&zswap_list_lru, entry); } - /* update stats */ - atomic_inc(&zswap_stored_pages); - count_vm_event(ZSWPOUT); - - return true; + return entry->length; store_failed: - zpool_free(entry->pool->zpool, entry->handle); -put_pool: - zswap_pool_put(entry->pool); -freepage: + zpool_free(pool->zpool, entry->handle); +compress_failed: zswap_entry_cache_free(entry); -reject: + return -EINVAL; +} + +bool zswap_store(struct folio *folio) +{ + long nr_pages = folio_nr_pages(folio); + swp_entry_t swp = folio->swap; + struct obj_cgroup *objcg = NULL; + struct mem_cgroup *memcg = NULL; + struct zswap_pool *pool; + size_t compressed_bytes = 0; + bool ret = false; + long index; + + VM_WARN_ON_ONCE(!folio_test_locked(folio)); + VM_WARN_ON_ONCE(!folio_test_swapcache(folio)); + + if (!zswap_enabled) + goto check_old; + + objcg = get_obj_cgroup_from_folio(folio); + if (objcg && !obj_cgroup_may_zswap(objcg)) { + memcg = get_mem_cgroup_from_objcg(objcg); + if (shrink_memcg(memcg)) { + mem_cgroup_put(memcg); + goto put_objcg; + } + mem_cgroup_put(memcg); + } + + if (zswap_check_limits()) + goto put_objcg; + + pool = zswap_pool_current_get(); + if (!pool) + goto put_objcg; + + if (objcg) { + memcg = get_mem_cgroup_from_objcg(objcg); + if (memcg_list_lru_alloc(memcg, &zswap_list_lru, GFP_KERNEL)) { + mem_cgroup_put(memcg); + goto put_pool; + } + mem_cgroup_put(memcg); + } + + for (index = 0; index < nr_pages; ++index) { + struct page *page = folio_page(folio, index); + ssize_t bytes; + + bytes = zswap_store_page(page, objcg, pool); + if (bytes < 0) + goto put_pool; + compressed_bytes += bytes; + } + + if (objcg) { + obj_cgroup_charge_zswap(objcg, compressed_bytes); + count_objcg_events(objcg, ZSWPOUT, nr_pages); + } + + atomic_long_add(nr_pages, &zswap_stored_pages); + count_vm_events(ZSWPOUT, nr_pages); + + ret = true; + +put_pool: + zswap_pool_put(pool); +put_objcg: obj_cgroup_put(objcg); - if (zswap_pool_reached_full) + if (!ret && zswap_pool_reached_full) queue_work(shrink_wq, &zswap_shrink_work); check_old: /* - * If the zswap store fails or zswap is disabled, we must invalidate the - * possibly stale entry which was previously stored at this offset. - * Otherwise, writeback could overwrite the new data in the swapfile. + * If the zswap store fails or zswap is disabled, we must invalidate + * the possibly stale entries which were previously stored at the + * offsets corresponding to each page of the folio. Otherwise, + * writeback could overwrite the new data in the swapfile. */ - entry = xa_erase(tree, offset); - if (entry) - zswap_entry_free(entry); - return false; + if (!ret) { + unsigned type = swp_type(swp); + pgoff_t offset = swp_offset(swp); + struct zswap_entry *entry; + struct xarray *tree; + + for (index = 0; index < nr_pages; ++index) { + tree = swap_zswap_tree(swp_entry(type, offset + index)); + entry = xa_erase(tree, offset + index); + if (entry) + zswap_entry_free(entry); + } + } + + return ret; } bool zswap_load(struct folio *folio) @@ -1577,7 +1624,7 @@ bool zswap_load(struct folio *folio) count_vm_event(ZSWPIN); if (entry->objcg) - count_objcg_event(entry->objcg, ZSWPIN); + count_objcg_events(entry->objcg, ZSWPIN, 1); if (swapcache) { zswap_entry_free(entry); @@ -1594,6 +1641,9 @@ void zswap_invalidate(swp_entry_t swp) struct xarray *tree = swap_zswap_tree(swp); struct zswap_entry *entry; + if (xa_empty(tree)) + return; + entry = xa_erase(tree, offset); if (entry) zswap_entry_free(entry); @@ -1651,6 +1701,13 @@ static int debugfs_get_total_size(void *data, u64 *val) } DEFINE_DEBUGFS_ATTRIBUTE(total_size_fops, debugfs_get_total_size, NULL, "%llu\n"); +static int debugfs_get_stored_pages(void *data, u64 *val) +{ + *val = atomic_long_read(&zswap_stored_pages); + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(stored_pages_fops, debugfs_get_stored_pages, NULL, "%llu\n"); + static int zswap_debugfs_init(void) { if (!debugfs_initialized()) @@ -1674,8 +1731,8 @@ static int zswap_debugfs_init(void) zswap_debugfs_root, &zswap_written_back_pages); debugfs_create_file("pool_total_size", 0444, zswap_debugfs_root, NULL, &total_size_fops); - debugfs_create_atomic_t("stored_pages", 0444, - zswap_debugfs_root, &zswap_stored_pages); + debugfs_create_file("stored_pages", 0444, + zswap_debugfs_root, NULL, &stored_pages_fops); return 0; } |