Added multi-page contiguous allocation

This commit is contained in:
Zhengyi Chen 2024-03-16 07:48:13 +00:00
parent a2422135fc
commit 682c0d9372
2 changed files with 218 additions and 153 deletions

View file

@ -1,17 +1,6 @@
// [TODO] Clean up headers...
#include <linux/device.h>
#include "asm-generic/errno-base.h"
#include "asm-generic/memory_model.h"
#include "asm/page-def.h"
#include "linux/gfp.h"
#include "linux/mutex.h"
#include "linux/pfn_t.h"
#include "linux/pid.h"
#include <linux/rcupdate.h>
#include <linux/vmalloc.h>
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/mm_types.h>
#include <linux/fs.h>
@ -19,25 +8,33 @@
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/version.h>
#include <linux/ktime.h>
#include <asm/cacheflush_extra.h>
#include "asm/page-def.h"
#include "linux/compiler_attributes.h"
#include "utils.h"
MODULE_AUTHOR("Zk.");
MODULE_DESCRIPTION("4.2.W1: mmap for point of coherency");
MODULE_DESCRIPTION("Y4S2.W1: mmap for point of coherency");
MODULE_LICENSE("GPL");
struct my_shmem_page {
ulong max_contiguous_alloc_order = 0;
module_param(max_contiguous_alloc_order, ulong, S_IRUGO | S_IWUSR);
struct my_shmem_alloc {
/* Head page for page-aligned allocation */
struct page *page;
ulong alloc_order;
struct list_head list;
};
static DEFINE_MUTEX(my_shmem_pages_mtx);
static DEFINE_MUTEX(my_shmem_allocs_mtx);
/* [!] READ/WRITE UNDER LOCK */
static LIST_HEAD(my_shmem_pages);
static LIST_HEAD(my_shmem_allocs);
/* [!] READ/WRITE UNDER LOCK */
// static size_t my_shmem_page_count = 0;
static size_t my_shmem_page_count = 0;
static int major;
static struct class* class;
@ -54,23 +51,23 @@ const char* DEV_NAME = "my_shmem";
*/
static void my_shmem_vmops_close(struct vm_area_struct *vma)
{
size_t nr_pages_in_cache = list_count_nodes(&my_shmem_pages);
// size_t nr_pages_in_cache = list_count_nodes(&my_shmem_allocs);
size_t nr_pages_of_vma = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
pr_info(
"[%s] Entered. vma size: %ld; cached pages: %ld.\n",
__func__, nr_pages_of_vma, nr_pages_in_cache
__func__, nr_pages_of_vma, my_shmem_page_count
);
size_t nr_pages_offset = vma->vm_pgoff;
struct my_shmem_page *entry;
// u64 clean_time_bgn, clean_time_end;
// u64 runtime;
struct my_shmem_alloc *entry;
mutex_lock(&my_shmem_allocs_mtx);
list_for_each_entry(entry, &my_shmem_allocs, list) {
const ulong entry_pgs = ORDER_TO_PAGE_NR(entry->alloc_order);
mutex_lock(&my_shmem_pages_mtx);
list_for_each_entry(entry, &my_shmem_pages, list) {
/* Loop until first page out of offset */
if (nr_pages_offset) {
nr_pages_offset--;
if (nr_pages_offset > entry_pgs) {
nr_pages_offset -= entry_pgs;
continue;
}
@ -81,102 +78,148 @@ static void my_shmem_vmops_close(struct vm_area_struct *vma)
/* Inside the window of mapped pages -- flush them up */
struct page *pg = entry->page;
ulong kvaddr_bgn = (ulong) page_address(pg);
ulong kvaddr_end = kvaddr_bgn + PAGE_SIZE;
ulong kvaddr_end = kvaddr_bgn + entry_pgs;
pr_info("[%s] Before flush: 0x%px has 0x%lx [+%ld].\n",
pr_info("[%s] Before flush: 0x%px has 0x%lx [+%ld]. Refcount: %d\n",
__func__, (void *) kvaddr_bgn, *(ulong *) kvaddr_bgn,
sizeof(ulong));
sizeof(ulong), atomic_read(&pg->_refcount));
__dcache_clean_poc(kvaddr_bgn, kvaddr_end);
pr_info("[%s] After flush: 0x%px has 0x%lx [+%ld].\n",
__func__, (void *) kvaddr_bgn, *(ulong *) kvaddr_bgn,
sizeof(ulong));
put_page(pg);
pr_info("[%s] After flush: 0x%px has 0x%lx [+%ld]. Refcount: %d\n",
__func__, (void *) kvaddr_bgn, *(ulong *) kvaddr_bgn,
sizeof(ulong), atomic_read(&pg->_refcount));
nr_pages_of_vma--;
if (nr_pages_offset != 0) {
nr_pages_of_vma -= min(entry_pgs - nr_pages_offset, nr_pages_of_vma);
nr_pages_offset = 0;
} else
nr_pages_of_vma -= min(entry_pgs, nr_pages_of_vma);
}
mutex_unlock(&my_shmem_pages_mtx);
mutex_unlock(&my_shmem_allocs_mtx);
pr_info("[%s] Flushed dcache.\n", __func__);
}
static vm_fault_t __my_shmem_fault_remap(struct vm_fault *vmf)
{
pr_info("[%s] Entered...\n", __func__);
// We remap, possibly with offset from composite `page`, so...
vm_fault_t ret = VM_FAULT_NOPAGE;
pgoff_t vma_pgoff = vmf->vma->vm_pgoff;
pgoff_t vmf_pgoff_wrt_vma = vmf->pgoff;
const ulong fault_addr = vmf->address;
ulong remap_addr = fault_addr;
/* Page ID wrt. `my_shmem_allocs` that should be remapped due to `vmf` */
pgoff_t vmf_pgoff_wrt_allocpool = vma_pgoff + vmf_pgoff_wrt_vma;
BUG_ON(vmf_pgoff_wrt_allocpool >= my_shmem_page_count);
/* Current max. nr. of pages still remappable to `vmf->vma` */
ulong remaining_remappable_pgs = min(
my_shmem_page_count - vmf_pgoff_wrt_allocpool,
vma_pgoff + NR_PAGE_OF_VMA(vmf->vma) - vmf_pgoff_wrt_allocpool
);
pr_info("[%s] %ld + %ld = %ld (remap from here); remap %ld pages.\n",
__func__, vma_pgoff, vmf_pgoff_wrt_vma, vmf_pgoff_wrt_allocpool,
remaining_remappable_pgs);
struct my_shmem_alloc *curr;
/* Page ID as pointed to by `curr` */
pgoff_t curr_pg_offset = 0;
/* Page ID as pointed to by next of `curr` */
pgoff_t next_pg_offset;
list_for_each_entry(curr, &my_shmem_allocs, list) {
next_pg_offset = curr_pg_offset + ORDER_TO_PAGE_NR(curr->alloc_order);
pr_info("[%s] curr pfn: 0x%lx, curr_off: %ld, next_off: %ld, remaining %ld remappable.\n",
__func__, page_to_pfn(curr->page), curr_pg_offset, next_pg_offset, remaining_remappable_pgs);
if (next_pg_offset > vmf_pgoff_wrt_allocpool) { // In remappable range
get_page(curr->page);
/* Offset in [`pg`, `pg + (1 << alloc_order)`) */
pgoff_t offset_in_alloc = vmf_pgoff_wrt_allocpool - curr_pg_offset;
ulong remap_range_pgs = min(
next_pg_offset - curr_pg_offset - offset_in_alloc,
remaining_remappable_pgs
);
ulong remap_range_bytes = remap_range_pgs * PAGE_SIZE;
ulong remap_pfn = page_to_pfn(curr->page) + offset_in_alloc;
pr_info("[%s] Remapping PFN 0x%lx (+%ld ~ %ldB) -> %px\n",
__func__, remap_pfn, remap_range_pgs, remap_range_bytes, (void *)remap_addr);
int remap_ret = remap_pfn_range(
vmf->vma, remap_addr, remap_pfn,
remap_range_bytes, vmf->vma->vm_page_prot
);
if (remap_ret) {
pr_info("[%s] Remap failed: %d\n", __func__, remap_ret);
put_page(curr->page);
goto err_remap_failed;
}
vmf_pgoff_wrt_allocpool = next_pg_offset;
remaining_remappable_pgs -= remap_range_pgs;
remap_addr += remap_range_bytes;
pr_info("[%s] Remap successful. Remaining %ld remappable.\n", __func__, remaining_remappable_pgs);
if (remaining_remappable_pgs == 0)
goto ok;
} else // out of remappable range, continue
curr_pg_offset = next_pg_offset;
}
err_remap_failed:
ret |= VM_FAULT_SIGBUS | VM_FAULT_RETRY;
ok:
mutex_unlock(&my_shmem_allocs_mtx);
return ret;
}
static vm_fault_t my_shmem_vmops_fault(struct vm_fault *vmf)
{
pr_info("[%s] vm_fault @ 0x%lx (real address 0x%lx).\n",
__func__, vmf->address, vmf->real_address);
pr_info("[%s] vm_fault @ 0x%lx (vma + %ld pages).\n",
__func__, vmf->address, vmf->pgoff);
vm_fault_t ret = 0;
struct vm_area_struct *vma_of_vmf = vmf->vma;
ulong vma_pg_offset = vma_of_vmf->vm_pgoff;
ulong vmf_pg_offset_from_vma_bgn =
(vmf->address - vma_of_vmf->vm_start) >> PAGE_SHIFT;
ulong pg_offset = vma_pg_offset + vmf_pg_offset_from_vma_bgn;
phys_addr_t _dbg_phys_of_page;
struct page *last_pg;
vm_fault_t ret = VM_FAULT_NOPAGE;
ulong fault_pg_offset = vmf->vma->vm_pgoff + vmf->pgoff;
mutex_lock(&my_shmem_pages_mtx);
size_t my_shmem_page_count = list_count_nodes(&my_shmem_pages);
BUG_ON(pg_offset < my_shmem_page_count);
/* Allocate the new page(s) */
ulong nr_pages_to_alloc = pg_offset - my_shmem_page_count + 1;
pr_info("[%s] Page count %ld, offset %ld -- allocating %ld more...\n",
__func__, my_shmem_page_count, vmf->pgoff, nr_pages_to_alloc);
for (; nr_pages_to_alloc > 0; nr_pages_to_alloc--)
{
// Allocate page handle in kernel
struct my_shmem_page *new_page = kzalloc(
sizeof(struct my_shmem_page), GFP_KERNEL);
if (!new_page) {
mutex_unlock(&my_shmem_pages_mtx);
goto err_ret_no_kmem;
}
// Allocate kernel virtual page
struct page *curr_pg = alloc_page(GFP_USER);
if (!curr_pg) {
mutex_unlock(&my_shmem_pages_mtx);
goto err_ret_no_page;
}
pr_info("[%s] Allocated pfn: %ld, kernel vaddr: %px.\n",
__func__, page_to_pfn(curr_pg), page_to_virt(curr_pg));
get_page(curr_pg); // For base page refcount
// Populate page handle
new_page->page = curr_pg;
// Add page handle to list
list_add(&new_page->list, &my_shmem_pages);
// Fill in last_pg for final return from page fault handler
last_pg = curr_pg;
mutex_lock(&my_shmem_allocs_mtx);
locked_retry:
if (fault_pg_offset < my_shmem_page_count) {
// => Already present, remap
return __my_shmem_fault_remap(vmf);
}
// Fill in vmf's page for return
get_page(last_pg);
vmf->page = last_pg;
// ret = vmf_insert_page(vma_of_vmf, vmf->adget_pagedress, last_pg);
// [!] YOU DON'T NEED TO CALL REMAP_PFN_RANGE OR FAMILY HERE!!!
// `__do_fault` allocates PTE prior to calling `vm_ops->fault`,
// and at return `finish_fault` inserts PTE for given page.
// [?] I think `vmf_insert_page` etc. are used to insert device pages...
// They require VM_MIXEDMAP, which in this case don't need (and in fact causes a BUG here.)
// I think this is also the reason why `remap_pfn_range` might work here,
// exactly because it does NOT try to add VM_MIXEDMAP, etc.
_dbg_phys_of_page = page_to_phys(last_pg);
// else => allocate `1 << order` pages opportunistically...
struct my_shmem_alloc *new_alloc_handle = kzalloc(
sizeof(struct my_shmem_alloc), GFP_KERNEL
);
if (!new_alloc_handle)
goto err_kzalloc_handle;
mutex_unlock(&my_shmem_pages_mtx);
goto ok_ret_allocated;
struct page *new_alloc_pg = alloc_pages(
GFP_USER, max_contiguous_alloc_order
);
if (!new_alloc_pg)
goto err_alloc_pages;
err_ret_no_kmem:
pr_err("[%s] Cannot allocate `struct my_shmem_page` in kernel memory.\n",
__func__);
return VM_FAULT_OOM;
err_ret_no_page:
pr_err("[%s] Cannot allocate requested page for virtual memory.\n",
__func__);
return VM_FAULT_OOM;
ok_ret_allocated:
// get_page(new_alloc_pg);
new_alloc_handle->page = new_alloc_pg;
new_alloc_handle->alloc_order = max_contiguous_alloc_order;
list_add_tail(&new_alloc_handle->list, &my_shmem_allocs);
my_shmem_page_count += ORDER_TO_PAGE_NR(new_alloc_handle->alloc_order);
pr_info("[%s] Allocated 1 << %ld pages: 0x%lx - 0x%lx. Current page count: %ld\n",
__func__, max_contiguous_alloc_order,
page_to_pfn(new_alloc_pg), page_to_pfn(new_alloc_pg) + (1 << max_contiguous_alloc_order),
my_shmem_page_count);
goto locked_retry;
err_alloc_pages:
err_kzalloc_handle:
ret |= VM_FAULT_OOM;
err_generic:
mutex_unlock(&my_shmem_allocs_mtx);
return ret;
}
@ -195,63 +238,75 @@ static const struct file_operations my_shmem_fops;
static int my_shmem_fops_mmap(struct file *filp, struct vm_area_struct *vma)
{
int ret = 0;
ulong uvaddr = vma->vm_start;
ulong upgoff = vma->vm_pgoff;
struct my_shmem_page *curr;
ulong vma_vaddr = vma->vm_start;
ulong vma_pgoff = vma->vm_pgoff;
const ulong vma_pg_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
struct my_shmem_alloc *curr;
struct page *pg;
ulong pfn_to_remap;
ulong remap_pg_count;
ulong vma_pg_to_remap = vma_pg_count;
const unsigned char *fp_name;
vma->vm_ops = &my_shmem_vmops;
/* Remap as much as possible */
mutex_lock(&my_shmem_pages_mtx);
size_t my_shmem_page_count = list_count_nodes(&my_shmem_pages);
if (!my_shmem_page_count) {
mutex_unlock(&my_shmem_pages_mtx);
goto cleanup_ok;
}
list_for_each_entry(curr, &my_shmem_pages, list) {
/* If userspace virt addr >= vm_end, exit. */
if (uvaddr >= vma->vm_end)
mutex_lock(&my_shmem_allocs_mtx);
// size_t my_shmem_page_count =
// list_count_nodes(&my_shmem_allocs) * (1 << max_contiguous_alloc_order);
// if (!my_shmem_page_count) {
// mutex_unlock(&my_shmem_allocs_mtx);
// goto cleanup_ok;
// }
list_for_each_entry(curr, &my_shmem_allocs, list) {
/* Finished mmap, exit loop */
if (vma_vaddr >= vma->vm_end)
break;
/* Wait until the vm_pgoff-th page, if exists. */
if (upgoff != 0) {
upgoff--;
/* Decrement page offset as much as possible*/
if (vma_pgoff > ORDER_TO_PAGE_NR(curr->alloc_order)) {
vma_pgoff -= ORDER_TO_PAGE_NR(curr->alloc_order);
continue;
}
/* Exists allocable page, remap */
/* Exists remappable alloc, compute PFN to remap */
pg = curr->page;
get_page(pg);
pfn_to_remap = page_to_pfn(pg) + vma_pgoff;
remap_pg_count = min(
vma_pg_to_remap,
ORDER_TO_PAGE_NR(curr->alloc_order) - vma_pgoff
);
ret = remap_pfn_range(
vma, uvaddr, page_to_pfn(pg), PAGE_SIZE,
vma->vm_page_prot);
vma, vma_vaddr, pfn_to_remap,
remap_pg_count * PAGE_SIZE, vma->vm_page_prot);
if (ret) {
mutex_unlock(&my_shmem_pages_mtx);
mutex_unlock(&my_shmem_allocs_mtx);
goto cleanup_err_remap_pfn_failed;
}
pr_info("[%s] Remapped pfn %ld (kvaddr: 0x%px) -> uvaddr: 0x%px.\n",
__func__, page_to_pfn(pg), page_to_virt(pg), (void *) uvaddr);
uvaddr += PAGE_SIZE;
pr_info("[%s] Remapped pfn 0x%lx (+%ld) -> uvaddr: 0x%px.\n",
__func__, pfn_to_remap, remap_pg_count, (void *) vma_vaddr);
vma_vaddr += (remap_pg_count * PAGE_SIZE);
vma_pg_to_remap -= remap_pg_count;
}
/* May still have unmapped pages, we allocate lazily at fault time. */
mutex_unlock(&my_shmem_pages_mtx);
mutex_unlock(&my_shmem_allocs_mtx);
goto cleanup_ok;
cleanup_err_remap_pfn_failed:
pr_err("[%s] Cannot remap pfn %ld (kvaddr: 0x%px) -> uvaddr: 0x%px: %d.\n",
__func__, page_to_pfn(pg), page_to_virt(pg), (void *) uvaddr, ret);
pr_err("[%s] Cannot remap pfn 0x%lx (+%ld) -> uvaddr: 0x%px: %d.\n",
__func__, pfn_to_remap, remap_pg_count, (void *) vma_vaddr, ret);
put_page(pg);
goto exit;
cleanup_ok:
fp_name = file_dentry(filp)->d_name.name;
if (uvaddr == vma->vm_end)
if (vma_vaddr == vma->vm_end)
pr_info("[%s] Device file '%s' mmapped for vma: [0x%lx - 0x%lx).\n",
__func__, fp_name, vma->vm_start, vma->vm_end);
else
pr_info("[%s] Device file '%s' mmapped for vma: [0x%lx - 0x%lx..0x%lx).\n",
__func__, fp_name, vma->vm_start, uvaddr, vma->vm_end);
pr_info("[%s] Device file '%s' mmapped for vma: [0x%lx - 0x%lx) (..0x%lx).\n",
__func__, fp_name, vma->vm_start, vma_vaddr, vma->vm_end);
exit:
return ret;
}
@ -276,21 +331,10 @@ static int my_shmem_fops_release(struct inode *inode, struct file *filp)
return 0;
}
// static int my_shmem_fops_ioctl(struct file *filp, uint cmd, ulong arg);
// static int my_shmem_fops_fsync(
// struct file *filp, loff_t bgn_off, loff_t end_off, int datasync)
// {
// pr_info("[%s] Entered.\n", __func__);
// }
static const struct file_operations my_shmem_fops = {
.owner = THIS_MODULE,
.open = my_shmem_fops_open,
.mmap = my_shmem_fops_mmap,
// .unlocked_ioctl = my_shmem_fops_ioctl,
// .compat_ioctl = compat_ptr_ioctl,
.release = my_shmem_fops_release,
};
@ -354,19 +398,18 @@ static void __exit my_shmem_exit(void)
pr_info("[%s] Device destroyed.\n", __func__);
/* Free all pages -- I'm not compacting in runtime!!! */
struct my_shmem_page *page_entry, *tmp;
mutex_lock(&my_shmem_pages_mtx);
list_for_each_entry_safe(page_entry, tmp, &my_shmem_pages, list) {
// put_page(page_entry->page);
BUG_ON(atomic_read(&page_entry->page->_refcount) != 1);
__free_page(page_entry->page); // no put_page since we don't want double-free
// free_page((ulong) page_to_virt(page_entry->page));
struct my_shmem_alloc *page_entry, *tmp;
mutex_lock(&my_shmem_allocs_mtx);
list_for_each_entry_safe(page_entry, tmp, &my_shmem_allocs, list) {
int _dbg_refcount = atomic_read(&page_entry->page->_refcount);
pr_info("[%s] Page 0x%lx has refcount %d.\n", __func__, page_to_pfn(page_entry->page), _dbg_refcount);
BUG_ON(_dbg_refcount != 1);
put_page(page_entry->page); // => free when refcount == 0
// my_shmem_page_count--;
list_del(&page_entry->list);
kfree(page_entry);
}
mutex_unlock(&my_shmem_pages_mtx);
mutex_unlock(&my_shmem_allocs_mtx);
}
module_init(my_shmem_init);

View file

@ -0,0 +1,22 @@
#ifndef _MY_SHMEM_UTILS
#define _MY_SHMEM_UTILS
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#define ORDER_TO_PAGE_NR(order) (1 << order)
#define SIZE_OF_VMA(vma) (vma->vm_end - vma->vm_start)
#define NR_PAGE_OF_VMA(vma) (SIZE_OF_VMA(vma) >> PAGE_SHIFT)
static inline bool my_shmem_addr_in_alloc_range(
const ulong addr, const struct page *page, const ulong alloc_order)
{
ulong alloc_start = (ulong)page_to_virt(page);
ulong alloc_end = (PAGE_SIZE << alloc_order) + alloc_start;
return alloc_start <= addr && addr < alloc_end;
}
// static int my_shmem_remap
#endif /* _MY_SHMEM_UTILS*/