From 682c0d9372e56dda686ec1851d260be2ad95b686 Mon Sep 17 00:00:00 2001 From: rubberhead Date: Sat, 16 Mar 2024 07:48:13 +0000 Subject: [PATCH] Added multi-page contiguous allocation --- src/aarch64-linux-flush-dcache/my_shmem.c | 349 ++++++++++++---------- src/aarch64-linux-flush-dcache/utils.h | 22 ++ 2 files changed, 218 insertions(+), 153 deletions(-) create mode 100644 src/aarch64-linux-flush-dcache/utils.h diff --git a/src/aarch64-linux-flush-dcache/my_shmem.c b/src/aarch64-linux-flush-dcache/my_shmem.c index 8bb411f..120ece0 100644 --- a/src/aarch64-linux-flush-dcache/my_shmem.c +++ b/src/aarch64-linux-flush-dcache/my_shmem.c @@ -1,17 +1,6 @@ -// [TODO] Clean up headers... #include -#include "asm-generic/errno-base.h" -#include "asm-generic/memory_model.h" -#include "asm/page-def.h" -#include "linux/gfp.h" -#include "linux/mutex.h" -#include "linux/pfn_t.h" -#include "linux/pid.h" -#include -#include +#include #include -#include -#include #include #include #include @@ -19,25 +8,33 @@ #include #include #include -#include #include +#include "asm/page-def.h" +#include "linux/compiler_attributes.h" +#include "utils.h" + MODULE_AUTHOR("Zk."); -MODULE_DESCRIPTION("4.2.W1: mmap for point of coherency"); +MODULE_DESCRIPTION("Y4S2.W1: mmap for point of coherency"); MODULE_LICENSE("GPL"); -struct my_shmem_page { +ulong max_contiguous_alloc_order = 0; +module_param(max_contiguous_alloc_order, ulong, S_IRUGO | S_IWUSR); + +struct my_shmem_alloc { + /* Head page for page-aligned allocation */ struct page *page; + ulong alloc_order; struct list_head list; }; -static DEFINE_MUTEX(my_shmem_pages_mtx); +static DEFINE_MUTEX(my_shmem_allocs_mtx); /* [!] READ/WRITE UNDER LOCK */ -static LIST_HEAD(my_shmem_pages); +static LIST_HEAD(my_shmem_allocs); /* [!] READ/WRITE UNDER LOCK */ -// static size_t my_shmem_page_count = 0; +static size_t my_shmem_page_count = 0; static int major; static struct class* class; @@ -54,23 +51,23 @@ const char* DEV_NAME = "my_shmem"; */ static void my_shmem_vmops_close(struct vm_area_struct *vma) { - size_t nr_pages_in_cache = list_count_nodes(&my_shmem_pages); + // size_t nr_pages_in_cache = list_count_nodes(&my_shmem_allocs); size_t nr_pages_of_vma = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; pr_info( "[%s] Entered. vma size: %ld; cached pages: %ld.\n", - __func__, nr_pages_of_vma, nr_pages_in_cache + __func__, nr_pages_of_vma, my_shmem_page_count ); size_t nr_pages_offset = vma->vm_pgoff; - struct my_shmem_page *entry; - // u64 clean_time_bgn, clean_time_end; - // u64 runtime; + struct my_shmem_alloc *entry; + + mutex_lock(&my_shmem_allocs_mtx); + list_for_each_entry(entry, &my_shmem_allocs, list) { + const ulong entry_pgs = ORDER_TO_PAGE_NR(entry->alloc_order); - mutex_lock(&my_shmem_pages_mtx); - list_for_each_entry(entry, &my_shmem_pages, list) { /* Loop until first page out of offset */ - if (nr_pages_offset) { - nr_pages_offset--; + if (nr_pages_offset > entry_pgs) { + nr_pages_offset -= entry_pgs; continue; } @@ -81,102 +78,148 @@ static void my_shmem_vmops_close(struct vm_area_struct *vma) /* Inside the window of mapped pages -- flush them up */ struct page *pg = entry->page; ulong kvaddr_bgn = (ulong) page_address(pg); - ulong kvaddr_end = kvaddr_bgn + PAGE_SIZE; + ulong kvaddr_end = kvaddr_bgn + entry_pgs; - pr_info("[%s] Before flush: 0x%px has 0x%lx [+%ld].\n", + pr_info("[%s] Before flush: 0x%px has 0x%lx [+%ld]. Refcount: %d\n", __func__, (void *) kvaddr_bgn, *(ulong *) kvaddr_bgn, - sizeof(ulong)); + sizeof(ulong), atomic_read(&pg->_refcount)); __dcache_clean_poc(kvaddr_bgn, kvaddr_end); - pr_info("[%s] After flush: 0x%px has 0x%lx [+%ld].\n", - __func__, (void *) kvaddr_bgn, *(ulong *) kvaddr_bgn, - sizeof(ulong)); - put_page(pg); + pr_info("[%s] After flush: 0x%px has 0x%lx [+%ld]. Refcount: %d\n", + __func__, (void *) kvaddr_bgn, *(ulong *) kvaddr_bgn, + sizeof(ulong), atomic_read(&pg->_refcount)); - nr_pages_of_vma--; + if (nr_pages_offset != 0) { + nr_pages_of_vma -= min(entry_pgs - nr_pages_offset, nr_pages_of_vma); + nr_pages_offset = 0; + } else + nr_pages_of_vma -= min(entry_pgs, nr_pages_of_vma); } - mutex_unlock(&my_shmem_pages_mtx); + mutex_unlock(&my_shmem_allocs_mtx); pr_info("[%s] Flushed dcache.\n", __func__); } +static vm_fault_t __my_shmem_fault_remap(struct vm_fault *vmf) +{ + pr_info("[%s] Entered...\n", __func__); + // We remap, possibly with offset from composite `page`, so... + vm_fault_t ret = VM_FAULT_NOPAGE; + + pgoff_t vma_pgoff = vmf->vma->vm_pgoff; + pgoff_t vmf_pgoff_wrt_vma = vmf->pgoff; + const ulong fault_addr = vmf->address; + ulong remap_addr = fault_addr; + /* Page ID wrt. `my_shmem_allocs` that should be remapped due to `vmf` */ + pgoff_t vmf_pgoff_wrt_allocpool = vma_pgoff + vmf_pgoff_wrt_vma; + + BUG_ON(vmf_pgoff_wrt_allocpool >= my_shmem_page_count); + /* Current max. nr. of pages still remappable to `vmf->vma` */ + ulong remaining_remappable_pgs = min( + my_shmem_page_count - vmf_pgoff_wrt_allocpool, + vma_pgoff + NR_PAGE_OF_VMA(vmf->vma) - vmf_pgoff_wrt_allocpool + ); + pr_info("[%s] %ld + %ld = %ld (remap from here); remap %ld pages.\n", + __func__, vma_pgoff, vmf_pgoff_wrt_vma, vmf_pgoff_wrt_allocpool, + remaining_remappable_pgs); + + struct my_shmem_alloc *curr; + /* Page ID as pointed to by `curr` */ + pgoff_t curr_pg_offset = 0; + /* Page ID as pointed to by next of `curr` */ + pgoff_t next_pg_offset; + list_for_each_entry(curr, &my_shmem_allocs, list) { + next_pg_offset = curr_pg_offset + ORDER_TO_PAGE_NR(curr->alloc_order); + pr_info("[%s] curr pfn: 0x%lx, curr_off: %ld, next_off: %ld, remaining %ld remappable.\n", + __func__, page_to_pfn(curr->page), curr_pg_offset, next_pg_offset, remaining_remappable_pgs); + + if (next_pg_offset > vmf_pgoff_wrt_allocpool) { // In remappable range + get_page(curr->page); + + /* Offset in [`pg`, `pg + (1 << alloc_order)`) */ + pgoff_t offset_in_alloc = vmf_pgoff_wrt_allocpool - curr_pg_offset; + ulong remap_range_pgs = min( + next_pg_offset - curr_pg_offset - offset_in_alloc, + remaining_remappable_pgs + ); + ulong remap_range_bytes = remap_range_pgs * PAGE_SIZE; + ulong remap_pfn = page_to_pfn(curr->page) + offset_in_alloc; + + pr_info("[%s] Remapping PFN 0x%lx (+%ld ~ %ldB) -> %px\n", + __func__, remap_pfn, remap_range_pgs, remap_range_bytes, (void *)remap_addr); + int remap_ret = remap_pfn_range( + vmf->vma, remap_addr, remap_pfn, + remap_range_bytes, vmf->vma->vm_page_prot + ); + if (remap_ret) { + pr_info("[%s] Remap failed: %d\n", __func__, remap_ret); + put_page(curr->page); + goto err_remap_failed; + } + vmf_pgoff_wrt_allocpool = next_pg_offset; + remaining_remappable_pgs -= remap_range_pgs; + remap_addr += remap_range_bytes; + pr_info("[%s] Remap successful. Remaining %ld remappable.\n", __func__, remaining_remappable_pgs); + if (remaining_remappable_pgs == 0) + goto ok; + + } else // out of remappable range, continue + curr_pg_offset = next_pg_offset; + + } + +err_remap_failed: + ret |= VM_FAULT_SIGBUS | VM_FAULT_RETRY; +ok: + mutex_unlock(&my_shmem_allocs_mtx); + return ret; +} + static vm_fault_t my_shmem_vmops_fault(struct vm_fault *vmf) { - pr_info("[%s] vm_fault @ 0x%lx (real address 0x%lx).\n", - __func__, vmf->address, vmf->real_address); + pr_info("[%s] vm_fault @ 0x%lx (vma + %ld pages).\n", + __func__, vmf->address, vmf->pgoff); - vm_fault_t ret = 0; - struct vm_area_struct *vma_of_vmf = vmf->vma; - ulong vma_pg_offset = vma_of_vmf->vm_pgoff; - ulong vmf_pg_offset_from_vma_bgn = - (vmf->address - vma_of_vmf->vm_start) >> PAGE_SHIFT; - ulong pg_offset = vma_pg_offset + vmf_pg_offset_from_vma_bgn; - phys_addr_t _dbg_phys_of_page; - struct page *last_pg; + vm_fault_t ret = VM_FAULT_NOPAGE; + ulong fault_pg_offset = vmf->vma->vm_pgoff + vmf->pgoff; - mutex_lock(&my_shmem_pages_mtx); - size_t my_shmem_page_count = list_count_nodes(&my_shmem_pages); - BUG_ON(pg_offset < my_shmem_page_count); - - /* Allocate the new page(s) */ - ulong nr_pages_to_alloc = pg_offset - my_shmem_page_count + 1; - pr_info("[%s] Page count %ld, offset %ld -- allocating %ld more...\n", - __func__, my_shmem_page_count, vmf->pgoff, nr_pages_to_alloc); - for (; nr_pages_to_alloc > 0; nr_pages_to_alloc--) - { - // Allocate page handle in kernel - struct my_shmem_page *new_page = kzalloc( - sizeof(struct my_shmem_page), GFP_KERNEL); - if (!new_page) { - mutex_unlock(&my_shmem_pages_mtx); - goto err_ret_no_kmem; - } - - // Allocate kernel virtual page - struct page *curr_pg = alloc_page(GFP_USER); - if (!curr_pg) { - mutex_unlock(&my_shmem_pages_mtx); - goto err_ret_no_page; - } - pr_info("[%s] Allocated pfn: %ld, kernel vaddr: %px.\n", - __func__, page_to_pfn(curr_pg), page_to_virt(curr_pg)); - get_page(curr_pg); // For base page refcount - - // Populate page handle - new_page->page = curr_pg; - - // Add page handle to list - list_add(&new_page->list, &my_shmem_pages); - - // Fill in last_pg for final return from page fault handler - last_pg = curr_pg; + mutex_lock(&my_shmem_allocs_mtx); +locked_retry: + if (fault_pg_offset < my_shmem_page_count) { + // => Already present, remap + return __my_shmem_fault_remap(vmf); } - // Fill in vmf's page for return - get_page(last_pg); - vmf->page = last_pg; - // ret = vmf_insert_page(vma_of_vmf, vmf->adget_pagedress, last_pg); - // [!] YOU DON'T NEED TO CALL REMAP_PFN_RANGE OR FAMILY HERE!!! - // `__do_fault` allocates PTE prior to calling `vm_ops->fault`, - // and at return `finish_fault` inserts PTE for given page. - // [?] I think `vmf_insert_page` etc. are used to insert device pages... - // They require VM_MIXEDMAP, which in this case don't need (and in fact causes a BUG here.) - // I think this is also the reason why `remap_pfn_range` might work here, - // exactly because it does NOT try to add VM_MIXEDMAP, etc. - _dbg_phys_of_page = page_to_phys(last_pg); + // else => allocate `1 << order` pages opportunistically... + struct my_shmem_alloc *new_alloc_handle = kzalloc( + sizeof(struct my_shmem_alloc), GFP_KERNEL + ); + if (!new_alloc_handle) + goto err_kzalloc_handle; - mutex_unlock(&my_shmem_pages_mtx); - goto ok_ret_allocated; + struct page *new_alloc_pg = alloc_pages( + GFP_USER, max_contiguous_alloc_order + ); + if (!new_alloc_pg) + goto err_alloc_pages; -err_ret_no_kmem: - pr_err("[%s] Cannot allocate `struct my_shmem_page` in kernel memory.\n", - __func__); - return VM_FAULT_OOM; -err_ret_no_page: - pr_err("[%s] Cannot allocate requested page for virtual memory.\n", - __func__); - return VM_FAULT_OOM; -ok_ret_allocated: + // get_page(new_alloc_pg); + new_alloc_handle->page = new_alloc_pg; + new_alloc_handle->alloc_order = max_contiguous_alloc_order; + list_add_tail(&new_alloc_handle->list, &my_shmem_allocs); + my_shmem_page_count += ORDER_TO_PAGE_NR(new_alloc_handle->alloc_order); + pr_info("[%s] Allocated 1 << %ld pages: 0x%lx - 0x%lx. Current page count: %ld\n", + __func__, max_contiguous_alloc_order, + page_to_pfn(new_alloc_pg), page_to_pfn(new_alloc_pg) + (1 << max_contiguous_alloc_order), + my_shmem_page_count); + + goto locked_retry; + +err_alloc_pages: +err_kzalloc_handle: + ret |= VM_FAULT_OOM; +err_generic: + mutex_unlock(&my_shmem_allocs_mtx); return ret; } @@ -195,63 +238,75 @@ static const struct file_operations my_shmem_fops; static int my_shmem_fops_mmap(struct file *filp, struct vm_area_struct *vma) { int ret = 0; - ulong uvaddr = vma->vm_start; - ulong upgoff = vma->vm_pgoff; - struct my_shmem_page *curr; + ulong vma_vaddr = vma->vm_start; + ulong vma_pgoff = vma->vm_pgoff; + const ulong vma_pg_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + + struct my_shmem_alloc *curr; struct page *pg; + ulong pfn_to_remap; + ulong remap_pg_count; + ulong vma_pg_to_remap = vma_pg_count; const unsigned char *fp_name; vma->vm_ops = &my_shmem_vmops; /* Remap as much as possible */ - mutex_lock(&my_shmem_pages_mtx); - size_t my_shmem_page_count = list_count_nodes(&my_shmem_pages); - if (!my_shmem_page_count) { - mutex_unlock(&my_shmem_pages_mtx); - goto cleanup_ok; - } - list_for_each_entry(curr, &my_shmem_pages, list) { - /* If userspace virt addr >= vm_end, exit. */ - if (uvaddr >= vma->vm_end) + mutex_lock(&my_shmem_allocs_mtx); + // size_t my_shmem_page_count = + // list_count_nodes(&my_shmem_allocs) * (1 << max_contiguous_alloc_order); + // if (!my_shmem_page_count) { + // mutex_unlock(&my_shmem_allocs_mtx); + // goto cleanup_ok; + // } + list_for_each_entry(curr, &my_shmem_allocs, list) { + /* Finished mmap, exit loop */ + if (vma_vaddr >= vma->vm_end) break; - /* Wait until the vm_pgoff-th page, if exists. */ - if (upgoff != 0) { - upgoff--; + /* Decrement page offset as much as possible*/ + if (vma_pgoff > ORDER_TO_PAGE_NR(curr->alloc_order)) { + vma_pgoff -= ORDER_TO_PAGE_NR(curr->alloc_order); continue; } - /* Exists allocable page, remap */ + /* Exists remappable alloc, compute PFN to remap */ pg = curr->page; get_page(pg); + pfn_to_remap = page_to_pfn(pg) + vma_pgoff; + remap_pg_count = min( + vma_pg_to_remap, + ORDER_TO_PAGE_NR(curr->alloc_order) - vma_pgoff + ); ret = remap_pfn_range( - vma, uvaddr, page_to_pfn(pg), PAGE_SIZE, - vma->vm_page_prot); + vma, vma_vaddr, pfn_to_remap, + remap_pg_count * PAGE_SIZE, vma->vm_page_prot); if (ret) { - mutex_unlock(&my_shmem_pages_mtx); + mutex_unlock(&my_shmem_allocs_mtx); goto cleanup_err_remap_pfn_failed; } - pr_info("[%s] Remapped pfn %ld (kvaddr: 0x%px) -> uvaddr: 0x%px.\n", - __func__, page_to_pfn(pg), page_to_virt(pg), (void *) uvaddr); - uvaddr += PAGE_SIZE; + pr_info("[%s] Remapped pfn 0x%lx (+%ld) -> uvaddr: 0x%px.\n", + __func__, pfn_to_remap, remap_pg_count, (void *) vma_vaddr); + vma_vaddr += (remap_pg_count * PAGE_SIZE); + vma_pg_to_remap -= remap_pg_count; } /* May still have unmapped pages, we allocate lazily at fault time. */ - mutex_unlock(&my_shmem_pages_mtx); + mutex_unlock(&my_shmem_allocs_mtx); goto cleanup_ok; cleanup_err_remap_pfn_failed: - pr_err("[%s] Cannot remap pfn %ld (kvaddr: 0x%px) -> uvaddr: 0x%px: %d.\n", - __func__, page_to_pfn(pg), page_to_virt(pg), (void *) uvaddr, ret); + pr_err("[%s] Cannot remap pfn 0x%lx (+%ld) -> uvaddr: 0x%px: %d.\n", + __func__, pfn_to_remap, remap_pg_count, (void *) vma_vaddr, ret); put_page(pg); goto exit; cleanup_ok: fp_name = file_dentry(filp)->d_name.name; - if (uvaddr == vma->vm_end) + if (vma_vaddr == vma->vm_end) pr_info("[%s] Device file '%s' mmapped for vma: [0x%lx - 0x%lx).\n", __func__, fp_name, vma->vm_start, vma->vm_end); else - pr_info("[%s] Device file '%s' mmapped for vma: [0x%lx - 0x%lx..0x%lx).\n", - __func__, fp_name, vma->vm_start, uvaddr, vma->vm_end); + pr_info("[%s] Device file '%s' mmapped for vma: [0x%lx - 0x%lx) (..0x%lx).\n", + __func__, fp_name, vma->vm_start, vma_vaddr, vma->vm_end); exit: return ret; } @@ -276,21 +331,10 @@ static int my_shmem_fops_release(struct inode *inode, struct file *filp) return 0; } -// static int my_shmem_fops_ioctl(struct file *filp, uint cmd, ulong arg); - -// static int my_shmem_fops_fsync( -// struct file *filp, loff_t bgn_off, loff_t end_off, int datasync) -// { -// pr_info("[%s] Entered.\n", __func__); - -// } - static const struct file_operations my_shmem_fops = { .owner = THIS_MODULE, .open = my_shmem_fops_open, .mmap = my_shmem_fops_mmap, - // .unlocked_ioctl = my_shmem_fops_ioctl, - // .compat_ioctl = compat_ptr_ioctl, .release = my_shmem_fops_release, }; @@ -354,19 +398,18 @@ static void __exit my_shmem_exit(void) pr_info("[%s] Device destroyed.\n", __func__); /* Free all pages -- I'm not compacting in runtime!!! */ - struct my_shmem_page *page_entry, *tmp; - mutex_lock(&my_shmem_pages_mtx); - list_for_each_entry_safe(page_entry, tmp, &my_shmem_pages, list) { - // put_page(page_entry->page); - BUG_ON(atomic_read(&page_entry->page->_refcount) != 1); - __free_page(page_entry->page); // no put_page since we don't want double-free - // free_page((ulong) page_to_virt(page_entry->page)); + struct my_shmem_alloc *page_entry, *tmp; + mutex_lock(&my_shmem_allocs_mtx); + list_for_each_entry_safe(page_entry, tmp, &my_shmem_allocs, list) { + int _dbg_refcount = atomic_read(&page_entry->page->_refcount); + pr_info("[%s] Page 0x%lx has refcount %d.\n", __func__, page_to_pfn(page_entry->page), _dbg_refcount); + BUG_ON(_dbg_refcount != 1); + put_page(page_entry->page); // => free when refcount == 0 - // my_shmem_page_count--; list_del(&page_entry->list); kfree(page_entry); } - mutex_unlock(&my_shmem_pages_mtx); + mutex_unlock(&my_shmem_allocs_mtx); } module_init(my_shmem_init); diff --git a/src/aarch64-linux-flush-dcache/utils.h b/src/aarch64-linux-flush-dcache/utils.h new file mode 100644 index 0000000..6a1d7ca --- /dev/null +++ b/src/aarch64-linux-flush-dcache/utils.h @@ -0,0 +1,22 @@ +#ifndef _MY_SHMEM_UTILS +#define _MY_SHMEM_UTILS + +#include +#include +#include + +#define ORDER_TO_PAGE_NR(order) (1 << order) +#define SIZE_OF_VMA(vma) (vma->vm_end - vma->vm_start) +#define NR_PAGE_OF_VMA(vma) (SIZE_OF_VMA(vma) >> PAGE_SHIFT) + +static inline bool my_shmem_addr_in_alloc_range( + const ulong addr, const struct page *page, const ulong alloc_order) +{ + ulong alloc_start = (ulong)page_to_virt(page); + ulong alloc_end = (PAGE_SIZE << alloc_order) + alloc_start; + return alloc_start <= addr && addr < alloc_end; +} + +// static int my_shmem_remap + +#endif /* _MY_SHMEM_UTILS*/ \ No newline at end of file