Added multi-page contiguous allocation

2024-03-16 07:48:13 +00:00 · 2024-03-16 07:48:13 +00:00 · 682c0d9372
commit 682c0d9372
parent a2422135fc
2 changed files with 218 additions and 153 deletions
--- a/src/aarch64-linux-flush-dcache/my_shmem.c
+++ b/src/aarch64-linux-flush-dcache/my_shmem.c
@ -1,17 +1,6 @@
-// [TODO] Clean up headers...
 #include <linux/device.h>
-#include "asm-generic/errno-base.h"
-#include "asm-generic/memory_model.h"
-#include "asm/page-def.h"
-#include "linux/gfp.h"
-#include "linux/mutex.h"
-#include "linux/pfn_t.h"
-#include "linux/pid.h"
-#include <linux/rcupdate.h>
-#include <linux/vmalloc.h>
+#include <linux/mutex.h>
 #include <linux/list.h>
-#include <linux/types.h>
-#include <linux/errno.h>
 #include <linux/mm.h>
 #include <linux/mm_types.h>
 #include <linux/fs.h>
@ -19,25 +8,33 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/version.h>
-#include <linux/ktime.h>

 #include <asm/cacheflush_extra.h>

+#include "asm/page-def.h"
+#include "linux/compiler_attributes.h"
+#include "utils.h"
+
 MODULE_AUTHOR("Zk.");
-MODULE_DESCRIPTION("4.2.W1: mmap for point of coherency");
+MODULE_DESCRIPTION("Y4S2.W1: mmap for point of coherency");
 MODULE_LICENSE("GPL");

-struct my_shmem_page {
+ulong max_contiguous_alloc_order = 0;
+module_param(max_contiguous_alloc_order, ulong, S_IRUGO | S_IWUSR);
+
+struct my_shmem_alloc {
+	/* Head page for page-aligned allocation */
 	struct page *page;
+	ulong alloc_order;
 	struct list_head list;
 };
-static DEFINE_MUTEX(my_shmem_pages_mtx);
+static DEFINE_MUTEX(my_shmem_allocs_mtx);

 /* [!] READ/WRITE UNDER LOCK */
-static LIST_HEAD(my_shmem_pages);
+static LIST_HEAD(my_shmem_allocs);

 /* [!] READ/WRITE UNDER LOCK */
-// static size_t my_shmem_page_count = 0;
+static size_t my_shmem_page_count = 0;

 static int major;
 static struct class* class;
@ -54,23 +51,23 @@ const char* DEV_NAME = "my_shmem";
 */
 static void my_shmem_vmops_close(struct vm_area_struct *vma)
 {
-	size_t nr_pages_in_cache = list_count_nodes(&my_shmem_pages);
+	// size_t nr_pages_in_cache = list_count_nodes(&my_shmem_allocs);
 	size_t nr_pages_of_vma = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 	pr_info(
 		"[%s] Entered. vma size: %ld; cached pages: %ld.\n",
-		__func__, nr_pages_of_vma, nr_pages_in_cache
+		__func__, nr_pages_of_vma, my_shmem_page_count
 	);

 	size_t nr_pages_offset = vma->vm_pgoff;
-	struct my_shmem_page *entry;
-	// u64 clean_time_bgn, clean_time_end;
-	// u64 runtime;
+	struct my_shmem_alloc *entry;
+
+	mutex_lock(&my_shmem_allocs_mtx);
+	list_for_each_entry(entry, &my_shmem_allocs, list) {
+		const ulong entry_pgs = ORDER_TO_PAGE_NR(entry->alloc_order);

-	mutex_lock(&my_shmem_pages_mtx);
-	list_for_each_entry(entry, &my_shmem_pages, list) {
 		/* Loop until first page out of offset */
-		if (nr_pages_offset) {
-			nr_pages_offset--;
+		if (nr_pages_offset > entry_pgs) {
+			nr_pages_offset -= entry_pgs;
 			continue;
 		}

@ -81,102 +78,148 @@ static void my_shmem_vmops_close(struct vm_area_struct *vma)
 		/* Inside the window of mapped pages -- flush them up */
 		struct page *pg = entry->page;
 		ulong kvaddr_bgn = (ulong) page_address(pg);
-		ulong kvaddr_end = kvaddr_bgn + PAGE_SIZE;
+		ulong kvaddr_end = kvaddr_bgn + entry_pgs;


-		pr_info("[%s] Before flush: 0x%px has 0x%lx [+%ld].\n",
+		pr_info("[%s] Before flush: 0x%px has 0x%lx [+%ld]. Refcount: %d\n",
 			__func__, (void *) kvaddr_bgn, *(ulong *) kvaddr_bgn,
-			sizeof(ulong));
+			sizeof(ulong), atomic_read(&pg->_refcount));
 		__dcache_clean_poc(kvaddr_bgn, kvaddr_end);
-		pr_info("[%s] After flush: 0x%px has 0x%lx [+%ld].\n",
-			__func__, (void *) kvaddr_bgn, *(ulong *) kvaddr_bgn,
-			sizeof(ulong));
-
 		put_page(pg);
+		pr_info("[%s] After flush: 0x%px has 0x%lx [+%ld]. Refcount: %d\n",
+			__func__, (void *) kvaddr_bgn, *(ulong *) kvaddr_bgn,
+			sizeof(ulong), atomic_read(&pg->_refcount));

-		nr_pages_of_vma--;
+		if (nr_pages_offset != 0) {
+			nr_pages_of_vma -= min(entry_pgs - nr_pages_offset, nr_pages_of_vma);
+			nr_pages_offset = 0;
+		} else
+			nr_pages_of_vma -= min(entry_pgs, nr_pages_of_vma);
 	}
-	mutex_unlock(&my_shmem_pages_mtx);
+	mutex_unlock(&my_shmem_allocs_mtx);

 	pr_info("[%s] Flushed dcache.\n", __func__);
 }

+static vm_fault_t __my_shmem_fault_remap(struct vm_fault *vmf)
+{
+	pr_info("[%s] Entered...\n", __func__);
+	// We remap, possibly with offset from composite `page`, so...
+	vm_fault_t ret = VM_FAULT_NOPAGE;
+
+	pgoff_t vma_pgoff = vmf->vma->vm_pgoff;
+	pgoff_t vmf_pgoff_wrt_vma = vmf->pgoff;
+	const ulong fault_addr = vmf->address;
+	ulong remap_addr = fault_addr;
+	/* Page ID wrt. `my_shmem_allocs` that should be remapped due to `vmf` */
+	pgoff_t vmf_pgoff_wrt_allocpool = vma_pgoff + vmf_pgoff_wrt_vma;
+
+	BUG_ON(vmf_pgoff_wrt_allocpool >= my_shmem_page_count);
+	/* Current max. nr. of pages still remappable to `vmf->vma` */
+	ulong remaining_remappable_pgs = min(
+		my_shmem_page_count - vmf_pgoff_wrt_allocpool,
+		vma_pgoff + NR_PAGE_OF_VMA(vmf->vma) - vmf_pgoff_wrt_allocpool
+	);
+	pr_info("[%s] %ld + %ld = %ld (remap from here); remap %ld pages.\n",
+		__func__, vma_pgoff, vmf_pgoff_wrt_vma, vmf_pgoff_wrt_allocpool,
+		remaining_remappable_pgs);
+
+	struct my_shmem_alloc *curr;
+	/* Page ID as pointed to by `curr` */
+	pgoff_t curr_pg_offset = 0;
+	/* Page ID as pointed to by next of `curr` */
+	pgoff_t next_pg_offset;
+	list_for_each_entry(curr, &my_shmem_allocs, list) {
+		next_pg_offset = curr_pg_offset + ORDER_TO_PAGE_NR(curr->alloc_order);
+		pr_info("[%s] curr pfn: 0x%lx, curr_off: %ld, next_off: %ld, remaining %ld remappable.\n",
+			__func__, page_to_pfn(curr->page), curr_pg_offset, next_pg_offset, remaining_remappable_pgs);
+
+		if (next_pg_offset > vmf_pgoff_wrt_allocpool) { // In remappable range
+			get_page(curr->page);
+
+			/* Offset in [`pg`, `pg + (1 << alloc_order)`) */
+			pgoff_t offset_in_alloc = vmf_pgoff_wrt_allocpool - curr_pg_offset;
+			ulong remap_range_pgs = min(
+				next_pg_offset - curr_pg_offset - offset_in_alloc,
+				remaining_remappable_pgs
+			);
+			ulong remap_range_bytes = remap_range_pgs * PAGE_SIZE;
+			ulong remap_pfn = page_to_pfn(curr->page) + offset_in_alloc;
+
+			pr_info("[%s] Remapping PFN 0x%lx (+%ld ~ %ldB) -> %px\n",
+				__func__, remap_pfn, remap_range_pgs, remap_range_bytes, (void *)remap_addr);
+			int remap_ret = remap_pfn_range(
+				vmf->vma, remap_addr, remap_pfn,
+				remap_range_bytes, vmf->vma->vm_page_prot
+			);
+			if (remap_ret) {
+				pr_info("[%s] Remap failed: %d\n", __func__, remap_ret);
+				put_page(curr->page);
+				goto err_remap_failed;
+			}
+			vmf_pgoff_wrt_allocpool = next_pg_offset;
+			remaining_remappable_pgs -= remap_range_pgs;
+			remap_addr += remap_range_bytes;
+			pr_info("[%s] Remap successful. Remaining %ld remappable.\n", __func__, remaining_remappable_pgs);
+			if (remaining_remappable_pgs == 0)
+				goto ok;
+
+		} else // out of remappable range, continue
+			curr_pg_offset = next_pg_offset;
+
+	}
+
+err_remap_failed:
+	ret |= VM_FAULT_SIGBUS | VM_FAULT_RETRY;
+ok:
+	mutex_unlock(&my_shmem_allocs_mtx);
+	return ret;
+}
+
 static vm_fault_t my_shmem_vmops_fault(struct vm_fault *vmf)
 {
-	pr_info("[%s] vm_fault @ 0x%lx (real address 0x%lx).\n",
-		__func__, vmf->address, vmf->real_address);
+	pr_info("[%s] vm_fault @ 0x%lx (vma + %ld pages).\n",
+		__func__, vmf->address, vmf->pgoff);

-	vm_fault_t ret = 0;
-	struct vm_area_struct *vma_of_vmf = vmf->vma;
-	ulong vma_pg_offset = vma_of_vmf->vm_pgoff;
-	ulong vmf_pg_offset_from_vma_bgn =
-		(vmf->address - vma_of_vmf->vm_start) >> PAGE_SHIFT;
-	ulong pg_offset = vma_pg_offset + vmf_pg_offset_from_vma_bgn;
-	phys_addr_t _dbg_phys_of_page;
-	struct page *last_pg;
+	vm_fault_t ret = VM_FAULT_NOPAGE;
+	ulong fault_pg_offset = vmf->vma->vm_pgoff + vmf->pgoff;

-	mutex_lock(&my_shmem_pages_mtx);
-	size_t my_shmem_page_count = list_count_nodes(&my_shmem_pages);
-	BUG_ON(pg_offset < my_shmem_page_count);
-
-	/* Allocate the new page(s) */
-	ulong nr_pages_to_alloc = pg_offset - my_shmem_page_count + 1;
-	pr_info("[%s] Page count %ld, offset %ld -- allocating %ld more...\n",
-		__func__, my_shmem_page_count, vmf->pgoff, nr_pages_to_alloc);
-	for (; nr_pages_to_alloc > 0; nr_pages_to_alloc--)
-	{
-		// Allocate page handle in kernel
-		struct my_shmem_page *new_page = kzalloc(
-			sizeof(struct my_shmem_page), GFP_KERNEL);
-		if (!new_page) {
-			mutex_unlock(&my_shmem_pages_mtx);
-			goto err_ret_no_kmem;
-		}
-
-		// Allocate kernel virtual page
-		struct page *curr_pg = alloc_page(GFP_USER);
-		if (!curr_pg) {
-			mutex_unlock(&my_shmem_pages_mtx);
-			goto err_ret_no_page;
-		}
-		pr_info("[%s] Allocated pfn: %ld, kernel vaddr: %px.\n",
-			__func__, page_to_pfn(curr_pg), page_to_virt(curr_pg));
-		get_page(curr_pg); // For base page refcount
-
-		// Populate page handle
-		new_page->page = curr_pg;
-
-		// Add page handle to list
-		list_add(&new_page->list, &my_shmem_pages);
-
-		// Fill in last_pg for final return from page fault handler
-		last_pg = curr_pg;
+	mutex_lock(&my_shmem_allocs_mtx);
+locked_retry:
+	if (fault_pg_offset < my_shmem_page_count) {
+		// => Already present, remap
+		return __my_shmem_fault_remap(vmf);
 	}
-	// Fill in vmf's page for return
-	get_page(last_pg);
-	vmf->page = last_pg;
-	// ret = vmf_insert_page(vma_of_vmf, vmf->adget_pagedress, last_pg);
-	// [!] YOU DON'T NEED TO CALL REMAP_PFN_RANGE OR FAMILY HERE!!!
-	// `__do_fault` allocates PTE prior to calling `vm_ops->fault`,
-	// and at return `finish_fault` inserts PTE for given page.
-	// [?] I think `vmf_insert_page` etc. are used to insert device pages...
-	// They require VM_MIXEDMAP, which in this case don't need (and in fact causes a BUG here.)
-	// I think this is also the reason why `remap_pfn_range` might work here,
-	// exactly because it does NOT try to add VM_MIXEDMAP, etc.
-	_dbg_phys_of_page = page_to_phys(last_pg);
+	// else => allocate `1 << order` pages opportunistically...
+	struct my_shmem_alloc *new_alloc_handle = kzalloc(
+		sizeof(struct my_shmem_alloc), GFP_KERNEL
+	);
+	if (!new_alloc_handle)
+		goto err_kzalloc_handle;

-	mutex_unlock(&my_shmem_pages_mtx);
-	goto ok_ret_allocated;
+	struct page *new_alloc_pg = alloc_pages(
+		GFP_USER, max_contiguous_alloc_order
+	);
+	if (!new_alloc_pg)
+		goto err_alloc_pages;

-err_ret_no_kmem:
-	pr_err("[%s] Cannot allocate `struct my_shmem_page` in kernel memory.\n",
-		__func__);
-	return VM_FAULT_OOM;
-err_ret_no_page:
-	pr_err("[%s] Cannot allocate requested page for virtual memory.\n",
-		__func__);
-	return VM_FAULT_OOM;
-ok_ret_allocated:
+	// get_page(new_alloc_pg);
+	new_alloc_handle->page = new_alloc_pg;
+	new_alloc_handle->alloc_order = max_contiguous_alloc_order;
+	list_add_tail(&new_alloc_handle->list, &my_shmem_allocs);
+	my_shmem_page_count += ORDER_TO_PAGE_NR(new_alloc_handle->alloc_order);
+	pr_info("[%s] Allocated 1 << %ld pages: 0x%lx - 0x%lx. Current page count: %ld\n",
+		__func__, max_contiguous_alloc_order,
+		page_to_pfn(new_alloc_pg), page_to_pfn(new_alloc_pg) + (1 << max_contiguous_alloc_order),
+		my_shmem_page_count);
+
+	goto locked_retry;
+
+err_alloc_pages:
+err_kzalloc_handle:
+	ret |= VM_FAULT_OOM;
+err_generic:
+	mutex_unlock(&my_shmem_allocs_mtx);
 	return ret;
 }

@ -195,63 +238,75 @@ static const struct file_operations my_shmem_fops;
 static int my_shmem_fops_mmap(struct file *filp, struct vm_area_struct *vma)
 {
 	int ret = 0;
-	ulong uvaddr = vma->vm_start;
-	ulong upgoff = vma->vm_pgoff;
-	struct my_shmem_page *curr;
+	ulong vma_vaddr = vma->vm_start;
+	ulong vma_pgoff = vma->vm_pgoff;
+	const ulong vma_pg_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+
+	struct my_shmem_alloc *curr;
 	struct page *pg;
+	ulong pfn_to_remap;
+	ulong remap_pg_count;
+	ulong vma_pg_to_remap = vma_pg_count;
 	const unsigned char *fp_name;

 	vma->vm_ops = &my_shmem_vmops;

 	/* Remap as much as possible */
-	mutex_lock(&my_shmem_pages_mtx);
-	size_t my_shmem_page_count = list_count_nodes(&my_shmem_pages);
-	if (!my_shmem_page_count) {
-		mutex_unlock(&my_shmem_pages_mtx);
-		goto cleanup_ok;
-	}
-	list_for_each_entry(curr, &my_shmem_pages, list) {
-		/* If userspace virt addr >= vm_end, exit. */
-		if (uvaddr >= vma->vm_end)
+	mutex_lock(&my_shmem_allocs_mtx);
+	// size_t my_shmem_page_count =
+	// 	list_count_nodes(&my_shmem_allocs) * (1 << max_contiguous_alloc_order);
+	// if (!my_shmem_page_count) {
+	// 	mutex_unlock(&my_shmem_allocs_mtx);
+	// 	goto cleanup_ok;
+	// }
+	list_for_each_entry(curr, &my_shmem_allocs, list) {
+		/* Finished mmap, exit loop */
+		if (vma_vaddr >= vma->vm_end)
 			break;

-		/* Wait until the vm_pgoff-th page, if exists. */
-		if (upgoff != 0) {
-			upgoff--;
+		/* Decrement page offset as much as possible*/
+		if (vma_pgoff > ORDER_TO_PAGE_NR(curr->alloc_order)) {
+			vma_pgoff -= ORDER_TO_PAGE_NR(curr->alloc_order);
 			continue;
 		}

-		/* Exists allocable page, remap */
+		/* Exists remappable alloc, compute PFN to remap */
 		pg = curr->page;
 		get_page(pg);
+		pfn_to_remap = page_to_pfn(pg) + vma_pgoff;
+		remap_pg_count = min(
+			vma_pg_to_remap,
+			ORDER_TO_PAGE_NR(curr->alloc_order) - vma_pgoff
+		);
 		ret = remap_pfn_range(
-			vma, uvaddr, page_to_pfn(pg), PAGE_SIZE,
-			vma->vm_page_prot);
+			vma, vma_vaddr, pfn_to_remap,
+			remap_pg_count * PAGE_SIZE, vma->vm_page_prot);
 		if (ret) {
-			mutex_unlock(&my_shmem_pages_mtx);
+			mutex_unlock(&my_shmem_allocs_mtx);
 			goto cleanup_err_remap_pfn_failed;
 		}
-		pr_info("[%s] Remapped pfn %ld (kvaddr: 0x%px) -> uvaddr: 0x%px.\n",
-			__func__, page_to_pfn(pg), page_to_virt(pg), (void *) uvaddr);
-		uvaddr += PAGE_SIZE;
+		pr_info("[%s] Remapped pfn 0x%lx (+%ld) -> uvaddr: 0x%px.\n",
+			__func__, pfn_to_remap, remap_pg_count, (void *) vma_vaddr);
+		vma_vaddr += (remap_pg_count * PAGE_SIZE);
+		vma_pg_to_remap -= remap_pg_count;
 	}
 	/* May still have unmapped pages, we allocate lazily at fault time. */
-	mutex_unlock(&my_shmem_pages_mtx);
+	mutex_unlock(&my_shmem_allocs_mtx);
 	goto cleanup_ok;

 cleanup_err_remap_pfn_failed:
-	pr_err("[%s] Cannot remap pfn %ld (kvaddr: 0x%px) -> uvaddr: 0x%px: %d.\n",
-		__func__, page_to_pfn(pg), page_to_virt(pg), (void *) uvaddr, ret);
+	pr_err("[%s] Cannot remap pfn 0x%lx (+%ld) -> uvaddr: 0x%px: %d.\n",
+		__func__, pfn_to_remap, remap_pg_count, (void *) vma_vaddr, ret);
 	put_page(pg);
 	goto exit;
 cleanup_ok:
 	fp_name = file_dentry(filp)->d_name.name;
-	if (uvaddr == vma->vm_end)
+	if (vma_vaddr == vma->vm_end)
 		pr_info("[%s] Device file '%s' mmapped for vma: [0x%lx - 0x%lx).\n",
 			__func__, fp_name, vma->vm_start, vma->vm_end);
 	else
-		pr_info("[%s] Device file '%s' mmapped for vma: [0x%lx - 0x%lx..0x%lx).\n",
-			__func__, fp_name, vma->vm_start, uvaddr, vma->vm_end);
+		pr_info("[%s] Device file '%s' mmapped for vma: [0x%lx - 0x%lx) (..0x%lx).\n",
+			__func__, fp_name, vma->vm_start, vma_vaddr, vma->vm_end);
 exit:
 	return ret;
 }
@ -276,21 +331,10 @@ static int my_shmem_fops_release(struct inode *inode, struct file *filp)
 	return 0;
 }

-// static int my_shmem_fops_ioctl(struct file *filp, uint cmd, ulong arg);
-
-// static int my_shmem_fops_fsync(
-// 	struct file *filp, loff_t bgn_off, loff_t end_off, int datasync)
-// {
-// 	pr_info("[%s] Entered.\n", __func__);
-
-// }
-
 static const struct file_operations my_shmem_fops = {
 	.owner = THIS_MODULE,
 	.open = my_shmem_fops_open,
 	.mmap = my_shmem_fops_mmap,
-	// .unlocked_ioctl = my_shmem_fops_ioctl,
-	// .compat_ioctl = compat_ptr_ioctl,
 	.release = my_shmem_fops_release,
 };

@ -354,19 +398,18 @@ static void __exit my_shmem_exit(void)
 	pr_info("[%s] Device destroyed.\n", __func__);

 	/* Free all pages -- I'm not compacting in runtime!!! */
-	struct my_shmem_page *page_entry, *tmp;
-	mutex_lock(&my_shmem_pages_mtx);
-	list_for_each_entry_safe(page_entry, tmp, &my_shmem_pages, list) {
-		// put_page(page_entry->page);
-		BUG_ON(atomic_read(&page_entry->page->_refcount) != 1);
-		__free_page(page_entry->page); // no put_page since we don't want double-free
-		// free_page((ulong) page_to_virt(page_entry->page));
+	struct my_shmem_alloc *page_entry, *tmp;
+	mutex_lock(&my_shmem_allocs_mtx);
+	list_for_each_entry_safe(page_entry, tmp, &my_shmem_allocs, list) {
+		int _dbg_refcount = atomic_read(&page_entry->page->_refcount);
+		pr_info("[%s] Page 0x%lx has refcount %d.\n", __func__, page_to_pfn(page_entry->page), _dbg_refcount);
+		BUG_ON(_dbg_refcount != 1);
+		put_page(page_entry->page); // => free when refcount == 0

-		// my_shmem_page_count--;
 		list_del(&page_entry->list);
 		kfree(page_entry);
 	}
-	mutex_unlock(&my_shmem_pages_mtx);
+	mutex_unlock(&my_shmem_allocs_mtx);
 }

 module_init(my_shmem_init);
--- a/src/aarch64-linux-flush-dcache/utils.h
+++ b/src/aarch64-linux-flush-dcache/utils.h
@ -0,0 +1,22 @@
+#ifndef _MY_SHMEM_UTILS
+#define _MY_SHMEM_UTILS
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+
+#define ORDER_TO_PAGE_NR(order) (1 << order)
+#define SIZE_OF_VMA(vma) (vma->vm_end - vma->vm_start)
+#define NR_PAGE_OF_VMA(vma) (SIZE_OF_VMA(vma) >> PAGE_SHIFT)
+
+static inline bool my_shmem_addr_in_alloc_range(
+	const ulong addr, const struct page *page, const ulong alloc_order)
+{
+	ulong alloc_start = (ulong)page_to_virt(page);
+	ulong alloc_end = (PAGE_SIZE << alloc_order) + alloc_start;
+	return alloc_start <= addr && addr < alloc_end;
+}
+
+// static int my_shmem_remap
+
+#endif /* _MY_SHMEM_UTILS*/