[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[microblaze-uclinux] tuned free() function



Hi,
I have a patch for you.

User programs with >10000 allocated blocks are very slow when it comes to free one of the blocks (with SIMPLE_ALLOC used in the kernel config (which is default for microblaze)). That is because of a sequential search in a simple-linked memory block management list in mmap.c. I measured a single free() takes till 5 ms in such a situation. It may take seconds when freeing a bunch of memory blocks in a loop.

My patch assumes several successive calls to free() access memory blocks which are neighbours to each other. The last position in the management list is stored, and a next access starts to search the list entry from there in both directions.

Often this trick speeds free() up a lot here. E.g. if I build up a temporary node tree from an XML file, freeing will be 100 times faster! The higher the memory block count is plus the higher you allocate in a row, the higher the effect of the patch will be.

CU, F@lk
Index: mmap.c
===================================================================
RCS file: /repository/uclinux_dist/linux-2.4.x/mmnommu/mmap.c,v
retrieving revision 1.4
retrieving revision 1.6
diff -u -r1.4 -r1.6
--- mmap.c	25 Nov 2005 08:26:01 -0000	1.4
+++ mmap.c	22 May 2007 15:47:00 -0000	1.6
@@ -26,6 +26,8 @@
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 
+//#define DEBUG
+
 #ifndef NO_MM
 /*
  * WARNING: the debugging will use recursive algorithms so never enable this
@@ -1694,6 +1696,9 @@
 	askedalloc += sizeof(*vml);
 
 	vml->next = current->mm->vmlist;
+	if (current->mm->vmlist) {
+		current->mm->vmlist->prev = vml;
+	}
 	current->mm->vmlist = vml;
 
 	up_write(&nommu_vma_sem);
@@ -1777,6 +1782,9 @@
 	struct vm_list_struct *vml, **parent;
 	unsigned long end = addr + len;
 
+	static struct vm_list_struct *s_old_parent = 0L;
+	static struct mm_struct *s_old_mm = 0L;
+
 #if 0 /* def MAGIC_ROM_PTR - we want do want to free it with the new mmap */
 	/* For efficiency's sake, if the pointer is obviously in ROM,
 	   don't bother walking the lists to free it */
@@ -1788,10 +1796,38 @@
 	printk("do_munmap:\n");
 #endif
 
-	for (parent = &mm->vmlist; *parent; parent = &(*parent)->next)
+	if (s_old_parent && s_old_mm == mm) {
+		int i = 0;
+		for (parent = &s_old_parent; *parent && i < 2; parent = &(*parent)->next, i++) {
+			if ((*parent)->vma->vm_start == addr &&
+			(len == 0 || (*parent)->vma->vm_end == end)) {
+				//printk("*1");
+				goto found;
+			}
+			//printk("*2");
+		}
+		i = 0;
+		for (parent = &s_old_parent; *parent && i < 8; parent = &(*parent)->prev, i++) {
+			if ((*parent)->vma->vm_start == addr &&
+			(len == 0 || (*parent)->vma->vm_end == end)) {
+				//printk("*3");
+				goto found;
+			}
+			//printk("*4");
+		}
+	}
+
+	for (parent = &mm->vmlist; *parent; parent = &(*parent)->next) {
 		if ((*parent)->vma->vm_start == addr &&
-		    (len == 0 || (*parent)->vma->vm_end == end))
+		    (len == 0 || (*parent)->vma->vm_end == end)) {
+			s_old_mm = mm;
+			//printk("*5");
 			goto found;
+		}
+		//printk("*6");
+	}
+
+	s_old_parent = 0L; s_old_mm = 0L;
 
 	printk("munmap of non-mmaped memory by process %d (%s): %p\n",
 	       current->pid, current->comm, (void *) addr);
@@ -1800,9 +1836,25 @@
  found:
 	vml = *parent;
 
+	s_old_parent = vml->next;
+	if (vml == s_old_parent) { // actually this shouldn't happen
+		s_old_parent = 0L; s_old_mm = 0L;
+	}
+
 	put_vma(vml->vma);
 
-	*parent = vml->next;
+	if (vml->prev) {
+		//printk("*7");
+		vml->prev->next = vml->next;
+	} else {
+		mm->vmlist = vml->next;
+	}
+	if (vml->next) {
+		//printk("*8");
+		vml->next->prev = vml->prev;
+	}
+	//printk("\n++ 0x%08x\n", vml);
+
 	realalloc -= ksize(vml);
 	askedalloc -= sizeof(*vml);
 	kfree(vml);
Index: sched.h
===================================================================
RCS file: /repository/uclinux_dist/linux-2.4.x/include/linux/sched.h,v
retrieving revision 1.4
diff -r1.4 sched.h
273a274
> 	struct vm_list_struct	*prev;