[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[microblaze-uclinux] tuned free() function
Hi,
I have a patch for you.
User programs with >10000 allocated blocks are very slow when it comes to free one of the blocks (with SIMPLE_ALLOC used in the kernel config (which is default for microblaze)). That is because of a sequential search in a simple-linked memory block management list in mmap.c. I measured a single free() takes till 5 ms in such a situation. It may take seconds when freeing a bunch of memory blocks in a loop.
My patch assumes several successive calls to free() access memory blocks which are neighbours to each other. The last position in the management list is stored, and a next access starts to search the list entry from there in both directions.
Often this trick speeds free() up a lot here. E.g. if I build up a temporary node tree from an XML file, freeing will be 100 times faster! The higher the memory block count is plus the higher you allocate in a row, the higher the effect of the patch will be.
CU, F@lk
Index: mmap.c
===================================================================
RCS file: /repository/uclinux_dist/linux-2.4.x/mmnommu/mmap.c,v
retrieving revision 1.4
retrieving revision 1.6
diff -u -r1.4 -r1.6
--- mmap.c 25 Nov 2005 08:26:01 -0000 1.4
+++ mmap.c 22 May 2007 15:47:00 -0000 1.6
@@ -26,6 +26,8 @@
#include <asm/uaccess.h>
#include <asm/pgalloc.h>
+//#define DEBUG
+
#ifndef NO_MM
/*
* WARNING: the debugging will use recursive algorithms so never enable this
@@ -1694,6 +1696,9 @@
askedalloc += sizeof(*vml);
vml->next = current->mm->vmlist;
+ if (current->mm->vmlist) {
+ current->mm->vmlist->prev = vml;
+ }
current->mm->vmlist = vml;
up_write(&nommu_vma_sem);
@@ -1777,6 +1782,9 @@
struct vm_list_struct *vml, **parent;
unsigned long end = addr + len;
+ static struct vm_list_struct *s_old_parent = 0L;
+ static struct mm_struct *s_old_mm = 0L;
+
#if 0 /* def MAGIC_ROM_PTR - we want do want to free it with the new mmap */
/* For efficiency's sake, if the pointer is obviously in ROM,
don't bother walking the lists to free it */
@@ -1788,10 +1796,38 @@
printk("do_munmap:\n");
#endif
- for (parent = &mm->vmlist; *parent; parent = &(*parent)->next)
+ if (s_old_parent && s_old_mm == mm) {
+ int i = 0;
+ for (parent = &s_old_parent; *parent && i < 2; parent = &(*parent)->next, i++) {
+ if ((*parent)->vma->vm_start == addr &&
+ (len == 0 || (*parent)->vma->vm_end == end)) {
+ //printk("*1");
+ goto found;
+ }
+ //printk("*2");
+ }
+ i = 0;
+ for (parent = &s_old_parent; *parent && i < 8; parent = &(*parent)->prev, i++) {
+ if ((*parent)->vma->vm_start == addr &&
+ (len == 0 || (*parent)->vma->vm_end == end)) {
+ //printk("*3");
+ goto found;
+ }
+ //printk("*4");
+ }
+ }
+
+ for (parent = &mm->vmlist; *parent; parent = &(*parent)->next) {
if ((*parent)->vma->vm_start == addr &&
- (len == 0 || (*parent)->vma->vm_end == end))
+ (len == 0 || (*parent)->vma->vm_end == end)) {
+ s_old_mm = mm;
+ //printk("*5");
goto found;
+ }
+ //printk("*6");
+ }
+
+ s_old_parent = 0L; s_old_mm = 0L;
printk("munmap of non-mmaped memory by process %d (%s): %p\n",
current->pid, current->comm, (void *) addr);
@@ -1800,9 +1836,25 @@
found:
vml = *parent;
+ s_old_parent = vml->next;
+ if (vml == s_old_parent) { // actually this shouldn't happen
+ s_old_parent = 0L; s_old_mm = 0L;
+ }
+
put_vma(vml->vma);
- *parent = vml->next;
+ if (vml->prev) {
+ //printk("*7");
+ vml->prev->next = vml->next;
+ } else {
+ mm->vmlist = vml->next;
+ }
+ if (vml->next) {
+ //printk("*8");
+ vml->next->prev = vml->prev;
+ }
+ //printk("\n++ 0x%08x\n", vml);
+
realalloc -= ksize(vml);
askedalloc -= sizeof(*vml);
kfree(vml);
Index: sched.h
===================================================================
RCS file: /repository/uclinux_dist/linux-2.4.x/include/linux/sched.h,v
retrieving revision 1.4
diff -r1.4 sched.h
273a274
> struct vm_list_struct *prev;