目录

CSAPP Malloclab

CSAPP-Malloclab

大致框架使用课本上的示例,补充一些宏定义。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
/* Basic constants and macros */
#define WSIZE 4             /* Word and header/footer size (bytes) */
#define DSIZE 8             /* Double word size (bytes) */
#define CHUNKSIZE (1<<12)   /* Extend heap by this amount (bytes) */
#define MINBLOCKSIZE 16

#define MAX(x, y) ((x) > (y) ? (x) : (y))

/* Pack a size and allocated bit into a word */
#define PACK(size, alloc) ((size) | (alloc)) 

/* Read and write a word at address p */
#define GET(p)      (*(unsigned int *)(p)) /* read a word at address p */
#define PUT(p, val) (*(unsigned int *)(p) = (val)) /* write a word at address p */

#define GET_SIZE(p)     (GET(p) & ~0x7) /* read the size field from address p */
#define GET_ALLOC(p)    (GET(p) & 0x1) /* read the alloc field from address p */

#define HDRP(bp) ((char*)(bp) - WSIZE) /* given block ptr bp, compute address of its header */
#define FTRP(bp) ((char*)(bp) + GET_SIZE(HDRP(bp)) - DSIZE) /* given block ptr bp, compute address of its footer */

#define NEXT_BLKP(bp) ((char*)(bp) + GET_SIZE(HDRP(bp))) /* given block ptr bp, compute address of next blocks */
#define PREV_BLKP(bp) ((char*)(bp) - GET_SIZE((char*)(bp)-DSIZE)) /* given block ptr bp, compute address of prev blocks */

此时,chunk的结构为:

1
2
3
4
5
6
struct chunk{
	int header;			/*header==footer*/
	char data[size];
	char padding[align];
	int footer;
}

书中实现了mm_init, mm_malloc, mm_free, extend_heap, coalesce, 根据书中的代码我们实现: mm_realloc

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
void *mm_realloc(void *ptr, size_t size)
{
    void *new_ptr;

    if(ptr==NULL){
        new_ptr=mm_malloc(size);
        if (new_ptr == NULL)
            return NULL;
        return new_ptr;
    }
    if(size==0){
        mm_free(ptr);
        return NULL;
    }
    if(size==GET_SIZE(HDRP(ptr))){
        return ptr;
    }
    else{  
        new_ptr=mm_malloc(size);
        if (new_ptr == NULL)
            return NULL;
        memcpy(new_ptr, ptr, size-WSIZE);
        mm_free(ptr);
        return new_ptr;
    }
}

place

1
2
3
4
5
6
7
8
static void place(void *bp, size_t asize){
    size_t size=GET_SIZE(HDRP(bp));
    PUT(HDRP(bp),PACK(size, 1));
    PUT(FTRP(bp),PACK(size, 1));
    
    split_block(bp,asize);

}

split_block

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
static void split_block(void *bp, size_t asize){
    size_t size =GET_SIZE(HDRP(bp));
    if((size-asize)>=MINBLOCKSIZE){
        PUT(HDRP(bp), PACK(asize, 1));
        PUT(FTRP(bp), PACK(asize, 1));
        bp = NEXT_BLKP(bp);
        PUT(HDRP(bp), PACK((size-asize),0));
        PUT(FTRP(bp),PACK((size-asize),0));
        coalesce(bp);    
    }

}

next_fit

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
static void *next_fit(size_t asize){
    char* bp;
    for ( bp = prev_listp; GET_SIZE(HDRP(bp)) > 0; bp = NEXT_BLKP(bp))
    {
        if (!GET_ALLOC(HDRP(bp)) && GET_SIZE(HDRP(bp)) >= asize)
        {
            prev_listp = bp;
            return bp;
        }
    }

    for ( bp = heap_listp; bp != prev_listp; bp = NEXT_BLKP(bp))
    {
        if (!GET_ALLOC(HDRP(bp)) && GET_SIZE(HDRP(bp)) >= asize)
        {
            prev_listp = bp;
            return bp;
        }
    }
    return NULL;
}

这里使用的是隐式链表+next fit,得分:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
Results for mm malloc:
trace  valid  util     ops      secs  Kops
 0       yes   91%    5694  0.001710  3329
 1       yes   92%    5848  0.000948  6166
 2       yes   95%    6648  0.002903  2290
 3       yes   97%    5380  0.003700  1454
 4       yes   66%   14400  0.000099146193
 5       yes   91%    4800  0.004307  1114
 6       yes   89%    4800  0.003500  1371
 7       yes   55%   12000  0.014687   817
 8       yes   51%   24000  0.007490  3204
 9       yes   27%   14401  0.048774   295
10       yes   45%   14401  0.001818  7919
Total          73%  112372  0.089937  1249

Perf index = 44 (util) + 40 (thru) = 84/100

改进:显式链表+first fit

最开始使用的是first_fit只有50+,改为next_fit之后达到了80+,在搜索的时候几乎是在遍历整个堆段,所以想到使用显示链表的方法来管理空闲chunk。 修改free_chunk结构:

1
2
3
4
5
6
struct chunk{
	int header;			/*header==footer*/
	chunk *fd;			/*prev free chunk*/
	chunk *bk;			/*next free chunk*/
	int footer;
}

添加定义:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
#define FD(bp)  ((char *)(bp))
#define BK(bp)  ((char *)(bp)+WSIZE)

#define SET_PTR(p,ptr)  (*(unsigned int *)(p) = (unsigned int)(ptr))

#define GET_NEXT(bp)    (*(char **)(BK(bp)))
#define GET_PREV(bp)    (*(char **)(bp)) 

static char* free_listp;	//manage all free chunk
static void insert_freelist(void* bp);	
static void remove_freelist(void* bp);

这里的fd,bk与glibc中的malloc是相似的。 insert_freelist

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
static void insert_freelist(void* bp)
{
    if (bp == NULL)
        return;

    if (free_listp == NULL)
    {
        free_listp = bp;
        SET_PTR(FD(bp),NULL);       //at the end of the list fd will be null;
        SET_PTR(BK(bp),NULL);       //at the begin of the list bk will be null;

    }
    else {
        void *old=free_listp;
        free_listp = bp;
        SET_PTR(FD(bp),old);
        SET_PTR(BK(bp),NULL);
        SET_PTR(BK(old),bp);
    }
    return;
}

remove_freelist

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
static void remove_freelist(void* bp)
{
    if(GET_NEXT(bp)!=NULL)
    {
        if(GET_PREV(bp)!=NULL)  /* free_listp-->xxx->bp-->xxx */
        {      
            SET_PTR(FD(GET_NEXT(bp)),GET_PREV(bp));
            SET_PTR(BK(GET_PREV(bp)),GET_NEXT(bp));
        }
        else                    /* free_listp-->xxx->bp */
        {
            SET_PTR(FD(GET_NEXT(bp)),NULL);
        }
    }
    else
    {
        if(GET_PREV(bp)!=NULL)  /* free_listp-->bp-->xxx */
        {
            SET_PTR(BK(GET_PREV(bp)),NULL);
            free_listp=GET_PREV(bp);
        }
        else                    /* free_listp-->bp */
        {
            free_listp=NULL;
        }
    }
}

first_fit

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
static void *find_fit(size_t asize)
{  
    void *bp=free_listp;
    for(;bp!=NULL;bp=GET_PREV(bp)) {
        if(asize<=GET_SIZE(HDRP(bp)))
            return bp;
    }

    return NULL;
}

但是分数没有变化。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
Results for mm malloc:
trace  valid  util     ops      secs  Kops
 0       yes   89%    5694  0.000159 35789
 1       yes   92%    5848  0.000106 55326
 2       yes   94%    6648  0.000232 28717
 3       yes   96%    5380  0.000177 30361
 4       yes  100%   14400  0.000095151739
 5       yes   88%    4800  0.000379 12668
 6       yes   85%    4800  0.000507  9471
 7       yes   55%   12000  0.004043  2968
 8       yes   51%   24000  0.002807  8549
 9       yes   26%   14401  0.058122   248
10       yes   34%   14401  0.002460  5854
Total          74%  112372  0.069087  1627

Perf index = 44 (util) + 40 (thru) = 84/100

改进:分离链表+first fit

依据2的次幂分为16个组:{1},{2},{3,4},{5~8},…,{1025~2048},{2049~4096}… 修改宏定义:

1
2
3
4
#define MAX_FREE_LIST 16

static void* free_listp[MAX_FREE_LIST];
static void insert_freelist(void* bp,size_t size);

first_fit

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
static void *first_fit(size_t asize)
{
    int index=free_index(asize);
    void *bp=NULL;
    while(index<MAX_FREE_LIST)
    {
        bp=free_listp[index];
        while((bp!=NULL)&&(asize>GET_SIZE(HDRP(bp))))
        {
            bp = GET_PREV(bp);
        }
        if(bp!=NULL)
            return bp;
        index++;
    }
    
    return NULL;
}

free_index

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
static int free_index(size_t size) {
    int index=0;
    while ((index<MAX_FREE_LIST-1))
    {
        if(size>1) {
            size>>=1;
            index++;
        }
        else
            break;
    }
    return index;
}

此版得分:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
trace  valid  util     ops      secs  Kops
 0       yes   98%    5694  0.000244 23307
 1       yes   97%    5848  0.000362 16168
 2       yes   96%    6648  0.000287 23156
 3       yes   98%    5380  0.000518 10386
 4       yes  100%   14400  0.000306 47105
 5       yes   93%    4800  0.000338 14189
 6       yes   90%    4800  0.000517  9283
 7       yes   55%   12000  0.000311 38598
 8       yes   51%   24000  0.000735 32666
 9       yes   28%   14401  0.058034   248
10       yes   28%   14401  0.002643  5448
Total          76%  112372  0.064295  1748

Perf index = 46 (util) + 40 (thru) = 86/100

改进:realloc

在之前的版本中,realloc都依赖了mallocfree函数,实现很暴力。在改进中,若当前块的后一个块是free且相加后的大小满足需求就可以进行合并。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
void *mm_realloc(void *ptr, size_t size)
{
    void *new_ptr;
    size_t asize;
    if(ptr==NULL){
        new_ptr=mm_malloc(size);
        if (new_ptr == NULL)
            return NULL;
        return new_ptr;
    }
    if(size==0){
        mm_free(ptr);
        return NULL;
    }
    if(size <= DSIZE)
        asize = 2*DSIZE;
    else
        asize = ALIGN(size + DSIZE);
    size_t oldsize=GET_SIZE(HDRP(ptr));
    if(asize<=oldsize){
        place(ptr,asize);
        return ptr;
    }
    else{
        size_t next_alloc=GET_ALLOC(HDRP(NEXT_BLKP(ptr)));
        size_t new_size;
        new_size=GET_SIZE(HDRP(NEXT_BLKP(ptr)))+oldsize;
        if(!next_alloc&&(asize<=new_size)) //next is free
        {
            remove_freelist(NEXT_BLKP(ptr));
            PUT(HDRP(ptr), PACK(new_size, 1));
            PUT(FTRP(ptr), PACK(new_size, 1));
            return ptr;
        }
        else {
            new_ptr=mm_malloc(size);
            if (new_ptr == NULL)
                return NULL;
            memcpy(new_ptr, ptr, size-WSIZE);
            mm_free(ptr);
            return new_ptr;
        }
    }
}

这里我认为合并后可以再次细化进行分割,但是分割后分数反而低了(黑人???.jpg)

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
Results for mm malloc:
trace  valid  util     ops      secs  Kops
 0       yes   98%    5694  0.000216 26361
 1       yes   97%    5848  0.000214 27340
 2       yes   96%    6648  0.000308 21556
 3       yes   98%    5380  0.000197 27365
 4       yes  100%   14400  0.000250 57554
 5       yes   93%    4800  0.000344 13970
 6       yes   90%    4800  0.000310 15469
 7       yes   55%   12000  0.000334 35907
 8       yes   51%   24000  0.000761 31521
 9       yes   99%   14401  0.000260 55367
10       yes   57%   14401  0.000234 61464
Total          85%  112372  0.003429 32771

Perf index = 51 (util) + 40 (thru) = 91/100

目前分数最高的一版。