目录

CSAPP Cachelab

目录

CSAPP-Cachelab

模拟cache

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
#include "cachelab.h"
#include <stdio.h>
#include <string.h>
#include <getopt.h>
#include <stdlib.h>
#include <unistd.h>
#include <limits.h>

#define BUFFER_SIZE 50
char buf[BUFFER_SIZE];

int s;	  
int E;	  
int b;	  
char *t;	

int cache_size;
long long *cache;       

unsigned time_stamp;
unsigned *last_used_time;

int hit_count;
int miss_count;
int eviction_count;

void memoryAccess(long long addr) {
	++time_stamp;

	int set_idx = (addr >> b) & ((1 << s) - 1);
	long long tag = addr >> (b + s);

	long long *set_cache = cache + set_idx * E;
	unsigned *set_used_time = last_used_time + set_idx * E;

	int i;
	unsigned LRU_i = -1;
	unsigned LRU_valid;
	unsigned LRU_time;
	for (i = 0; i < E; ++i) {
		unsigned valid = set_cache[i] & 1;
		long long tag_i = set_cache[i] >> 1;
		if (valid > 0 && tag == tag_i) {
			++hit_count;
			set_used_time[i] = time_stamp;
			return ;
		} else {
			if (LRU_i == -1
			  || valid < LRU_valid
			  || (valid == LRU_valid&&
			      set_used_time[i] < LRU_time)) {
				LRU_i = i;
				LRU_valid = valid;
				LRU_time = set_used_time[i];
			}
		}
	}

	++miss_count;
	eviction_count += LRU_valid;
	set_used_time[LRU_i] = time_stamp;
	set_cache[LRU_i] = tag << 1 | 1;
}

int main(int argc, char *argv[]) {
	char *optString = "s:E:b:t:";
	int opt = getopt(argc, argv, optString);
	while (~opt) {
		switch (opt) {
			case 's': s = atoi(optarg); break;
			case 'E': E = atoi(optarg); break;
			case 'b': b = atoi(optarg); break;
			case 't': t = optarg; break;
		}
		opt = getopt(argc, argv, optString);
	}

	time_stamp = 0;
	hit_count = miss_count = eviction_count = 0;
	cache_size = E << s;
	cache = (long long *) malloc(sizeof(*cache) * cache_size);
	last_used_time = (unsigned *) malloc(sizeof(*last_used_time) * cache_size);
	memset(cache, 0, sizeof(*cache) * cache_size);

	FILE *fp = fopen(t, "r");
	while (fgets(buf, BUFFER_SIZE, fp) != NULL) {
		int len = strlen(buf);
		if (len <= 2 || buf[0] != ' ') continue;
		char op = buf[1];
		if (!(op == 'L' || op == 'S' || op == 'M'))
			continue;
		int i;
		for (i = 0; i < len; ++i)
			if (buf[i] == ',') {	   
				buf[i] = '\0';
				break;
			}
		buf[1] = '0', buf[2] = 'x';
		long long addr;
		sscanf(buf + 1, "%llx", &addr);
		printf("op = %c, addr = %llx, ", op, addr);
		memoryAccess(addr);
		if (op == 'M') ++hit_count;
	}
	printSummary(hit_count, miss_count, eviction_count);

	fclose(fp);
	return 0;
}

要求实现数组转置,并且有限制:

  • 缓存参数为:s = 5, E = 1, b = 5。
  • 最多能够定义 12 个 int 类型的局部变量。
  • 不允许修改矩阵 A,但能任意修改矩阵 B。

由就硬分块,分八块,加上一些变量能最大利用局部变量。32*32的矩阵分八块,将八块全部读入再写。64*64矩阵先分八块,再四块。61*67分16块。

64*64用了分块矩阵转置的知识 $$ \begin{Bmatrix} A_{11} & A_{12} \ A_{21} & A_{22} \end{Bmatrix} = \begin{Bmatrix} A^T_{11} & A^T_{21} \ A^T_{12} & A^T_{22} \end{Bmatrix}

$$ 将其分为32*32的四块,子矩阵分八块转置,再分四块交换对角线子矩阵。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
void transpose_submit(int M, int N, int A[N][M], int B[M][N]) {
    int i, j, k, h;
    int a1, a2, a3, a4, a5, a6, a7, a8;
    if(N==32) {
        for (i = 0; i < N; i+=8) {
            for (j = 0; j < M; j+=8) {
                for(k=i; k<i+8; ++k) {
                    a1 = A[k][j];
                    a2 = A[k][j+1];
                    a3 = A[k][j+2];
                    a4 = A[k][j+3];
                    a5 = A[k][j+4];
                    a6 = A[k][j+5];
                    a7 = A[k][j+6];
                    a8 = A[k][j+7];

                    B[j][k] = a1;
                    B[j+1][k] = a2;
                    B[j+2][k] = a3;
                    B[j+3][k] = a4;
                    B[j+4][k] = a5;
                    B[j+5][k] = a6;
                    B[j+6][k] = a7;
                    B[j+7][k] = a8;
                }
            }
        }
    } else if(N==64) {
        for(i=0; i<N; i+=8) {
            for(j=0; j<M; j+=8) {
                for(k=j; k<j+4; ++k) {
                    a1=A[k][i];
                    a2=A[k][i+1];
                    a3=A[k][i+2];
                    a4=A[k][i+3];
                    a5=A[k][i+4];
                    a6=A[k][i+5];
                    a7=A[k][i+6];
                    a8=A[k][i+7];

                    B[i][k]=a1;
                    B[i][k+4]=a5;
                    B[i+1][k]=a2;
                    B[i+1][k+4]=a6;
                    B[i+2][k]=a3;
                    B[i+2][k+4]=a7;
                    B[i+3][k]=a4;
                    B[i+3][k+4]=a8;
                }
                for(k=i; k<i+4; ++k) {
                    a1=B[k][j+4];
                    a2=B[k][j+5];
                    a3=B[k][j+6];
                    a4=B[k][j+7];
                    a5=A[j+4][k];
                    a6=A[j+5][k];
                    a7=A[j+6][k];
                    a8=A[j+7][k];

                    B[k][j+4]=a5;
                    B[k][j+5]=a6;
                    B[k][j+6]=a7;
                    B[k][j+7]=a8;
                    B[k+4][j]=a1;
                    B[k+4][j+1]=a2;
                    B[k+4][j+2]=a3;
                    B[k+4][j+3]=a4;
                }
                for(k=i+4; k<i+8; ++k) {
                    a1=A[j+4][k];
                    a2=A[j+5][k];
                    a3=A[j+6][k];
                    a4=A[j+7][k];

                    B[k][j+4]=a1;
                    B[k][j+5]=a2;
                    B[k][j+6]=a3;
                    B[k][j+7]=a4;
                }
            }
        }
    } else if(M==61) {
        for(i=0; i<N; i+=16) {
            for(j=0; j<M; j+=16) {
                for(k=i; k<i+16&&k<N; ++k) {
                    for(h=j; h<j+16&&h<M; ++h) {
                        B[h][k] = A[k][h];
                    }
                }
            }
        }
    }
}