Welcome, guest! Login / Register - Why register?
Psst.. new poll here.
Psst.. new forums here.
Microsoft is blocking us again (TY IP Reputation!) so just use oauth login instead. :)

Paste

Pasted as C by gh ( 16 years ago )
#include <stdio.h>
#include <time.h>
#include <string.h>
#include <xmmintrin.h>

const int N = 0x100;
const long int M = 0xa;
const double BILLION = 1000000000.0;

double accum;
struct timespec start, stop;

void time_start() {
    clock_gettime(CLOCK_REALTIME, &start;);
}

void time_stop() {
    clock_gettime(CLOCK_REALTIME, &stop;);
}

double get_interval() {
    return (stop.tv_sec - start.tv_sec) + (stop.tv_nsec - start.tv_nsec) / BILLION;
}

void print(float a[N + N*N]) {
    int i;
    int j;

    printf("\n");
    for (i = 0; i < N; i++) {
        for (j = 0; j < N; j++) {
            printf(".6f", a[i + N*j]);
        }
        printf("\n");
    }
}

void transpose(float *a, float *b) {
    int i;
    int j;

    for (j = 0; j < N; j++) {
        for (i = 0; i < N; i++) {
            a[i + N*j] = b[j + N*j];
        }
    }
}

void matrix_product(float *a, float *b, float *c) {
    __m128 *d;
    __m128 *e = (__m128 *)b;
    __m128 *g = (__m128 *)c;
    int i;
    int j;
    int k;
    float *f = (float *)_mm_malloc(N * N * sizeof(float), 16);
    d = (__m128 *)f;

    transpose(a, f);
    for (i = 0; i < N; i++) {
        for (k = 0; k < N/4; k++) {
            for (j = 0; j < N/4; j++) {
                g[i + N*k] = _mm_mul_ps(d[i + N*j], e[i + N*j]);
            }
        }
    }
    _mm_free(f);
}

int main() {
    float *a = (float *)_mm_malloc(N * N * sizeof(float), 16);
    float *b = (float *)_mm_malloc(N * N * sizeof(float), 16);
    float *r = (float *)_mm_malloc(N * N * sizeof(float), 16);
    float *c1 = (float *)_mm_malloc(N * N * sizeof(float), 16);
    float *c2 = (float *)_mm_malloc(N * N * sizeof(float), 16);
    float s[N];
    float row;
    float col;
    float *r1;
    float *r2;
    float *r3;
    int i;
    int j;
    int k;

    srand(time(NULL));

    for (i = 0; i < N; i++) {
        for (j = 0; j < N; j++) {
            a[i + N*j] = rand() % 11 - 5;
        }
    }

    row = 0;
    for (j = 0; j < N; j++) {
        row += a[0 + N*j];
    }
    memset(s, 0, sizeof(float) * N);
    for (i = 0; i < N; i++) {
        float sum = 0;
        for (j = 0; j < N; j++) {
            sum += a[i + N*j];
            s[j] += a[i + N*j];
        }
        if (sum > row) {
            row = sum;
        }
    }

    col = s[0];
    for (i = 1; i < N; i++) {
        if (col < s[i]) {
            col = s[i];
        }
    }
    row = 1.0 / row / col;

    for (i = 0; i < N; i++) {
        for (j = 0; j < N; j++) {
            b[j + N*i] = a[i + N*j] * row;
        }
    }
    
    memset(r, 0, sizeof(float) * N * N);
    for (i = 0; i < N; i++) {
        for (j = 0; j < N; j++) {
            for (k = 0; k < N; k++) {
                r[i + N*k] -= b[i + N*j] * a[j + N*k];
            }
        }
        r[i + N*i]++;
    }
        
    memset(c2, 0, sizeof(float) * N * N);
    for (i = 0; i < N; i++) {
        c2[i + N*i] = 1;
    }
    r1 = c1;
    r2 = c2;

    time_start();
    for (i = 0; i < M; i++) {
        memset(r1, 0, sizeof(float) * N * N);
        matrix_product(r2, r, r1); 
        
        for (j = 0; j < N; j++) {
            r1[j + N*j]++; 
        }
        r3 = r1;
        r1 = r2;
        r2 = r3;
    }
    memset(r2, 0, sizeof(float) * N * N);
    matrix_product(r1, b, r2);
    time_stop();

    accum += get_interval() / (M + 1);
    printf("%lfs\n", accum);

    _mm_free(a);
    _mm_free(b);
    _mm_free(r);
    _mm_free(c1);
    _mm_free(c2);
    return 0;
}

 

Revise this Paste

Your Name: Code Language: