#include "cstdlib" #include "iostream" #include "omp.h" #include "mkl.h" using namespace std; int main() { //Matrix dimensions = NxN //int N = 2560; //int N = 5120; //int N = 7680; int N = 10240; //scaling factors float alpha = 1.0, beta = 0.0; //matrices float A[N*N], B[N*N], C[N*N]; //initialize the matrices for (int i = 0; i < N; i++) { for (int j = 0; j < N; j++) { A[i*N+j] = (float) i+j; B[i*N+j] = (float) i-j; C[i*N+j] = 0.0; } } cout << "MIC devices present: " << mkl_mic_get_device_count() << "\n"; cout << "Warm-up..."; cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, N, N, N, alpha, A, N, B, N, beta, C, N); cout << "Done\n"; int nIter = 5, nOmpThr; #pragma omp parallel nOmpThr = omp_get_num_threads(); double aveTime,minTime=1e6,maxTime=0.; for(int i=0; i < nIter; i++) { double startTime = dsecnd(); cout << "Performing multiplication " << i << endl; cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, N, N, N, alpha, A, N, B, N, beta, C, N); double endTime = dsecnd(); double runtime = endTime-startTime; maxTime=(maxTime > runtime)?maxTime:runtime; minTime=(minTime < runtime)?minTime:runtime; aveTime += runtime; } aveTime /= nIter; cout << "matrix size: " << N << endl; cout << "nThreads: " << nOmpThr << endl; cout << "nIter: " << nIter << endl; cout << "maxRT: " << maxTime << endl; cout << "minRT: " << minTime << endl; cout << "aveRT: " << aveTime << endl; cout << "aveGlop/S: " << 2e-9*N*N*N/aveTime << endl; }