インテル® oneAPI マス・カーネル・ライブラリー (インテル® oneMKL) 2021.3 リリース以降、CMake プロジェクトに oneMKL を統合する場合、MKLConfig.cmake がパッケージとインストールの一部として提供されています。MKLConfig.cmake は、oneMKL 自体と同様に、すべての oneMKL 構成、コンパイラー、およびランタイムをサポートします。ヘルプ/使用方法は、MKLConfig.cmake の先頭セクションに記載されています。
my_test/ |_ build/ <-- ソース外のビルド・ディレクトリー |_ CMakeLists.txt <-- ユーザー・プロジェクトの CMakeLists.txt |_ app.c <-- oneMKL API を使用するソースファイル
app.c
#include <stdio.h>
#include "mkl.h"
int main()
{
MKLVersion mkl_version;
mkl_get_version(&mkl_version);
printf("You are using oneMKL %d.%d\n", mkl_version.MajorVersion, mkl_version.UpdateVersion);
return 0;
}
CMakeLists.txt
cmake_minimum_required(VERSION 3.13)
project(oneMKL_Example LANGUAGES C)
find_package(MKL CONFIG REQUIRED PATHS $ENV{MKLROOT})
add_executable(myapp app.c)
target_link_libraries(myapp PUBLIC MKL::MKL)
コマンドライン
# 事前に oneAPI setvars.bat/oneapi-vars.bat スクリプトを呼び出し build>cmake .. -G Ninja -DCMAKE_C_COMPILER=icx build>cmake --build .
アプリケーションでカスタマイズ設定が必要な場合は、次の CMakeLists.txt で、より細かい制御を実現する方法を示します。
CMakeLists.txt
cmake_minimum_required(VERSION 3.13)
project(oneMKL_Example LANGUAGES C)
find_package(MKL CONFIG REQUIRED PATHS $ENV{MKLROOT})
message(STATUS "Imported oneMKL targets: ${MKL_IMPORTED_TARGETS}")
add_executable(myapp app.c)
target_compile_options(myapp PUBLIC $<TARGET_PROPERTY:MKL::MKL,INTERFACE_COMPILE_OPTIONS>)
target_include_directories(myapp PUBLIC $<TARGET_PROPERTY:MKL::MKL,INTERFACE_INCLUDE_DIRECTORIES>)
target_link_libraries(myapp PUBLIC $<LINK_ONLY:MKL::MKL>)
Ninja および NMake Makefile は Windows* でサポートされています。他の CMake ジェネレーターも動作する可能性がありますが、保証されません。
Ninja ビルドシステムを使用する場合、Fortran には Ninja 1.10.2 以降が必要です。
mkl_example.h
/*******************************************************************************
* Copyright 1999-2022 Intel Corporation.
*
* This software and the related documents are Intel copyrighted materials, and
* your use of them is governed by the express license under which they were
* provided to you (License). Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute, disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents are provided as is, with no express
* or implied warranties, other than those that are expressly stated in the
* License.
*******************************************************************************/
/*
! Content:
! Intel® oneAPI Math Kernel Library (oneMKL) examples interface
!******************************************************************************/
#ifndef _MKL_EXAMPLE_H_
#define _MKL_EXAMPLE_H_
#include "mkl_types.h"
#include "mkl_cblas.h"
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
#define COMMENTS ':'
#define MAX_STRING_LEN 512
#define MIN(a,b) (((a) > (b)) ? (b) : (a))
#define MAX(a,b) (((a) > (b)) ? (a) : (b))
#define ABS(a) (((a) < 0) ? -(a) : (a))
#define GENERAL_MATRIX 0
#define UPPER_MATRIX 1
#define LOWER_MATRIX -1
#define FULLPRINT 0
#define SHORTPRINT 1
#ifdef MKL_ILP64
#define INT_FORMAT "%lld"
#else
#define INT_FORMAT "%d"
#endif
/*
* ===========================================
* Prototypes for example program functions
* ===========================================
*/
int GetIntegerParameters(FILE*, ...);
int GetIntegerParameters8(FILE*, ...);
int GetIntegerParameters16(FILE*, ...);
int GetIntegerParameters32(FILE*, ...);
int GetScalarsH(FILE*, ...);
int GetScalarsD(FILE*, ...);
int GetScalarsS(FILE*, ...);
int GetScalarsC(FILE*, ...);
int GetScalarsZ(FILE*, ...);
int GetCblasCharParameters(FILE *in_file, ...);
MKL_INT MaxValue(MKL_INT, MKL_INT*);
MKL_INT GetVectorI(FILE*, MKL_INT, MKL_INT*);
MKL_INT GetVectorI32(FILE*, MKL_INT, MKL_INT32*);
MKL_INT GetVectorS(FILE*, MKL_INT, float*, MKL_INT);
MKL_INT GetVectorC(FILE*, MKL_INT, MKL_Complex8*, MKL_INT);
MKL_INT GetVectorD(FILE*, MKL_INT, double*, MKL_INT);
MKL_INT GetVectorZ(FILE*, MKL_INT, MKL_Complex16*, MKL_INT);
MKL_INT GetArrayI8(FILE*, CBLAS_LAYOUT*, MKL_INT, MKL_INT*, MKL_INT*, MKL_INT8*, MKL_INT*);
MKL_INT GetArrayI16(FILE*, CBLAS_LAYOUT*, MKL_INT, MKL_INT*, MKL_INT*, MKL_INT16*, MKL_INT*);
MKL_INT GetArrayI32(FILE*, CBLAS_LAYOUT*, MKL_INT, MKL_INT*, MKL_INT*, MKL_INT32*, MKL_INT*);
MKL_INT GetArrayBF16(FILE*, CBLAS_LAYOUT*, MKL_INT, MKL_INT*, MKL_INT*, MKL_BF16*, MKL_INT*);
MKL_INT GetArrayH(FILE*, CBLAS_LAYOUT*, MKL_INT, MKL_INT*, MKL_INT*, MKL_F16*, MKL_INT*);
MKL_INT GetArrayS(FILE*, CBLAS_LAYOUT*, MKL_INT, MKL_INT*, MKL_INT*, float*, MKL_INT*);
MKL_INT GetArrayD(FILE*, CBLAS_LAYOUT*, MKL_INT, MKL_INT*, MKL_INT*, double*, MKL_INT*);
MKL_INT GetArrayC(FILE*, CBLAS_LAYOUT*, MKL_INT, MKL_INT*, MKL_INT*, MKL_Complex8*, MKL_INT*);
MKL_INT GetArrayZ(FILE*, CBLAS_LAYOUT*, MKL_INT, MKL_INT*, MKL_INT*, MKL_Complex16*, MKL_INT*);
MKL_INT GetBandArrayS(FILE*, CBLAS_LAYOUT*, MKL_INT, MKL_INT, MKL_INT, MKL_INT, float*, MKL_INT);
MKL_INT GetBandArrayD(FILE*, CBLAS_LAYOUT*, MKL_INT, MKL_INT, MKL_INT, MKL_INT, double*, MKL_INT);
MKL_INT GetBandArrayC(FILE*, CBLAS_LAYOUT*, MKL_INT, MKL_INT, MKL_INT, MKL_INT, MKL_Complex8*, MKL_INT);
MKL_INT GetBandArrayZ(FILE*, CBLAS_LAYOUT*, MKL_INT, MKL_INT, MKL_INT, MKL_INT, MKL_Complex16*, MKL_INT);
MKL_INT GetValuesI8(FILE*, MKL_INT8*, MKL_INT, MKL_INT, MKL_INT);
MKL_INT GetValuesI16(FILE*, MKL_INT16*, MKL_INT, MKL_INT, MKL_INT);
MKL_INT GetValuesI32(FILE*, MKL_INT32*, MKL_INT, MKL_INT, MKL_INT);
MKL_INT GetValuesI(FILE *, MKL_INT*, MKL_INT, MKL_INT);
MKL_INT GetValuesBF16(FILE *, MKL_BF16*, MKL_INT, MKL_INT, MKL_INT);
MKL_INT GetValuesH(FILE *, MKL_F16*, MKL_INT, MKL_INT, MKL_INT);
MKL_INT GetValuesC(FILE *, MKL_Complex8*, MKL_INT, MKL_INT, MKL_INT);
MKL_INT GetValuesZ(FILE *, MKL_Complex16*, MKL_INT, MKL_INT, MKL_INT);
MKL_INT GetValuesD(FILE *, double*, MKL_INT, MKL_INT, MKL_INT);
MKL_INT GetValuesS(FILE *, float*, MKL_INT, MKL_INT, MKL_INT);
float GetValueH(MKL_F16);
void PrintParameters( char*, ...);
void PrintVectorI(MKL_INT, MKL_INT*, char*);
void PrintVectorI32(MKL_INT, MKL_INT32*, char*);
void PrintVectorS(int, MKL_INT, float*, MKL_INT, char*);
void PrintVectorC(int, MKL_INT, MKL_Complex8*, MKL_INT, char*);
void PrintVectorD(int, MKL_INT, double*, MKL_INT, char*);
void PrintVectorZ(int, MKL_INT, MKL_Complex16*, MKL_INT, char*);
void PrintArrayI8(CBLAS_LAYOUT*, int, int, MKL_INT*, MKL_INT*, MKL_INT8*, MKL_INT*, char*);
void PrintArrayI16(CBLAS_LAYOUT*, int, int, MKL_INT*, MKL_INT*, MKL_INT16*, MKL_INT*, char*);
void PrintArrayI32(CBLAS_LAYOUT*, int, int, MKL_INT*, MKL_INT*, MKL_INT32*, MKL_INT*, char*);
void PrintArrayBF16(CBLAS_LAYOUT*, int, int, MKL_INT*, MKL_INT*, MKL_BF16*, MKL_INT*, char*);
void PrintArrayH(CBLAS_LAYOUT*, int, int, MKL_INT*, MKL_INT*, MKL_F16*, MKL_INT*, char*);
void PrintArrayS(CBLAS_LAYOUT*, int, int, MKL_INT*, MKL_INT*, float*, MKL_INT*, char*);
void PrintArrayD(CBLAS_LAYOUT*, int, int, MKL_INT*, MKL_INT*, double*, MKL_INT*, char*);
void PrintArrayC(CBLAS_LAYOUT*, int, int, MKL_INT*, MKL_INT*, MKL_Complex8*, MKL_INT*, char*);
void PrintArrayZ(CBLAS_LAYOUT*, int, int, MKL_INT*, MKL_INT*, MKL_Complex16*, MKL_INT*, char*);
void PrintBandArrayS(CBLAS_LAYOUT*, int, MKL_INT, MKL_INT, MKL_INT, MKL_INT, float*, MKL_INT, char*);
void PrintBandArrayD(CBLAS_LAYOUT*, int, MKL_INT, MKL_INT, MKL_INT, MKL_INT, double*, MKL_INT, char*);
void PrintBandArrayC(CBLAS_LAYOUT*, int, MKL_INT, MKL_INT, MKL_INT, MKL_INT, MKL_Complex8*, MKL_INT, char*);
void PrintBandArrayZ(CBLAS_LAYOUT*, int, MKL_INT, MKL_INT, MKL_INT, MKL_INT, MKL_Complex16*, MKL_INT, char*);
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif /* _MKL_EXAMPLE_H_ */
common_func.c
/*******************************************************************************
* Copyright 1999-2022 Intel Corporation.
*
* This software and the related documents are Intel copyrighted materials, and
* your use of them is governed by the express license under which they were
* provided to you (License). Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute, disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents are provided as is, with no express
* or implied warranties, other than those that are expressly stated in the
* License.
*******************************************************************************/
/*
! Content:
!
!******************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdarg.h>
#include <math.h>
#include "mkl_example.h"
#include "mkl_cblas.h"
MKL_INT MaxValue(MKL_INT n, MKL_INT *x)
{
MKL_INT i, indmax;
indmax = x[0];
for (i = 1; i < n; i++)
if (indmax < x[i]) indmax = x[i];
return indmax;
} /* MaxValue */
typedef union {
float float_part;
MKL_BF16 int_part[2];
} conv_union_bf16;
typedef union {
MKL_F16 raw;
struct {
unsigned int frac : 10;
unsigned int exp : 5;
unsigned int sign : 1;
} bits;
} conv_union_f16;
typedef union {
float raw;
struct {
unsigned int frac : 23;
unsigned int exp : 8;
unsigned int sign : 1;
} bits;
} conv_union_f32;
static float b2f(MKL_BF16 src) {
conv_union_bf16 conv;
conv.int_part[0] = 0;
conv.int_part[1] = src;
return conv.float_part;
}
static MKL_BF16 f2b(float src) {
conv_union_bf16 conv;
conv.float_part = src;
return conv.int_part[1];
}
static float h2f(MKL_F16 x) {
conv_union_f16 src;
conv_union_f32 dst;
src.raw = x;
dst.raw = 0;
dst.bits.sign = src.bits.sign;
if (src.bits.exp == 0x01fU) {
dst.bits.exp = 0xffU;
if (src.bits.frac > 0) {
dst.bits.frac = ((src.bits.frac | 0x200U) << 13);
}
} else if (src.bits.exp > 0x00U) {
dst.bits.exp = src.bits.exp + ((1 << 7) - (1 << 4));
dst.bits.frac = (src.bits.frac << 13);
} else {
unsigned int v = (src.bits.frac << 13);
if (v > 0) {
dst.bits.exp = 0x71;
while ((v & 0x800000UL) == 0) {
dst.bits.exp --;
v <<= 1;
}
dst.bits.frac = v;
}
}
return dst.raw;
}
static MKL_F16 f2h(float x) {
conv_union_f32 src;
conv_union_f16 dst;
src.raw = x;
dst.raw = 0;
dst.bits.sign = src.bits.sign;
if (src.bits.exp == 0x0ffU) {
dst.bits.exp = 0x01fU;
dst.bits.frac = (src.bits.frac >> 13);
if (src.bits.frac > 0) dst.bits.frac |= 0x200U;
} else if (src.bits.exp >= 0x08fU) {
dst.bits.exp = 0x01fU;
dst.bits.frac = 0x000U;
} else if (src.bits.exp >= 0x071U){
dst.bits.exp = src.bits.exp + ((1 << 4) - (1 << 7));
dst.bits.frac = (src.bits.frac >> 13);
} else if (src.bits.exp >= 0x067U){
dst.bits.exp = 0x000;
if (src.bits.frac > 0) {
dst.bits.frac = (((1U << 23) | src.bits.frac) >> 14);
} else {
dst.bits.frac = 1;
}
}
return dst.raw;
}
float GetValueH(MKL_F16 x)
{
return h2f( x );
} /* GetValueH */
MKL_INT GetVectorI(FILE *in_file, MKL_INT n, MKL_INT *x)
{
return GetValuesI( in_file, x, 0, n );
} /* GetVectorI */
MKL_INT GetVectorI16(FILE *in_file, MKL_INT n, MKL_INT16 *x)
{
return GetValuesI16( in_file, x, 1, 0, n );
} /* GetVectorI16 */
MKL_INT GetVectorI32(FILE *in_file, MKL_INT n, MKL_INT32 *x)
{
return GetValuesI32( in_file, x, 1, 0, n );
} /* GetVectorI32 */
MKL_INT GetVectorS(FILE *in_file, MKL_INT n, float *x, MKL_INT incx)
{
return GetValuesS( in_file, x, 1, 0, (1+(n-1)*ABS(incx)) );
} /* GetVectorS */
MKL_INT GetVectorD(FILE *in_file, MKL_INT n, double *x, MKL_INT incx)
{
return GetValuesD( in_file, x, 1, 0, (1+(n-1)*ABS(incx)) );
} /* GetVectorD */
MKL_INT GetVectorC(FILE *in_file, MKL_INT n, MKL_Complex8 *x, MKL_INT incx)
{
return GetValuesC( in_file, x, 1, 0, (1+(n-1)*ABS(incx)) );
} /* GetVectorC */
MKL_INT GetVectorZ(FILE *in_file, MKL_INT n, MKL_Complex16 *x, MKL_INT incx)
{
return GetValuesZ( in_file, x, 1, 0, (1+(n-1)*ABS(incx)) );
} /* GetVectorZ */
MKL_INT GetArrayI8(FILE *in_file, CBLAS_LAYOUT*layout, MKL_INT flag, MKL_INT *m, MKL_INT *n,
MKL_INT8 *a, MKL_INT *lda)
{
MKL_INT i, j , number;
MKL_INT8 *addr;
if( *layout == CblasRowMajor ) {
if( flag == GENERAL_MATRIX ) {
for( i = 0; i < (*m); i++ ) {
addr = a + i*(*lda);
number = GetValuesI8( in_file, addr, 1, 0, *n );
if( number != *n ) return 1;
} /* for */
} else if( flag == UPPER_MATRIX ) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesI8( in_file, addr, 1, i, *n-i );
if( number != *n-i ) return 1;
} /* for */
} else if( flag == LOWER_MATRIX ) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesI8( in_file, addr, 1, 0, i+1 );
if( number != i+1 ) return 1;
} /* for */
} /* if */
} else if( *layout == CblasColMajor ) {
if( flag == GENERAL_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesI8( in_file, addr, 1, 0, *m );
if( number != *m ) return 1;
} /* for */
} else if( flag == UPPER_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesI8( in_file, addr, 1, 0, j+1 );
if( number != j+1 ) return 1;
} /* for */
} else if( flag == LOWER_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesI8( in_file, addr, 1, j, *m-j );
if( number != *m-j ) return 1;
} /* for */
} /* if */
} /* if */
return 0;
} /* GetArrayI8 */
MKL_INT GetArrayI16(FILE *in_file, CBLAS_LAYOUT*layout, MKL_INT flag, MKL_INT *m, MKL_INT *n,
MKL_INT16 *a, MKL_INT *lda)
{
MKL_INT i, j , number;
MKL_INT16 *addr;
if( *layout == CblasRowMajor ) {
if( flag == GENERAL_MATRIX ) {
for( i = 0; i < (*m); i++ ) {
addr = a + i*(*lda);
number = GetValuesI16( in_file, addr, 1, 0, *n );
if( number != *n ) return 1;
} /* for */
} else if( flag == UPPER_MATRIX ) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesI16( in_file, addr, 1, i, *n-i );
if( number != *n-i ) return 1;
} /* for */
} else if( flag == LOWER_MATRIX ) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesI16( in_file, addr, 1, 0, i+1 );
if( number != i+1 ) return 1;
} /* for */
} /* if */
} else if( *layout == CblasColMajor ) {
if( flag == GENERAL_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesI16( in_file, addr, 1, 0, *m );
if( number != *m ) return 1;
} /* for */
} else if( flag == UPPER_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesI16( in_file, addr, 1, 0, j+1 );
if( number != j+1 ) return 1;
} /* for */
} else if( flag == LOWER_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesI16( in_file, addr, 1, j, *m-j );
if( number != *m-j ) return 1;
} /* for */
} /* if */
} /* if */
return 0;
} /* GetArrayI16 */
MKL_INT GetArrayI32(FILE *in_file, CBLAS_LAYOUT*layout, MKL_INT flag, MKL_INT *m, MKL_INT *n,
MKL_INT32 *a, MKL_INT *lda)
{
MKL_INT i, j , number;
MKL_INT32 *addr;
if( *layout == CblasRowMajor ) {
if( flag == GENERAL_MATRIX ) {
for( i = 0; i < (*m); i++ ) {
addr = a + i*(*lda);
number = GetValuesI32( in_file, addr, 1, 0, *n );
if( number != *n ) return 1;
} /* for */
} else if( flag == UPPER_MATRIX ) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesI32( in_file, addr, 1, i, *n-i );
if( number != *n-i ) return 1;
} /* for */
} else if( flag == LOWER_MATRIX ) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesI32( in_file, addr, 1, 0, i+1 );
if( number != i+1 ) return 1;
} /* for */
} /* if */
} else if( *layout == CblasColMajor ) {
if( flag == GENERAL_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesI32( in_file, addr, 1, 0, *m );
if( number != *m ) return 1;
} /* for */
} else if( flag == UPPER_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesI32( in_file, addr, 1, 0, j+1 );
if( number != j+1 ) return 1;
} /* for */
} else if( flag == LOWER_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesI32( in_file, addr, 1, j, *m-j );
if( number != *m-j ) return 1;
} /* for */
} /* if */
} /* if */
return 0;
} /* GetArrayI32 */
MKL_INT GetArrayBF16(FILE *in_file, CBLAS_LAYOUT*layout, MKL_INT flag, MKL_INT *m, MKL_INT *n,
MKL_BF16 *a, MKL_INT *lda)
{
MKL_INT i, j, number;
MKL_BF16 *addr;
if( *layout == CblasRowMajor ) {
if( flag == GENERAL_MATRIX ) {
for( i = 0; i < (*m); i++ ) {
addr = a + i*(*lda);
number = GetValuesBF16( in_file, addr, 1, 0, *n );
if( number != *n ) return 1;
} /* for */
} else if( flag == UPPER_MATRIX ) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesBF16( in_file, addr, 1, i, *n-i );
if( number != *n-i ) return 1;
} /* for */
} else if( flag == LOWER_MATRIX ) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesBF16( in_file, addr, 1, 0, i+1 );
if( number != i+1 ) return 1;
} /* for */
} /* if */
} else if( *layout == CblasColMajor ) {
if( flag == GENERAL_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesBF16( in_file, addr, 1, 0, *m );
if( number != *m ) return 1;
} /* for */
} else if( flag == UPPER_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesBF16( in_file, addr, 1, 0, j+1 );
if( number != j+1 ) return 1;
} /* for */
} else if( flag == LOWER_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesBF16( in_file, addr, 1, j, *m-j );
if( number != *m-j ) return 1;
} /* for */
} /* if */
} /* if */
return 0;
} /* GetArrayBF16 */
MKL_INT GetArrayH(FILE *in_file, CBLAS_LAYOUT*layout, MKL_INT flag, MKL_INT *m, MKL_INT *n,
MKL_F16 *a, MKL_INT *lda)
{
MKL_INT i, j, number;
MKL_F16 *addr;
if( *layout == CblasRowMajor ) {
if( flag == GENERAL_MATRIX ) {
for( i = 0; i < (*m); i++ ) {
addr = a + i*(*lda);
number = GetValuesH( in_file, addr, 1, 0, *n );
if( number != *n ) return 1;
} /* for */
} else if( flag == UPPER_MATRIX ) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesH( in_file, addr, 1, i, *n-i );
if( number != *n-i ) return 1;
} /* for */
} else if( flag == LOWER_MATRIX ) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesH( in_file, addr, 1, 0, i+1 );
if( number != i+1 ) return 1;
} /* for */
} /* if */
} else if( *layout == CblasColMajor ) {
if( flag == GENERAL_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesH( in_file, addr, 1, 0, *m );
if( number != *m ) return 1;
} /* for */
} else if( flag == UPPER_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesH( in_file, addr, 1, 0, j+1 );
if( number != j+1 ) return 1;
} /* for */
} else if( flag == LOWER_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesH( in_file, addr, 1, j, *m-j );
if( number != *m-j ) return 1;
} /* for */
} /* if */
} /* if */
return 0;
} /* GetArrayH */
MKL_INT GetArrayS(FILE *in_file, CBLAS_LAYOUT*layout, MKL_INT flag, MKL_INT *m, MKL_INT *n,
float *a, MKL_INT *lda)
{
MKL_INT i, j, number;
float *addr;
if( *layout == CblasRowMajor ) {
if( flag == GENERAL_MATRIX ) {
for( i = 0; i < (*m); i++ ) {
addr = a + i*(*lda);
number = GetValuesS( in_file, addr, 1, 0, *n );
if( number != *n ) return 1;
} /* for */
} else if( flag == UPPER_MATRIX ) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesS( in_file, addr, 1, i, *n-i );
if( number != *n-i ) return 1;
} /* for */
} else if( flag == LOWER_MATRIX ) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesS( in_file, addr, 1, 0, i+1 );
if( number != i+1 ) return 1;
} /* for */
} /* if */
} else if( *layout == CblasColMajor ) {
if( flag == GENERAL_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesS( in_file, addr, 1, 0, *m );
if( number != *m ) return 1;
} /* for */
} else if( flag == UPPER_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesS( in_file, addr, 1, 0, j+1 );
if( number != j+1 ) return 1;
} /* for */
} else if( flag == LOWER_MATRIX ) {
for( j = 0; j < (*n); j++ ) {
addr = a + j*(*lda);
number = GetValuesS( in_file, addr, 1, j, *m-j );
if( number != *m-j ) return 1;
} /* for */
} /* if */
} /* if */
return 0;
} /* GetArrayS */
MKL_INT GetArrayD(FILE *in_file, CBLAS_LAYOUT*layout, MKL_INT flag, MKL_INT *m, MKL_INT *n,
double *a, MKL_INT *lda)
{
MKL_INT i, j, number;
double *addr;
if (*layout == CblasRowMajor) {
if (flag == GENERAL_MATRIX) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesD( in_file, addr, 1, 0, *n );
if( number != *n ) return 1;
} /* for */
} else if (flag == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesD( in_file, addr, 1, i, *n-i );
if( number != *n-i ) return 1;
} /* for */
} else if (flag == LOWER_MATRIX) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesD( in_file, addr, 1, 0, i+1 );
if( number != i+1 ) return 1;
} /* for */
} /* if */
} else if (*layout == CblasColMajor) {
if (flag == GENERAL_MATRIX) {
for (j = 0; j < (*n); j++) {
addr = a + j*(*lda);
number = GetValuesD( in_file, addr, 1, 0, *m );
if( number != *m ) return 1;
} /* for */
} else if (flag == UPPER_MATRIX) {
for (j = 0; j < (*n); j++) {
addr = a + j*(*lda);
number = GetValuesD( in_file, addr, 1, 0, j+1 );
if( number != j+1 ) return 1;
} /* for */
} else if (flag == LOWER_MATRIX) {
for (j = 0; j < (*n); j++) {
addr = a + j*(*lda);
number = GetValuesD( in_file, addr, 1, j, *m-j );
if( number != *m-j ) return 1;
} /* for */
} /* if */
} /* if */
return 0;
} /* GetArrayD */
MKL_INT GetArrayC(FILE *in_file, CBLAS_LAYOUT*layout, MKL_INT flag, MKL_INT *m, MKL_INT *n,
MKL_Complex8 *a, MKL_INT *lda)
{
MKL_INT i, j, number;
MKL_Complex8 *addr;
if (*layout == CblasRowMajor) {
if (flag == GENERAL_MATRIX) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesC( in_file, addr, 1, 0, *n );
if( number != *n ) return 1;
} /* for */
} else if (flag == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesC( in_file, addr, 1, i, *n-i );
if( number != *n-i ) return 1;
} /* for */
} else if (flag == LOWER_MATRIX) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesC( in_file, addr, 1, 0, i+1 );
if( number != i+1 ) return 1;
} /* for */
} /* if */
} else if (*layout == CblasColMajor) {
if (flag == GENERAL_MATRIX) {
for (j = 0; j < (*n); j++) {
addr = a + j*(*lda);
number = GetValuesC( in_file, addr, 1, 0, *m );
if( number != *m ) return 1;
} /* for */
} else if (flag == UPPER_MATRIX) {
for (j = 0; j < (*n); j++) {
addr = a + j*(*lda);
number = GetValuesC( in_file, addr, 1, 0, j+1 );
if( number != j+1 ) return 1;
} /* for */
} else if (flag == LOWER_MATRIX) {
for (j = 0; j < (*n); j++) {
addr = a + j*(*lda);
number = GetValuesC( in_file, addr, 1, j, *m-j );
if( number != *m-j ) return 1;
} /* for */
} /* if */
} /* if */
return 0;
} /* GetArrayC */
MKL_INT GetArrayZ(FILE *in_file, CBLAS_LAYOUT*layout, MKL_INT flag,
MKL_INT *m, MKL_INT *n, MKL_Complex16 *a, MKL_INT *lda)
{
MKL_INT i, j, number;
MKL_Complex16 *addr;
if (*layout == CblasRowMajor) {
if (flag == GENERAL_MATRIX) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesZ( in_file, addr, 1, 0, *n );
if( number != *n ) return 1;
} /* for */
} else if (flag == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesZ( in_file, addr, 1, i, *n-i );
if( number != *n-i ) return 1;
} /* for */
} else if (flag == LOWER_MATRIX) {
for (i = 0; i < (*m); i++) {
addr = a + i*(*lda);
number = GetValuesZ( in_file, addr, 1, 0, i+1 );
if( number != i+1 ) return 1;
} /* for */
} /* if */
} else if(*layout == CblasColMajor) {
if (flag == GENERAL_MATRIX) {
for (j = 0; j < (*n); j++) {
addr = a + j*(*lda);
number = GetValuesZ( in_file, addr, 1, 0, *m );
if( number != *m ) return 1;
} /* for */
} else if (flag == UPPER_MATRIX) {
for (j = 0; j < (*n); j++) {
addr = a + j*(*lda);
number = GetValuesZ( in_file, addr, 1, 0, j+1 );
if( number != j+1 ) return 1;
} /* for */
} else if (flag == LOWER_MATRIX) {
for (j = 0; j < (*n); j++) {
addr = a + j*(*lda);
number = GetValuesZ( in_file, addr, 1, j, *m-j );
if( number != *m-j ) return 1;
} /* for */
} /* if */
} /* if */
return 0;
} /* GetArrayZ */
MKL_INT GetBandArrayS(FILE *in_file, CBLAS_LAYOUT*layout, MKL_INT kl, MKL_INT ku,
MKL_INT m, MKL_INT n, float *a, MKL_INT lda)
{
MKL_INT i, j;
MKL_INT kl1, ku1, i_start, j_start, j_end, ku_rows, kl_rows, number;
float *addr, *addr1;
if (*layout == CblasRowMajor) {
for( i = 0; i < MIN( m, n ); i++ ) {
addr = a + i*lda;
kl1 = ( i - kl <= 0 ) ? i : kl;
ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku;
j_start = kl - kl1;
j_end = j_start + kl1 + ku1;
addr1 = addr + j_start;
number = GetValuesS( in_file, addr1, 1, 0, j_end-j_start+1 );
if( number != j_end-j_start+1 ) return 1;
}
for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) {
addr = a + i*lda;
kl1 = n - i + kl;
j_start = ( kl > n ) ? kl - n : n - kl;
j_end = j_start + kl1 - 1;
addr1 = addr + j_start;
number = GetValuesS( in_file, addr1, 1, 0, j_end-j_start+1 );
if( number != j_end-j_start+1 ) return 1;
}
} else if (*layout == CblasColMajor) {
i_start = (ku > n ) ? ku - n + 1 : 0;
ku_rows = (ku > n ) ? n - 1 : ku;
j_start = ku_rows;
for( i = 0; i < ku_rows; i++ ) {
j = j_start*lda; addr1 = a+i+i_start;
number = GetValuesS( in_file, addr1, lda, j, n-ku_rows );
if( number != n-ku_rows ) return 1;
j_start--;
}
j_end = MIN(m,n);
addr1 = a + ku;
number = GetValuesS( in_file, addr1, lda, 0, j_end );
if( number != j_end ) return 1;
kl_rows = ( kl <= m-1 ) ? kl : m - 1;
for ( i = 1; i < kl_rows+1; i++ ) {
kl1 = ( i+j_end <= m ) ? j_end : m - i;
addr1 = a + ku + i;
number = GetValuesS( in_file, addr1, lda, 0, kl1 );
if( number != kl1 ) return 1;
}
}
return 0;
} /* GetBandArrayS */
MKL_INT GetBandArrayD(FILE *in_file, CBLAS_LAYOUT*layout, MKL_INT kl, MKL_INT ku,
MKL_INT m, MKL_INT n, double *a, MKL_INT lda)
{
MKL_INT i, j;
MKL_INT kl1, ku1, i_start, j_start, j_end, kl_rows, ku_rows, number;
double *addr, *addr1;
if (*layout == CblasRowMajor) {
for( i = 0; i < MIN( m, n ); i++ ) {
addr = a + i*lda;
kl1 = ( i - kl <= 0 ) ? i : kl;
ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku;
j_start = kl - kl1;
j_end = j_start + kl1 + ku1;
addr1 = addr + j_start;
number = GetValuesD( in_file, addr1, 1, 0, j_end-j_start+1 );
if( number != j_end-j_start+1 ) return 1;
}
for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) {
addr = a + i*lda;
kl1 = n - i + kl;
j_start = ( kl > n ) ? kl - n : n - kl;
j_end = j_start + kl1 - 1;
addr1 = addr + j_start;
number = GetValuesD( in_file, addr1, 1, 0, j_end-j_start+1 );
if( number != j_end-j_start+1 ) return 1;
}
} else if (*layout == CblasColMajor) {
i_start = (ku > n ) ? ku - n + 1 : 0;
ku_rows = (ku > n ) ? n - 1 : ku;
j_start = ku_rows;
for( i = 0; i < ku_rows; i++ ) {
j = j_start*lda; addr1 = a+i+i_start;
number = GetValuesD( in_file, addr1, lda, j, n-ku_rows );
if( number != n-ku_rows ) return 1;
j_start--;
}
j_end = MIN(m,n);
addr1 = a+ku;
number = GetValuesD( in_file, addr1, lda, 0, j_end );
if( number != j_end ) return 1;
kl_rows = ( kl <= m-1 ) ? kl : m - 1;
for ( i = 1; i < kl_rows+1; i++ ) {
kl1 = ( i+j_end <= m ) ? j_end : m - i;
addr1 = a+ku+i;
number = GetValuesD( in_file, addr1, lda, 0, kl1 );
if( number != kl1 ) return 1;
}
} /* if */
return 0;
} /* GetBandArrayD */
MKL_INT GetBandArrayC(FILE *in_file, CBLAS_LAYOUT*layout, MKL_INT kl, MKL_INT ku,
MKL_INT m, MKL_INT n, MKL_Complex8 *a, MKL_INT lda)
{
MKL_INT i, j;
MKL_INT kl1, ku1, i_start, j_start, j_end, ku_rows, kl_rows, number;
MKL_Complex8 *addr, *addr1;
if (*layout == CblasRowMajor) {
for( i = 0; i < MIN( m, n ); i++ ) {
addr = a + i*lda;
kl1 = ( i - kl <= 0 ) ? i : kl;
ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku;
j_start = kl - kl1;
j_end = j_start + kl1 + ku1;
addr1 = addr + j_start;
number = GetValuesC( in_file, addr1, 1, 0, j_end-j_start+1 );
if( number != j_end-j_start+1 ) return 1;
}
for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) {
addr = a + i*lda;
kl1 = n - i + kl;
j_start = ( kl > n ) ? kl - n : n - kl;
j_end = j_start + kl1 - 1;
addr1 = addr + j_start;
number = GetValuesC( in_file, addr1, 1, 0, j_end-j_start+1 );
if( number != j_end-j_start+1 ) return 1;
}
} else if (*layout == CblasColMajor) {
i_start = (ku > n ) ? ku - n + 1 : 0;
ku_rows = (ku > n ) ? n - 1 : ku;
j_start = ku_rows;
for( i = 0; i < ku_rows; i++ ) {
j = j_start*lda; addr1 = a+i+i_start;
number = GetValuesC( in_file, addr1, lda, j, n-ku_rows );
if( number != n-ku_rows ) return 1;
j_start--;
}
j_end = MIN(m,n);
addr1 = a+ku;
number = GetValuesC( in_file, addr1, lda, 0, j_end );
if( number != j_end ) return 1;
kl_rows = ( kl <= m-1 ) ? kl : m - 1;
for ( i = 1; i < kl_rows+1; i++ ) {
kl1 = ( i+j_end <= m ) ? j_end : m - i;
addr1 = a+ku+i;
number = GetValuesC( in_file, addr1, lda, 0, kl1 );
if( number != kl1 ) return 1;
}
} /* if */
return 0;
} /* GetBandArrayC */
MKL_INT GetBandArrayZ(FILE *in_file, CBLAS_LAYOUT*layout, MKL_INT kl, MKL_INT ku,
MKL_INT m, MKL_INT n, MKL_Complex16 *a, MKL_INT lda)
{
MKL_INT i, j;
MKL_INT kl1, ku1, i_start, j_start, j_end, ku_rows, kl_rows, number;
MKL_Complex16 *addr, *addr1;
if (*layout == CblasRowMajor) {
for( i = 0; i < MIN( m, n ); i++ ) {
addr = a + i*lda;
kl1 = ( i - kl <= 0 ) ? i : kl;
ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku;
j_start = kl - kl1;
j_end = j_start + kl1 + ku1;
addr1 = addr + j_start;
number = GetValuesZ( in_file, addr1, 1, 0, j_end-j_start+1 );
if( number != j_end-j_start+1 ) return 1;
}
for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) {
addr = a + i*lda;
kl1 = n - i + kl;
j_start = ( kl > n ) ? kl - n : n - kl;
j_end = j_start + kl1 - 1;
addr1 = addr + j_start;
number = GetValuesZ( in_file, addr1, 1, 0, j_end-j_start+1 );
if( number != j_end-j_start+1 ) return 1;
}
} else if (*layout == CblasColMajor) {
i_start = (ku > n ) ? ku - n + 1 : 0;
ku_rows = (ku > n ) ? n - 1 : ku;
j_start = ku_rows;
for( i = 0; i < ku_rows; i++ ) {
j = j_start*lda; addr1 = a+i+i_start;
number = GetValuesZ( in_file, addr1, lda, j, n-ku_rows );
if( number != n-ku_rows ) return 1;
j_start--;
}
j_end = MIN(m,n);
addr1 = a+ku;
number = GetValuesZ( in_file, addr1, lda, 0, j_end );
if( number != j_end ) return 1;
kl_rows = ( kl <= m-1 ) ? kl : m - 1;
for ( i = 1; i < kl_rows+1; i++ ) {
kl1 = ( i+j_end <= m ) ? j_end : m - i;
addr1 = a+ku+i;
number = GetValuesZ( in_file, addr1, lda, 0, kl1 );
if( number != kl1 ) return 1;
}
} /* if */
return 0;
} /* GetBandArrayZ */
MKL_INT GetValuesI( FILE *in_file, MKL_INT *in_array, MKL_INT begin, MKL_INT max_numbers )
{
MKL_INT i, counter=0;
int value;
char buf[MAX_STRING_LEN], *str;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, " " );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
for( i = 0; i < max_numbers; i++ ) {
if ( *str==COMMENTS ) break;
if( sscanf( str, "%d", &value) != 1 ){
printf( "\n File format is inappropriate\n" );
return 0;
}
in_array[begin+i]=(MKL_INT)value;
counter++;
if ( (str = strtok( NULL, " " )) == NULL ) break;
}
return counter;
}
MKL_INT GetValuesI8( FILE *in_file, MKL_INT8 *in_array, MKL_INT ld, MKL_INT begin, MKL_INT max_numbers )
{
MKL_INT i, counter=0;
int value_int;
char buf[MAX_STRING_LEN], *str;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, " " );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
for( i = 0; i < max_numbers; i++ ) {
if ( *str==COMMENTS ) break;
if( sscanf( str, "%d", &value_int) != 1 ){
printf( "\n File format is inappropriate\n" );
return 0;
}
in_array[begin+i*ld]=(MKL_INT8)value_int;
counter++;
if ( (str = strtok( NULL, " " )) == NULL ) break;
}
return counter;
}
MKL_INT GetValuesI16( FILE *in_file, MKL_INT16 *in_array, MKL_INT ld, MKL_INT begin, MKL_INT max_numbers )
{
MKL_INT i, counter=0;
MKL_INT16 value;
char buf[MAX_STRING_LEN], *str;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, " " );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
for( i = 0; i < max_numbers; i++ ) {
if ( *str==COMMENTS ) break;
if( sscanf( str, "%hd", &value) != 1 ){
printf( "\n File format is inappropriate\n" );
return 0;
}
in_array[begin+i*ld]=(MKL_INT16)value;
counter++;
if ( (str = strtok( NULL, " " )) == NULL ) break;
}
return counter;
}
MKL_INT GetValuesI32( FILE *in_file, MKL_INT32 *in_array, MKL_INT ld, MKL_INT begin, MKL_INT max_numbers )
{
MKL_INT i, counter=0;
MKL_INT32 value;
char buf[MAX_STRING_LEN], *str;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, " " );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
for( i = 0; i < max_numbers; i++ ) {
if ( *str==COMMENTS ) break;
if( sscanf( str, "%d", &value) != 1 ){
printf( "\n File format is inappropriate\n" );
return 0;
}
in_array[begin+i*ld]=(MKL_INT32)value;
counter++;
if ( (str = strtok( NULL, " " )) == NULL ) break;
}
return counter;
}
MKL_INT GetValuesBF16( FILE *in_file, MKL_BF16 *in_array, MKL_INT ld, MKL_INT begin, MKL_INT max_numbers )
{
MKL_INT i, counter=0;
float temp;
char buf[MAX_STRING_LEN], *str;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, " " );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
for( i = 0; i < max_numbers; i++ ) {
if ( *str==COMMENTS ) break;
if( sscanf( str, "%f", &temp) != 1 ){
printf( "\n File format is inappropriate\n" );
return 0;
}
in_array[begin+i*ld]=f2b(temp);
counter++;
if ( (str = strtok( NULL, " " )) == NULL ) break;
}
return counter;
} /* GetValuesBF16 */
MKL_INT GetValuesH( FILE *in_file, MKL_F16 *in_array, MKL_INT ld, MKL_INT begin, MKL_INT max_numbers )
{
MKL_INT i, counter=0;
float temp;
char buf[MAX_STRING_LEN], *str;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, " " );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
for( i = 0; i < max_numbers; i++ ) {
if ( *str==COMMENTS ) break;
if( sscanf( str, "%f", &temp) != 1 ){
printf( "\n File format is inappropriate\n" );
return 0;
}
in_array[begin+i*ld]=f2h(temp);
counter++;
if ( (str = strtok( NULL, " " )) == NULL ) break;
}
return counter;
} /* GetValuesH */
MKL_INT GetValuesD( FILE *in_file, double *in_array, MKL_INT ld, MKL_INT begin, MKL_INT max_numbers )
{
MKL_INT i, counter=0;
double temp;
char buf[MAX_STRING_LEN], *str;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, " " );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
for( i = 0; i < max_numbers; i++ ) {
if ( *str==COMMENTS ) break;
if( sscanf( str, "%lf", &temp) != 1 ){
printf( "\n File format is inappropriate\n" );
return 0;
}
in_array[begin+i*ld]=temp;
counter++;
if ( (str = strtok( NULL, " " )) == NULL ) break;
}
return counter;
} /* GetValuesD */
MKL_INT GetValuesS( FILE *in_file, float *in_array, MKL_INT ld, MKL_INT begin, MKL_INT max_numbers )
{
MKL_INT i, counter=0;
float temp;
char buf[MAX_STRING_LEN], *str;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, " " );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
for( i = 0; i < max_numbers; i++ ) {
if ( *str==COMMENTS ) break;
if( sscanf( str, "%f", &temp) != 1 ){
printf( "\n File format is inappropriate\n" );
return 0;
}
in_array[begin+i*ld]=temp;
counter++;
if ( (str = strtok( NULL, " " )) == NULL ) break;
}
return counter;
} /* GetValuesS */
MKL_INT GetValuesC( FILE *in_file, MKL_Complex8 *in_array, MKL_INT ld, MKL_INT begin, MKL_INT max_numbers )
{
MKL_INT i, counter=0;
float re, im;
char seps[]=" ,()\t";
char buf[MAX_STRING_LEN], *str;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, seps );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
for( i = 0; i < max_numbers; i++ ) {
if ( *str==COMMENTS ) break;
if( sscanf( str, "%f", &re) != 1 || (str = strtok( NULL,seps)) == NULL ||
sscanf( str, "%f", &im) != 1 ){
printf( "\n File format is inappropriate\n" );
return 0;
}
in_array[begin+i*ld].real=re;
in_array[begin+i*ld].imag=im;
counter++;
if ( (str = strtok( NULL, seps )) == NULL ) break;
}
return counter;
} /* GetValuesC */
MKL_INT GetValuesZ( FILE *in_file, MKL_Complex16 *in_array, MKL_INT ld, MKL_INT begin, MKL_INT max_numbers )
{
MKL_INT i, counter=0;
double re, im;
char seps[]=" ,()\t";
char buf[MAX_STRING_LEN], *str;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, seps );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
for( i = 0; i < max_numbers; i++ ) {
if ( *str==COMMENTS ) break;
if( sscanf( str, "%lf", &re) != 1 || (str = strtok( NULL,seps))== NULL ||
sscanf( str, "%lf", &im) != 1 ){
printf( "\n File format is inappropriate\n" );
return 0;
}
in_array[begin+i*ld].real=re;
in_array[begin+i*ld].imag=im;
counter++;
if ( (str = strtok( NULL, seps )) == NULL ) break;
}
return counter;
} /* GetValuesZ */
int GetIntegerParameters8(FILE *in_file, ...)
{
int counter=0;
char buf[MAX_STRING_LEN], *str;
MKL_INT8 *param;
va_list ap;
int value;
do {
fgets(buf, MAX_STRING_LEN, in_file);
str = strtok( buf, " " );
if( str == NULL ){
printf("\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
va_start(ap, in_file);
param = va_arg(ap, MKL_INT8 *);
while ( 1 ) {
if( *str == COMMENTS ) break;
if( sscanf(str, "%d", &value ) != 1 ){
printf("\n File format is inappropriate\n");
va_end( ap );
return 0;
}
*param = (MKL_INT8)value;
counter++;
if( (str = strtok( NULL, " " )) == NULL ) break;
param = va_arg( ap, MKL_INT8 * );
}
va_end( ap );
return counter;
} /* GetIntegerParameters16 */
int GetIntegerParameters16(FILE *in_file, ...)
{
int counter=0;
char buf[MAX_STRING_LEN], *str;
MKL_INT16 *param;
va_list ap;
MKL_INT16 value;
do {
fgets(buf, MAX_STRING_LEN, in_file);
str = strtok( buf, " " );
if( str == NULL ){
printf("\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
va_start(ap, in_file);
param = va_arg(ap, MKL_INT16 *);
while ( 1 ) {
if( *str == COMMENTS ) break;
if( sscanf(str, "%hd", &value ) != 1 ){
printf("\n File format is inappropriate\n");
va_end( ap );
return 0;
}
*param = (MKL_INT16)value;
counter++;
if( (str = strtok( NULL, " " )) == NULL ) break;
param = va_arg( ap, MKL_INT16 * );
}
va_end( ap );
return counter;
} /* GetIntegerParameters16 */
int GetIntegerParameters32(FILE *in_file, ...)
{
int counter=0;
char buf[MAX_STRING_LEN], *str;
MKL_INT32 *param;
va_list ap;
MKL_INT32 value;
do {
fgets(buf, MAX_STRING_LEN, in_file);
str = strtok( buf, " " );
if( str == NULL ){
printf("\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
va_start(ap, in_file);
param = va_arg(ap, MKL_INT32 *);
while ( 1 ) {
if( *str == COMMENTS ) break;
if( sscanf(str, "%d", &value ) != 1 ){
printf("\n File format is inappropriate\n");
va_end( ap );
return 0;
}
*param = (MKL_INT32)value;
counter++;
if( (str = strtok( NULL, " " )) == NULL ) break;
param = va_arg( ap, MKL_INT32 * );
}
va_end( ap );
return counter;
} /* GetIntegerParameters32 */
int GetIntegerParameters(FILE *in_file, ...)
{
int counter=0;
char buf[MAX_STRING_LEN], *str;
MKL_INT *param;
va_list ap;
int value;
do {
fgets(buf, MAX_STRING_LEN, in_file);
str = strtok( buf, " " );
if( str == NULL ){
printf("\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
va_start(ap, in_file);
param = va_arg(ap, MKL_INT *);
while ( 1 ) {
if( *str == COMMENTS ) break;
if( sscanf(str, "%d", &value ) != 1 ){
printf("\n File format is inappropriate\n");
va_end( ap );
return 0;
}
*param = (MKL_INT)value;
counter++;
if( (str = strtok( NULL, " " )) == NULL ) break;
param = va_arg( ap, MKL_INT * );
}
va_end( ap );
return counter;
} /* GetIntegerParameters */
int GetCblasCharParameters(FILE *in_file, ...)
{
int counter=0;
char buf[MAX_STRING_LEN], *str;
int *param;
va_list ap;
do {
fgets(buf, MAX_STRING_LEN, in_file);
str = strtok( buf, " " );
if( str == NULL ){
printf("\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
va_start(ap, in_file);
param = va_arg(ap, int *);
while ( 1 ) {
if( *str == COMMENTS ) break;
if( sscanf(str, "%d", param ) != 1 ){
printf("\n File format is inappropriate\n");
va_end( ap );
return 0;
}
counter++;
if( (str = strtok( NULL, " " )) == NULL ) break;
param = va_arg( ap, int * );
}
va_end( ap );
return counter;
} /* GetCblasCharParameters */
int GetScalarsH( FILE *in_file, ...)
{
int counter=0;
char buf[MAX_STRING_LEN], *str;
MKL_F16 *param;
va_list ap;
float t;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, " " );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
va_start( ap, in_file );
param = va_arg( ap, MKL_F16 * );
while ( 1 ) {
if ( *str == COMMENTS ) break;
if( sscanf( str, "%f", &t ) != 1 ){
printf( "\n File format is inappropriate\n" );
va_end( ap );
return 0;
}
*param = f2h(t);
counter++;
if ( (str = strtok( NULL, " " )) == NULL ) break;
param = va_arg( ap, MKL_F16 * );
}
va_end( ap );
return counter;
} /* GetScalarsH */
int GetScalarsD( FILE *in_file, ...)
{
int counter=0;
char buf[MAX_STRING_LEN], *str;
double *param;
va_list ap;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, " " );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
va_start( ap, in_file );
param = va_arg( ap, double * );
while ( 1 ) {
if ( *str == COMMENTS ) break;
if( sscanf( str, "%lf", param ) != 1 ){
printf( "\n File format is inappropriate\n" );
va_end( ap );
return 0;
}
counter++;
if ( (str = strtok( NULL, " " )) == NULL ) break;
param = va_arg( ap, double * );
}
va_end( ap );
return counter;
} /* GetScalarsD */
int GetScalarsS( FILE *in_file, ...)
{
int counter=0;
char buf[MAX_STRING_LEN], *str;
float *param;
va_list ap;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, " " );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
va_start( ap, in_file );
param = va_arg( ap, float * );
while ( 1 ) {
if ( *str == COMMENTS ) break;
if( sscanf( str, "%f", param ) != 1 ){
printf( "\n File format is inappropriate\n" );
va_end( ap );
return 0;
}
counter++;
if ( (str = strtok( NULL, " " )) == NULL ) break;
param = va_arg( ap, float * );
}
va_end( ap );
return counter;
} /* GetScalarsS */
int GetScalarsC( FILE *in_file, ...)
{
int counter=0;
char buf[MAX_STRING_LEN], *str;
char seps[]=" ,()\t";
float re = 0.f, im = 0.f;
MKL_Complex8 *param;
va_list ap;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, seps );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
va_start( ap, in_file );
param = va_arg( ap, MKL_Complex8 * );
while ( 1 ) {
if ( *str == COMMENTS ) break;
if( sscanf( str, "%f", &re) != 1 || (str = strtok( NULL,seps))== NULL ||
sscanf( str, "%f", &im) != 1 ){
printf( "\n File format is inappropriate %f %f\n", re, im );
va_end( ap );
return 0;
}
param->real = re;
param->imag = im;
counter++;
if ( (str = strtok( NULL, seps )) == NULL ) break;
param = va_arg( ap, MKL_Complex8 * );
}
va_end( ap );
return counter;
} /* GetScalarsC */
int GetScalarsZ( FILE *in_file, ...)
{
int counter=0;
char buf[MAX_STRING_LEN], *str;
char seps[]=" ,()\t";
double re = 0., im = 0.;
MKL_Complex16 *param;
va_list ap;
do {
fgets( buf, MAX_STRING_LEN, in_file );
str = strtok( buf, seps );
if( str == NULL ){
printf( "\n File format is inappropriate\n");
return 0;
}
} while ( *str == COMMENTS );
va_start( ap, in_file );
param = va_arg( ap, MKL_Complex16 * );
while ( 1 ) {
if ( *str == COMMENTS ) break;
if( sscanf( str, "%lf", &re) != 1 || (str = strtok( NULL,seps)) == NULL ||
sscanf( str, "%lf", &im) != 1 ){
printf( "\n File format is inappropriate %f %f\n", re, im );
va_end( ap );
return 0;
}
param->real = re;
param->imag = im;
counter++;
if ( (str = strtok( NULL, seps )) == NULL ) break;
param = va_arg( ap, MKL_Complex16 * );
}
va_end( ap );
return counter;
} /* GetScalarsZ */
void PrintParameters( char *names, ...)
{
char *p, *str, str1[MAX_STRING_LEN], buf[MAX_STRING_LEN];
va_list ap;
char seps[]=" ,";
MKL_INT itmp;
MKL_UINT i;
printf("\n ");
va_start( ap, names );
strcpy(buf, names);
str = strtok( buf, seps );
if( str == NULL ){
printf( "\n You must determine the parameters names\n");
return;
}
do {
strcpy(str1, str);
p = str1;
for( i = 0; i < strlen(str1); i++ ) {
*p = (char) tolower(str1[i]);
p++;
}
if ( strcmp( str1, "layout" ) == 0 ) {
itmp = va_arg( ap, CBLAS_LAYOUT );
if ( itmp == CblasRowMajor )
printf("LAYOUT = CblasRowMajor ");
else if ( itmp == CblasColMajor )
printf("LAYOUT = CblasColMajor ");
} else if ( strcmp( str1, "side" ) == 0 ) {
itmp = va_arg( ap, CBLAS_SIDE);
if ( itmp == CblasLeft )
printf("SIDE = CblasLeft ");
else if ( itmp == CblasRight )
printf("SIDE = CblasRight ");
} else if ( strcmp( str1, "uplo" ) == 0 ) {
itmp = va_arg( ap, CBLAS_UPLO );
if ( itmp == CblasUpper )
printf("UPLO = CblasUpper ");
else if ( itmp == CblasLower )
printf("UPLO = CblasLower ");
} else if ( strcmp( str1, "diag" ) == 0 ) {
itmp = va_arg( ap, CBLAS_DIAG );
if ( itmp == CblasUnit )
printf("DIAG=CblasUnit ");
else if ( itmp == CblasNonUnit )
printf("DIAG = CblasNonUnit ");
} else if ( ( strcmp( str1, "trans" ) == 0 ) ||
( strcmp( str1, "transa" ) == 0 ) ||
( strcmp( str1, "transb" ) == 0 ) ) {
itmp = va_arg( ap, CBLAS_TRANSPOSE );
if ( itmp == CblasNoTrans )
printf("%s = CblasNoTrans ", str);
else if ( itmp == CblasTrans )
printf("%s = CblasTrans ", str);
else if ( itmp == CblasConjTrans )
printf("%s = CblasConjTrans ", str);
} else if ( strcmp( str1, "offsetc" ) == 0 ) {
itmp = va_arg( ap, CBLAS_OFFSET );
if ( itmp == CblasFixOffset ) {
printf("OFFSETC = CblasFixOffset ");
} else if ( itmp == CblasRowOffset ) {
printf("OFFSETC = CblasRowOffset ");
} else if ( itmp == CblasColOffset ) {
printf("OFFSETC = CblasColOffset ");
}
}
} while ( (str = strtok( NULL, seps )) != NULL );
va_end( ap );
return;
} /* PrintParameters */
void PrintVectorI(MKL_INT n, MKL_INT *x, char *name)
{
MKL_INT i;
printf("\n VECTOR %s\n ", name);
for (i = 0; i < n; i++)
printf(" "INT_FORMAT, *(x+i));
return;
} /* PrintVectorI */
void PrintVectorI16(MKL_INT n, MKL_INT16 *x, char *name)
{
MKL_INT i;
printf("\n VECTOR %s\n ", name);
for (i = 0; i < n; i++)
printf(" %d", *(x+i));
return;
} /* PrintVectorI16 */
void PrintVectorI32(MKL_INT n, MKL_INT32 *x, char *name)
{
MKL_INT i;
printf("\n VECTOR %s\n ", name);
for (i = 0; i < n; i++)
printf(" %d", *(x+i));
return;
} /* PrintVectorI32 */
void PrintVectorS(int flag, MKL_INT n, float *x, MKL_INT incx, char *name)
{
MKL_INT i;
switch(flag) {
case 0:
printf("\n VECTOR %s INC%s=" INT_FORMAT "\n ",
name, name, incx);
break;
case 1:
printf("\n VECTOR %s\n ", name);
break;
default:
break;
} /* switch */
for (i = 0; i < (1+(n-1)*ABS(incx)); i++)
printf("%6.2f ", *(x+i));
return;
} /* PrintVectorS */
void PrintVectorD(int flag, MKL_INT n, double *x, MKL_INT incx, char *name)
{
MKL_INT i;
switch(flag) {
case 0:
printf("\n VECTOR %s INC%s=" INT_FORMAT "\n ",
name, name, incx);
break;
case 1:
printf("\n VECTOR %s\n ", name);
break;
default:
break;
} /* switch */
for (i = 0; i < (1+(n-1)*ABS(incx)); i++)
printf("%8.3f ", *(x+i));
return;
} /* PrintVectorD */
void PrintVectorC(int flag, MKL_INT n, MKL_Complex8 *x, MKL_INT incx, char *name)
{
MKL_INT i;
switch(flag) {
case 0:
printf("\n VECTOR %s INC%s=" INT_FORMAT "\n ",
name, name, incx);
break;
case 1:
printf("\n VECTOR %s\n ", name);
break;
default:
break;
} /* switch */
for (i = 0; i < (1+(n-1)*ABS(incx)); i++)
printf("(%6.2f,%6.2f) ", (x+i)->real, (x+i)->imag);
return;
} /* PrintVectorC */
void PrintVectorZ(int flag, MKL_INT n, MKL_Complex16 *x, MKL_INT incx, char *name)
{
MKL_INT i;
switch(flag) {
case 0:
printf("\n VECTOR %s INC%s=" INT_FORMAT "\n ",
name, name, incx);
break;
case 1:
printf("\n VECTOR %s\n ", name);
break;
default:
break;
} /* switch */
for (i = 0; i < (1+(n-1)*ABS(incx)); i++)
printf("(%6.2f,%6.2f) ", (x+i)->real, (x+i)->imag);
return;
} /* PrintVectorZ */
void PrintArrayI8(CBLAS_LAYOUT*layout, int flag1, int flag2, MKL_INT *m, MKL_INT *n, MKL_INT8 *a,
MKL_INT *lda, char *name)
{
MKL_INT i, j;
MKL_INT8 *addr;
switch(flag1) {
case 0:
printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, *lda);
break;
case 1:
printf("\n ARRAY %s", name);
break;
default:
break;
} /* switch */
if (*layout == CblasRowMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j < *n; j++)
printf("%d ", (int) *(addr+j));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i*(*lda);
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%d ", (int) *(addr+j));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j <= i; j++)
printf("%d ", (int) *(addr+j));
} /* for */
} /* if */
} else if (*layout == CblasColMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j = 0; j < *n; j++)
printf("%d ", (int) *(addr+j*(*lda)));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i;
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%d ", (int)*(addr+j*(*lda)));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i;
for (j = 0; j <= i; j++)
printf("%d ", (int) *(addr+j*(*lda)));
} /* for */
} /* if */
} /* if */
return;
} /* PrintArrayI8 */
void PrintArrayI16(CBLAS_LAYOUT*layout, int flag1, int flag2, MKL_INT *m, MKL_INT *n, MKL_INT16 *a,
MKL_INT *lda, char *name)
{
MKL_INT i, j;
MKL_INT16 *addr;
switch(flag1) {
case 0:
printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, *lda);
break;
case 1:
printf("\n ARRAY %s", name);
break;
default:
break;
} /* switch */
if (*layout == CblasRowMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j < *n; j++)
printf("%hd ", *(addr+j));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i*(*lda);
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%hd ", *(addr+j));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j <= i; j++)
printf("%hd ", *(addr+j));
} /* for */
} /* if */
} else if (*layout == CblasColMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j = 0; j < *n; j++)
printf("%hd ", *(addr+j*(*lda)));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i;
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%hd ", *(addr+j*(*lda)));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i;
for (j = 0; j <= i; j++)
printf("%hd ", *(addr+j*(*lda)));
} /* for */
} /* if */
} /* if */
return;
} /* PrintArrayI16 */
void PrintArrayI32(CBLAS_LAYOUT*layout, int flag1, int flag2, MKL_INT *m, MKL_INT *n, MKL_INT32 *a,
MKL_INT *lda, char *name)
{
MKL_INT i, j;
MKL_INT32 *addr;
switch(flag1) {
case 0:
printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, *lda);
break;
case 1:
printf("\n ARRAY %s", name);
break;
default:
break;
} /* switch */
if (*layout == CblasRowMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j < *n; j++)
printf("%d ", *(addr+j));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i*(*lda);
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%d ", *(addr+j));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j <= i; j++)
printf("%d ", *(addr+j));
} /* for */
} /* if */
} else if (*layout == CblasColMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j = 0; j < *n; j++)
printf("%d ", *(addr+j*(*lda)));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i;
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%d ", *(addr+j*(*lda)));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i;
for (j = 0; j <= i; j++)
printf("%d ", *(addr+j*(*lda)));
} /* for */
} /* if */
} /* if */
return;
} /* PrintArrayI32 */
void PrintArrayBF16(CBLAS_LAYOUT*layout, int flag1, int flag2, MKL_INT *m, MKL_INT *n, MKL_BF16 *a,
MKL_INT *lda, char *name)
{
MKL_INT i, j;
MKL_BF16 *addr;
switch(flag1) {
case 0:
printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, *lda);
break;
case 1:
printf("\n ARRAY %s", name);
break;
default:
break;
} /* switch */
if (*layout == CblasRowMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j < *n; j++)
printf("%6.2f ", b2f(*(addr+j)));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i*(*lda);
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%6.2f ", b2f(*(addr+j)));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j <= i; j++)
printf("%6.2f ", b2f(*(addr+j)));
} /* for */
} /* if */
} else if (*layout == CblasColMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j = 0; j < *n; j++)
printf("%6.2f ", b2f(*(addr+j*(*lda))));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i;
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%6.2f ", b2f(*(addr+j*(*lda))));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i;
for (j = 0; j <= i; j++)
printf("%6.2f ", b2f(*(addr+j*(*lda))));
} /* for */
} /* if */
} /* if */
return;
} /* PrintArrayBF16 */
void PrintArrayH(CBLAS_LAYOUT*layout, int flag1, int flag2, MKL_INT *m, MKL_INT *n, MKL_F16 *a,
MKL_INT *lda, char *name)
{
MKL_INT i, j;
MKL_F16 *addr;
switch(flag1) {
case 0:
printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, *lda);
break;
case 1:
printf("\n ARRAY %s", name);
break;
default:
break;
} /* switch */
if (*layout == CblasRowMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j < *n; j++)
printf("%6.2f ", h2f(*(addr+j)));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i*(*lda);
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%6.2f ", h2f(*(addr+j)));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j <= i; j++)
printf("%6.2f ", h2f(*(addr+j)));
} /* for */
} /* if */
} else if (*layout == CblasColMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j = 0; j < *n; j++)
printf("%6.2f ", h2f(*(addr+j*(*lda))));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i;
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%6.2f ", h2f(*(addr+j*(*lda))));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i;
for (j = 0; j <= i; j++)
printf("%6.2f ", h2f(*(addr+j*(*lda))));
} /* for */
} /* if */
} /* if */
return;
} /* PrintArrayH */
void PrintArrayS(CBLAS_LAYOUT*layout, int flag1, int flag2, MKL_INT *m, MKL_INT *n, float *a,
MKL_INT *lda, char *name)
{
MKL_INT i, j;
float *addr;
switch(flag1) {
case 0:
printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, *lda);
break;
case 1:
printf("\n ARRAY %s", name);
break;
default:
break;
} /* switch */
if (*layout == CblasRowMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j < *n; j++)
printf("%6.2f ", *(addr+j));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i*(*lda);
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%6.2f ", *(addr+j));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j <= i; j++)
printf("%6.2f ", *(addr+j));
} /* for */
} /* if */
} else if (*layout == CblasColMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j = 0; j < *n; j++)
printf("%6.2f ", *(addr+j*(*lda)));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i;
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%6.2f ", *(addr+j*(*lda)));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < (*m); i++) {
printf("\n ");
addr = a + i;
for (j = 0; j <= i; j++)
printf("%6.2f ", *(addr+j*(*lda)));
} /* for */
} /* if */
} /* if */
return;
} /* PrintArrayS */
void PrintArrayD(CBLAS_LAYOUT*layout, int flag1, int flag2, MKL_INT *m, MKL_INT *n, double *a,
MKL_INT *lda, char *name)
{
MKL_INT i, j;
double *addr;
switch(flag1) {
case 0:
printf("\n ARRAY %s LD%s=" INT_FORMAT,
name, name, *lda);
break;
case 1:
printf("\n ARRAY %s", name);
break;
default:
break;
} /* switch */
if (*layout == CblasRowMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j < *n; j++)
printf("%8.3f ", *(addr+j));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%8.3f ", *(addr+j));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j <= i; j++)
printf("%8.3f ", *(addr+j));
} /* for */
} /* if */
} else if (*layout == CblasColMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j = 0; j < *n; j++)
printf("%8.3f ", *(addr+j*(*lda)));
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("%8.3f ", *(addr+j*(*lda)));
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j = 0; j <= i; j++)
printf("%8.3f ", *(addr+j*(*lda)));
} /* for */
} /* if */
} /* if */
return;
} /* PrintArrayD */
void PrintArrayC(CBLAS_LAYOUT*layout, int flag1, int flag2, MKL_INT *m, MKL_INT *n, MKL_Complex8 *a,
MKL_INT *lda, char *name)
{
MKL_INT i, j;
MKL_Complex8 *addr;
switch(flag1) {
case 0:
printf("\n ARRAY %s LD%s=" INT_FORMAT,
name, name, *lda);
break;
case 1:
printf("\n ARRAY %s", name);
break;
default:
break;
} /* switch */
if (*layout == CblasRowMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j < *n; j++)
printf("(%6.2f,%6.2f) ", (addr+j)->real, (addr+j)->imag);
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("(%6.2f,%6.2f) ", (addr+j)->real, (addr+j)->imag);
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j <= i; j++)
printf("(%6.2f,%6.2f) ", (addr+j)->real, (addr+j)->imag);
} /* for */
} /* if */
} else if(*layout == CblasColMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j = 0; j < *n; j++)
printf("(%6.2f,%6.2f) ", (addr+j*(*lda))->real,
(addr+j*(*lda))->imag);
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("(%6.2f,%6.2f) ", (addr+j*(*lda))->real,
(addr+j*(*lda))->imag);
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j = 0; j <= i; j++)
printf("(%6.2f,%6.2f) ", (addr+j*(*lda))->real,
(addr+j*(*lda))->imag);
} /* for */
} /* if */
} /* if */
return;
} /* PrintArrayC */
void PrintArrayZ(CBLAS_LAYOUT*layout, int flag1, int flag2,
MKL_INT *m, MKL_INT *n, MKL_Complex16 *a, MKL_INT *lda, char *name)
{
MKL_INT i, j;
MKL_Complex16 *addr;
if (flag1 == 0)
printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, *lda);
else
printf("\n ARRAY %s", name);
if (*layout == CblasRowMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j < *n; j++)
printf("(%6.2f,%6.2f) ", (addr+j)->real,
(addr+j)->imag);
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("(%6.2f,%6.2f) ", (addr+j)->real,
(addr+j)->imag);
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i*(*lda);
for (j = 0; j <= i; j++)
printf("(%6.2f,%6.2f) ", (addr+j)->real,
(addr+j)->imag);
} /* for */
} /* if */
} else if (*layout == CblasColMajor) {
if (flag2 == GENERAL_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j = 0; j < *n; j++)
printf("(%6.2f,%6.2f) ", (addr+j*(*lda))->real,
(addr+j*(*lda))->imag);
} /* for */
} else if (flag2 == UPPER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j=0; j < i; j++)
printf(" ");
for (j = i; j < *n; j++)
printf("(%6.2f,%6.2f) ", (addr+j*(*lda))->real,
(addr+j*(*lda))->imag);
} /* for */
} else if (flag2 == LOWER_MATRIX) {
for (i = 0; i < *m; i++) {
printf("\n ");
addr = a + i;
for (j = 0; j <= i; j++)
printf("(%6.2f,%6.2f) ", (addr+j*(*lda))->real,
(addr+j*(*lda))->imag);
} /* for */
} /* if */
} /* if */
return;
} /* PrintArrayZ */
void PrintBandArrayS(CBLAS_LAYOUT*layout, int flag,
MKL_INT kl, MKL_INT ku, MKL_INT m, MKL_INT n,
float *a, MKL_INT lda, char *name)
{
MKL_INT i, j, l;
float *addr, *addr1;
MKL_INT kl1, ku1, i_start, j_start, j_end, kl_rows, ku_rows;
if (flag == 0)
printf("\n ARRAY %s LD%s=" INT_FORMAT " KL=" INT_FORMAT " KU=" INT_FORMAT,
name, name, lda, kl, ku);
else
printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, lda);
if (*layout == CblasRowMajor) {
for( i = 0; i < MIN( m, n ); i++ ) {
printf("\n ");
addr = a + i*lda;
kl1 = ( i - kl <= 0 ) ? i : kl;
ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku;
j_start = kl - kl1;
j_end = j_start + kl1 + ku1;
addr1 = addr + j_start;
for( l=0; l < j_start; l++ )
printf(" ");
for( l=0; l < j_end-j_start+1; l++ )
printf("%6.2f ", *(addr1+l));
}
for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) {
printf("\n ");
addr = a + i*lda;
kl1 = n - i + kl;
j_start = ( kl > n ) ? kl - n : n - kl;
j_end = j_start + kl1 - 1;
addr1 = addr + j_start;
for( l=0; l < j_start; l++ )
printf(" ");
for( l=0; l < j_end-j_start+1; l++ )
printf("%6.2f ", *(addr1+l));
}
} else if (*layout == CblasColMajor) {
i_start = (ku > n ) ? ku - n + 1 : 0;
ku_rows = (ku > n ) ? n - 1 : ku;
j_start = ku_rows;
for( i = 0; i < ku_rows; i++ ) {
printf("\n ");
j = j_start*lda; addr1 = a + i + i_start;
for( l=0; l < j_start; l++ )
printf(" ");
for( l=0; l < n-ku_rows; l++ )
printf("%6.2f ", *(addr1+j+l*lda) );
j_start--;
}
j_end = MIN(m,n);
addr1 = a + ku;
printf("\n ");
for( l=0; l < j_end; l++ )
printf("%6.2f ", *(addr1+l*lda) );
kl_rows = ( kl <= m-1 ) ? kl : m - 1;
for ( i = 1; i < kl_rows+1; i++ ) {
printf("\n ");
kl1 = ( i+j_end <= m ) ? j_end : m - i;
addr1 = a + ku + i;
for( l=0; l < kl1; l++ )
printf("%6.2f ", *(addr1+l*lda) );
}
} /* if */
return;
} /* PrintBandArrayS */
void PrintBandArrayD(CBLAS_LAYOUT*layout, int flag,
MKL_INT kl, MKL_INT ku, MKL_INT m, MKL_INT n,
double *a, MKL_INT lda, char *name)
{
MKL_INT i, j, l;
double *addr, *addr1;
MKL_INT kl1, ku1, i_start, j_start, j_end, kl_rows, ku_rows;
if (flag == 0)
printf("\n ARRAY %s LD%s=" INT_FORMAT " KL=" INT_FORMAT " KU=" INT_FORMAT,
name, name, lda, kl, ku);
else
printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, lda);
if (*layout == CblasRowMajor) {
for( i = 0; i < MIN( m, n ); i++ ) {
printf("\n ");
addr = a + i*lda;
kl1 = ( i - kl <= 0 ) ? i : kl;
ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku;
j_start = kl - kl1;
j_end = j_start + kl1 + ku1;
addr1 = addr + j_start;
for( l=0; l < j_start; l++ )
printf(" ");
for( l=0; l < j_end-j_start+1; l++ )
printf("%6.2f ", *(addr1+l));
}
for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) {
printf("\n ");
addr = a + i*lda;
kl1 = n - i + kl;
j_start = ( kl > n ) ? kl - n : n - kl;
j_end = j_start + kl1 - 1;
addr1 = addr + j_start;
for( l=0; l < j_start; l++ )
printf(" ");
for( l=0; l < j_end-j_start+1; l++ )
printf("%6.2f ", *(addr1+l));
}
} else if (*layout == CblasColMajor) {
i_start = (ku > n ) ? ku - n + 1 : 0;
ku_rows = (ku > n ) ? n - 1 : ku;
j_start = ku_rows;
for( i = 0; i < ku_rows; i++ ) {
printf("\n ");
j = j_start*lda; addr1 = a + i + i_start;
for( l=0; l < j_start; l++ )
printf(" ");
for( l=0; l < n-ku_rows; l++ )
printf("%6.2f ", *(addr1+j+l*lda) );
j_start--;
}
j_end = MIN(m,n);
addr1 = a + ku;
printf("\n ");
for( l=0; l < j_end; l++ )
printf("%6.2f ", *(addr1+l*lda) );
kl_rows = ( kl <= m-1 ) ? kl : m - 1;
for ( i = 1; i < kl_rows+1; i++ ) {
printf("\n ");
kl1 = ( i+j_end <= m ) ? j_end : m - i;
addr1 = a + ku + i;
for( l=0; l < kl1; l++ )
printf("%6.2f ", *(addr1+l*lda) );
}
} /* if */
return;
} /* PrintBandArrayD */
void PrintBandArrayC(CBLAS_LAYOUT*layout, int flag,
MKL_INT kl, MKL_INT ku, MKL_INT m, MKL_INT n,
MKL_Complex8 *a, MKL_INT lda, char *name)
{
MKL_INT i, j, l;
MKL_Complex8 *addr, *addr1;
MKL_INT kl1, ku1, j_start, j_end, i_start, kl_rows, ku_rows;
if (flag == 0)
printf("\n ARRAY %s LD%s=" INT_FORMAT " KL=" INT_FORMAT " KU=" INT_FORMAT,
name, name, lda, kl, ku);
else
printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, lda);
if (*layout == CblasRowMajor) {
for( i = 0; i < MIN( m, n ); i++ ) {
printf("\n ");
addr = a + i*lda;
kl1 = ( i - kl <= 0 ) ? i : kl;
ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku;
j_start = kl - kl1;
j_end = j_start + kl1 + ku1;
addr1 = addr + j_start;
for( l=0; l < j_start; l++ )
printf(" ");
for( l=0; l < j_end-j_start+1; l++ )
printf("(%6.2f,%6.2f) ", (addr1+l)->real, (addr1+l)->imag);
}
for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) {
printf("\n ");
addr = a + i*lda;
kl1 = n - i + kl;
j_start = ( kl > n ) ? kl - n : n - kl;
j_end = j_start + kl1 - 1;
addr1 = addr + j_start;
for( l=0; l < j_start; l++ )
printf(" ");
for( l=0; l < j_end-j_start+1; l++ )
printf("(%6.2f,%6.2f) ", (addr1+l)->real, (addr1+l)->imag);
}
} else if (*layout == CblasColMajor) {
i_start = (ku > n ) ? ku - n + 1 : 0;
ku_rows = (ku > n ) ? n - 1 : ku;
j_start = ku_rows;
for( i = 0; i < ku_rows; i++ ) {
printf("\n ");
j = j_start*lda; addr1 = a + i + i_start;
for( l=0; l < j_start; l++ )
printf(" ");
for( l=0; l < n-ku_rows; l++ )
printf("(%6.2f,%6.2f) ",
(addr1+j+l*lda)->real, (addr1+j+l*lda)->imag );
j_start--;
}
j_end = MIN(m,n);
addr1 = a + ku;
printf("\n ");
for( l=0; l < j_end; l++ )
printf("(%6.2f,%6.2f) ",
(addr1+l*lda)->real, (addr1+l*lda)->imag );
kl_rows = ( kl <= m-1 ) ? kl : m - 1;
for ( i = 1; i < kl_rows+1; i++ ) {
printf("\n ");
kl1 = ( i+j_end <= m ) ? j_end : m - i;
addr1 = a + ku + i;
for( l=0; l < kl1; l++ )
printf("(%6.2f,%6.2f) ",
(addr1+l*lda)->real, (addr1+l*lda)->imag );
}
} /* if */
return;
} /* PrintBandArrayC */
void PrintBandArrayZ(CBLAS_LAYOUT*layout, int flag,
MKL_INT kl, MKL_INT ku, MKL_INT m, MKL_INT n,
MKL_Complex16 *a, MKL_INT lda, char *name)
{
MKL_INT i, j, l;
MKL_Complex16 *addr, *addr1;
MKL_INT kl1, ku1, j_start, j_end, i_start, kl_rows, ku_rows;
if (flag == 0)
printf("\n ARRAY %s LD%s=" INT_FORMAT " KL=" INT_FORMAT " KU=" INT_FORMAT,
name, name, lda, kl, ku);
else
printf("\n ARRAY %s LD%s=" INT_FORMAT, name, name, lda);
if (*layout == CblasRowMajor) {
for( i = 0; i < MIN( m, n ); i++ ) {
printf("\n ");
addr = a + i*lda;
kl1 = ( i - kl <= 0 ) ? i : kl;
ku1 = ( i + ku >= n ) ? MAX(0,n-i-1) : ku;
j_start = kl - kl1;
j_end = j_start + kl1 + ku1;
addr1 = addr + j_start;
for( l=0; l < j_start; l++ )
printf(" ");
for( l=0; l < j_end-j_start+1; l++ )
printf("(%6.2f,%6.2f) ", (addr1+l)->real, (addr1+l)->imag);
}
for( i = MIN( m, n ); i < MIN( m, MIN( m, n ) + kl); i++ ) {
printf("\n ");
addr = a + i*lda;
kl1 = n - i + kl;
j_start = ( kl > n ) ? kl - n : n - kl;
j_end = j_start + kl1 - 1;
addr1 = addr + j_start;
for( l=0; l < j_start; l++ )
printf(" ");
for( l=0; l < j_end-j_start+1; l++ )
printf("(%6.2f,%6.2f) ", (addr1+l)->real, (addr1+l)->imag);
}
} else if (*layout == CblasColMajor) {
i_start = (ku > n ) ? ku - n + 1 : 0;
ku_rows = (ku > n ) ? n - 1 : ku;
j_start = ku_rows;
for( i = 0; i < ku_rows; i++ ) {
printf("\n ");
j = j_start*lda; addr1 = a + i + i_start;
for( l=0; l < j_start; l++ )
printf(" ");
for( l=0; l < n-ku_rows; l++ )
printf("(%6.2f,%6.2f) ",
(addr1+j+l*lda)->real, (addr1+j+l*lda)->imag );
j_start--;
}
j_end = MIN(m,n);
addr1 = a + ku;
printf("\n ");
for( l=0; l < j_end; l++ )
printf("(%6.2f,%6.2f) ",
(addr1+l*lda)->real, (addr1+l*lda)->imag );
kl_rows = ( kl <= m-1 ) ? kl : m - 1;
for ( i = 1; i < kl_rows+1; i++ ) {
printf("\n ");
kl1 = ( i+j_end <= m ) ? j_end : m - i;
addr1 = a + ku + i;
for( l=0; l < kl1; l++ )
printf("(%6.2f,%6.2f) ",
(addr1+l*lda)->real, (addr1+l*lda)->imag );
}
} /* if */
return;
} /* PrintBandArrayZ */
cblas_sgemmx.c
/*******************************************************************************
* Copyright 1999-2022 Intel Corporation.
*
* This software and the related documents are Intel copyrighted materials, and
* your use of them is governed by the express license under which they were
* provided to you (License). Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute, disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents are provided as is, with no express
* or implied warranties, other than those that are expressly stated in the
* License.
*******************************************************************************/
/*
! Content:
! C B L A S _ S G E M M Example Program Text ( C Interface )
!******************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include "mkl.h"
#include "mkl_example.h"
int main(int argc, char *argv[])
{
FILE *in_file;
char *in_file_name;
MKL_INT m, n, k;
MKL_INT lda, ldb, ldc;
MKL_INT rmaxa, cmaxa, rmaxb, cmaxb, rmaxc, cmaxc;
float alpha, beta;
float *a, *b, *c;
CBLAS_LAYOUT layout;
CBLAS_TRANSPOSE transA, transB;
MKL_INT ma, na, mb, nb;
printf("\n C B L A S _ S G E M M EXAMPLE PROGRAM\n");
/* Get input parameters */
if( argc == 1 ) {
printf("\n You must specify in_file data file as 1-st parameter");
return 1;
}
in_file_name = argv[1];
/* Get input data */
if( (in_file = fopen( in_file_name, "r" )) == NULL ) {
printf("\n ERROR on OPEN '%s' with mode=\"r\"\n", in_file_name);
return 1;
}
if( GetIntegerParameters(in_file, &m, &n, &k) != 3 ) {
printf("\n ERROR of m, n, k reading\n");
fclose(in_file);
return 1;
}
if( GetScalarsS(in_file, &alpha, &beta) != 2 ) {
printf("\n ERROR of alpha, beta reading\n");
fclose(in_file);
return 1;
}
if( GetCblasCharParameters(in_file, &transA, &transB, &layout) != 3 ) {
printf("\n ERROR of transA, transB, layout reading\n");
fclose(in_file);
return 1;
}
if( transA == CblasNoTrans ) {
rmaxa = m + 1;
cmaxa = k;
ma = m;
na = k;
} else {
rmaxa = k + 1;
cmaxa = m;
ma = k;
na = m;
}
if( transB == CblasNoTrans ) {
rmaxb = k + 1;
cmaxb = n;
mb = k;
nb = n;
} else {
rmaxb = n + 1;
cmaxb = k;
mb = n;
nb = k;
}
rmaxc = m + 1;
cmaxc = n;
a = (float *)mkl_calloc(rmaxa*cmaxa, sizeof( float ), 64);
b = (float *)mkl_calloc(rmaxb*cmaxb, sizeof( float ), 64);
c = (float *)mkl_calloc(rmaxc*cmaxc, sizeof( float ), 64);
if( a == NULL || b == NULL || c == NULL ) {
printf( "\n Can't allocate memory for arrays\n");
mkl_free(a);
mkl_free(b);
mkl_free(c);
fclose(in_file);
return 1;
}
if (layout == CblasRowMajor) {
lda=cmaxa;
ldb=cmaxb;
ldc=cmaxc;
} else {
lda=rmaxa;
ldb=rmaxb;
ldc=rmaxc;
}
if( GetArrayS(in_file, &layout, GENERAL_MATRIX, &ma, &na, a, &lda) != 0 ) {
printf("\n ERROR of array A reading\n");
mkl_free(a);
mkl_free(b);
mkl_free(c);
fclose(in_file);
return 1;
}
if( GetArrayS(in_file, &layout, GENERAL_MATRIX, &mb, &nb, b, &ldb) != 0 ) {
printf("\n ERROR of array B reading\n");
mkl_free(a);
mkl_free(b);
mkl_free(c);
fclose(in_file);
return 1;
}
if( GetArrayS(in_file, &layout, GENERAL_MATRIX, &m, &n, c, &ldc) != 0 ) {
printf("\n ERROR of array C reading\n");
mkl_free(a);
mkl_free(b);
mkl_free(c);
fclose(in_file);
return 1;
}
fclose(in_file);
/* Print input data */
printf("\n INPUT DATA");
printf("\n M="INT_FORMAT" N="INT_FORMAT" K="INT_FORMAT, m, n, k);
printf("\n ALPHA=%5.1f BETA=%5.1f", alpha, beta);
PrintParameters("TRANSA, TRANSB", transA, transB);
PrintParameters("LAYOUT", layout);
PrintArrayS(&layout, FULLPRINT, GENERAL_MATRIX, &ma, &na, a, &lda, "A");
PrintArrayS(&layout, FULLPRINT, GENERAL_MATRIX, &mb, &nb, b, &ldb, "B");
PrintArrayS(&layout, FULLPRINT, GENERAL_MATRIX, &m, &n, c, &ldc, "C");
/* Call SGEMM subroutine ( C Interface ) */
cblas_sgemm(layout, transA, transB, m, n, k, alpha,
a, lda, b, ldb, beta, c, ldc);
/* Print output data */
printf("\n\n OUTPUT DATA");
PrintArrayS(&layout, FULLPRINT, GENERAL_MATRIX, &m, &n, c, &ldc, "C");
mkl_free(a);
mkl_free(b);
mkl_free(c);
return 0;
}
cblas_sgemmx.d
: C B L A S _ S G E M M EXAMPLE PROGRAM DATA
3 5 4 :Values of m, n, k
0.5 1.2 :Values of alpha, beta
111 111 101 :Values of transA, transB, layout
1.0 2.0 3.0 4.0
3.2 2.7 8.1 5.0
0.0 4.0 5.0 3.0 :Values of array A
1.0 2.0 3.0 4.0 5.0
1.0 2.0 3.0 4.0 5.0
1.0 2.0 3.0 4.0 5.0
1.0 2.0 3.0 4.0 5.0 :Values of array B
0.0 0.0 1.0 1.0 1.0
0.0 0.0 1.0 1.0 1.0
0.0 0.0 1.0 1.0 1.0 :Values of array C
CMakeLists.txt
#===============================================================================
# Copyright 2023 Intel Corporation.
#
# This software and the related documents are Intel copyrighted materials, and
# your use of them is governed by the express license under which they were
# provided to you (License). Unless the License provides otherwise, you may not
# use, modify, copy, publish, distribute, disclose or transmit this software or
# the related documents without Intel's prior written permission.
#
# This software and the related documents are provided as is, with no express
# or implied warranties, other than those that are expressly stated in the
# License.
#===============================================================================
cmake_minimum_required(VERSION 3.13)
enable_testing()
# Define language and file extensions
set(TEST_LANG C)
set(TEST_EXT c)
set(DATA_EXT d)
project(MKL_Example LANGUAGES ${TEST_LANG})
# Include MKLConfig.cmake
find_package(MKL CONFIG REQUIRED)
# Build extra files
add_library(blas_common common_func.c)
if(WIN32)
set(TEST_COPT "-D_CRT_SECURE_NO_WARNINGS")
endif()
# Add some parts of MKL target: headers and compiler options
target_include_directories(blas_common PUBLIC $<TARGET_PROPERTY:MKL::MKL,INTERFACE_INCLUDE_DIRECTORIES>)
target_compile_options(blas_common PUBLIC ${TEST_COPT} $<TARGET_PROPERTY:MKL::MKL,INTERFACE_COMPILE_OPTIONS>)
set(TEST_LOPT blas_common)
# Build target
set(func "example")
set(func_name "cblas_sgemmx")
set(${func}_SRC ${func_name}.${TEST_EXT})
add_executable(${func} ${${func}_SRC})
set_property(TARGET ${func} PROPERTY C_STANDARD 99)
# Add MKL target: headers, compiler options and libraries to link
target_link_libraries(${func} PUBLIC ${TEST_LOPT} MKL::MKL)
# Register example as ctest
set(${func_name}_DATA ${PROJECT_SOURCE_DIR}/${func_name}.${DATA_EXT})
add_test(NAME ${func} COMMAND ${func} ${${func_name}_DATA})
# Add oneMKL environment variables if any defined
if(MKL_ENV)
set_tests_properties(${func} PROPERTIES ENVIRONMENT "${MKL_ENV}")
endif()
matrix_generator.h
/*******************************************************************************
* Copyright 2022 Intel Corporation.
*
* This software and the related documents are Intel copyrighted materials, and
* your use of them is governed by the express license under which they were
* provided to you (License). Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute, disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents are provided as is, with no express
* or implied warranties, other than those that are expressly stated in the
* License.
*******************************************************************************/
/*
! Content:
! Intel® oneAPI Math Kernel Library ScaLAPACK C example source file
!
!*******************************************************************************
!===============================================================================
! Matrix generator routines for ScaLAPACK p?getrf and p?getrs example program
!===============================================================================
! List of routines are given in the file:
!
! pdmatgen_random
! psmatgen_random
! pzmatgen_random
! psmatgen_random
!
! Definitions for element index numbers of ScaLAPACK array descriptors
*/
#include "mkl.h"
#include "mkl_blacs.h"
#include "mkl_pblas.h"
#include "mkl_scalapack.h"
#define DTYPE_ 1
#define CTXT_ 2
#define M_ 3
#define N_ 4
#define MB_ 5
#define NB_ 6
#define RSRC_ 7
#define CSRC_ 8
#define LLD_ 9
/* Additional definitions */
#ifndef min
#define min(a, b) ((a) < (b) ? (a) : (b))
#endif
#ifndef max
#define max(a, b) ((a) > (b) ? (a) : (b))
#endif
/* Local real and complex constants */
#define RZERO 0.0
#define RONE 1.0
#define RNEGONE -1.0
#define CZERO {0.0f, 0.0f}
#define CONE {1.0f, 0.0f}
#define CNEGONE {-1.0f, 0.0f}
const MKL_INT izero = 0, ione = 1, isix = 6, inegone = -1, dist=1;
void pdmatgen_random(MKL_INT type, MKL_INT* desca, double* a, MKL_INT* seed, MKL_INT nprow, MKL_INT npcol)
/*
* Purpose
* =======
*
* pdmatgen_random: parallel real double precision matrix generator.
* Generate a sub-matrix of A.
*
* Arguments
* =========
*
* type (global input)
* Type of global matrix to be generated.
* If type==0, local matrices are parts of the matrix
* which possesses 1 as all elements along its leading (main) diagonal;
* while all other elements in the matrix are 0.
* A random matrix is generated if type != 0.
*
* desca (global and local input) an array of (at least) length 9.
* The array descriptor for the distributed matrix A.
* See the notes below for the decription of each element.
*
* a (local output), pointer into the local
* memory to an array containing the
* local pieces of the distributed matrix.
*
* seed (local input and output) integer array, dimension (4)
* On entry, the seed of the random number generator.
* On exit, the seed is updated.
*
* nprow (global input)
* The number of process rows in the grid.
*
* npcol (global input)
* The number of process columns in the grid.
*
*
* Notes:
* =====
*
* Each global data object is described by an associated description
* vector. This vector stores the information required to establish
* the mapping between an object element and its corresponding process
* and memory location.
*
*
* In the following comments, the character _ should be read as
* "of the distributed matrix". Let A be a generic term for any 2D
* block cyclicly distributed matrix. Its description vector is DESC_A:
*
* NOTATION STORED IN EXPLANATION
* ---------------- --------------- ------------------------------------
* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
* the NPROW x NPCOL BLACS process grid
* A is distributed over.The context
* itself is global, but the handle
* (the integer value) may vary.
* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
* ted matrix A, M_A >= 0.
* N_A (global) DESCA[ N_ ] The number of columns in the distri-
* buted matrix A, N_A >= 0.
* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
* block of the matrix A, IMB_A > 0.
* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
* left block of the matrix A,
* INB_A > 0.
* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
* bute the last M_A-IMB_A rows of A,
* MB_A > 0.
* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
* bute the last N_A-INB_A columns of
* A, NB_A > 0.
* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
* row of the matrix A is distributed,
* NPROW > RSRC_A >= 0.
* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
* first column of A is distributed.
* NPCOL > CSRC_A >= 0.
* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
* array storing the local blocks of
* the distributed matrix A,
* IF( Lc( 1, N_A ) > 0 )
* LLD_A >= MAX( 1, Lr( 1, M_A ) )
* ELSE
* LLD_A >= 1.
*/
{
MKL_INT myrow, mycol;
MKL_INT iarow = 0, iacol = 0;
MKL_INT ictxt = desca[CTXT_-1];
MKL_INT m = desca[M_-1];
MKL_INT n = desca[N_-1];
MKL_INT mb = desca[MB_-1];
MKL_INT nb = desca[NB_-1];
MKL_INT lld = desca[LLD_-1];
MKL_INT i, j, ii, jj, il = 0, jl = 0;
blacs_gridinfo(&ictxt, &nprow, &npcol, &myrow, &mycol);
for (j = 0; j < n; j += nb) {
for (i = 0; i < m; i += mb) {
i++; j++;
infog2l(&i, &j, desca, &nprow, &npcol, &myrow, &mycol, &il, &jl, &iarow, &iacol);
i--; j--; il--; jl--;
if (iarow==myrow && iacol==mycol) {
for (jj = 0; jj < min(nb, n - j); jj++) {
for (ii = 0; ii < min(mb, m - i); ii++) {
if(type) {
a[il+ii+(jl+jj)*lld] = (double) dlarnd( &dist, seed );
} else {
if(i==j) a[il+ii+(jl+jj)*lld] = RONE;
else a[il+ii+(jl+jj)*lld] = RZERO;
}
}
}
}
}
}
}
void psmatgen_random(MKL_INT type, MKL_INT* desca, float* a, MKL_INT* seed, MKL_INT nprow, MKL_INT npcol)
{
/*
* Purpose
* =======
*
* psmatgen_random: parallel real float precision matrix generator.
* Generate a sub-matrix of A.
*
* Arguments
* =========
*
* type (global input)
* Type of global matrix to be generated.
* If type==0, local matrices are parts of the matrix
* which possesses 1 as all elements along its leading (main) diagonal;
* while all other elements in the matrix are 0.
* A random matrix is generated if type != 0.
*
* desca (global and local input) an array of (at least) length 9.
* The array descriptor for the distributed matrix A.
* See the notes below for the decription of each element.
*
* a (local output), pointer into the local
* memory to a float array containing the
* local pieces of the distributed matrix.
*
* seed (local input and output) integer array, dimension (4)
* On entry, the seed of the random number generator.
* On exit, the seed is updated.
*
* nprow (global input)
* The number of process rows in the grid.
*
* npcol (global input)
* The number of process columns in the grid.
*
*
* Notes:
* =====
*
* Each global data object is described by an associated description
* vector. This vector stores the information required to establish
* the mapping between an object element and its corresponding process
* and memory location.
*
*
* In the following comments, the character _ should be read as
* "of the distributed matrix". Let A be a generic term for any 2D
* block cyclicly distributed matrix. Its description vector is DESC_A:
*
* NOTATION STORED IN EXPLANATION
* ---------------- --------------- ------------------------------------
* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
* the NPROW x NPCOL BLACS process grid
* A is distributed over.The context
* itself is global, but the handle
* (the integer value) may vary.
* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
* ted matrix A, M_A >= 0.
* N_A (global) DESCA[ N_ ] The number of columns in the distri-
* buted matrix A, N_A >= 0.
* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
* block of the matrix A, IMB_A > 0.
* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
* left block of the matrix A,
* INB_A > 0.
* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
* bute the last M_A-IMB_A rows of A,
* MB_A > 0.
* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
* bute the last N_A-INB_A columns of
* A, NB_A > 0.
* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
* row of the matrix A is distributed,
* NPROW > RSRC_A >= 0.
* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
* first column of A is distributed.
* NPCOL > CSRC_A >= 0.
* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
* array storing the local blocks of
* the distributed matrix A,
* IF( Lc( 1, N_A ) > 0 )
* LLD_A >= MAX( 1, Lr( 1, M_A ) )
* ELSE
* LLD_A >= 1.
*/
MKL_INT myrow, mycol;
MKL_INT iarow = 0, iacol = 0;
MKL_INT ictxt = desca[CTXT_-1];
MKL_INT m = desca[M_-1];
MKL_INT n = desca[N_-1];
MKL_INT mb = desca[MB_-1];
MKL_INT nb = desca[NB_-1];
MKL_INT lld = desca[LLD_-1];
MKL_INT i, j, ii, jj, il = 0, jl = 0;
blacs_gridinfo(&ictxt, &nprow, &npcol, &myrow, &mycol);
for (j = 0; j < n; j += nb) {
for (i = 0; i < m; i += mb) {
i++; j++;
infog2l(&i, &j, desca, &nprow, &npcol, &myrow, &mycol, &il, &jl, &iarow, &iacol);
i--; j--; il--; jl--;
if ( iarow==myrow && iacol==mycol ) {
for (jj = 0; jj < min(nb, n - j); jj++) {
for (ii = 0; ii < min(mb, m - i); ii++) {
if( type ) {
a[il+ii+(jl+jj)*lld] = (float) dlarnd( &dist, seed );
} else {
if(i==j) a[il+ii+(jl+jj)*lld] = RONE;
else a[il+ii+(jl+jj)*lld] = RZERO;
}
}
}
}
}
}
}
void pzmatgen_random(MKL_INT type, MKL_INT* desca, MKL_Complex16* a, MKL_INT* seed, MKL_INT nprow, MKL_INT npcol)
{
/*
* Purpose
* =======
*
* pzmatgen_random: parallel double complex precision matrix generator.
* Generate a sub-matrix of A.
*
* Arguments
* =========
*
* type (global input)
* Type of global matrix to be generated.
* If type==0, local matrices are parts of the matrix
* which possesses 1 as all elements along its leading (main) diagonal;
* while all other elements in the matrix are 0.
* Otherwise a random matrix is generated.
* type (global input)
* Type of global matrix to be generated.
* If type==0, local matrices are parts of the identity matrix.
* Otherwise a random matrix is generated.
*
* desca (global and local input) an array of (at least) length 9.
* The array descriptor for the distributed matrix A.
* See the notes below for the decription of each element.
*
* a (local output), pointer into the local
* memory to a MKL_Complex16 array containing the
* local pieces of the distributed matrix.
*
* seed (local input and output) integer array, dimension (4)
* On entry, the seed of the random number generator.
* On exit, the seed is updated.
*
* nprow (global input)
* The number of process rows in the grid.
*
* npcol (global input)
* The number of process columns in the grid.
*
* Notes:
* =====
*
* Each global data object is described by an associated description
* vector. This vector stores the information required to establish
* the mapping between an object element and its corresponding process
* and memory location.
*
* In the following comments, the character _ should be read as
* "of the distributed matrix". Let A be a generic term for any 2D
* block cyclicly distributed matrix. Its description vector is DESC_A:
*
* NOTATION STORED IN EXPLANATION
* ---------------- --------------- ------------------------------------
* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
* the NPROW x NPCOL BLACS process grid
* A is distributed over.The context
* itself is global, but the handle
* (the integer value) may vary.
* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
* ted matrix A, M_A >= 0.
* N_A (global) DESCA[ N_ ] The number of columns in the distri-
* buted matrix A, N_A >= 0.
* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
* block of the matrix A, IMB_A > 0.
* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
* left block of the matrix A,
* INB_A > 0.
* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
* bute the last M_A-IMB_A rows of A,
* MB_A > 0.
* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
* bute the last N_A-INB_A columns of
* A, NB_A > 0.
* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
* row of the matrix A is distributed,
* NPROW > RSRC_A >= 0.
* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
* first column of A is distributed.
* NPCOL > CSRC_A >= 0.
* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
* array storing the local blocks of
* the distributed matrix A,
* IF( Lc( 1, N_A ) > 0 )
* LLD_A >= MAX( 1, Lr( 1, M_A ) )
* ELSE
* LLD_A >= 1.
*/
MKL_INT myrow, mycol;
MKL_INT iarow = 0, iacol = 0;
MKL_INT ictxt = desca[CTXT_-1];
MKL_INT m = desca[M_-1];
MKL_INT n = desca[N_-1];
MKL_INT mb = desca[MB_-1];
MKL_INT nb = desca[NB_-1];
MKL_INT lld = desca[LLD_-1];
MKL_INT i, j, ii, jj, il = 0, jl = 0;
blacs_gridinfo(&ictxt, &nprow, &npcol, &myrow, &mycol);
for (j = 0; j < n; j += nb) {
for (i = 0; i < m; i += mb) {
i++; j++;
infog2l(&i, &j, desca, &nprow, &npcol, &myrow, &mycol, &il, &jl, &iarow, &iacol);
i--; j--; il--; jl--;
if (iarow==myrow && iacol==mycol) {
for (jj = 0; jj < min(nb, n - j); jj++) {
for (ii = 0; ii < min(mb, m - i); ii++) {
if( type ) {
a[il+ii+(jl+jj)*lld].real = (double) dlarnd( &dist, seed );
a[il+ii+(jl+jj)*lld].imag = (double) dlarnd( &dist, seed );
} else {
if( i==j ) {
a[il+ii+(jl+jj)*lld].real = RONE;
a[il+ii+(jl+jj)*lld].imag = RZERO;
} else {
a[il+ii+(jl+jj)*lld].real = RZERO;
a[il+ii+(jl+jj)*lld].imag = RZERO;
}
}
}
}
}
}
}
}
void pcmatgen_random(MKL_INT type, MKL_INT* desca, MKL_Complex8* a, MKL_INT* seed, MKL_INT nprow, MKL_INT npcol)
{
/*
* Purpose
* =======
*
* pcmatgen_random: parallel single complex precision matrix generator.
* Generate a sub-matrix of A.
*
* Arguments
* =========
*
* type (global input)
* Type of global matrix to be generated.
* If type==0, local matrices are parts of the matrix
* which possesses 1 as all elements along its leading (main) diagonal;
* while all other elements in the matrix are 0.
* Otherwise a random matrix is generated.
*
* desca (global and local input) an array of (at least) length 9.
* The array descriptor for the distributed matrix A.
* See the notes below for the decription of each element.
*
* a (local output), pointer into the local
* memory to a MKL_Complex8 array containing the
* local pieces of the distributed matrix.
*
* seed (local input and output) integer array, dimension (4)
* On entry, the seed of the random number generator.
* On exit, the seed is updated.
*
* nprow (global input)
* The number of process rows in the grid.
*
* npcol (global input)
* The number of process columns in the grid.
*
*
* Notes:
* =====
*
* Each global data object is described by an associated description
* vector. This vector stores the information required to establish
* the mapping between an object element and its corresponding process
* and memory location.
*
*
* In the following comments, the character _ should be read as
* "of the distributed matrix". Let A be a generic term for any 2D
* block cyclicly distributed matrix. Its description vector is DESC_A:
*
* NOTATION STORED IN EXPLANATION
* ---------------- --------------- ------------------------------------
* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
* the NPROW x NPCOL BLACS process grid
* A is distributed over.The context
* itself is global, but the handle
* (the integer value) may vary.
* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
* ted matrix A, M_A >= 0.
* N_A (global) DESCA[ N_ ] The number of columns in the distri-
* buted matrix A, N_A >= 0.
* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
* block of the matrix A, IMB_A > 0.
* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
* left block of the matrix A,
* INB_A > 0.
* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
* bute the last M_A-IMB_A rows of A,
* MB_A > 0.
* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
* bute the last N_A-INB_A columns of
* A, NB_A > 0.
* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
* row of the matrix A is distributed,
* NPROW > RSRC_A >= 0.
* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
* first column of A is distributed.
* NPCOL > CSRC_A >= 0.
* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
* array storing the local blocks of
* the distributed matrix A,
* IF( Lc( 1, N_A ) > 0 )
* LLD_A >= MAX( 1, Lr( 1, M_A ) )
* ELSE
* LLD_A >= 1.
*/
MKL_INT myrow, mycol;
MKL_INT iarow = 0, iacol = 0;
MKL_INT ictxt = desca[CTXT_-1];
MKL_INT m = desca[M_-1];
MKL_INT n = desca[N_-1];
MKL_INT mb = desca[MB_-1];
MKL_INT nb = desca[NB_-1];
MKL_INT lld = desca[LLD_-1];
MKL_INT i, j, ii, jj, il = 0, jl = 0;
blacs_gridinfo(&ictxt, &nprow, &npcol, &myrow, &mycol);
for (j = 0; j < n; j += nb) {
for (i = 0; i < m; i += mb) {
i++; j++;
infog2l(&i, &j, desca, &nprow, &npcol, &myrow, &mycol, &il, &jl, &iarow, &iacol);
i--; j--; il--; jl--;
if (iarow==myrow && iacol==mycol) {
for (jj = 0; jj < min(nb, n - j); jj++) {
for (ii = 0; ii < min(mb, m - i); ii++) {
if( type ) {
a[il+ii+(jl+jj)*lld].real = (float) dlarnd( &dist, seed );
a[il+ii+(jl+jj)*lld].imag = (float) dlarnd( &dist, seed );
} else {
if( i==j ) {
a[il+ii+(jl+jj)*lld].real = RONE;
a[il+ii+(jl+jj)*lld].imag = RZERO;
} else {
a[il+ii+(jl+jj)*lld].real = RZERO;
a[il+ii+(jl+jj)*lld].imag = RZERO;
}
}
}
}
}
}
}
}
psgetrf_example.c
/*******************************************************************************
* Copyright 2022 Intel Corporation.
*
* This software and the related documents are Intel copyrighted materials, and
* your use of them is governed by the express license under which they were
* provided to you (License). Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute, disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents are provided as is, with no express
* or implied warranties, other than those that are expressly stated in the
* License.
*******************************************************************************
! Content:
! Intel® oneAPI Math Kernel Library ScaLAPACK C example source file
!
!*****************************************************************************
!===============================================================================
!==== ScaLAPACK psgetrf and psgetrs example program ============================
!===============================================================================
!
! Example solves a system of distributed linear equations
!
! A * X = Y or A * X = I
!
! with a general random n-by-n distributed matrix A using the LU
! factorization computed by psgetrf.
! Y is a random n-by-nrhs distributed matrix,
! I denotes n-by-nrhs matrix which possesses 1 as all
! elements along its leading (main) diagonal;
! while all other elements in the matrix are 0.
! If n==nrhs, I is the identity matrix and X is the matrix inverse.
!
! Example also demonstrates BLACS routines usage.
!
! It works with block-cyclic distribution on 2D process grid
!
! List of routines demonstrated in the example:
!
! psgetrf
! psgetrs
! pslamch
! pslange
! psgemr2d
! psgemm
! blacs_get
! blacs_gridinit
! blacs_gridinfo
! blacs_gridexit
! blacs_exit
! descinit
! igamx2d
! igebr2d
! igebs2d
! sgebr2d
! sgebs2d
! numroc
! indxg2p
!
! The program must be driven by a short data file.
!
! 0 type, problem type, if type=0 the system A*X = I is solved, otherwise A*X = Y
! 2000 n, dimension of matrix, 0 < n
! 1000 nrhs,number of right hand sides
! 16 nb, size of blocks, must be > 0
! 2 p, number of rows in the process grid, must be > 0
! 2 q, number of columns in the process grid, must be > 0, p*q = number of processes
! 20.0 threshold for residual check
!
!*******************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include "matrix_generator.h"
MKL_INT test_solve(MKL_INT ictxt, MKL_INT type, MKL_INT n, MKL_INT nrhs, MKL_INT nb, MKL_INT nprow, MKL_INT npcol, MKL_INT* seed, float thresh)
{
MKL_INT info = 0;
MKL_INT err = 0;
float *A = NULL;
float *A_INI = NULL;
float *X = NULL;
float *Y = NULL;
MKL_INT *IPIV = NULL;
/* Matrix descriptors */
MKL_INT desca[9];
MKL_INT descx[9];
MKL_INT descy[9];
/* Local variables */
MKL_INT myrow;
MKL_INT mycol;
MKL_INT iam, nprocs;
float eps;
float normA, normRhs, normSol, normRes, residual;
float fzero = RZERO;
float fone = RONE;
float fnegone = RNEGONE;
/* Compute machine epsilon */
eps = pslamch( &ictxt, "e" );
/* Get information about how many processes are used for program execution
and number of current process */
blacs_pinfo( &iam, &nprocs );
blacs_gridinfo(&ictxt, &nprow, &npcol, &myrow, &mycol);
/* Compute precise length of local pieces of distributed matrices */
MKL_INT loc_ra = numroc(&n, &nb, &myrow, &izero, &nprow);
MKL_INT loc_ca = numroc(&n, &nb, &mycol, &izero, &npcol);
MKL_INT loc_lda = max(1, loc_ra);
MKL_INT loc_sizeA =loc_ra*loc_ca;
MKL_INT loc_rx = numroc(&n, &nb, &myrow, &izero, &nprow);
MKL_INT loc_cx = numroc(&nrhs, &nb, &mycol, &izero, &npcol);
MKL_INT loc_ldx = max(1, loc_rx);
MKL_INT loc_ry = numroc(&n, &nb, &myrow, &izero, &nprow);
MKL_INT loc_cy = numroc(&nrhs, &nb, &mycol, &izero, &npcol);
MKL_INT loc_ldy = max(1, loc_ry);
/* Initialize descriptors for distributed arrays */
descinit(desca, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &loc_lda, &info);
descinit(descx, &n, &nrhs, &nb, &nb, &izero, &izero, &ictxt, &loc_ldx, &info);
descinit(descy, &n, &nrhs, &nb, &nb, &izero, &izero, &ictxt, &loc_ldy, &info);
/* Allocate matrix, right-hand-side, vector solution x and the IPIV array
containing the pivoting information.The array A_INI is used to store
original matrix */
if( loc_sizeA ) {
A = (float*)mkl_malloc( loc_sizeA * sizeof(float), 64 );
A_INI=(float*)mkl_malloc( loc_sizeA * sizeof(float), 64 );
}
if( loc_rx * loc_cx ) X = (float*)mkl_malloc( loc_rx * loc_cx * sizeof(float), 64 );
if( loc_ry * loc_cy ) Y = (float*)mkl_malloc( loc_ry * loc_cy * sizeof(float), 64 );
IPIV = (MKL_INT*)mkl_malloc( (loc_ra + nb)*sizeof(MKL_INT), 64 );
if( A == NULL || X == NULL || Y == NULL || A_INI == NULL || IPIV == NULL ) info = 1;
igamx2d(&ictxt, "ALL", " ", &ione, &ione, &info, &ione, NULL, NULL, &inegone, &inegone, &inegone);
if( info ) {
err = 1;
if ( iam == 0 ) printf( "\n Can't allocate memory for arrays\n" );
goto early_exit;
}
/* Generate matrix and solution.Copy the initial matrix.*/
seed[0]+= myrow + mycol;
psmatgen_random(1, desca, A, seed, nprow, npcol);
scopy(&loc_sizeA, A, &ione, A_INI, &ione);
psmatgen_random(type, descx, X, seed, nprow, npcol);
psmatgen_random(type, descy, Y, seed, nprow, npcol);
/* Compute the right hand side */
if( type ) {
psgemm("N", "N", &n, &nrhs, &n, &fone, A, &ione, &ione, desca,
X, &ione, &ione, descx,
&fzero, Y, &ione, &ione, descy);
/* Copy the right hand side Y to X */
psgemr2d(&n, &nrhs, Y, &ione, &ione, descy, X, &ione, &ione, descx, &ictxt);
}
/* Compute norm of the initial matrix and right hand side */
normA = pslange("M", &n, &n, A, &ione, &ione, desca, NULL);
normRhs = pslange("M", &n, &nrhs, Y, &ione, &ione, descy, NULL);
/* Compute the LU factorization of the initial matrix */
psgetrf(&n, &n, A, &ione, &ione, desca, IPIV, &info);
igamx2d(&ictxt, "ALL", " ", &ione, &ione, &info, &ione, NULL, NULL, &inegone, &inegone, &inegone);
/* If psgetrf fails, deallocate all arays and report an error */
if( info) {
err = 1;
if ( iam == 0 ) printf( "\n psgetrf fails to compute LU decomposition \n" );
goto early_exit;
}
psgetrs("N", &n, &nrhs, A, &ione, &ione, desca, IPIV, X, &ione, &ione, descx, &info);
igamx2d(&ictxt, "ALL", " ", &ione, &ione, &info, &ione, NULL, NULL, &inegone, &inegone, &inegone);
/* If psgetrs fails, deallocate all arays and report an error */
if( info ) {
err = 1;
if ( iam == 0 ) printf( "\n psgetrs fails to solve \n" );
goto early_exit;
}
/* Normsol is the norm of the solution matrix: normsol = || X|| */
normSol = pslange("M", &n, &nrhs, X, &ione, &ione, descx, NULL);
/* Compute the residual matrix || A*X - Y || */
psgemm("N", "N", &n, &nrhs, &n, &fone, A_INI, &ione, &ione, desca,
X, &ione, &ione, descx,
&fnegone, Y, &ione, &ione, descy);
/* Compute the residual as:
residual = ||A*X-Y||/(n*eps*(||A||*||X||+||Y||)) */
normRes = pslange("M", &n, &nrhs, Y, &ione, &ione, descy, NULL);
residual = normRes/( ( (float)n ) * eps *( normA * normSol + normRhs ) );
/* Check if residual passed or failed the threshold */
if( iam == 0 ) {
if ( ( thresh >= RZERO ) && !( residual <= thresh ) ){
printf( "FAILED.Residual = %05.11f\n", residual );
err = 1;
} else {
printf( "PASSED.Residual = %05.11f\n", residual );
}
printf( "=== END OF EXAMPLE =====================\n" );
}
early_exit :
/* Destroy arrays */
if( A ) mkl_free(A);
if( A_INI ) mkl_free(A_INI);
if( X ) mkl_free(X);
if( Y ) mkl_free(Y);
if( IPIV ) mkl_free(IPIV);
return err;
}
int main(int argc, char *argv[])
{
/* ==== Declarations =================================================== */
/* File variables */
FILE *fin;
/* Local scalars */
MKL_INT iam, nprocs, ictxt=-1, nprow, npcol;
MKL_INT n, nb, nrhs;
MKL_INT err;
MKL_INT type=1;
int n_int, nb_int, nprow_int, npcol_int, nrhs_int, type_int;
float thresh;
MKL_INT seed[4] = { 105, 1410, 1860, 4085 };
/* Local arrays */
MKL_INT iw[ 6 ];
/* ==== Executable statements ========================================== */
/* Get information about how many processes are used for program execution
and number of current process */
blacs_pinfo( &iam, &nprocs );
/* Init temporary 1D process grid */
blacs_get( &inegone, &izero, &ictxt );
blacs_gridinit( &ictxt, "C", &nprocs, &ione );
/* Open input file */
if ( iam == 0 ) {
fin = fopen( "psgetrf_example.in", "r" );
if ( fin == NULL ) {
printf( "Error while open input file.");
return 1;
}
}
/* Read data and send it to all processes */
if ( iam == 0 ) {
/* Read parameters */
fscanf( fin, "%d type, problem type, if type=0 the system A*X = I is solved, otherwise A*X = Y", &type_int );
fscanf( fin, "%d n, dimension of matrix, 0 < n", &n_int );
fscanf( fin, "%d nrhs,number of right hand sides, 0 < nrhs", &nrhs_int );
fscanf( fin, "%d nb, size of blocks, must be > 0", &nb_int );
fscanf( fin, "%d p, number of rows in the process grid, must be > 0", &nprow_int );
fscanf( fin, "%d q, number of columns in the process grid, must be > 0, p*q = number of processes", &npcol_int );
fscanf( fin, "%f threshold for residual check", &thresh );
fclose( fin );
type = (MKL_INT) type_int;
n = (MKL_INT) n_int;
nrhs = (MKL_INT) nrhs_int;
nb = (MKL_INT) nb_int;
nprow = (MKL_INT) nprow_int;
npcol = (MKL_INT) npcol_int;
/* Check if all parameters are correct */
if( ( n<=0 )||( nrhs<=0 )||( nb<=0 )||( nprow<=0 )||
( npcol<=0 )||( nprow*npcol != nprocs ) ) {
printf( "One or several input parameters has incorrect value.Limitations:\n" );
printf( "n > 0, nrhs > 0, nb > 0, p > 0, q > 0 - integer\n" );
printf( "p*q = number of processes\n" );
printf( "threshold - float \n");
return 1;
}
/* Pack data into array and send it to other processes */
iw[ 0 ] = type;
iw[ 1 ] = n;
iw[ 2 ] = nrhs;
iw[ 3 ] = nb;
iw[ 4 ] = nprow;
iw[ 5 ] = npcol;
igebs2d( &ictxt, "All", " ", &isix, &ione, iw, &isix );
sgebs2d( &ictxt, "All", " ", &ione, &ione, &thresh, &ione );
} else {
/* Recieve and unpack data */
igebr2d( &ictxt, "All", " ", &isix, &ione, iw, &isix, &izero, &izero );
sgebr2d( &ictxt, "All", " ", &ione, &ione, &thresh, &ione, &izero, &izero );
type = iw[ 0 ];
n = iw[ 1 ];
nrhs = iw[ 2 ];
nb = iw[ 3 ];
nprow = iw[ 4 ];
npcol = iw[ 5 ];
}
/* Destroy temporary process grid */
blacs_gridexit( &ictxt );
/* Init workind 2D process grid */
blacs_get(&inegone, &izero, &ictxt);
blacs_gridinit(&ictxt, "R", &nprow, &npcol);
if ( iam == 0 ) {
/* Print information of task */
printf( "=== START OF EXAMPLE ===================\n" );
if( type ) {
printf( "Solve matrix equation: A*X = Y\n" );
printf( "with a general random n-by-n distributed matrix A \n" );
printf( "Y is a random n-by-nrhs distributed matrix\n\n" );
} else {
printf( "Solve matrix equation: A*X = I\n" );
printf( "with a general random n-by-n distributed matrix A \n" );
printf( "I denotes a n-by-nrhs matrix which possesses 1 \n" );
printf( "as all elements along its leading (main) diagonal \n" );
printf( "while all other elements in the matrix are 0.\n" );
printf( "If n==nrhs, I is the identity matrix and X is the matrix inverse of A.\n\n" );
}
printf( "n = %d, nrhs = %d, nb = %d; %dx%d - process grid\n\n", n_int, nrhs_int, nb_int, nprow_int, npcol_int);
printf( "Threshold for residual check = %05.11f\n", thresh );
}
err = test_solve(ictxt, type, n, nrhs, nb, nprow, npcol, seed, thresh);
/* Destroy process grid */
blacs_gridexit(&ictxt);
blacs_exit(&izero);
return err;
}
psgetrf_example.in
0 type, problem type, if type=0 the system A*X = I is solved, otherwise A*X = Y
2000 n, dimension of matrix, 0 < n
1000 nrhs,number of right hand sides
16 nb, size of blocks, must be > 0
2 p, number of rows in the process grid, must be > 0
2 q, number of columns in the process grid, must be > 0, p*q = number of processes
20.0 threshold for residual check
CMakeLists.txt
#===============================================================================
# Copyright 2023 Intel Corporation.
#
# This software and the related documents are Intel copyrighted materials, and
# your use of them is governed by the express license under which they were
# provided to you (License). Unless the License provides otherwise, you may not
# use, modify, copy, publish, distribute, disclose or transmit this software or
# the related documents without Intel's prior written permission.
#
# This software and the related documents are provided as is, with no express
# or implied warranties, other than those that are expressly stated in the
# License.
#===============================================================================
cmake_minimum_required(VERSION 3.13)
enable_testing()
# Enable SCALAPACK in oneMKL
# See MKLConfig.cmake top section for all cluster libraries
set(ENABLE_SCALAPACK ON)
# Define language and compiler
set(TEST_LANG C)
set(TEST_EXT c)
project(MKL_Example LANGUAGES ${TEST_LANG})
# Include oneMKL config file
find_package(MKL CONFIG REQUIRED)
# W/A for known problem in Intel® MPI 2021.1
if(MKL_MPI STREQUAL "intelmpi" AND DEFINED ENV{I_MPI_ROOT})
if(UNIX AND NOT APPLE AND $ENV{I_MPI_ROOT} MATCHES "2021.1")
set(MPI_C_ADDITIONAL_INCLUDE_DIRS $ENV{I_MPI_ROOT}/include)
endif()
endif()
# Force mshpc to be the first in mpi find package
if(MKL_MPI STREQUAL "msmpi" OR MKL_MPI STREQUAL "mshpc")
set(MPI_GUESS_LIBRARY_NAME MSMPI)
endif()
find_package(MPI REQUIRED)
# Override default compile/link lines for mpi script
if(USE_MPI_SCRIPT AND WIN32)
set(CMAKE_C_COMPILE_OBJECT "<CMAKE_C_COMPILER> <DEFINES> <INCLUDES> <FLAGS> /Fo<OBJECT> -c <SOURCE>")
set(CMAKE_C_LINK_EXECUTABLE "<CMAKE_C_COMPILER> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
endif()
# Try to identify MPI for correct BLACS if MKL_MPI is not defined
if(MPI_${TEST_LANG}_FOUND AND NOT MKL_MPI)
if(UNIX AND NOT APPLE AND MPI_${TEST_LANG}_COMPILER MATCHES "openmpi")
set(CURRENT_MPI "openmpi")
elseif(WIN32 AND MPI_${TEST_LANG}_COMPILER MATCHES "msmpi" OR MPI_${TEST_LANG}_COMPILER MATCHES "Microsoft MPI")
set(CURRENT_MPI "msmpi")
elseif(MPI_${TEST_LANG}_COMPILER MATCHES "mpich")
set(CURRENT_MPI "mpich")
elseif(NOT APPLE AND MPI_${TEST_LANG}_COMPILER MATCHES ".*intel.*mpi.*")
set(CURRENT_MPI "intelmpi")
if(WIN32 AND MPI_LOCALONLY)
set(MPI_LOCAL_OPT -localonly)
endif()
endif()
set(MKL_MPI "${CURRENT_MPI}")
endif()
# Define target
set(TEST_INCLUDE "${MPI_${TEST_LANG}_INCLUDE_DIRS}")
# Build example
set(func_name "psgetrf_example")
set(func "${func_name}")
file(GLOB_RECURSE ${func}_SRC ${PROJECT_SOURCE_DIR}/${func_name}.${TEST_EXT})
add_executable(${func} ${${func}_SRC})
target_include_directories(${func} PUBLIC ${TEST_INCLUDE})
if(WIN32)
set(TEST_COPT "-D_CRT_SECURE_NO_WARNINGS")
target_compile_options(${func} PUBLIC ${TEST_COPT})
endif()
target_link_libraries(${func} PUBLIC MKL::MKL MPI::MPI_C)
# Register example as ctest
configure_file(${PROJECT_SOURCE_DIR}/${func_name}.in ${CMAKE_CURRENT_BINARY_DIR} COPYONLY)
add_test(NAME ${func} COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPI_LOCAL_OPT} ${PROJECT_BINARY_DIR}/${func})
# Add oneMKL environment variables
if(MKL_ENV)
set_tests_properties(${func} PROPERTIES ENVIRONMENT "${MKL_ENV}")
endif()