2 * Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
6 * This source code and/or documentation ("Licensed Deliverables") are
7 * subject to NVIDIA intellectual property rights under U.S. and
8 * international Copyright laws.
10 * These Licensed Deliverables contained herein is PROPRIETARY and
11 * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12 * conditions of a form of NVIDIA software license agreement by and
13 * between NVIDIA and Licensee ("License Agreement") or electronically
14 * accepted by Licensee. Notwithstanding any terms or conditions to
15 * the contrary in the License Agreement, reproduction or disclosure
16 * of the Licensed Deliverables to any third party without the express
17 * written consent of NVIDIA is prohibited.
19 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20 * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21 * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22 * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23 * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24 * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25 * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27 * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28 * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29 * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32 * OF THESE LICENSED DELIVERABLES.
34 * U.S. Government End Users. These Licensed Deliverables are a
35 * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36 * 1995), consisting of "commercial computer software" and "commercial
37 * computer software documentation" as such terms are used in 48
38 * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39 * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40 * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41 * U.S. Government End Users acquire the Licensed Deliverables with
42 * only those rights set forth herein.
44 * Any use of the Licensed Deliverables in individual and commercial
45 * software must include, in the user documentation and internal
46 * comments to the code, the above Disclaimer and U.S. Government End
51 * This is the public header file for the CUBLAS library, defining the API
53 * CUBLAS is an implementation of BLAS (Basic Linear Algebra Subroutines)
54 * on top of the CUDA runtime.
57 #if !defined(CUBLAS_API_H_)
62 #define CUBLASWINAPI __stdcall
69 #error "This file should not be included without defining CUBLASAPI"
72 #include "driver_types.h"
73 #include "cuComplex.h" /* import complex data type */
75 #if defined(__cplusplus)
77 #endif /* __cplusplus */
79 /* CUBLAS status type returns */
81 CUBLAS_STATUS_SUCCESS =0,
82 CUBLAS_STATUS_NOT_INITIALIZED =1,
83 CUBLAS_STATUS_ALLOC_FAILED =3,
84 CUBLAS_STATUS_INVALID_VALUE =7,
85 CUBLAS_STATUS_ARCH_MISMATCH =8,
86 CUBLAS_STATUS_MAPPING_ERROR =11,
87 CUBLAS_STATUS_EXECUTION_FAILED=13,
88 CUBLAS_STATUS_INTERNAL_ERROR =14,
89 CUBLAS_STATUS_NOT_SUPPORTED =15,
90 CUBLAS_STATUS_LICENSE_ERROR =16
95 CUBLAS_FILL_MODE_LOWER=0,
96 CUBLAS_FILL_MODE_UPPER=1
100 CUBLAS_DIAG_NON_UNIT=0,
118 CUBLAS_POINTER_MODE_HOST = 0,
119 CUBLAS_POINTER_MODE_DEVICE = 1
120 } cublasPointerMode_t;
123 CUBLAS_ATOMICS_NOT_ALLOWED = 0,
124 CUBLAS_ATOMICS_ALLOWED = 1
125 } cublasAtomicsMode_t;
127 /* Opaque structure holding CUBLAS library context */
128 struct cublasContext;
129 typedef struct cublasContext *cublasHandle_t;
131 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCreate_v2 (cublasHandle_t *handle);
132 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDestroy_v2 (cublasHandle_t handle);
133 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGetVersion_v2(cublasHandle_t handle, int *version);
134 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSetStream_v2 (cublasHandle_t handle, cudaStream_t streamId);
135 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGetStream_v2 (cublasHandle_t handle, cudaStream_t *streamId);
137 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGetPointerMode_v2 (cublasHandle_t handle, cublasPointerMode_t *mode);
138 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSetPointerMode_v2 (cublasHandle_t handle, cublasPointerMode_t mode);
140 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGetAtomicsMode(cublasHandle_t handle, cublasAtomicsMode_t *mode);
141 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSetAtomicsMode(cublasHandle_t handle, cublasAtomicsMode_t mode);
145 * cublasSetVector (int n, int elemSize, const void *x, int incx,
148 * copies n elements from a vector x in CPU memory space to a vector y
149 * in GPU memory space. Elements in both vectors are assumed to have a
150 * size of elemSize bytes. Storage spacing between consecutive elements
151 * is incx for the source vector x and incy for the destination vector
152 * y. In general, y points to an object, or part of an object, allocated
153 * via cublasAlloc(). Column major format for two-dimensional matrices
154 * is assumed throughout CUBLAS. Therefore, if the increment for a vector
155 * is equal to 1, this access a column vector while using an increment
156 * equal to the leading dimension of the respective matrix accesses a
161 * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library not been initialized
162 * CUBLAS_STATUS_INVALID_VALUE if incx, incy, or elemSize <= 0
163 * CUBLAS_STATUS_MAPPING_ERROR if an error occurred accessing GPU memory
164 * CUBLAS_STATUS_SUCCESS if the operation completed successfully
166 cublasStatus_t CUBLASWINAPI cublasSetVector (int n, int elemSize, const void *x,
167 int incx, void *devicePtr, int incy);
171 * cublasGetVector (int n, int elemSize, const void *x, int incx,
174 * copies n elements from a vector x in GPU memory space to a vector y
175 * in CPU memory space. Elements in both vectors are assumed to have a
176 * size of elemSize bytes. Storage spacing between consecutive elements
177 * is incx for the source vector x and incy for the destination vector
178 * y. In general, x points to an object, or part of an object, allocated
179 * via cublasAlloc(). Column major format for two-dimensional matrices
180 * is assumed throughout CUBLAS. Therefore, if the increment for a vector
181 * is equal to 1, this access a column vector while using an increment
182 * equal to the leading dimension of the respective matrix accesses a
187 * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library not been initialized
188 * CUBLAS_STATUS_INVALID_VALUE if incx, incy, or elemSize <= 0
189 * CUBLAS_STATUS_MAPPING_ERROR if an error occurred accessing GPU memory
190 * CUBLAS_STATUS_SUCCESS if the operation completed successfully
192 cublasStatus_t CUBLASWINAPI cublasGetVector (int n, int elemSize, const void *x,
193 int incx, void *y, int incy);
197 * cublasSetMatrix (int rows, int cols, int elemSize, const void *A,
198 * int lda, void *B, int ldb)
200 * copies a tile of rows x cols elements from a matrix A in CPU memory
201 * space to a matrix B in GPU memory space. Each element requires storage
202 * of elemSize bytes. Both matrices are assumed to be stored in column
203 * major format, with the leading dimension (i.e. number of rows) of
204 * source matrix A provided in lda, and the leading dimension of matrix B
205 * provided in ldb. In general, B points to an object, or part of an
206 * object, that was allocated via cublasAlloc().
210 * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized
211 * CUBLAS_STATUS_INVALID_VALUE if rows or cols < 0, or elemSize, lda, or
213 * CUBLAS_STATUS_MAPPING_ERROR if error occurred accessing GPU memory
214 * CUBLAS_STATUS_SUCCESS if the operation completed successfully
216 cublasStatus_t CUBLASWINAPI cublasSetMatrix (int rows, int cols, int elemSize,
217 const void *A, int lda, void *B,
222 * cublasGetMatrix (int rows, int cols, int elemSize, const void *A,
223 * int lda, void *B, int ldb)
225 * copies a tile of rows x cols elements from a matrix A in GPU memory
226 * space to a matrix B in CPU memory space. Each element requires storage
227 * of elemSize bytes. Both matrices are assumed to be stored in column
228 * major format, with the leading dimension (i.e. number of rows) of
229 * source matrix A provided in lda, and the leading dimension of matrix B
230 * provided in ldb. In general, A points to an object, or part of an
231 * object, that was allocated via cublasAlloc().
235 * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized
236 * CUBLAS_STATUS_INVALID_VALUE if rows, cols, eleSize, lda, or ldb <= 0
237 * CUBLAS_STATUS_MAPPING_ERROR if error occurred accessing GPU memory
238 * CUBLAS_STATUS_SUCCESS if the operation completed successfully
240 cublasStatus_t CUBLASWINAPI cublasGetMatrix (int rows, int cols, int elemSize,
241 const void *A, int lda, void *B,
246 * cublasSetVectorAsync ( int n, int elemSize, const void *x, int incx,
247 * void *y, int incy, cudaStream_t stream );
249 * cublasSetVectorAsync has the same functionnality as cublasSetVector
250 * but the transfer is done asynchronously within the CUDA stream passed
255 * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library not been initialized
256 * CUBLAS_STATUS_INVALID_VALUE if incx, incy, or elemSize <= 0
257 * CUBLAS_STATUS_MAPPING_ERROR if an error occurred accessing GPU memory
258 * CUBLAS_STATUS_SUCCESS if the operation completed successfully
260 cublasStatus_t CUBLASWINAPI cublasSetVectorAsync (int n, int elemSize,
261 const void *hostPtr, int incx,
262 void *devicePtr, int incy,
263 cudaStream_t stream);
266 * cublasGetVectorAsync( int n, int elemSize, const void *x, int incx,
267 * void *y, int incy, cudaStream_t stream)
269 * cublasGetVectorAsync has the same functionnality as cublasGetVector
270 * but the transfer is done asynchronously within the CUDA stream passed
275 * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library not been initialized
276 * CUBLAS_STATUS_INVALID_VALUE if incx, incy, or elemSize <= 0
277 * CUBLAS_STATUS_MAPPING_ERROR if an error occurred accessing GPU memory
278 * CUBLAS_STATUS_SUCCESS if the operation completed successfully
280 cublasStatus_t CUBLASWINAPI cublasGetVectorAsync (int n, int elemSize,
281 const void *devicePtr, int incx,
282 void *hostPtr, int incy,
283 cudaStream_t stream);
287 * cublasSetMatrixAsync (int rows, int cols, int elemSize, const void *A,
288 * int lda, void *B, int ldb, cudaStream_t stream)
290 * cublasSetMatrixAsync has the same functionnality as cublasSetMatrix
291 * but the transfer is done asynchronously within the CUDA stream passed
296 * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized
297 * CUBLAS_STATUS_INVALID_VALUE if rows or cols < 0, or elemSize, lda, or
299 * CUBLAS_STATUS_MAPPING_ERROR if error occurred accessing GPU memory
300 * CUBLAS_STATUS_SUCCESS if the operation completed successfully
302 cublasStatus_t CUBLASWINAPI cublasSetMatrixAsync (int rows, int cols, int elemSize,
303 const void *A, int lda, void *B,
304 int ldb, cudaStream_t stream);
308 * cublasGetMatrixAsync (int rows, int cols, int elemSize, const void *A,
309 * int lda, void *B, int ldb, cudaStream_t stream)
311 * cublasGetMatrixAsync has the same functionnality as cublasGetMatrix
312 * but the transfer is done asynchronously within the CUDA stream passed
317 * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized
318 * CUBLAS_STATUS_INVALID_VALUE if rows, cols, eleSize, lda, or ldb <= 0
319 * CUBLAS_STATUS_MAPPING_ERROR if error occurred accessing GPU memory
320 * CUBLAS_STATUS_SUCCESS if the operation completed successfully
322 cublasStatus_t CUBLASWINAPI cublasGetMatrixAsync (int rows, int cols, int elemSize,
323 const void *A, int lda, void *B,
324 int ldb, cudaStream_t stream);
327 CUBLASAPI void CUBLASWINAPI cublasXerbla (const char *srName, int info);
328 /* ---------------- CUBLAS BLAS1 functions ---------------- */
329 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSnrm2_v2(cublasHandle_t handle,
333 float *result); /* host or device pointer */
335 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDnrm2_v2(cublasHandle_t handle,
339 double *result); /* host or device pointer */
341 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScnrm2_v2(cublasHandle_t handle,
345 float *result); /* host or device pointer */
347 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDznrm2_v2(cublasHandle_t handle,
349 const cuDoubleComplex *x,
351 double *result); /* host or device pointer */
353 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSdot_v2 (cublasHandle_t handle,
359 float *result); /* host or device pointer */
361 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDdot_v2 (cublasHandle_t handle,
367 double *result); /* host or device pointer */
369 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdotu_v2 (cublasHandle_t handle,
375 cuComplex *result); /* host or device pointer */
377 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdotc_v2 (cublasHandle_t handle,
383 cuComplex *result); /* host or device pointer */
385 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdotu_v2 (cublasHandle_t handle,
387 const cuDoubleComplex *x,
389 const cuDoubleComplex *y,
391 cuDoubleComplex *result); /* host or device pointer */
393 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdotc_v2 (cublasHandle_t handle,
395 const cuDoubleComplex *x,
397 const cuDoubleComplex *y,
399 cuDoubleComplex *result); /* host or device pointer */
401 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSscal_v2(cublasHandle_t handle,
403 const float *alpha, /* host or device pointer */
407 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDscal_v2(cublasHandle_t handle,
409 const double *alpha, /* host or device pointer */
413 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCscal_v2(cublasHandle_t handle,
415 const cuComplex *alpha, /* host or device pointer */
419 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsscal_v2(cublasHandle_t handle,
421 const float *alpha, /* host or device pointer */
425 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZscal_v2(cublasHandle_t handle,
427 const cuDoubleComplex *alpha, /* host or device pointer */
431 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdscal_v2(cublasHandle_t handle,
433 const double *alpha, /* host or device pointer */
437 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSaxpy_v2 (cublasHandle_t handle,
439 const float *alpha, /* host or device pointer */
445 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDaxpy_v2 (cublasHandle_t handle,
447 const double *alpha, /* host or device pointer */
453 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCaxpy_v2 (cublasHandle_t handle,
455 const cuComplex *alpha, /* host or device pointer */
461 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZaxpy_v2 (cublasHandle_t handle,
463 const cuDoubleComplex *alpha, /* host or device pointer */
464 const cuDoubleComplex *x,
469 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScopy_v2 (cublasHandle_t handle,
476 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDcopy_v2 (cublasHandle_t handle,
483 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCcopy_v2 (cublasHandle_t handle,
490 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZcopy_v2 (cublasHandle_t handle,
492 const cuDoubleComplex *x,
497 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSswap_v2 (cublasHandle_t handle,
504 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDswap_v2 (cublasHandle_t handle,
511 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCswap_v2 (cublasHandle_t handle,
518 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZswap_v2 (cublasHandle_t handle,
525 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIsamax_v2(cublasHandle_t handle,
529 int *result); /* host or device pointer */
531 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIdamax_v2(cublasHandle_t handle,
535 int *result); /* host or device pointer */
537 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIcamax_v2(cublasHandle_t handle,
541 int *result); /* host or device pointer */
543 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIzamax_v2(cublasHandle_t handle,
545 const cuDoubleComplex *x,
547 int *result); /* host or device pointer */
549 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIsamin_v2(cublasHandle_t handle,
553 int *result); /* host or device pointer */
555 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIdamin_v2(cublasHandle_t handle,
559 int *result); /* host or device pointer */
561 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIcamin_v2(cublasHandle_t handle,
565 int *result); /* host or device pointer */
567 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIzamin_v2(cublasHandle_t handle,
569 const cuDoubleComplex *x,
571 int *result); /* host or device pointer */
573 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSasum_v2(cublasHandle_t handle,
577 float *result); /* host or device pointer */
579 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDasum_v2(cublasHandle_t handle,
583 double *result); /* host or device pointer */
585 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScasum_v2(cublasHandle_t handle,
589 float *result); /* host or device pointer */
591 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDzasum_v2(cublasHandle_t handle,
593 const cuDoubleComplex *x,
595 double *result); /* host or device pointer */
597 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSrot_v2 (cublasHandle_t handle,
603 const float *c, /* host or device pointer */
604 const float *s); /* host or device pointer */
606 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDrot_v2 (cublasHandle_t handle,
612 const double *c, /* host or device pointer */
613 const double *s); /* host or device pointer */
615 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCrot_v2 (cublasHandle_t handle,
621 const float *c, /* host or device pointer */
622 const cuComplex *s); /* host or device pointer */
624 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsrot_v2(cublasHandle_t handle,
630 const float *c, /* host or device pointer */
631 const float *s); /* host or device pointer */
633 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZrot_v2 (cublasHandle_t handle,
639 const double *c, /* host or device pointer */
640 const cuDoubleComplex *s); /* host or device pointer */
642 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdrot_v2(cublasHandle_t handle,
648 const double *c, /* host or device pointer */
649 const double *s); /* host or device pointer */
651 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSrotg_v2(cublasHandle_t handle,
652 float *a, /* host or device pointer */
653 float *b, /* host or device pointer */
654 float *c, /* host or device pointer */
655 float *s); /* host or device pointer */
657 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDrotg_v2(cublasHandle_t handle,
658 double *a, /* host or device pointer */
659 double *b, /* host or device pointer */
660 double *c, /* host or device pointer */
661 double *s); /* host or device pointer */
663 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCrotg_v2(cublasHandle_t handle,
664 cuComplex *a, /* host or device pointer */
665 cuComplex *b, /* host or device pointer */
666 float *c, /* host or device pointer */
667 cuComplex *s); /* host or device pointer */
669 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZrotg_v2(cublasHandle_t handle,
670 cuDoubleComplex *a, /* host or device pointer */
671 cuDoubleComplex *b, /* host or device pointer */
672 double *c, /* host or device pointer */
673 cuDoubleComplex *s); /* host or device pointer */
675 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSrotm_v2(cublasHandle_t handle,
681 const float* param); /* host or device pointer */
683 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDrotm_v2(cublasHandle_t handle,
689 const double* param); /* host or device pointer */
691 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSrotmg_v2(cublasHandle_t handle,
692 float *d1, /* host or device pointer */
693 float *d2, /* host or device pointer */
694 float *x1, /* host or device pointer */
695 const float *y1, /* host or device pointer */
696 float *param); /* host or device pointer */
698 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDrotmg_v2(cublasHandle_t handle,
699 double *d1, /* host or device pointer */
700 double *d2, /* host or device pointer */
701 double *x1, /* host or device pointer */
702 const double *y1, /* host or device pointer */
703 double *param); /* host or device pointer */
705 /* --------------- CUBLAS BLAS2 functions ---------------- */
708 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemv_v2 (cublasHandle_t handle,
709 cublasOperation_t trans,
712 const float *alpha, /* host or device pointer */
717 const float *beta, /* host or device pointer */
721 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemv_v2 (cublasHandle_t handle,
722 cublasOperation_t trans,
725 const double *alpha, /* host or device pointer */
730 const double *beta, /* host or device pointer */
734 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemv_v2 (cublasHandle_t handle,
735 cublasOperation_t trans,
738 const cuComplex *alpha, /* host or device pointer */
743 const cuComplex *beta, /* host or device pointer */
747 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemv_v2 (cublasHandle_t handle,
748 cublasOperation_t trans,
751 const cuDoubleComplex *alpha, /* host or device pointer */
752 const cuDoubleComplex *A,
754 const cuDoubleComplex *x,
756 const cuDoubleComplex *beta, /* host or device pointer */
760 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgbmv_v2 (cublasHandle_t handle,
761 cublasOperation_t trans,
766 const float *alpha, /* host or device pointer */
771 const float *beta, /* host or device pointer */
775 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgbmv_v2 (cublasHandle_t handle,
776 cublasOperation_t trans,
781 const double *alpha, /* host or device pointer */
786 const double *beta, /* host or device pointer */
790 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgbmv_v2 (cublasHandle_t handle,
791 cublasOperation_t trans,
796 const cuComplex *alpha, /* host or device pointer */
801 const cuComplex *beta, /* host or device pointer */
805 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgbmv_v2 (cublasHandle_t handle,
806 cublasOperation_t trans,
811 const cuDoubleComplex *alpha, /* host or device pointer */
812 const cuDoubleComplex *A,
814 const cuDoubleComplex *x,
816 const cuDoubleComplex *beta, /* host or device pointer */
821 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrmv_v2 (cublasHandle_t handle,
822 cublasFillMode_t uplo,
823 cublasOperation_t trans,
824 cublasDiagType_t diag,
831 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrmv_v2 (cublasHandle_t handle,
832 cublasFillMode_t uplo,
833 cublasOperation_t trans,
834 cublasDiagType_t diag,
841 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrmv_v2 (cublasHandle_t handle,
842 cublasFillMode_t uplo,
843 cublasOperation_t trans,
844 cublasDiagType_t diag,
851 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrmv_v2 (cublasHandle_t handle,
852 cublasFillMode_t uplo,
853 cublasOperation_t trans,
854 cublasDiagType_t diag,
856 const cuDoubleComplex *A,
862 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStbmv_v2 (cublasHandle_t handle,
863 cublasFillMode_t uplo,
864 cublasOperation_t trans,
865 cublasDiagType_t diag,
873 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtbmv_v2 (cublasHandle_t handle,
874 cublasFillMode_t uplo,
875 cublasOperation_t trans,
876 cublasDiagType_t diag,
884 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtbmv_v2 (cublasHandle_t handle,
885 cublasFillMode_t uplo,
886 cublasOperation_t trans,
887 cublasDiagType_t diag,
895 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtbmv_v2 (cublasHandle_t handle,
896 cublasFillMode_t uplo,
897 cublasOperation_t trans,
898 cublasDiagType_t diag,
901 const cuDoubleComplex *A,
907 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStpmv_v2 (cublasHandle_t handle,
908 cublasFillMode_t uplo,
909 cublasOperation_t trans,
910 cublasDiagType_t diag,
916 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtpmv_v2 (cublasHandle_t handle,
917 cublasFillMode_t uplo,
918 cublasOperation_t trans,
919 cublasDiagType_t diag,
925 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtpmv_v2 (cublasHandle_t handle,
926 cublasFillMode_t uplo,
927 cublasOperation_t trans,
928 cublasDiagType_t diag,
934 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtpmv_v2 (cublasHandle_t handle,
935 cublasFillMode_t uplo,
936 cublasOperation_t trans,
937 cublasDiagType_t diag,
939 const cuDoubleComplex *AP,
944 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsv_v2 (cublasHandle_t handle,
945 cublasFillMode_t uplo,
946 cublasOperation_t trans,
947 cublasDiagType_t diag,
954 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsv_v2 (cublasHandle_t handle,
955 cublasFillMode_t uplo,
956 cublasOperation_t trans,
957 cublasDiagType_t diag,
964 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsv_v2 (cublasHandle_t handle,
965 cublasFillMode_t uplo,
966 cublasOperation_t trans,
967 cublasDiagType_t diag,
974 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsv_v2 (cublasHandle_t handle,
975 cublasFillMode_t uplo,
976 cublasOperation_t trans,
977 cublasDiagType_t diag,
979 const cuDoubleComplex *A,
985 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStpsv_v2 (cublasHandle_t handle,
986 cublasFillMode_t uplo,
987 cublasOperation_t trans,
988 cublasDiagType_t diag,
994 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtpsv_v2 (cublasHandle_t handle,
995 cublasFillMode_t uplo,
996 cublasOperation_t trans,
997 cublasDiagType_t diag,
1003 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtpsv_v2 (cublasHandle_t handle,
1004 cublasFillMode_t uplo,
1005 cublasOperation_t trans,
1006 cublasDiagType_t diag,
1008 const cuComplex *AP,
1012 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtpsv_v2 (cublasHandle_t handle,
1013 cublasFillMode_t uplo,
1014 cublasOperation_t trans,
1015 cublasDiagType_t diag,
1017 const cuDoubleComplex *AP,
1021 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStbsv_v2 (cublasHandle_t handle,
1022 cublasFillMode_t uplo,
1023 cublasOperation_t trans,
1024 cublasDiagType_t diag,
1032 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtbsv_v2 (cublasHandle_t handle,
1033 cublasFillMode_t uplo,
1034 cublasOperation_t trans,
1035 cublasDiagType_t diag,
1043 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtbsv_v2 (cublasHandle_t handle,
1044 cublasFillMode_t uplo,
1045 cublasOperation_t trans,
1046 cublasDiagType_t diag,
1054 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtbsv_v2 (cublasHandle_t handle,
1055 cublasFillMode_t uplo,
1056 cublasOperation_t trans,
1057 cublasDiagType_t diag,
1060 const cuDoubleComplex *A,
1066 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsymv_v2 (cublasHandle_t handle,
1067 cublasFillMode_t uplo,
1069 const float *alpha, /* host or device pointer */
1074 const float *beta, /* host or device pointer */
1078 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsymv_v2 (cublasHandle_t handle,
1079 cublasFillMode_t uplo,
1081 const double *alpha, /* host or device pointer */
1086 const double *beta, /* host or device pointer */
1090 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsymv_v2 (cublasHandle_t handle,
1091 cublasFillMode_t uplo,
1093 const cuComplex *alpha, /* host or device pointer */
1098 const cuComplex *beta, /* host or device pointer */
1102 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsymv_v2 (cublasHandle_t handle,
1103 cublasFillMode_t uplo,
1105 const cuDoubleComplex *alpha, /* host or device pointer */
1106 const cuDoubleComplex *A,
1108 const cuDoubleComplex *x,
1110 const cuDoubleComplex *beta, /* host or device pointer */
1114 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChemv_v2 (cublasHandle_t handle,
1115 cublasFillMode_t uplo,
1117 const cuComplex *alpha, /* host or device pointer */
1122 const cuComplex *beta, /* host or device pointer */
1126 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhemv_v2 (cublasHandle_t handle,
1127 cublasFillMode_t uplo,
1129 const cuDoubleComplex *alpha, /* host or device pointer */
1130 const cuDoubleComplex *A,
1132 const cuDoubleComplex *x,
1134 const cuDoubleComplex *beta, /* host or device pointer */
1139 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsbmv_v2 (cublasHandle_t handle,
1140 cublasFillMode_t uplo,
1143 const float *alpha, /* host or device pointer */
1148 const float *beta, /* host or device pointer */
1152 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsbmv_v2 (cublasHandle_t handle,
1153 cublasFillMode_t uplo,
1156 const double *alpha, /* host or device pointer */
1161 const double *beta, /* host or device pointer */
1165 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChbmv_v2 (cublasHandle_t handle,
1166 cublasFillMode_t uplo,
1169 const cuComplex *alpha, /* host or device pointer */
1174 const cuComplex *beta, /* host or device pointer */
1178 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhbmv_v2 (cublasHandle_t handle,
1179 cublasFillMode_t uplo,
1182 const cuDoubleComplex *alpha, /* host or device pointer */
1183 const cuDoubleComplex *A,
1185 const cuDoubleComplex *x,
1187 const cuDoubleComplex *beta, /* host or device pointer */
1192 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSspmv_v2 (cublasHandle_t handle,
1193 cublasFillMode_t uplo,
1195 const float *alpha, /* host or device pointer */
1199 const float *beta, /* host or device pointer */
1203 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDspmv_v2 (cublasHandle_t handle,
1204 cublasFillMode_t uplo,
1206 const double *alpha, /* host or device pointer */
1210 const double *beta, /* host or device pointer */
1214 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChpmv_v2 (cublasHandle_t handle,
1215 cublasFillMode_t uplo,
1217 const cuComplex *alpha, /* host or device pointer */
1218 const cuComplex *AP,
1221 const cuComplex *beta, /* host or device pointer */
1225 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhpmv_v2 (cublasHandle_t handle,
1226 cublasFillMode_t uplo,
1228 const cuDoubleComplex *alpha, /* host or device pointer */
1229 const cuDoubleComplex *AP,
1230 const cuDoubleComplex *x,
1232 const cuDoubleComplex *beta, /* host or device pointer */
1237 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSger_v2 (cublasHandle_t handle,
1240 const float *alpha, /* host or device pointer */
1248 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDger_v2 (cublasHandle_t handle,
1251 const double *alpha, /* host or device pointer */
1259 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgeru_v2 (cublasHandle_t handle,
1262 const cuComplex *alpha, /* host or device pointer */
1270 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgerc_v2 (cublasHandle_t handle,
1273 const cuComplex *alpha, /* host or device pointer */
1281 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgeru_v2 (cublasHandle_t handle,
1284 const cuDoubleComplex *alpha, /* host or device pointer */
1285 const cuDoubleComplex *x,
1287 const cuDoubleComplex *y,
1292 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgerc_v2 (cublasHandle_t handle,
1295 const cuDoubleComplex *alpha, /* host or device pointer */
1296 const cuDoubleComplex *x,
1298 const cuDoubleComplex *y,
1304 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr_v2 (cublasHandle_t handle,
1305 cublasFillMode_t uplo,
1307 const float *alpha, /* host or device pointer */
1313 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr_v2 (cublasHandle_t handle,
1314 cublasFillMode_t uplo,
1316 const double *alpha, /* host or device pointer */
1322 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr_v2 (cublasHandle_t handle,
1323 cublasFillMode_t uplo,
1325 const cuComplex *alpha, /* host or device pointer */
1331 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr_v2 (cublasHandle_t handle,
1332 cublasFillMode_t uplo,
1334 const cuDoubleComplex *alpha, /* host or device pointer */
1335 const cuDoubleComplex *x,
1340 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher_v2 (cublasHandle_t handle,
1341 cublasFillMode_t uplo,
1343 const float *alpha, /* host or device pointer */
1349 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher_v2 (cublasHandle_t handle,
1350 cublasFillMode_t uplo,
1352 const double *alpha, /* host or device pointer */
1353 const cuDoubleComplex *x,
1359 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSspr_v2 (cublasHandle_t handle,
1360 cublasFillMode_t uplo,
1362 const float *alpha, /* host or device pointer */
1367 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDspr_v2 (cublasHandle_t handle,
1368 cublasFillMode_t uplo,
1370 const double *alpha, /* host or device pointer */
1375 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChpr_v2 (cublasHandle_t handle,
1376 cublasFillMode_t uplo,
1378 const float *alpha, /* host or device pointer */
1383 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhpr_v2 (cublasHandle_t handle,
1384 cublasFillMode_t uplo,
1386 const double *alpha, /* host or device pointer */
1387 const cuDoubleComplex *x,
1389 cuDoubleComplex *AP);
1392 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr2_v2 (cublasHandle_t handle,
1393 cublasFillMode_t uplo,
1395 const float *alpha, /* host or device pointer */
1403 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr2_v2 (cublasHandle_t handle,
1404 cublasFillMode_t uplo,
1406 const double *alpha, /* host or device pointer */
1414 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr2_v2 (cublasHandle_t handle,
1415 cublasFillMode_t uplo, int n,
1416 const cuComplex *alpha, /* host or device pointer */
1424 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr2_v2 (cublasHandle_t handle,
1425 cublasFillMode_t uplo,
1427 const cuDoubleComplex *alpha, /* host or device pointer */
1428 const cuDoubleComplex *x,
1430 const cuDoubleComplex *y,
1436 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher2_v2 (cublasHandle_t handle,
1437 cublasFillMode_t uplo, int n,
1438 const cuComplex *alpha, /* host or device pointer */
1446 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher2_v2 (cublasHandle_t handle,
1447 cublasFillMode_t uplo,
1449 const cuDoubleComplex *alpha, /* host or device pointer */
1450 const cuDoubleComplex *x,
1452 const cuDoubleComplex *y,
1458 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSspr2_v2 (cublasHandle_t handle,
1459 cublasFillMode_t uplo,
1461 const float *alpha, /* host or device pointer */
1468 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDspr2_v2 (cublasHandle_t handle,
1469 cublasFillMode_t uplo,
1471 const double *alpha, /* host or device pointer */
1479 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChpr2_v2 (cublasHandle_t handle,
1480 cublasFillMode_t uplo,
1482 const cuComplex *alpha, /* host or device pointer */
1489 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhpr2_v2 (cublasHandle_t handle,
1490 cublasFillMode_t uplo,
1492 const cuDoubleComplex *alpha, /* host or device pointer */
1493 const cuDoubleComplex *x,
1495 const cuDoubleComplex *y,
1497 cuDoubleComplex *AP);
1499 /* ---------------- CUBLAS BLAS3 functions ---------------- */
1502 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemm_v2 (cublasHandle_t handle,
1503 cublasOperation_t transa,
1504 cublasOperation_t transb,
1508 const float *alpha, /* host or device pointer */
1513 const float *beta, /* host or device pointer */
1517 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemm_v2 (cublasHandle_t handle,
1518 cublasOperation_t transa,
1519 cublasOperation_t transb,
1523 const double *alpha, /* host or device pointer */
1528 const double *beta, /* host or device pointer */
1532 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemm_v2 (cublasHandle_t handle,
1533 cublasOperation_t transa,
1534 cublasOperation_t transb,
1538 const cuComplex *alpha, /* host or device pointer */
1543 const cuComplex *beta, /* host or device pointer */
1547 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemm_v2 (cublasHandle_t handle,
1548 cublasOperation_t transa,
1549 cublasOperation_t transb,
1553 const cuDoubleComplex *alpha, /* host or device pointer */
1554 const cuDoubleComplex *A,
1556 const cuDoubleComplex *B,
1558 const cuDoubleComplex *beta, /* host or device pointer */
1563 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrk_v2 (cublasHandle_t handle,
1564 cublasFillMode_t uplo,
1565 cublasOperation_t trans,
1568 const float *alpha, /* host or device pointer */
1571 const float *beta, /* host or device pointer */
1575 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyrk_v2 (cublasHandle_t handle,
1576 cublasFillMode_t uplo,
1577 cublasOperation_t trans,
1580 const double *alpha, /* host or device pointer */
1583 const double *beta, /* host or device pointer */
1587 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyrk_v2 (cublasHandle_t handle,
1588 cublasFillMode_t uplo,
1589 cublasOperation_t trans,
1592 const cuComplex *alpha, /* host or device pointer */
1595 const cuComplex *beta, /* host or device pointer */
1599 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyrk_v2 (cublasHandle_t handle,
1600 cublasFillMode_t uplo,
1601 cublasOperation_t trans,
1604 const cuDoubleComplex *alpha, /* host or device pointer */
1605 const cuDoubleComplex *A,
1607 const cuDoubleComplex *beta, /* host or device pointer */
1611 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherk_v2 (cublasHandle_t handle,
1612 cublasFillMode_t uplo,
1613 cublasOperation_t trans,
1616 const float *alpha, /* host or device pointer */
1619 const float *beta, /* host or device pointer */
1623 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZherk_v2 (cublasHandle_t handle,
1624 cublasFillMode_t uplo,
1625 cublasOperation_t trans,
1628 const double *alpha, /* host or device pointer */
1629 const cuDoubleComplex *A,
1631 const double *beta, /* host or device pointer */
1636 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr2k_v2 (cublasHandle_t handle,
1637 cublasFillMode_t uplo,
1638 cublasOperation_t trans,
1641 const float *alpha, /* host or device pointer */
1646 const float *beta, /* host or device pointer */
1650 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr2k_v2 (cublasHandle_t handle,
1651 cublasFillMode_t uplo,
1652 cublasOperation_t trans,
1655 const double *alpha, /* host or device pointer */
1660 const double *beta, /* host or device pointer */
1664 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr2k_v2 (cublasHandle_t handle,
1665 cublasFillMode_t uplo,
1666 cublasOperation_t trans,
1669 const cuComplex *alpha, /* host or device pointer */
1674 const cuComplex *beta, /* host or device pointer */
1678 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr2k_v2 (cublasHandle_t handle,
1679 cublasFillMode_t uplo,
1680 cublasOperation_t trans,
1683 const cuDoubleComplex *alpha, /* host or device pointer */
1684 const cuDoubleComplex *A,
1686 const cuDoubleComplex *B,
1688 const cuDoubleComplex *beta, /* host or device pointer */
1692 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher2k_v2 (cublasHandle_t handle,
1693 cublasFillMode_t uplo,
1694 cublasOperation_t trans,
1697 const cuComplex *alpha, /* host or device pointer */
1702 const float *beta, /* host or device pointer */
1706 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher2k_v2 (cublasHandle_t handle,
1707 cublasFillMode_t uplo,
1708 cublasOperation_t trans,
1711 const cuDoubleComplex *alpha, /* host or device pointer */
1712 const cuDoubleComplex *A,
1714 const cuDoubleComplex *B,
1716 const double *beta, /* host or device pointer */
1719 /* SYRKX : eXtended SYRK*/
1720 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrkx (cublasHandle_t handle,
1721 cublasFillMode_t uplo,
1722 cublasOperation_t trans,
1725 const float *alpha, /* host or device pointer */
1730 const float *beta, /* host or device pointer */
1734 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyrkx (cublasHandle_t handle,
1735 cublasFillMode_t uplo,
1736 cublasOperation_t trans,
1739 const double *alpha, /* host or device pointer */
1744 const double *beta, /* host or device pointer */
1748 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyrkx (cublasHandle_t handle,
1749 cublasFillMode_t uplo,
1750 cublasOperation_t trans,
1753 const cuComplex *alpha, /* host or device pointer */
1758 const cuComplex *beta, /* host or device pointer */
1762 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyrkx (cublasHandle_t handle,
1763 cublasFillMode_t uplo,
1764 cublasOperation_t trans,
1767 const cuDoubleComplex *alpha, /* host or device pointer */
1768 const cuDoubleComplex *A,
1770 const cuDoubleComplex *B,
1772 const cuDoubleComplex *beta, /* host or device pointer */
1775 /* HERKX : eXtended HERK */
1776 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherkx (cublasHandle_t handle,
1777 cublasFillMode_t uplo,
1778 cublasOperation_t trans,
1781 const cuComplex *alpha, /* host or device pointer */
1786 const float *beta, /* host or device pointer */
1790 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZherkx (cublasHandle_t handle,
1791 cublasFillMode_t uplo,
1792 cublasOperation_t trans,
1795 const cuDoubleComplex *alpha, /* host or device pointer */
1796 const cuDoubleComplex *A,
1798 const cuDoubleComplex *B,
1800 const double *beta, /* host or device pointer */
1804 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsymm_v2 (cublasHandle_t handle,
1805 cublasSideMode_t side,
1806 cublasFillMode_t uplo,
1809 const float *alpha, /* host or device pointer */
1814 const float *beta, /* host or device pointer */
1818 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsymm_v2 (cublasHandle_t handle,
1819 cublasSideMode_t side,
1820 cublasFillMode_t uplo,
1823 const double *alpha, /* host or device pointer */
1828 const double *beta, /* host or device pointer */
1832 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsymm_v2 (cublasHandle_t handle,
1833 cublasSideMode_t side,
1834 cublasFillMode_t uplo,
1837 const cuComplex *alpha, /* host or device pointer */
1842 const cuComplex *beta, /* host or device pointer */
1846 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsymm_v2 (cublasHandle_t handle,
1847 cublasSideMode_t side,
1848 cublasFillMode_t uplo,
1851 const cuDoubleComplex *alpha, /* host or device pointer */
1852 const cuDoubleComplex *A,
1854 const cuDoubleComplex *B,
1856 const cuDoubleComplex *beta, /* host or device pointer */
1861 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChemm_v2 (cublasHandle_t handle,
1862 cublasSideMode_t side,
1863 cublasFillMode_t uplo,
1866 const cuComplex *alpha, /* host or device pointer */
1871 const cuComplex *beta, /* host or device pointer */
1875 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhemm_v2 (cublasHandle_t handle,
1876 cublasSideMode_t side,
1877 cublasFillMode_t uplo,
1880 const cuDoubleComplex *alpha, /* host or device pointer */
1881 const cuDoubleComplex *A,
1883 const cuDoubleComplex *B,
1885 const cuDoubleComplex *beta, /* host or device pointer */
1890 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsm_v2 (cublasHandle_t handle,
1891 cublasSideMode_t side,
1892 cublasFillMode_t uplo,
1893 cublasOperation_t trans,
1894 cublasDiagType_t diag,
1897 const float *alpha, /* host or device pointer */
1904 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsm_v2 (cublasHandle_t handle,
1905 cublasSideMode_t side,
1906 cublasFillMode_t uplo,
1907 cublasOperation_t trans,
1908 cublasDiagType_t diag,
1911 const double *alpha, /* host or device pointer */
1917 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsm_v2(cublasHandle_t handle,
1918 cublasSideMode_t side,
1919 cublasFillMode_t uplo,
1920 cublasOperation_t trans,
1921 cublasDiagType_t diag,
1924 const cuComplex *alpha, /* host or device pointer */
1930 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsm_v2(cublasHandle_t handle,
1931 cublasSideMode_t side,
1932 cublasFillMode_t uplo,
1933 cublasOperation_t trans,
1934 cublasDiagType_t diag,
1937 const cuDoubleComplex *alpha, /* host or device pointer */
1938 const cuDoubleComplex *A,
1944 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrmm_v2 (cublasHandle_t handle,
1945 cublasSideMode_t side,
1946 cublasFillMode_t uplo,
1947 cublasOperation_t trans,
1948 cublasDiagType_t diag,
1951 const float *alpha, /* host or device pointer */
1959 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrmm_v2 (cublasHandle_t handle,
1960 cublasSideMode_t side,
1961 cublasFillMode_t uplo,
1962 cublasOperation_t trans,
1963 cublasDiagType_t diag,
1966 const double *alpha, /* host or device pointer */
1974 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrmm_v2(cublasHandle_t handle,
1975 cublasSideMode_t side,
1976 cublasFillMode_t uplo,
1977 cublasOperation_t trans,
1978 cublasDiagType_t diag,
1981 const cuComplex *alpha, /* host or device pointer */
1989 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrmm_v2(cublasHandle_t handle, cublasSideMode_t side,
1990 cublasFillMode_t uplo,
1991 cublasOperation_t trans,
1992 cublasDiagType_t diag,
1995 const cuDoubleComplex *alpha, /* host or device pointer */
1996 const cuDoubleComplex *A,
1998 const cuDoubleComplex *B,
2003 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmBatched (cublasHandle_t handle,
2004 cublasOperation_t transa,
2005 cublasOperation_t transb,
2009 const float *alpha, /* host or device pointer */
2010 const float *Aarray[],
2012 const float *Barray[],
2014 const float *beta, /* host or device pointer */
2019 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmBatched (cublasHandle_t handle,
2020 cublasOperation_t transa,
2021 cublasOperation_t transb,
2025 const double *alpha, /* host or device pointer */
2026 const double *Aarray[],
2028 const double *Barray[],
2030 const double *beta, /* host or device pointer */
2035 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmBatched (cublasHandle_t handle,
2036 cublasOperation_t transa,
2037 cublasOperation_t transb,
2041 const cuComplex *alpha, /* host or device pointer */
2042 const cuComplex *Aarray[],
2044 const cuComplex *Barray[],
2046 const cuComplex *beta, /* host or device pointer */
2047 cuComplex *Carray[],
2051 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmBatched (cublasHandle_t handle,
2052 cublasOperation_t transa,
2053 cublasOperation_t transb,
2057 const cuDoubleComplex *alpha, /* host or device pointer */
2058 const cuDoubleComplex *Aarray[],
2060 const cuDoubleComplex *Barray[],
2062 const cuDoubleComplex *beta, /* host or device pointer */
2063 cuDoubleComplex *Carray[],
2067 /* ---------------- CUBLAS BLAS-like extension ---------------- */
2069 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgeam(cublasHandle_t handle,
2070 cublasOperation_t transa,
2071 cublasOperation_t transb,
2074 const float *alpha, /* host or device pointer */
2077 const float *beta , /* host or device pointer */
2083 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgeam(cublasHandle_t handle,
2084 cublasOperation_t transa,
2085 cublasOperation_t transb,
2088 const double *alpha, /* host or device pointer */
2091 const double *beta, /* host or device pointer */
2097 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgeam(cublasHandle_t handle,
2098 cublasOperation_t transa,
2099 cublasOperation_t transb,
2102 const cuComplex *alpha, /* host or device pointer */
2105 const cuComplex *beta, /* host or device pointer */
2111 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgeam(cublasHandle_t handle,
2112 cublasOperation_t transa,
2113 cublasOperation_t transb,
2116 const cuDoubleComplex *alpha, /* host or device pointer */
2117 const cuDoubleComplex *A,
2119 const cuDoubleComplex *beta, /* host or device pointer */
2120 const cuDoubleComplex *B,
2125 /* Batched LU - GETRF*/
2126 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetrfBatched(cublasHandle_t handle,
2128 float *A[], /*Device pointer*/
2130 int *P, /*Device Pointer*/
2131 int *info, /*Device Pointer*/
2134 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetrfBatched(cublasHandle_t handle,
2136 double *A[], /*Device pointer*/
2138 int *P, /*Device Pointer*/
2139 int *info, /*Device Pointer*/
2142 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetrfBatched(cublasHandle_t handle,
2144 cuComplex *A[], /*Device pointer*/
2146 int *P, /*Device Pointer*/
2147 int *info, /*Device Pointer*/
2150 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetrfBatched(cublasHandle_t handle,
2152 cuDoubleComplex *A[], /*Device pointer*/
2154 int *P, /*Device Pointer*/
2155 int *info, /*Device Pointer*/
2158 /* Batched inversion based on LU factorization from getrf */
2159 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetriBatched(cublasHandle_t handle,
2161 const float *A[], /*Device pointer*/
2163 const int *P, /*Device pointer*/
2164 float *C[], /*Device pointer*/
2169 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetriBatched(cublasHandle_t handle,
2171 const double *A[], /*Device pointer*/
2173 const int *P, /*Device pointer*/
2174 double *C[], /*Device pointer*/
2179 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetriBatched(cublasHandle_t handle,
2181 const cuComplex *A[], /*Device pointer*/
2183 const int *P, /*Device pointer*/
2184 cuComplex *C[], /*Device pointer*/
2189 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetriBatched(cublasHandle_t handle,
2191 const cuDoubleComplex *A[], /*Device pointer*/
2193 const int *P, /*Device pointer*/
2194 cuDoubleComplex *C[], /*Device pointer*/
2201 /* TRSM - Batched Triangular Solver */
2202 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsmBatched( cublasHandle_t handle,
2203 cublasSideMode_t side,
2204 cublasFillMode_t uplo,
2205 cublasOperation_t trans,
2206 cublasDiagType_t diag,
2209 const float *alpha, /*Host or Device Pointer*/
2216 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsmBatched( cublasHandle_t handle,
2217 cublasSideMode_t side,
2218 cublasFillMode_t uplo,
2219 cublasOperation_t trans,
2220 cublasDiagType_t diag,
2223 const double *alpha, /*Host or Device Pointer*/
2230 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsmBatched( cublasHandle_t handle,
2231 cublasSideMode_t side,
2232 cublasFillMode_t uplo,
2233 cublasOperation_t trans,
2234 cublasDiagType_t diag,
2237 const cuComplex *alpha, /*Host or Device Pointer*/
2238 const cuComplex *A[],
2244 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsmBatched( cublasHandle_t handle,
2245 cublasSideMode_t side,
2246 cublasFillMode_t uplo,
2247 cublasOperation_t trans,
2248 cublasDiagType_t diag,
2251 const cuDoubleComplex *alpha, /*Host or Device Pointer*/
2252 const cuDoubleComplex *A[],
2254 cuDoubleComplex *B[],
2258 /* Batched - MATINV*/
2259 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSmatinvBatched(cublasHandle_t handle,
2261 const float *A[], /*Device pointer*/
2263 float *Ainv[], /*Device pointer*/
2265 int *info, /*Device Pointer*/
2268 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDmatinvBatched(cublasHandle_t handle,
2270 const double *A[], /*Device pointer*/
2272 double *Ainv[], /*Device pointer*/
2274 int *info, /*Device Pointer*/
2277 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCmatinvBatched(cublasHandle_t handle,
2279 const cuComplex *A[], /*Device pointer*/
2281 cuComplex *Ainv[], /*Device pointer*/
2283 int *info, /*Device Pointer*/
2286 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZmatinvBatched(cublasHandle_t handle,
2288 const cuDoubleComplex *A[], /*Device pointer*/
2290 cuDoubleComplex *Ainv[], /*Device pointer*/
2292 int *info, /*Device Pointer*/
2295 /* Batch QR Factorization */
2296 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgeqrfBatched( cublasHandle_t handle,
2299 float *Aarray[], /*Device pointer*/
2301 float *TauArray[], /* Device pointer*/
2305 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgeqrfBatched( cublasHandle_t handle,
2308 double *Aarray[], /*Device pointer*/
2310 double *TauArray[], /* Device pointer*/
2314 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgeqrfBatched( cublasHandle_t handle,
2317 cuComplex *Aarray[], /*Device pointer*/
2319 cuComplex *TauArray[], /* Device pointer*/
2323 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgeqrfBatched( cublasHandle_t handle,
2326 cuDoubleComplex *Aarray[], /*Device pointer*/
2328 cuDoubleComplex *TauArray[], /* Device pointer*/
2331 /* Least Square Min only m >= n and Non-transpose supported */
2332 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgelsBatched( cublasHandle_t handle,
2333 cublasOperation_t trans,
2337 float *Aarray[], /*Device pointer*/
2339 float *Carray[], /* Device pointer*/
2342 int *devInfoArray, /* Device pointer*/
2345 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgelsBatched( cublasHandle_t handle,
2346 cublasOperation_t trans,
2350 double *Aarray[], /*Device pointer*/
2352 double *Carray[], /* Device pointer*/
2355 int *devInfoArray, /* Device pointer*/
2358 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgelsBatched( cublasHandle_t handle,
2359 cublasOperation_t trans,
2363 cuComplex *Aarray[], /*Device pointer*/
2365 cuComplex *Carray[], /* Device pointer*/
2371 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgelsBatched( cublasHandle_t handle,
2372 cublasOperation_t trans,
2376 cuDoubleComplex *Aarray[], /*Device pointer*/
2378 cuDoubleComplex *Carray[], /* Device pointer*/
2384 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSdgmm(cublasHandle_t handle,
2385 cublasSideMode_t mode,
2395 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDdgmm(cublasHandle_t handle,
2396 cublasSideMode_t mode,
2406 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdgmm(cublasHandle_t handle,
2407 cublasSideMode_t mode,
2417 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdgmm(cublasHandle_t handle,
2418 cublasSideMode_t mode,
2421 const cuDoubleComplex *A,
2423 const cuDoubleComplex *x,
2428 /* TPTTR : Triangular Pack format to Triangular format */
2429 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStpttr ( cublasHandle_t handle,
2430 cublasFillMode_t uplo,
2436 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtpttr ( cublasHandle_t handle,
2437 cublasFillMode_t uplo,
2443 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtpttr ( cublasHandle_t handle,
2444 cublasFillMode_t uplo,
2446 const cuComplex *AP,
2450 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtpttr ( cublasHandle_t handle,
2451 cublasFillMode_t uplo,
2453 const cuDoubleComplex *AP,
2456 /* TRTTP : Triangular format to Triangular Pack format */
2457 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrttp ( cublasHandle_t handle,
2458 cublasFillMode_t uplo,
2464 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrttp ( cublasHandle_t handle,
2465 cublasFillMode_t uplo,
2471 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrttp ( cublasHandle_t handle,
2472 cublasFillMode_t uplo,
2478 CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrttp ( cublasHandle_t handle,
2479 cublasFillMode_t uplo,
2481 const cuDoubleComplex *A,
2483 cuDoubleComplex *AP );
2485 #if defined(__cplusplus)
2487 #endif /* __cplusplus */
2489 #endif /* !defined(CUBLAS_API_H_) */