Just for fun an integration of Nvidia CUDA capable graphic card into PHP.Well, mainly it is a
CUBLAS(Linear Algebra) Integration patch.
Of course, you need large matrices to benefit from the extra call overhead.
At least the graphic card does not allow concurrent execution, so a webserver integration is questionable. But newer versions of the NVidia CUDA driver (afair) the Fermi architecture can execute concurrent GPU-kernels.
Source: diff patch
diff -urN php-5.3.3/ext/standard//array.c php-5.3.3_new/ext/standard//array.c
--- php-5.3.3/ext/standard//array.c 2010-06-11 10:53:31.000000000 +0200
+++ php-5.3.3_new/ext/standard//array.c 2010-10-12 18:34:38.000000000 +0200
@@ -50,6 +50,10 @@
#include "ext/spl/spl_array.h"
#endif
+// pkirsch:
+#include "ExcelCUDA_wrapper.h"
+
+
/* {{{ defines */
#define EXTR_OVERWRITE 0
#define EXTR_SKIP 1
@@ -4524,3 +4528,68 @@
* vim600: noet sw=4 ts=4 fdm=marker
* vim<600: noet sw=4 ts=4
*/
+
+/* pkirsch CUDA integration:
+ * build:
+ * /usr/local/cuda/bin/nvcc -gencode=arch=compute_10,code=\"sm_10,compute_10\" -gencode=arch=compute_20,code=\"sm_20,compute_20\" -m64 --compiler-options -fno-strict-aliasing -I. -I/usr/local/cuda/include -I../../common/inc -I../../../shared//inc -DUNIX -O2 -o ExcelCUDA_wrapper.cu.o -c ExcelCUDA_wrapper.cu
+ * - globale Makefile patchen:
+ * - PHP_GLOBAL_OBJS += ext/standard/ExcelCUDA_wrapper.cu.o
+ * - EXTRA_LIBS += -lcudart -lcutil_x86_64 -lcublas -L/usr/local/cuda/lib64
+ *
+ * */
+
+/* orig: cublasSdot
+ scaling vectors */
+PHP_FUNCTION(cu_scal)
+{
+ zval *input,
+ **entry,
+ entry_n;
+ int error = 0;
+ HashPosition pos;
+ long scale;
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "al", &input, &scale) == FAILURE) {
+ return;
+ }
+
+ int array_len = zend_hash_num_elements(Z_ARRVAL_P(input));
+ float *floats_input = malloc(sizeof(float) * array_len);
+ int index = 0;
+
+ for (zend_hash_internal_pointer_reset_ex(Z_ARRVAL_P(input), &pos);
+ zend_hash_get_current_data_ex(Z_ARRVAL_P(input), (void **)&entry, &pos) == SUCCESS;
+ zend_hash_move_forward_ex(Z_ARRVAL_P(input), &pos)
+ ) {
+ if (Z_TYPE_PP(entry) == IS_ARRAY || Z_TYPE_PP(entry) == IS_OBJECT) {
+ continue;
+ }
+ entry_n = **entry;
+ zval_copy_ctor(&entry_n);
+ convert_scalar_to_number(&entry_n TSRMLS_CC);
+ floats_input[index++] = (float)Z_LVAL(entry_n);
+ }
+
+ /* CUDA */
+ long time1, time2;
+ time1 = clock();
+ ExcelCUDA_CUBLAS_scal(index,
+ (float)scale,
+ floats_input,
+ &error);
+ time2 = clock();
+ printf("cudadiff Timediff: %d\n", time2-time1);
+ time1 = clock();
+
+ /* returns an array */
+ array_init_size(return_value, array_len);
+ int index_prev = index;
+ while (index--) {
+ zval *ret;
+ MAKE_STD_ZVAL(ret); /* default zend value allocating */
+ convert_to_double(ret); /* must do: set to type */
+ Z_DVAL_P(ret) = (double)floats_input[index]; /* set result */
+ zval_add_ref(&ret);
+ zend_hash_next_index_insert(Z_ARRVAL_P(return_value), &ret, sizeof(zval *), NULL);
+ }
+ }
\ Kein Zeilenumbruch am Dateiende.
diff -urN php-5.3.3/ext/standard//basic_functions.c php-5.3.3_new/ext/standard//basic_functions.c
--- php-5.3.3/ext/standard//basic_functions.c 2010-05-13 04:13:30.000000000 +0200
+++ php-5.3.3_new/ext/standard//basic_functions.c 2010-10-12 09:15:21.000000000 +0200
@@ -609,6 +609,13 @@
ZEND_ARG_INFO(0, keys) /* ARRAY_INFO(0, keys, 0) */
ZEND_ARG_INFO(0, values) /* ARRAY_INFO(0, values, 0) */
ZEND_END_ARG_INFO()
+
+/* cuda */
+ZEND_BEGIN_ARG_INFO_EX(arginfo_cu_scal, 0, 0, 1)
+ ZEND_ARG_INFO(0, input)
+ ZEND_ARG_INFO(0, scale)
+ZEND_END_ARG_INFO()
+
/* }}} */
/* {{{ basic_functions.c */
ZEND_BEGIN_ARG_INFO(arginfo_get_magic_quotes_gpc, 0)
@@ -3326,7 +3333,9 @@
PHP_FE(array_chunk, arginfo_array_chunk)
PHP_FE(array_combine, arginfo_array_combine)
PHP_FE(array_key_exists, arginfo_array_key_exists)
-
+ /* cuda */
+ PHP_FE(cu_scal, arginfo_array_chunk)
+
/* aliases from array.c */
PHP_FALIAS(pos, current, arginfo_current)
PHP_FALIAS(sizeof, count, arginfo_count)
diff -urN php-5.3.3/ext/standard//ExcelCUDA_wrapper.cu php-5.3.3_new/ext/standard//ExcelCUDA_wrapper.cu
--- php-5.3.3/ext/standard//ExcelCUDA_wrapper.cu 1970-01-01 01:00:00.000000000 +0100
+++ php-5.3.3_new/ext/standard//ExcelCUDA_wrapper.cu 2010-10-12 10:04:51.000000000 +0200
@@ -0,0 +1,345 @@
+/*
+ * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
+ *
+ * Please refer to the NVIDIA end user license agreement (EULA) associated
+ * with this source code for terms and conditions that govern your use of
+ * this software. Any use, reproduction, disclosure, or distribution of
+ * this software and related documentation outside the terms of the EULA
+ * is strictly prohibited.
+ *
+ */
+
+/*
+* Cublas additions: Patrick Kirsch
+*/
+#include
+#include
+
+#include "ExcelCUDA_wrapper.h"
+
+/* #include "reduction.h" */
+#include "cublas.h"
+
+inline int success(cudaError_t result)
+{
+ return result == cudaSuccess;
+}
+
+/* CUBLAS */
+/* orig: cublasSdot
+ dot product of two vectors
+*/
+void ExcelCUDA_CUBLAS_sdot(int dimension,
+ float *result,
+ float* h_A,
+ float* h_B,
+ int *error)
+{
+ cublasStatus status;
+ float* d_A = 0;
+ float* d_B = 0;
+
+ status = cublasInit();
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! CUBLAS initialization error\n");
+ *error = 113;
+ }
+
+ /* Allocate device memory for the matrices */
+ status = cublasAlloc(dimension, sizeof(d_A[0]), (void**)&d_A);
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! device memory allocation error (A)\n");
+ *error = 114;
+ }
+ status = cublasAlloc(dimension, sizeof(d_B[0]), (void**)&d_B);
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! device memory allocation error (B)\n");
+ *error = 115;
+ }
+
+ /* Initialize the device matrices with the host matrices */
+ status = cublasSetVector(dimension, sizeof(h_A[0]), h_A, 1, d_A, 1);
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! device access error (write A)\n");
+ *error = 117;
+ }
+
+ status = cublasSetVector(dimension, sizeof(h_B[0]), h_B, 1, d_B, 1);
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! device access error (write B)\n");
+ *error = 118;
+ }
+
+ /* Clear last error */
+ cublasGetError();
+
+ /* Performs operation using cublas */
+ *result = cublasSdot(dimension, d_A, 1, d_B, 1);
+
+ status = cublasGetError();
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! kernel execution error.\n");
+ *error = -119;
+ }
+
+ /* Read the result back */
+ //status = cublasGetVector(dimension, sizeof(float), d_B, 1, h_B, 1);
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! device access error (read B)\n");
+ *error = 121;
+ }
+
+
+ /* Memory clean up */
+ free(h_A);
+ free(h_B);
+}
+/* orig: cublasSdot
+ scaling vectors
+*/
+void ExcelCUDA_CUBLAS_scal(int dimension,
+ float alpha,
+ float* h_A,
+ int *error)
+{
+ cublasStatus status;
+ float* d_A = 0;
+
+ status = cublasInit();
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! CUBLAS initialization error\n");
+ *error = 113;
+ }
+
+ /* Allocate device memory for the matrices */
+ status = cublasAlloc(dimension, sizeof(d_A[0]), (void**)&d_A);
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! device memory allocation error (A)\n");
+ *error = 114;
+ }
+
+ /* Initialize the device matrices with the host matrices */
+ status = cublasSetVector(dimension, sizeof(h_A[0]), h_A, 1, d_A, 1);
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! device access error (write A)\n");
+ *error = 117;
+ }
+
+ /* Clear last error */
+ cublasGetError();
+
+ /* Performs operation using cublas */
+ cublasSscal(dimension, alpha, d_A, 1);
+
+ status = cublasGetError();
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! kernel execution error.\n");
+ *error = -119;
+ }
+
+ /* Read the result back */
+ status = cublasGetVector(dimension, sizeof(float), d_A, 1, h_A, 1);
+
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! device access error (read A)\n");
+ *error = 121;
+ }
+}
+
+/* orig: cublasSaxpy
+ Y = alpha * X + Y
+*/
+void ExcelCUDA_CUBLAS_saxpy(int dimension,
+ float alpha,
+ float* h_A,
+ float* h_B,
+ int *error)
+{
+ cublasStatus status;
+ float* d_A = 0;
+ float* d_B = 0;
+
+ status = cublasInit();
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! CUBLAS initialization error\n");
+ *error = 113;
+ }
+
+ /* Allocate device memory for the matrices */
+ status = cublasAlloc(dimension, sizeof(d_A[0]), (void**)&d_A);
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! device memory allocation error (A)\n");
+ *error = 114;
+ }
+ status = cublasAlloc(dimension, sizeof(d_B[0]), (void**)&d_B);
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! device memory allocation error (B)\n");
+ *error = 115;
+ }
+
+ /* Initialize the device matrices with the host matrices */
+ status = cublasSetVector(dimension, sizeof(h_A[0]), h_A, 1, d_A, 1);
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! device access error (write A)\n");
+ *error = 117;
+ }
+
+ status = cublasSetVector(dimension, sizeof(h_B[0]), h_B, 1, d_B, 1);
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! device access error (write B)\n");
+ *error = 118;
+ }
+
+ /* Clear last error */
+ cublasGetError();
+
+ /* Performs operation using cublas */
+ cublasSaxpy(dimension, alpha, d_A, 1, d_B, 1);
+
+ status = cublasGetError();
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! kernel execution error.\n");
+ *error = -119;
+ }
+
+ /* Read the result back */
+ status = cublasGetVector(dimension, sizeof(float), d_B, 1, h_B, 1);
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! device access error (read B)\n");
+ *error = 121;
+ }
+
+
+ /* Memory clean up */
+ free(h_A);
+}
+
+/* C = alpha * op(A) * op(B) + beta * C; op(X) = X or op(X) = X transformed
+Hint:
+ h_(A,B,C) needs to be (float*)malloc(n2 * sizeof(h_A[0])) !
+*/
+void ExcelCUDA_CUBLAS_sgemm(int n2,
+ float alpha,
+ float beta,
+ float* h_A,
+ float* h_B,
+ float* h_C,
+ int *error)
+{
+ cublasStatus status;
+ float* d_A = 0;
+ float* d_B = 0;
+ float* d_C = 0;
+ int dimension = (int)sqrt((float)n2);
+
+ status = cublasInit();
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! CUBLAS initialization error\n");
+ *error = 113;
+ }
+
+ /* Allocate host memory for the matrices
+ h_A = (float*)malloc(n2 * sizeof(h_A[0]));
+ if (h_A == 0) {
+ fprintf (stderr, "!!!! host memory allocation error (A)\n");
+ }
+ h_B = (float*)malloc(n2 * sizeof(h_B[0]));
+ if (h_B == 0) {
+ fprintf (stderr, "!!!! host memory allocation error (B)\n");
+ }
+ h_C = (float*)malloc(n2 * sizeof(h_C[0]));
+ if (h_C == 0) {
+ fprintf (stderr, "!!!! host memory allocation error (C)\n");
+ } */
+
+ /* Fill the matrices with test data
+ for (i = 0; i < n2; i++) {
+ h_A[i] = rand() / (float)RAND_MAX;
+ h_B[i] = rand() / (float)RAND_MAX;
+ h_C[i] = rand() / (float)RAND_MAX;
+ }*/
+
+ /* Allocate device memory for the matrices */
+ status = cublasAlloc(n2, sizeof(d_A[0]), (void**)&d_A);
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! device memory allocation error (A)\n");
+ *error = 114;
+ }
+ status = cublasAlloc(n2, sizeof(d_B[0]), (void**)&d_B);
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! device memory allocation error (B)\n");
+ *error = 115;
+ }
+ status = cublasAlloc(n2, sizeof(d_C[0]), (void**)&d_C);
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! device memory allocation error (C)\n");
+ *error = 116;
+ }
+
+ /* Initialize the device matrices with the host matrices */
+ status = cublasSetVector(n2, sizeof(h_A[0]), h_A, 1, d_A, 1);
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! device access error (write A)\n");
+ *error = 117;
+ }
+
+ status = cublasSetVector(n2, sizeof(h_B[0]), h_B, 1, d_B, 1);
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! device access error (write B)\n");
+ *error = 118;
+ }
+ status = cublasSetVector(n2, sizeof(h_C[0]), h_C, 1, d_C, 1);
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! device access error (write C)\n");
+ *error = 119;
+ }
+
+ /* Performs operation using plain C code */
+ // not needed: simple_sgemm(N, alpha, h_A, h_B, beta, h_C);
+ // h_C_ref = h_C;
+
+ /* Clear last error */
+ cublasGetError();
+
+ /* Performs operation using cublas */
+ cublasSgemm('n', 'n', dimension, dimension, dimension, alpha, d_A, dimension, d_B, dimension, beta, d_C, dimension);
+
+ status = cublasGetError();
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! kernel execution error.\n");
+ *error = -119;
+ }
+
+ /* Allocate host memory for reading back the result from device memory
+ result = (float*)malloc(n2 * sizeof(h_C[0]));
+ if (*h_C == 0) {
+ fprintf (stderr, "!!!! host memory allocation error (C)\n");
+ *error = 120;
+ }*/
+
+ /* Read the result back */
+ status = cublasGetVector(n2, sizeof(float), d_C, 1, h_C, 1);
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! device access error (read C)\n");
+ *error = 121;
+ }
+
+ /* Check result against reference
+ error_norm = 0;
+ ref_norm = 0;
+ for (i = 0; i < n2; ++i) {
+ diff = h_C_ref[i] - h_C[i];
+ error_norm += diff * diff;
+ ref_norm += h_C_ref[i] * h_C_ref[i];
+ }
+ error_norm = (float)sqrt((double)error_norm);
+ ref_norm = (float)sqrt((double)ref_norm);
+ if (fabs(ref_norm) < 1e-7) {
+ fprintf (stderr, "!!!! reference norm is 0\n");
+
+ }*/
+
+ /* Memory clean up */
+ free(h_A);
+ free(h_B);
+}
diff -urN php-5.3.3/ext/standard//ExcelCUDA_wrapper.h php-5.3.3_new/ext/standard//ExcelCUDA_wrapper.h
--- php-5.3.3/ext/standard//ExcelCUDA_wrapper.h 1970-01-01 01:00:00.000000000 +0100
+++ php-5.3.3_new/ext/standard//ExcelCUDA_wrapper.h 2010-10-11 18:32:10.000000000 +0200
@@ -0,0 +1,25 @@
+/*
+ * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
+ *
+ * Please refer to the NVIDIA end user license agreement (EULA) associated
+ * with this source code for terms and conditions that govern your use of
+ * this software. Any use, reproduction, disclosure, or distribution of
+ * this software and related documentation outside the terms of the EULA
+ * is strictly prohibited.
+ *
+ */
+
+#pragma once
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+ void ExcelCUDA_CUBLAS_sgemm(int n2, float alpha, float beta, float* h_A, float* h_B, float* h_C, int *error);
+ void ExcelCUDA_CUBLAS_saxpy(int dimension,float alpha,float* h_A,float* h_B, int *error);
+ void ExcelCUDA_CUBLAS_sdot(int dimension,float *result, float* h_A, float* h_B, int *error) ;
+ void ExcelCUDA_CUBLAS_scal(int dimension,float alpha, float* h_A, int *error);
+#ifdef __cplusplus
+}
+#endif
diff -urN php-5.3.3/ext/standard//php_array.h php-5.3.3_new/ext/standard//php_array.h
--- php-5.3.3/ext/standard//php_array.h 2010-01-03 10:23:27.000000000 +0100
+++ php-5.3.3_new/ext/standard//php_array.h 2010-10-12 09:34:58.000000000 +0200
@@ -102,6 +102,9 @@
PHP_FUNCTION(array_chunk);
PHP_FUNCTION(array_combine);
+/* cuda */
+PHP_FUNCTION(cu_scal);
+
PHPAPI HashTable* php_splice(HashTable *, int, int, zval ***, int, HashTable **);
PHPAPI int php_array_merge(HashTable *dest, HashTable *src, int recursive TSRMLS_DC);
PHPAPI int php_array_replace_recursive(HashTable *dest, HashTable *src TSRMLS_DC);