Anyway, there is a simple way of keeping your NVIDIA display driver up to date by adding the following ppa to your repositories:
sudo apt-add-repository ppa:ubuntu-x-swat/x-updates sudo apt-get update sudo apt-get install nvidia-current
sudo apt-add-repository ppa:ubuntu-x-swat/x-updates sudo apt-get update sudo apt-get install nvidia-current
mogrify -quality 75 *.jpg
I'm sure many of you are hanging on tenterhooks for my next blog post. To be brief, I have just graduated and am taking a (well earned?) break. Have been travelling around Eastern Europe by train and now currently in the south of Spain.
Therefore, I haven't had much of a chance to keep up on news though someone did send me this which was definitely pretty big news and interesting comparison. It's a comparison of video transcoding using the two services. Amazon wins but I think it's pretty likely the gap will narrow shortly. Google to compete with Amazon's EC2 is big!...see my previous post for a look at the Amazon GPU infrastructure. When I get the time I will perform a comparison with the Google GPU infrastructure if it ever becomes available.
sudo add-apt-repository ppa:r5u87x-loader/ppa sudo apt-get update sudo apt-get install r5u87x sudo /usr/share/r5u87x/r5u87x-download-firmware.sh
typedef struct { float speeds[9]; } t_speed;
ld.global.f32 %f190, [%r14+32]; ld.global.f32 %f8, [%r14+28]; ld.global.f32 %f7, [%r14+24]; ld.global.f32 %f187, [%r14+20]; ld.global.f32 %f5, [%r14+16]; ld.global.f32 %f4, [%r14+12]; ld.global.f32 %f194, [%r14+8]; ld.global.f32 %f195, [%r14+4]; ld.global.f32 %f200, [%r14];
FILE *fIn = fopen(srcFile, "r"); // Error check the fIn here // get the size fseek(fIn, 0L, SEEK_END); size_t sz = ftell(fIn); rewind(fIn); char *file = (char*)malloc(sizeof(char)*sz+1); fread(file, sizeof(char), sz, fIn); const char* cfile = (const char*)file; *m_cpProgram = clCreateProgramWithSource(*m_ctx, 1, &cfile, &sz, &ciErrNum); ciErrNum = clBuildProgram(*m_cpProgram, 1, (const cl_device_id*)m_cldDevices, compilerFlags, NULL, NULL); // Calculate how big the binary is ciErrNum = clGetProgramInfo(*m_cpProgram, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &kernel_length, NULL); unsigned char* bin; bin = (char*)malloc(sizeof(char)*kernel_length); ciErrNum = clGetProgramInfo(*m_cpProgram, CL_PROGRAM_BINARIES, kernel_length, &bin, NULL); // Print the binary out to the output file fp = fopen(strcat(srcFile,".bin"), "wb"); fwrite(bin, 1, kernel_length, fp); fclose(fp);
// Based on the example given in the opencl programming guide FILE *fp = fopen("custom_file.ptx", "rb"); if (fp == NULL) return -1; fseek(fp, 0, SEEK_END); int kernel_length = ftell(fp); rewind(fp); unsigned char *binary = (unsigned char*)malloc(sizeof(unsigned char)*kernel_length+10); fclose(fp); cl_int clStat; *m_cpProgram = clCreateProgramWithBinary(*m_ctx, 1, (const cl_device_id*)m_cldDevices, &kernel_length, (const unsigned char**)&binary, &clStat, &ciErrNum); // Put an error check for ciErrNum here ciErrNum = clBuildProgram(*m_cpProgram, 1, (const cl_device_id*)m_cldDevices, NULL, NULL, NULL);
add.f32 %f201, %f164, 0f00000000;Which seems a little odd, why not use a mov? I don't know enough about the low level workings of GPUs but this seems bizarre. Surely it is faster and simpler to move one value to another register using the following...
mov.f32 %f201, %f164;This has a simple read and write rather than two reads, a floating point addition followed by a write. Very bizarre. This accounts for the differences in the chart above.
NVIDIA Tesla M2050 |
.version 1.4 .target sm_10, map_f64_to_f32 // compiled with /usr/local/gpu/cuda-toolkit-4.1.28/cuda/open64/lib//be // nvopencc 4.1 built on 2012-01-12 //----------------------------------------------------------- // Compiling /tmp/tmpxft_00003e20_00000000-9_d2q9.cpp3.i (/tmp/ccBI#.FdyONv) //----------------------------------------------------------- //----------------------------------------------------------- // Options: //----------------------------------------------------------- // Target:ptx, ISA:sm_10, Endian:little, Pointer Size:64 // -O3 (Optimization level) // -g0 (Debug level) // -m2 (Report advisories) //-----------------------------------------------------------
// // Generated by NVIDIA NVVM Compiler // Compiler built on Thu Jan 12 22:46:01 2012 (1326408361) // Cuda compilation tools, release 4.1, V0.2.1221 // .version 3.0 .target sm_20 .address_size 64 .file 1 "/tmp/tmpxft_000021b9_00000000-9_d2q9.cpp3.i" .file 2 "d2q9.cu" .file 3 "/usr/local/gpu/cuda-toolkit-4.1.28/cuda/bin/../include/device_functions.h" .file 4 "/usr/local/gpu/cuda-toolkit-4.1.28/cuda/nvvm/ci_include.h" // __cuda_local_var_17168_35_non_const_sdata has been demoted
ld.global.f32 %f1, [%rd4+0]; mov.f32 %f2, %f1; ld.global.f32 %f3, [%rd4+4]; mov.f32 %f4, %f3; ld.global.f32 %f5, [%rd4+8]; mov.f32 %f6, %f5; ld.global.f32 %f7, [%rd4+12]; mov.f32 %f8, %f7; ld.global.f32 %f9, [%rd4+16]; mov.f32 %f10, %f9; ld.global.f32 %f11, [%rd4+20]; mov.f32 %f12, %f11; ld.global.f32 %f13, [%rd4+24]; mov.f32 %f14, %f13; ld.global.f32 %f15, [%rd4+28]; mov.f32 %f16, %f15; ld.global.f32 %f17, [%rd4+32]; mov.f32 %f18, %f17;
#!/bin/bash for file in *.m4a; do mplayer -ao pcm "$file" -ao pcm:file="${file/m4a/wav}"; lame --alt-preset 160 "${file/m4a/wav}" "${file/m4a/mp3}"; done rm *.wav
(add-to-list 'load-path "~/emacs.d/")
(require 'linum) (global-linum-mode 1)
(global-set-key (kbd "C-x <up>") 'windmove-up) (global-set-key (kbd "C-x <down>") 'windmove-down) (global-set-key (kbd "C-x <right>") 'windmove-right) (global-set-key (kbd "C-x <left>") 'windmove-left)
(global-set-key [(f9)] 'compile) (global-set-key [(f10)] 'recompile)
(defun smooth-scroll (increment) (scroll-up increment) (sit-for 0.05) (scroll-up increment) (sit-for 0.02) (scroll-up increment) (sit-for 0.02) (scroll-up increment) (sit-for 0.05) (scroll-up increment) (sit-for 0.06) (scroll-up increment)) (global-set-key [(mouse-5)] '(lambda () (interactive) (smooth-scroll 1))) (global-set-key [(mouse-4)] '(lambda () (interactive) (smooth-scroll -1)))
(global-set-key "\C-l" 'goto-line)
(setq-default default-tab-width 4) (setq-default c-basic-offset 4) (setq-default c-default-style "bsd")
(setq frame-title-format "%b - [Your name here]'s Emacs" buffer-file-name)
Visualising with MPE |
#include "mpe.h" #include "mpe_graphics.h" //...the start of your program here and initialise your MPI env. as per usual MPE_XGraph graph; // Open the MPE graphics window of size 400x400 at 600,-1 int ierr = MPE_Open_graphics( &graph, MPI_COMM_WORLD,NULL, 600, -1, 400, 400, 0 ); // Alternatively you can set the capture file... //ierr = MPE_CaptureFile(graph, "outputimage", 1); if(ierr != MPE_SUCCESS) { printf("Error Launching X world\n"); MPI_Abort(MPI_COMM_WORLD, 1); exit(1); } //...skip out some code here and we enter the main body of our MPI code. MPI_Isend(&cells[ topload ], NX, structTypeTop, (rank+1) % size, 1, MPI_COMM_WORLD, &request); MPI_Isend(&cells[ bottomload ], NX, structTypeBot, (rank+size-1) % size, 2, MPI_COMM_WORLD, &request); // This forces our graph to update after having seen the most recent comms ierr = MPE_Update( graph ); MPI_Recv(&cells [ bottomloadR], NX, structTypeTop, (rank+size-1) %size, 1, MPI_COMM_WORLD, &status); MPI_Recv(&cells [ toploadR ], NX, structTypeBot, (rank+1)%size, 2, MPI_COMM_WORLD, &status); // Again, after receiving let's update our graph ierr = MPE_Update( graph ); // ... continue looping round. When we are finished with MPI let's sync our clocks.. MPE_Log_sync_clocks(); MPE_Finish_log(argv[0]); // And also close our graphics MPE_Close_graphics(&graph);
C-x C-f | Open a buffer |
C-x C-s | Save the file |
C-x C-w | Save the file with a new name |
C-home | Move to the start of the buffer |
C-end | Go to the end of the buffer |
C-space | Begin highlighting |
M-w | Copy |
C-w | Cut |
C-y | Paste |
M-x replace-string | Replace string (find & replace) use the minibuffer to guide you through a string replacement (yes you literally type in replace-string) |
M-x query-replace | Replace string with prompts for each find |
C-s | Search for a given string using the minibuffer |
M-x reverse-region | Reverse the lines in a region |
C-x C-c | Close emacs |
C-x 3 | Split the screen vertically |
C-x 2 | Split the screen horizontally |
C-x 1 | Make this buffer fill the window |
C-x 0 | Close the active window |
C-x C-b | Select a buffer by name |
M-x C++-mode | Change highlighting C++-mode (you can insert your current language, e.g. python-mode). It generally does this automatically depending on the file extension. |
Coming from a country that is not the US where zip/postal codes are hyper specific, it always drives me nuts when you are filling in a form ...