#define _CRT_SECURE_NO_WARNINGS #include #include #include #include #include #include #include #include //#define DBG_OUTPUT #ifndef DBG_OUTPUT inline void NULLFUNC(...) {} #define fprintf NULLFUNC #else FILE* dbgFile; #endif /////////////////////////////////////////////////// // Typedefs typedef unsigned char ubyte; //typedef char byte; typedef char int8; typedef short int16; typedef int int32; typedef unsigned char uint8; typedef unsigned short uint16; typedef unsigned int uint32; typedef unsigned int uint; /////////////////////////////////////////////////// // Defines #define ROWS 768 #define COLS 1024 /////////////////////////////////////////////////// // functions //! This function behaves like the crt function //! memcpy(), but works on GPU. //! \param[out] dest The dest of where src should be //! copied to //! \param[in] src The source of the memory to be //! copied //! \param[in] count The number of bytes to be //! copied from src ot dest __device__ inline void d_memcpy(void* dest, const void* src, size_t count) { ubyte* dstp = (ubyte*)dest; const ubyte* srcp = (const ubyte*)src; while (count--) *dstp++ = *srcp++; } //! This function converts the unfiltered hnd file data //! and converts it to raw image file data. //! \param[in] hnd The unfiltered hnd image file data //! \param[out] raw The pointer to write the raw file //! data to. The memory that the pointer points to //! must be allocated by (sizeof(uint32) * ROWS * COLS). __device__ void d_hndToRawData(const void* hnd, uint32* raw) { // Pointer to lookup table const ubyte* const lut = (const ubyte*)hnd + 1024; // Pointer to uncompressed pixel data const uint32* const pixels = (const uint32*)(lut + ((COLS * (ROWS - 1)) / 4)); // Pointer to pixel differences (compressed pixel data) const void* diffs = (const void*)(pixels + COLS + 1); // Copy the uncompressed pixels from the first row d_memcpy(raw, pixels, COLS * sizeof(uint32)); // Index of pixel we are on. uint i = COLS; // Copy the uncompressed pixel from the second row raw[i] = pixels[i]; ++i; uint r11, r12, r21, lutoff, lutid; ubyte lutval; int diff; int8 bchar; int16 bshort; int32 blong; for (lutval = lutoff = lutid = 0; i < ROWS * COLS; ++i) { r11 = raw[i - COLS - 1]; r12 = raw[i - COLS]; r21 = raw[i - 1]; lutval = lut[lutid]; // Since the lookup table is a table of 2 bit values, // we must keep an offset variable to track how many // entries (of two bits) we have gone into in a ubyte. switch (lutoff) { case 0: // 0x03 equals 00000011 lutval = lutval & 0x03; ++lutoff; break; case 1: // 0x0C equals 00001100 lutval = (lutval & 0x0C) >> 2; ++lutoff; break; case 2: // 0x30 equals 00110000 lutval = (lutval & 0x30) >> 4; ++lutoff; break; case 3: // 0xC0 equals 11000000 lutval = (lutval & 0xC0) >> 6; lutoff = 0; ++lutid; break; } switch (lutval) { case 0: bchar = *((const int8*)diffs); ++((const int8*&)diffs); diff = (int)bchar; break; case 1: bshort = *((const int16*)diffs); ++((const int16*&)diffs); diff = (int)bshort; break; case 2: blong = *((const int32*)diffs); ++((const int32*&)diffs); diff = (int)blong; break; } #ifdef OUTPUT_DBG if (((float)r21 + (float)diff - (float)r11 + (float)r12) > 0xFFFFFFFF) { printf("uint16 overflow detected!\n"); } #endif raw[i] = r21 + diff - r11 + r12; #ifdef OUTPUT_DBG fprintf(dbgFile, "r11 = %x, r12 = %x, r21 = %x, diff = %d\n", r11, r12, r21, diff); #endif } } //! This kernel will convert a bunch of hnd files into raw files __global__ void hndsToRawsKernel(const void* hnds, uint* hndsOffsets, uint32* raws, uint32 rawsSize) { uint i = threadIdx.x + (blockIdx.x * blockDim.x); #ifdef OUTPUT_DBG fprintf(dbgFile, "i = %u\n", i); #endif d_hndToRawData((const byte*)hnds + hndsOffsets[i], (uint32*)((byte*)raws + i*rawsSize)); } //! This function will call a kernel to batch convert a bunch of //! hnd files to raw files. //! \param[in] hndFiles The hnd files to convert //! \param[out] rawFiles The resulting raw files. There must be //! one raw file for each hnd file //! \param[in] numFiles The number of hnd and raw files. void convertHndFilesToRawFiles(FILE** hndFiles, FILE** rawFiles, uint numFiles) { const uint rawFileSize = COLS * ROWS * sizeof(uint32); uint totalHndsSize; ////////////////////////////////////////////////////////////////// // Host memory void* h_hnds; // Contents of hnd files uint* h_hndsOffsets; // Offsets each hnd file in h_hnds uint32* h_raws; // Raw files uint* h_hndsSizes; // Sizes of each hnd file ////////////////////////////////////////////////////////////////// // Device memory void* d_hnds; void* d_hndsOffsets; void* d_raws; totalHndsSize = 0; h_hndsOffsets = (uint*)malloc(sizeof(uint) * numFiles); h_hndsSizes = (uint*)malloc(sizeof(uint) * numFiles); // Fill up totalHndsSize, h_hndsOffsets, and hndsSizes for (uint i = 0; i < numFiles; ++i) { h_hndsOffsets[i] = totalHndsSize; fseek(hndFiles[i], 0, SEEK_END); uint fileSize = ftell(hndFiles[i]); rewind(hndFiles[i]); h_hndsSizes[i] = fileSize; totalHndsSize += fileSize; } // Alloc, write d_hndsOffset CUDA_SAFE_CALL(cudaMalloc((void**)&d_hndsOffsets, sizeof(uint) * numFiles)); CUDA_SAFE_CALL(cudaMemcpy(d_hndsOffsets, h_hndsOffsets, sizeof(uint) * numFiles, cudaMemcpyHostToDevice)); h_hnds = malloc(totalHndsSize); // Fill h_hnds for (uint i = 0; i < numFiles; ++i) { fread((byte*)h_hnds + h_hndsOffsets[i], h_hndsSizes[i], 1, hndFiles[i]); rewind(hndFiles[i]); } // Alloc, write to d_hnds CUDA_SAFE_CALL(cudaMalloc(&d_hnds, totalHndsSize)); CUDA_SAFE_CALL(cudaMemcpy(d_hnds, h_hnds, totalHndsSize, cudaMemcpyHostToDevice)); // Memory for the contents of each raw file CUDA_SAFE_CALL(cudaMalloc((void**)&d_raws, rawFileSize * numFiles)); // We need a thread for each file uint gridSize = (int)ceil((float)numFiles / 512.f); uint blockSize = (int)ceil((float)numFiles / (float)gridSize); CUT_CHECK_ERROR("before kernel invocation"); hndsToRawsKernel<<>>(d_hnds, (uint*)d_hndsOffsets, (uint32*)d_raws, rawFileSize); CUT_CHECK_ERROR("after kernel invocation"); cudaThreadSynchronize(); h_raws = (uint32*)malloc(rawFileSize * numFiles); CUDA_SAFE_CALL(cudaMemcpy(h_raws, d_raws, rawFileSize * numFiles, cudaMemcpyDeviceToHost)); for (uint i = 0; i < numFiles; ++i) { rewind(rawFiles[i]); fwrite((byte*)h_raws + rawFileSize*i, rawFileSize, 1, rawFiles[i]); static ImageView imgv(IF_UINT_32_GREY, COLS, ROWS); imgv.SetImageData((byte*)h_raws + rawFileSize*i); imgv.Update(); system("pause"); } free(h_hndsSizes); free(h_hnds); free(h_hndsOffsets); CUDA_SAFE_CALL(cudaFree(d_raws)); CUDA_SAFE_CALL(cudaFree(d_hnds)); CUDA_SAFE_CALL(cudaFree(d_hndsOffsets)); } //! Check the arguments to make sure they are correct //! \return False if the provided arguments are invalid, true //! if they are valid bool checkArgs(int argc, char* argv[]) { if (argc < 3 || (argc - 1) % 2 != 0) { printf("Usage: hnd_to_raw_cuda hnd_file raw_file (... ...)\n"); return false; } for (unsigned int i = 1; i < (unsigned int)argc; i += 2) { if (GetFileAttributes(argv[i - 1]) == INVALID_FILE_ATTRIBUTES) { printf("Error: hnd_file (%s) does not exist.", argv[i - 1]); return false; } } return true; } int main(int argc, char* argv[]) { CUT_DEVICE_INIT(argc, argv); if (!checkArgs(argc, argv)) return 0; FILE** hndFiles, ** rawFiles; const unsigned int numpairs = (argc - 1) / 2; hndFiles = (FILE**)malloc(numpairs * sizeof(FILE*)); rawFiles = (FILE**)malloc(numpairs * sizeof(FILE*)); for (unsigned int i = 0; i < numpairs; ++i) { hndFiles[i] = fopen(argv[(i * 2) + 1], "r"); rawFiles[i] = fopen(argv[(i * 2) + 2], "w"); } #ifdef DBG_OUTPUT dbgFile = fopen("log.txt", "w"); #endif convertHndFilesToRawFiles(hndFiles, rawFiles, numpairs); #ifdef DBG_OUTPUT fclose(dbgFile); system("log.txt"); #endif for (unsigned int i = 0; i < numpairs; ++i) { fclose(hndFiles[i]); fclose(rawFiles[i]); } free(hndFiles); free(rawFiles); return 0; }