Path: blob/master/DOTA_devkit/poly_nms_gpu/poly_overlaps_kernel.cu
2288 views
1#include "poly_overlaps.hpp"2#include <vector>3#include <iostream>4#include <cmath>5#include <cstdio>6#include<algorithm>78using namespace std;910//##define CUDA_CHECK(condition)\11//12// do {13// cudaError_t error = condition;14// if (error != cudaSuccess) {15//16// }17// }1819#define CUDA_CHECK(condition) \20/* Code block avoids redefinition of cudaError_t error */ \21do { \22cudaError_t error = condition; \23if (error != cudaSuccess) { \24std::cout << cudaGetErrorString(error) << std::endl; \25} \26} while (0)2728#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))29int const threadsPerBlock = sizeof(unsigned long long) * 8;303132#define maxn 51033const double eps=1E-8;3435__device__ inline int sig(float d){36return(d>eps)-(d<-eps);37}38// struct Point{39// double x,y; Point(){}40// Point(double x,double y):x(x),y(y){}41// bool operator==(const Point&p)const{42// return sig(x-p.x)==0&&sig(y-p.y)==0;43// }44// };4546__device__ inline int point_eq(const float2 a, const float2 b) {47return (sig(a.x - b.x) == 0) && (sig(a.y - b.y)==0);48}4950__device__ inline void point_swap(float2 *a, float2 *b) {51float2 temp = *a;52*a = *b;53*b = temp;54}5556__device__ inline void point_reverse(float2 *first, float2* last)57{58while ((first!=last)&&(first!=--last)) {59point_swap (first,last);60++first;61}62}63// void point_reverse(Point* first, Point* last)64// {65// while ((first!=last)&&(first!=--last)) {66// point_swap (first,last);67// ++first;68// }69// }707172__device__ inline float cross(float2 o,float2 a,float2 b){ //叉积73return(a.x-o.x)*(b.y-o.y)-(b.x-o.x)*(a.y-o.y);74}75__device__ inline float area(float2* ps,int n){76ps[n]=ps[0];77float res=0;78for(int i=0;i<n;i++){79res+=ps[i].x*ps[i+1].y-ps[i].y*ps[i+1].x;80}81return res/2.0;82}83__device__ inline int lineCross(float2 a,float2 b,float2 c,float2 d,float2&p){84float s1,s2;85s1=cross(a,b,c);86s2=cross(a,b,d);87if(sig(s1)==0&&sig(s2)==0) return 2;88if(sig(s2-s1)==0) return 0;89p.x=(c.x*s2-d.x*s1)/(s2-s1);90p.y=(c.y*s2-d.y*s1)/(s2-s1);91return 1;92}93949596//多边形切割97//用直线ab切割多边形p,切割后的在向量(a,b)的左侧,并原地保存切割结果98//如果退化为一个点,也会返回去,此时n为199// __device__ inline void polygon_cut(float2*p,int&n,float2 a,float2 b){100// // TODO: The static variable may be the reason, why single thread is ok, multiple threads are not work101// printf("polygon_cut, offset\n");102103// static float2 pp[maxn];104// int m=0;p[n]=p[0];105// for(int i=0;i<n;i++){106// if(sig(cross(a,b,p[i]))>0) pp[m++]=p[i];107// if(sig(cross(a,b,p[i]))!=sig(cross(a,b,p[i+1])))108// lineCross(a,b,p[i],p[i+1],pp[m++]);109// }110// n=0;111112// for(int i=0;i<m;i++)113// if(!i||!(point_eq(pp[i], pp[i-1])))114// p[n++]=pp[i];115// // while(n>1&&p[n-1]==p[0])n--;116// while(n>1&&point_eq(p[n-1], p[0]))n--;117// // int x = blockIdx.x * blockDim.x + threadIdx.x;118// // // corresponding to k119// // int y = blockIdx.y * blockDim.y + threadIdx.y;120// // int offset = x * 1 + y;121// // printf("polygon_cut, offset\n");122// }123124__device__ inline void polygon_cut(float2*p,int&n,float2 a,float2 b, float2* pp){125// TODO: The static variable may be the reason, why single thread is ok, multiple threads are not work126// printf("polygon_cut, offset\n");127128// static float2 pp[maxn];129int m=0;p[n]=p[0];130for(int i=0;i<n;i++){131if(sig(cross(a,b,p[i]))>0) pp[m++]=p[i];132if(sig(cross(a,b,p[i]))!=sig(cross(a,b,p[i+1])))133lineCross(a,b,p[i],p[i+1],pp[m++]);134}135n=0;136137for(int i=0;i<m;i++)138if(!i||!(point_eq(pp[i], pp[i-1])))139p[n++]=pp[i];140// while(n>1&&p[n-1]==p[0])n--;141while(n>1&&point_eq(p[n-1], p[0]))n--;142// int x = blockIdx.x * blockDim.x + threadIdx.x;143// // corresponding to k144// int y = blockIdx.y * blockDim.y + threadIdx.y;145// int offset = x * 1 + y;146// printf("polygon_cut, offset\n");147}148149//---------------华丽的分隔线-----------------//150//返回三角形oab和三角形ocd的有向交面积,o是原点//151__device__ inline float intersectArea(float2 a,float2 b,float2 c,float2 d){152float2 o = make_float2(0,0);153int s1=sig(cross(o,a,b));154int s2=sig(cross(o,c,d));155if(s1==0||s2==0)return 0.0;//退化,面积为0156// if(s1==-1) swap(a,b);157// if(s2==-1) swap(c,d);158// printf("before swap\n");159// printf("a.x %f, a.y %f\n", a.x, a.y);160// printf("b.x %f, b.y %f\n", b.x, b.y);161if(s1 == -1) point_swap(&a, &b);162// printf("a.x %f, a.y %f\n", a.x, a.y);163// printf("b.x %f, b.y %f\n", b.x, b.y);164// printf("after swap\n");165if(s2 == -1) point_swap(&c, &d);166float2 p[10]={o,a,b};167int n=3;168169// // manually implement polygon_cut(p, n, a, b)170// float2 pp[maxn];171// // polygon_cut(p, n, o, c)172// int m=0;p[n]=p[0];173// for(int i=0;i<n;i++){174// if(sig(cross(o,c,p[i]))>0) pp[m++]=p[i];175// if(sig(cross(o,c,p[i]))!=sig(cross(o,c,p[i+1])))176// lineCross(o,c,p[i],p[i+1],pp[m++]);177// }178// n=0;179180// for(int i=0;i<m;i++)181// if(!i||!(point_eq(pp[i], pp[i-1])))182// p[n++]=pp[i];183// while(n>1&&point_eq(p[n-1], p[0]))n--;184185// // polygon_cut(p, n, c, d)186// m=0;p[n]=p[0];187// for(int i=0;i<n;i++){188// if(sig(cross(c,d,p[i]))>0) pp[m++]=p[i];189// if(sig(cross(c,d,p[i]))!=sig(cross(c,d,p[i+1])))190// lineCross(c,d,p[i],p[i+1],pp[m++]);191// }192// n=0;193194// for(int i=0;i<m;i++)195// if(!i||!(point_eq(pp[i], pp[i-1])))196// p[n++]=pp[i];197// while(n>1&&point_eq(p[n-1], p[0]))n--;198199// // polygon_cut(p, n, d, o)200// m=0;p[n]=p[0];201// for(int i=0;i<n;i++){202// if(sig(cross(d,o,p[i]))>0) pp[m++]=p[i];203// if(sig(cross(d,o,p[i]))!=sig(cross(d,o,p[i+1])))204// lineCross(d,o,p[i],p[i+1],pp[m++]);205// }206// n=0;207208// for(int i=0;i<m;i++)209// if(!i||!(point_eq(pp[i], pp[i-1])))210// p[n++]=pp[i];211// while(n>1&&point_eq(p[n-1], p[0]))n--;212float2 pp[maxn];213polygon_cut(p,n,o,c,pp);214polygon_cut(p,n,c,d,pp);215polygon_cut(p,n,d,o,pp);216float res=fabs(area(p,n));217int x = blockIdx.x * blockDim.x + threadIdx.x;218// corresponding to k219int y = blockIdx.y * blockDim.y + threadIdx.y;220int offset = x * 1 + y;221// printf("intersectArea2, offset: %d, %f, %f, %f, %f, %f, %f, %f, %f, res: %f\n", offset, a.x, a.y, b.x, b.y, c.x, c.y, d.x, d.y, res);222if(s1*s2==-1) res=-res;return res;223224}225//求两多边形的交面积226// TODO: here changed the input, this need to be debug227__device__ inline float intersectArea(float2*ps1,int n1,float2*ps2,int n2){228int x = blockIdx.x * blockDim.x + threadIdx.x;229// corresponding to k230int y = blockIdx.y * blockDim.y + threadIdx.y;231int offset = x * 1 + y;232if(area(ps1,n1)<0) point_reverse(ps1,ps1+n1);233if(area(ps2,n2)<0) point_reverse(ps2,ps2+n2);234ps1[n1]=ps1[0];235ps2[n2]=ps2[0];236float res=0;237for(int i=0;i<n1;i++){238for(int j=0;j<n2;j++){239// printf("offset: %d, %f, %f, %f, %f, %f, %f, %f, %f addArea: %f \n",240// offset, ps1[i].x, ps1[i].y, ps1[i + 1].x, ps1[i + 1].y, ps2[j].x, ps2[j].y,241// ps2[j + 1].x, ps2[j + 1].y, intersectArea(ps1[i],ps1[i+1],ps2[j],ps2[j+1]));242243// float2 a = ps1[i];244// float2 b = ps1[i+1];245// float2 c = ps2[j];246// float2 d = ps2[j+1];247// res+=intersectArea2(a,b,c,d);248res+=intersectArea(ps1[i],ps1[i+1],ps2[j],ps2[j+1]);249}250}251return res;//assumeresispositive!252}253254255256257//__device__ inline double iou_poly(vector<double> p, vector<double> q) {258// Point ps1[maxn],ps2[maxn];259// int n1 = 4;260// int n2 = 4;261// for (int i = 0; i < 4; i++) {262// ps1[i].x = p[i * 2];263// ps1[i].y = p[i * 2 + 1];264//265// ps2[i].x = q[i * 2];266// ps2[i].y = q[i * 2 + 1];267// }268// double inter_area = intersectArea(ps1, n1, ps2, n2);269// double union_area = fabs(area(ps1, n1)) + fabs(area(ps2, n2)) - inter_area;270// double iou = inter_area / union_area;271//272//// cout << "inter_area:" << inter_area << endl;273//// cout << "union_area:" << union_area << endl;274//// cout << "iou:" << iou << endl;275//276// return iou;277//}278279__device__ inline void RotBox2Poly(float const * const dbox, float2 * ps) {280float cs = cos(dbox[4]);281float ss = sin(dbox[4]);282float w = dbox[2];283float h = dbox[3];284285float x_ctr = dbox[0];286float y_ctr = dbox[1];287ps[0].x = x_ctr + cs * (w / 2.0) - ss * (-h / 2.0);288ps[1].x = x_ctr + cs * (w / 2.0) - ss * (h / 2.0);289ps[2].x = x_ctr + cs * (-w / 2.0) - ss * (h / 2.0);290ps[3].x = x_ctr + cs * (-w / 2.0) - ss * (-h / 2.0);291292ps[0].y = y_ctr + ss * (w / 2.0) + cs * (-h / 2.0);293ps[1].y = y_ctr + ss * (w / 2.0) + cs * (h / 2.0);294ps[2].y = y_ctr + ss * (-w / 2.0) + cs * (h / 2.0);295ps[3].y = y_ctr + ss * (-w / 2.0) + cs * (-h / 2.0);296}297298299__device__ inline float devPolyIoU(float const * const dbbox1, float const * const dbbox2) {300301302float2 ps1[maxn], ps2[maxn];303int n1 = 4;304int n2 = 4;305306307308309RotBox2Poly(dbbox1, ps1);310RotBox2Poly(dbbox2, ps2);311312// printf("ps1: %f, %f, %f, %f, %f, %f, %f, %f\n", ps1[0].x, ps1[0].y, ps1[1].x, ps1[1].y, ps1[2].x, ps1[2].y, ps1[3].x, ps1[3].y);313// printf("ps2: %f, %f, %f, %f, %f, %f, %f, %f\n", ps2[0].x, ps2[0].y, ps2[1].x, ps2[1].y, ps2[2].x, ps2[2].y, ps2[3].x, ps2[3].y);314float inter_area = intersectArea(ps1, n1, ps2, n2);315//printf("inter_area: %f \n", inter_area);316float union_area = fabs(area(ps1, n1)) + fabs(area(ps2, n2)) - inter_area;317//printf("before union_area\n");318//printf("union_area: %f \n", union_area);319float iou = 0;320if (union_area == 0) {321iou = (inter_area + 1) / (union_area + 1);322} else {323iou = inter_area / union_area;324}325// printf("iou: %f \n", iou);326return iou;327}328329__global__ void overlaps_kernel(const int N, const int K, const float* dev_boxes,330const float * dev_query_boxes, float* dev_overlaps) {331332// const int col_start = blockIdx.y;333// const int row_start = blockIdx.x;334335// corresponding to n336int x = blockIdx.x * blockDim.x + threadIdx.x;337// corresponding to k338int y = blockIdx.y * blockDim.y + threadIdx.y;339if ((x < N) && (y < K)) {340int offset = x * K + y;341342//printf343// printf("offset: %d dbbox: %f %f %f %f %f\n", offset, (dev_boxes + x*5)[0],344// (dev_boxes + x*5)[1], (dev_boxes + x*5)[2], (dev_boxes + x*5)[3],345// (dev_boxes + x*5)[4] );346// printf("offset: %d dbbox: %f %f %f %f %f\n", offset, (dev_query_boxes + y*5)[0],347// (dev_query_boxes + y*5)[1], (dev_query_boxes + y*5)[2], (dev_query_boxes + y*5)[3],348// (dev_query_boxes + y*5)[4] );349350dev_overlaps[offset] = devPolyIoU(dev_boxes + x * 5, dev_query_boxes + y * 5);351}352}353354355void _set_device(int device_id) {356int current_device;357CUDA_CHECK(cudaGetDevice(¤t_device));358if (current_device == device_id) {359return;360}361// The call to cudaSetDevice must come before any calls to Get, which362// may perform initialization using the GPU.363CUDA_CHECK(cudaSetDevice(device_id));364}365366367void _overlaps(float* overlaps,const float* boxes,const float* query_boxes, int n, int k, int device_id) {368369_set_device(device_id);370371float* overlaps_dev = NULL;372float* boxes_dev = NULL;373float* query_boxes_dev = NULL;374375376CUDA_CHECK(cudaMalloc(&boxes_dev,377n * 5 * sizeof(float)));378379380381CUDA_CHECK(cudaMemcpy(boxes_dev,382boxes,383n * 5 * sizeof(float),384cudaMemcpyHostToDevice));385386387388CUDA_CHECK(cudaMalloc(&query_boxes_dev,389k * 5 * sizeof(float)));390391392393CUDA_CHECK(cudaMemcpy(query_boxes_dev,394query_boxes,395k * 5 * sizeof(float),396cudaMemcpyHostToDevice));397398CUDA_CHECK(cudaMalloc(&overlaps_dev,399n * k * sizeof(float)));400401402if (true){}403404405dim3 blocks(DIVUP(n, 32),406DIVUP(k, 32));407408dim3 threads(32, 32);409410411overlaps_kernel<<<blocks, threads>>>(n, k,412boxes_dev,413query_boxes_dev,414overlaps_dev);415416CUDA_CHECK(cudaMemcpy(overlaps,417overlaps_dev,418n * k * sizeof(float),419cudaMemcpyDeviceToHost));420421422CUDA_CHECK(cudaFree(overlaps_dev));423CUDA_CHECK(cudaFree(boxes_dev));424CUDA_CHECK(cudaFree(query_boxes_dev));425426}427428429