Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hukaixuan19970627
GitHub Repository: hukaixuan19970627/yolov5_obb
Path: blob/master/DOTA_devkit/poly_nms_gpu/poly_overlaps_kernel.cu
2288 views
1
2
#include "poly_overlaps.hpp"
3
#include <vector>
4
#include <iostream>
5
#include <cmath>
6
#include <cstdio>
7
#include<algorithm>
8
9
using namespace std;
10
11
//##define CUDA_CHECK(condition)\
12
//
13
// do {
14
// cudaError_t error = condition;
15
// if (error != cudaSuccess) {
16
//
17
// }
18
// }
19
20
#define CUDA_CHECK(condition) \
21
/* Code block avoids redefinition of cudaError_t error */ \
22
do { \
23
cudaError_t error = condition; \
24
if (error != cudaSuccess) { \
25
std::cout << cudaGetErrorString(error) << std::endl; \
26
} \
27
} while (0)
28
29
#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
30
int const threadsPerBlock = sizeof(unsigned long long) * 8;
31
32
33
#define maxn 510
34
const double eps=1E-8;
35
36
__device__ inline int sig(float d){
37
return(d>eps)-(d<-eps);
38
}
39
// struct Point{
40
// double x,y; Point(){}
41
// Point(double x,double y):x(x),y(y){}
42
// bool operator==(const Point&p)const{
43
// return sig(x-p.x)==0&&sig(y-p.y)==0;
44
// }
45
// };
46
47
__device__ inline int point_eq(const float2 a, const float2 b) {
48
return (sig(a.x - b.x) == 0) && (sig(a.y - b.y)==0);
49
}
50
51
__device__ inline void point_swap(float2 *a, float2 *b) {
52
float2 temp = *a;
53
*a = *b;
54
*b = temp;
55
}
56
57
__device__ inline void point_reverse(float2 *first, float2* last)
58
{
59
while ((first!=last)&&(first!=--last)) {
60
point_swap (first,last);
61
++first;
62
}
63
}
64
// void point_reverse(Point* first, Point* last)
65
// {
66
// while ((first!=last)&&(first!=--last)) {
67
// point_swap (first,last);
68
// ++first;
69
// }
70
// }
71
72
73
__device__ inline float cross(float2 o,float2 a,float2 b){ //叉积
74
return(a.x-o.x)*(b.y-o.y)-(b.x-o.x)*(a.y-o.y);
75
}
76
__device__ inline float area(float2* ps,int n){
77
ps[n]=ps[0];
78
float res=0;
79
for(int i=0;i<n;i++){
80
res+=ps[i].x*ps[i+1].y-ps[i].y*ps[i+1].x;
81
}
82
return res/2.0;
83
}
84
__device__ inline int lineCross(float2 a,float2 b,float2 c,float2 d,float2&p){
85
float s1,s2;
86
s1=cross(a,b,c);
87
s2=cross(a,b,d);
88
if(sig(s1)==0&&sig(s2)==0) return 2;
89
if(sig(s2-s1)==0) return 0;
90
p.x=(c.x*s2-d.x*s1)/(s2-s1);
91
p.y=(c.y*s2-d.y*s1)/(s2-s1);
92
return 1;
93
}
94
95
96
97
//多边形切割
98
//用直线ab切割多边形p,切割后的在向量(a,b)的左侧,并原地保存切割结果
99
//如果退化为一个点,也会返回去,此时n为1
100
// __device__ inline void polygon_cut(float2*p,int&n,float2 a,float2 b){
101
// // TODO: The static variable may be the reason, why single thread is ok, multiple threads are not work
102
// printf("polygon_cut, offset\n");
103
104
// static float2 pp[maxn];
105
// int m=0;p[n]=p[0];
106
// for(int i=0;i<n;i++){
107
// if(sig(cross(a,b,p[i]))>0) pp[m++]=p[i];
108
// if(sig(cross(a,b,p[i]))!=sig(cross(a,b,p[i+1])))
109
// lineCross(a,b,p[i],p[i+1],pp[m++]);
110
// }
111
// n=0;
112
113
// for(int i=0;i<m;i++)
114
// if(!i||!(point_eq(pp[i], pp[i-1])))
115
// p[n++]=pp[i];
116
// // while(n>1&&p[n-1]==p[0])n--;
117
// while(n>1&&point_eq(p[n-1], p[0]))n--;
118
// // int x = blockIdx.x * blockDim.x + threadIdx.x;
119
// // // corresponding to k
120
// // int y = blockIdx.y * blockDim.y + threadIdx.y;
121
// // int offset = x * 1 + y;
122
// // printf("polygon_cut, offset\n");
123
// }
124
125
__device__ inline void polygon_cut(float2*p,int&n,float2 a,float2 b, float2* pp){
126
// TODO: The static variable may be the reason, why single thread is ok, multiple threads are not work
127
// printf("polygon_cut, offset\n");
128
129
// static float2 pp[maxn];
130
int m=0;p[n]=p[0];
131
for(int i=0;i<n;i++){
132
if(sig(cross(a,b,p[i]))>0) pp[m++]=p[i];
133
if(sig(cross(a,b,p[i]))!=sig(cross(a,b,p[i+1])))
134
lineCross(a,b,p[i],p[i+1],pp[m++]);
135
}
136
n=0;
137
138
for(int i=0;i<m;i++)
139
if(!i||!(point_eq(pp[i], pp[i-1])))
140
p[n++]=pp[i];
141
// while(n>1&&p[n-1]==p[0])n--;
142
while(n>1&&point_eq(p[n-1], p[0]))n--;
143
// int x = blockIdx.x * blockDim.x + threadIdx.x;
144
// // corresponding to k
145
// int y = blockIdx.y * blockDim.y + threadIdx.y;
146
// int offset = x * 1 + y;
147
// printf("polygon_cut, offset\n");
148
}
149
150
//---------------华丽的分隔线-----------------//
151
//返回三角形oab和三角形ocd的有向交面积,o是原点//
152
__device__ inline float intersectArea(float2 a,float2 b,float2 c,float2 d){
153
float2 o = make_float2(0,0);
154
int s1=sig(cross(o,a,b));
155
int s2=sig(cross(o,c,d));
156
if(s1==0||s2==0)return 0.0;//退化,面积为0
157
// if(s1==-1) swap(a,b);
158
// if(s2==-1) swap(c,d);
159
// printf("before swap\n");
160
// printf("a.x %f, a.y %f\n", a.x, a.y);
161
// printf("b.x %f, b.y %f\n", b.x, b.y);
162
if(s1 == -1) point_swap(&a, &b);
163
// printf("a.x %f, a.y %f\n", a.x, a.y);
164
// printf("b.x %f, b.y %f\n", b.x, b.y);
165
// printf("after swap\n");
166
if(s2 == -1) point_swap(&c, &d);
167
float2 p[10]={o,a,b};
168
int n=3;
169
170
// // manually implement polygon_cut(p, n, a, b)
171
// float2 pp[maxn];
172
// // polygon_cut(p, n, o, c)
173
// int m=0;p[n]=p[0];
174
// for(int i=0;i<n;i++){
175
// if(sig(cross(o,c,p[i]))>0) pp[m++]=p[i];
176
// if(sig(cross(o,c,p[i]))!=sig(cross(o,c,p[i+1])))
177
// lineCross(o,c,p[i],p[i+1],pp[m++]);
178
// }
179
// n=0;
180
181
// for(int i=0;i<m;i++)
182
// if(!i||!(point_eq(pp[i], pp[i-1])))
183
// p[n++]=pp[i];
184
// while(n>1&&point_eq(p[n-1], p[0]))n--;
185
186
// // polygon_cut(p, n, c, d)
187
// m=0;p[n]=p[0];
188
// for(int i=0;i<n;i++){
189
// if(sig(cross(c,d,p[i]))>0) pp[m++]=p[i];
190
// if(sig(cross(c,d,p[i]))!=sig(cross(c,d,p[i+1])))
191
// lineCross(c,d,p[i],p[i+1],pp[m++]);
192
// }
193
// n=0;
194
195
// for(int i=0;i<m;i++)
196
// if(!i||!(point_eq(pp[i], pp[i-1])))
197
// p[n++]=pp[i];
198
// while(n>1&&point_eq(p[n-1], p[0]))n--;
199
200
// // polygon_cut(p, n, d, o)
201
// m=0;p[n]=p[0];
202
// for(int i=0;i<n;i++){
203
// if(sig(cross(d,o,p[i]))>0) pp[m++]=p[i];
204
// if(sig(cross(d,o,p[i]))!=sig(cross(d,o,p[i+1])))
205
// lineCross(d,o,p[i],p[i+1],pp[m++]);
206
// }
207
// n=0;
208
209
// for(int i=0;i<m;i++)
210
// if(!i||!(point_eq(pp[i], pp[i-1])))
211
// p[n++]=pp[i];
212
// while(n>1&&point_eq(p[n-1], p[0]))n--;
213
float2 pp[maxn];
214
polygon_cut(p,n,o,c,pp);
215
polygon_cut(p,n,c,d,pp);
216
polygon_cut(p,n,d,o,pp);
217
float res=fabs(area(p,n));
218
int x = blockIdx.x * blockDim.x + threadIdx.x;
219
// corresponding to k
220
int y = blockIdx.y * blockDim.y + threadIdx.y;
221
int offset = x * 1 + y;
222
// printf("intersectArea2, offset: %d, %f, %f, %f, %f, %f, %f, %f, %f, res: %f\n", offset, a.x, a.y, b.x, b.y, c.x, c.y, d.x, d.y, res);
223
if(s1*s2==-1) res=-res;return res;
224
225
}
226
//求两多边形的交面积
227
// TODO: here changed the input, this need to be debug
228
__device__ inline float intersectArea(float2*ps1,int n1,float2*ps2,int n2){
229
int x = blockIdx.x * blockDim.x + threadIdx.x;
230
// corresponding to k
231
int y = blockIdx.y * blockDim.y + threadIdx.y;
232
int offset = x * 1 + y;
233
if(area(ps1,n1)<0) point_reverse(ps1,ps1+n1);
234
if(area(ps2,n2)<0) point_reverse(ps2,ps2+n2);
235
ps1[n1]=ps1[0];
236
ps2[n2]=ps2[0];
237
float res=0;
238
for(int i=0;i<n1;i++){
239
for(int j=0;j<n2;j++){
240
// printf("offset: %d, %f, %f, %f, %f, %f, %f, %f, %f addArea: %f \n",
241
// offset, ps1[i].x, ps1[i].y, ps1[i + 1].x, ps1[i + 1].y, ps2[j].x, ps2[j].y,
242
// ps2[j + 1].x, ps2[j + 1].y, intersectArea(ps1[i],ps1[i+1],ps2[j],ps2[j+1]));
243
244
// float2 a = ps1[i];
245
// float2 b = ps1[i+1];
246
// float2 c = ps2[j];
247
// float2 d = ps2[j+1];
248
// res+=intersectArea2(a,b,c,d);
249
res+=intersectArea(ps1[i],ps1[i+1],ps2[j],ps2[j+1]);
250
}
251
}
252
return res;//assumeresispositive!
253
}
254
255
256
257
258
//__device__ inline double iou_poly(vector<double> p, vector<double> q) {
259
// Point ps1[maxn],ps2[maxn];
260
// int n1 = 4;
261
// int n2 = 4;
262
// for (int i = 0; i < 4; i++) {
263
// ps1[i].x = p[i * 2];
264
// ps1[i].y = p[i * 2 + 1];
265
//
266
// ps2[i].x = q[i * 2];
267
// ps2[i].y = q[i * 2 + 1];
268
// }
269
// double inter_area = intersectArea(ps1, n1, ps2, n2);
270
// double union_area = fabs(area(ps1, n1)) + fabs(area(ps2, n2)) - inter_area;
271
// double iou = inter_area / union_area;
272
//
273
//// cout << "inter_area:" << inter_area << endl;
274
//// cout << "union_area:" << union_area << endl;
275
//// cout << "iou:" << iou << endl;
276
//
277
// return iou;
278
//}
279
280
__device__ inline void RotBox2Poly(float const * const dbox, float2 * ps) {
281
float cs = cos(dbox[4]);
282
float ss = sin(dbox[4]);
283
float w = dbox[2];
284
float h = dbox[3];
285
286
float x_ctr = dbox[0];
287
float y_ctr = dbox[1];
288
ps[0].x = x_ctr + cs * (w / 2.0) - ss * (-h / 2.0);
289
ps[1].x = x_ctr + cs * (w / 2.0) - ss * (h / 2.0);
290
ps[2].x = x_ctr + cs * (-w / 2.0) - ss * (h / 2.0);
291
ps[3].x = x_ctr + cs * (-w / 2.0) - ss * (-h / 2.0);
292
293
ps[0].y = y_ctr + ss * (w / 2.0) + cs * (-h / 2.0);
294
ps[1].y = y_ctr + ss * (w / 2.0) + cs * (h / 2.0);
295
ps[2].y = y_ctr + ss * (-w / 2.0) + cs * (h / 2.0);
296
ps[3].y = y_ctr + ss * (-w / 2.0) + cs * (-h / 2.0);
297
}
298
299
300
__device__ inline float devPolyIoU(float const * const dbbox1, float const * const dbbox2) {
301
302
303
float2 ps1[maxn], ps2[maxn];
304
int n1 = 4;
305
int n2 = 4;
306
307
308
309
310
RotBox2Poly(dbbox1, ps1);
311
RotBox2Poly(dbbox2, ps2);
312
313
// printf("ps1: %f, %f, %f, %f, %f, %f, %f, %f\n", ps1[0].x, ps1[0].y, ps1[1].x, ps1[1].y, ps1[2].x, ps1[2].y, ps1[3].x, ps1[3].y);
314
// printf("ps2: %f, %f, %f, %f, %f, %f, %f, %f\n", ps2[0].x, ps2[0].y, ps2[1].x, ps2[1].y, ps2[2].x, ps2[2].y, ps2[3].x, ps2[3].y);
315
float inter_area = intersectArea(ps1, n1, ps2, n2);
316
//printf("inter_area: %f \n", inter_area);
317
float union_area = fabs(area(ps1, n1)) + fabs(area(ps2, n2)) - inter_area;
318
//printf("before union_area\n");
319
//printf("union_area: %f \n", union_area);
320
float iou = 0;
321
if (union_area == 0) {
322
iou = (inter_area + 1) / (union_area + 1);
323
} else {
324
iou = inter_area / union_area;
325
}
326
// printf("iou: %f \n", iou);
327
return iou;
328
}
329
330
__global__ void overlaps_kernel(const int N, const int K, const float* dev_boxes,
331
const float * dev_query_boxes, float* dev_overlaps) {
332
333
// const int col_start = blockIdx.y;
334
// const int row_start = blockIdx.x;
335
336
// corresponding to n
337
int x = blockIdx.x * blockDim.x + threadIdx.x;
338
// corresponding to k
339
int y = blockIdx.y * blockDim.y + threadIdx.y;
340
if ((x < N) && (y < K)) {
341
int offset = x * K + y;
342
343
//printf
344
// printf("offset: %d dbbox: %f %f %f %f %f\n", offset, (dev_boxes + x*5)[0],
345
// (dev_boxes + x*5)[1], (dev_boxes + x*5)[2], (dev_boxes + x*5)[3],
346
// (dev_boxes + x*5)[4] );
347
// printf("offset: %d dbbox: %f %f %f %f %f\n", offset, (dev_query_boxes + y*5)[0],
348
// (dev_query_boxes + y*5)[1], (dev_query_boxes + y*5)[2], (dev_query_boxes + y*5)[3],
349
// (dev_query_boxes + y*5)[4] );
350
351
dev_overlaps[offset] = devPolyIoU(dev_boxes + x * 5, dev_query_boxes + y * 5);
352
}
353
}
354
355
356
void _set_device(int device_id) {
357
int current_device;
358
CUDA_CHECK(cudaGetDevice(&current_device));
359
if (current_device == device_id) {
360
return;
361
}
362
// The call to cudaSetDevice must come before any calls to Get, which
363
// may perform initialization using the GPU.
364
CUDA_CHECK(cudaSetDevice(device_id));
365
}
366
367
368
void _overlaps(float* overlaps,const float* boxes,const float* query_boxes, int n, int k, int device_id) {
369
370
_set_device(device_id);
371
372
float* overlaps_dev = NULL;
373
float* boxes_dev = NULL;
374
float* query_boxes_dev = NULL;
375
376
377
CUDA_CHECK(cudaMalloc(&boxes_dev,
378
n * 5 * sizeof(float)));
379
380
381
382
CUDA_CHECK(cudaMemcpy(boxes_dev,
383
boxes,
384
n * 5 * sizeof(float),
385
cudaMemcpyHostToDevice));
386
387
388
389
CUDA_CHECK(cudaMalloc(&query_boxes_dev,
390
k * 5 * sizeof(float)));
391
392
393
394
CUDA_CHECK(cudaMemcpy(query_boxes_dev,
395
query_boxes,
396
k * 5 * sizeof(float),
397
cudaMemcpyHostToDevice));
398
399
CUDA_CHECK(cudaMalloc(&overlaps_dev,
400
n * k * sizeof(float)));
401
402
403
if (true){}
404
405
406
dim3 blocks(DIVUP(n, 32),
407
DIVUP(k, 32));
408
409
dim3 threads(32, 32);
410
411
412
overlaps_kernel<<<blocks, threads>>>(n, k,
413
boxes_dev,
414
query_boxes_dev,
415
overlaps_dev);
416
417
CUDA_CHECK(cudaMemcpy(overlaps,
418
overlaps_dev,
419
n * k * sizeof(float),
420
cudaMemcpyDeviceToHost));
421
422
423
CUDA_CHECK(cudaFree(overlaps_dev));
424
CUDA_CHECK(cudaFree(boxes_dev));
425
CUDA_CHECK(cudaFree(query_boxes_dev));
426
427
}
428
429