CoCalc -- poly_overlaps

hukaixuan19970627
GitHub Repository: hukaixuan19970627/yolov5_obb
Path: blob/master/DOTA_devkit/poly_nms_gpu/poly_overlaps_kernel.cu
²²⁸⁸ views
1

2
#include "poly_overlaps.hpp"
3
#include <vector>
4
#include <iostream>
5
#include <cmath>
6
#include <cstdio>
7
#include<algorithm>
8

9
using namespace std;
10

11
//##define CUDA_CHECK(condition)\
12
//
13
//  do {
14
//    cudaError_t error = condition;
15
//    if (error != cudaSuccess) {
16
//
17
//    }
18
//  }
19

20
#define CUDA_CHECK(condition) \
21
  /* Code block avoids redefinition of cudaError_t error */ \
22
  do { \
23
    cudaError_t error = condition; \
24
    if (error != cudaSuccess) { \
25
      std::cout << cudaGetErrorString(error) << std::endl; \
26
    } \
27
  } while (0)
28

29
#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
30
int const threadsPerBlock = sizeof(unsigned long long) * 8;
31

32

33
#define maxn 510
34
const double eps=1E-8;
35

36
__device__ inline int sig(float d){
37
    return(d>eps)-(d<-eps);
38
}
39
// struct Point{
40
//     double x,y; Point(){}
41
//     Point(double x,double y):x(x),y(y){}
42
//     bool operator==(const Point&p)const{
43
//         return sig(x-p.x)==0&&sig(y-p.y)==0;
44
//     }
45
// };
46

47
__device__ inline int point_eq(const float2 a, const float2 b) {
48
    return (sig(a.x - b.x) == 0) && (sig(a.y - b.y)==0);
49
}
50

51
__device__ inline void point_swap(float2 *a, float2 *b) {
52
    float2 temp = *a;
53
    *a = *b;
54
    *b = temp;
55
}
56

57
__device__ inline void point_reverse(float2 *first, float2* last)
58
{
59
    while ((first!=last)&&(first!=--last)) {
60
        point_swap (first,last);
61
        ++first;
62
    }
63
}
64
// void point_reverse(Point* first, Point* last)
65
// {
66
//     while ((first!=last)&&(first!=--last)) {
67
//         point_swap (first,last);
68
//         ++first;
69
//     }
70
// }
71

72

73
__device__ inline float cross(float2 o,float2 a,float2 b){  //叉积
74
    return(a.x-o.x)*(b.y-o.y)-(b.x-o.x)*(a.y-o.y);
75
}
76
__device__ inline float area(float2* ps,int n){
77
    ps[n]=ps[0];
78
    float res=0;
79
    for(int i=0;i<n;i++){
80
        res+=ps[i].x*ps[i+1].y-ps[i].y*ps[i+1].x;
81
    }
82
    return res/2.0;
83
}
84
__device__ inline int lineCross(float2 a,float2 b,float2 c,float2 d,float2&p){
85
    float s1,s2;
86
    s1=cross(a,b,c);
87
    s2=cross(a,b,d);
88
    if(sig(s1)==0&&sig(s2)==0) return 2;
89
    if(sig(s2-s1)==0) return 0;
90
    p.x=(c.x*s2-d.x*s1)/(s2-s1);
91
    p.y=(c.y*s2-d.y*s1)/(s2-s1);
92
    return 1;
93
}
94

95

96

97
//多边形切割
98
//用直线ab切割多边形p，切割后的在向量(a,b)的左侧，并原地保存切割结果
99
//如果退化为一个点，也会返回去,此时n为1
100
// __device__ inline void polygon_cut(float2*p,int&n,float2 a,float2 b){
101
//     // TODO: The static variable may be the reason, why single thread is ok, multiple threads are not work
102
//     printf("polygon_cut, offset\n");
103
    
104
//     static float2 pp[maxn];
105
//     int m=0;p[n]=p[0];
106
//     for(int i=0;i<n;i++){
107
//         if(sig(cross(a,b,p[i]))>0) pp[m++]=p[i];
108
//         if(sig(cross(a,b,p[i]))!=sig(cross(a,b,p[i+1])))
109
//             lineCross(a,b,p[i],p[i+1],pp[m++]);
110
//     }
111
//     n=0;
112

113
//     for(int i=0;i<m;i++)
114
//         if(!i||!(point_eq(pp[i], pp[i-1])))
115
//             p[n++]=pp[i];
116
//     // while(n>1&&p[n-1]==p[0])n--;
117
//     while(n>1&&point_eq(p[n-1], p[0]))n--;
118
//     // int x = blockIdx.x * blockDim.x + threadIdx.x;
119
//     // // corresponding to k
120
//     // int y = blockIdx.y * blockDim.y + threadIdx.y;
121
//     // int offset = x * 1 + y;
122
//     // printf("polygon_cut, offset\n");
123
// }
124

125
__device__ inline void polygon_cut(float2*p,int&n,float2 a,float2 b, float2* pp){
126
    // TODO: The static variable may be the reason, why single thread is ok, multiple threads are not work
127
    // printf("polygon_cut, offset\n");
128
    
129
    // static float2 pp[maxn];
130
    int m=0;p[n]=p[0];
131
    for(int i=0;i<n;i++){
132
        if(sig(cross(a,b,p[i]))>0) pp[m++]=p[i];
133
        if(sig(cross(a,b,p[i]))!=sig(cross(a,b,p[i+1])))
134
            lineCross(a,b,p[i],p[i+1],pp[m++]);
135
    }
136
    n=0;
137

138
    for(int i=0;i<m;i++)
139
        if(!i||!(point_eq(pp[i], pp[i-1])))
140
            p[n++]=pp[i];
141
    // while(n>1&&p[n-1]==p[0])n--;
142
    while(n>1&&point_eq(p[n-1], p[0]))n--;
143
    // int x = blockIdx.x * blockDim.x + threadIdx.x;
144
    // // corresponding to k
145
    // int y = blockIdx.y * blockDim.y + threadIdx.y;
146
    // int offset = x * 1 + y;
147
    // printf("polygon_cut, offset\n");
148
}
149

150
//---------------华丽的分隔线-----------------//
151
//返回三角形oab和三角形ocd的有向交面积,o是原点//
152
__device__ inline float intersectArea(float2 a,float2 b,float2 c,float2 d){
153
    float2 o = make_float2(0,0);
154
    int s1=sig(cross(o,a,b));
155
    int s2=sig(cross(o,c,d));
156
    if(s1==0||s2==0)return 0.0;//退化，面积为0
157
    // if(s1==-1) swap(a,b);
158
    // if(s2==-1) swap(c,d);
159
    // printf("before swap\n");
160
    // printf("a.x %f, a.y %f\n", a.x, a.y);
161
    // printf("b.x %f, b.y %f\n", b.x, b.y);
162
    if(s1 == -1) point_swap(&a, &b);
163
    // printf("a.x %f, a.y %f\n", a.x, a.y);
164
    // printf("b.x %f, b.y %f\n", b.x, b.y);
165
    // printf("after swap\n");
166
    if(s2 == -1) point_swap(&c, &d);
167
    float2 p[10]={o,a,b};
168
    int n=3;
169

170
    // // manually implement polygon_cut(p, n, a, b)
171
    // float2 pp[maxn];
172
    // // polygon_cut(p, n, o, c)
173
    // int m=0;p[n]=p[0];
174
    // for(int i=0;i<n;i++){
175
    //     if(sig(cross(o,c,p[i]))>0) pp[m++]=p[i];
176
    //     if(sig(cross(o,c,p[i]))!=sig(cross(o,c,p[i+1])))
177
    //         lineCross(o,c,p[i],p[i+1],pp[m++]);
178
    // }
179
    // n=0;
180

181
    // for(int i=0;i<m;i++)
182
    //     if(!i||!(point_eq(pp[i], pp[i-1])))
183
    //         p[n++]=pp[i];
184
    // while(n>1&&point_eq(p[n-1], p[0]))n--;
185

186
    // // polygon_cut(p, n, c, d)
187
    // m=0;p[n]=p[0];
188
    // for(int i=0;i<n;i++){
189
    //     if(sig(cross(c,d,p[i]))>0) pp[m++]=p[i];
190
    //     if(sig(cross(c,d,p[i]))!=sig(cross(c,d,p[i+1])))
191
    //         lineCross(c,d,p[i],p[i+1],pp[m++]);
192
    // }
193
    // n=0;
194

195
    // for(int i=0;i<m;i++)
196
    //     if(!i||!(point_eq(pp[i], pp[i-1])))
197
    //         p[n++]=pp[i];
198
    // while(n>1&&point_eq(p[n-1], p[0]))n--;
199

200
    // // polygon_cut(p, n, d, o)
201
    // m=0;p[n]=p[0];
202
    // for(int i=0;i<n;i++){
203
    //     if(sig(cross(d,o,p[i]))>0) pp[m++]=p[i];
204
    //     if(sig(cross(d,o,p[i]))!=sig(cross(d,o,p[i+1])))
205
    //         lineCross(d,o,p[i],p[i+1],pp[m++]);
206
    // }
207
    // n=0;
208

209
    // for(int i=0;i<m;i++)
210
    //     if(!i||!(point_eq(pp[i], pp[i-1])))
211
    //         p[n++]=pp[i];
212
    // while(n>1&&point_eq(p[n-1], p[0]))n--;
213
    float2 pp[maxn];
214
    polygon_cut(p,n,o,c,pp);
215
    polygon_cut(p,n,c,d,pp);
216
    polygon_cut(p,n,d,o,pp);
217
    float res=fabs(area(p,n));
218
    int x = blockIdx.x * blockDim.x + threadIdx.x;
219
    // corresponding to k
220
    int y = blockIdx.y * blockDim.y + threadIdx.y;
221
    int offset = x * 1 + y;
222
    // printf("intersectArea2, offset: %d, %f, %f, %f, %f, %f, %f, %f, %f, res: %f\n", offset, a.x, a.y, b.x, b.y, c.x, c.y, d.x, d.y, res);
223
    if(s1*s2==-1) res=-res;return res;
224

225
}
226
//求两多边形的交面积
227
// TODO: here changed the input, this need to be debug
228
__device__ inline float intersectArea(float2*ps1,int n1,float2*ps2,int n2){
229
    int x = blockIdx.x * blockDim.x + threadIdx.x;
230
    // corresponding to k
231
    int y = blockIdx.y * blockDim.y + threadIdx.y;
232
    int offset = x * 1 + y;
233
    if(area(ps1,n1)<0) point_reverse(ps1,ps1+n1);
234
    if(area(ps2,n2)<0) point_reverse(ps2,ps2+n2);
235
    ps1[n1]=ps1[0];
236
    ps2[n2]=ps2[0];
237
    float res=0;
238
    for(int i=0;i<n1;i++){
239
        for(int j=0;j<n2;j++){
240
            // printf("offset: %d, %f, %f, %f, %f, %f, %f, %f, %f addArea: %f \n", 
241
            // offset, ps1[i].x, ps1[i].y, ps1[i + 1].x, ps1[i + 1].y, ps2[j].x, ps2[j].y, 
242
            // ps2[j + 1].x, ps2[j + 1].y, intersectArea(ps1[i],ps1[i+1],ps2[j],ps2[j+1]));
243

244
            // float2 a = ps1[i];
245
            // float2 b = ps1[i+1];
246
            // float2 c = ps2[j];
247
            // float2 d = ps2[j+1];
248
            // res+=intersectArea2(a,b,c,d);
249
            res+=intersectArea(ps1[i],ps1[i+1],ps2[j],ps2[j+1]);
250
        }
251
    }
252
    return res;//assumeresispositive!
253
}
254

255

256

257

258
//__device__ inline double iou_poly(vector<double> p, vector<double> q) {
259
//    Point ps1[maxn],ps2[maxn];
260
//    int n1 = 4;
261
//    int n2 = 4;
262
//    for (int i = 0; i < 4; i++) {
263
//        ps1[i].x = p[i * 2];
264
//        ps1[i].y = p[i * 2 + 1];
265
//
266
//        ps2[i].x = q[i * 2];
267
//        ps2[i].y = q[i * 2 + 1];
268
//    }
269
//    double inter_area = intersectArea(ps1, n1, ps2, n2);
270
//    double union_area = fabs(area(ps1, n1)) + fabs(area(ps2, n2)) - inter_area;
271
//    double iou = inter_area / union_area;
272
//
273
////    cout << "inter_area:" << inter_area << endl;
274
////    cout << "union_area:" << union_area << endl;
275
////    cout << "iou:" << iou << endl;
276
//
277
//    return iou;
278
//}
279

280
__device__ inline void RotBox2Poly(float const * const dbox, float2 * ps) {
281
    float cs = cos(dbox[4]);
282
    float ss = sin(dbox[4]);
283
    float w = dbox[2];
284
    float h = dbox[3];
285

286
    float x_ctr = dbox[0];
287
    float y_ctr = dbox[1];
288
    ps[0].x = x_ctr + cs * (w / 2.0) - ss * (-h / 2.0);
289
    ps[1].x = x_ctr + cs * (w / 2.0) - ss * (h / 2.0);
290
    ps[2].x = x_ctr + cs * (-w / 2.0) - ss * (h / 2.0);
291
    ps[3].x = x_ctr + cs * (-w / 2.0) - ss * (-h / 2.0);
292

293
    ps[0].y = y_ctr + ss * (w / 2.0) + cs * (-h / 2.0);
294
    ps[1].y = y_ctr + ss * (w / 2.0) + cs * (h / 2.0);
295
    ps[2].y = y_ctr + ss * (-w / 2.0) + cs * (h / 2.0);
296
    ps[3].y = y_ctr + ss * (-w / 2.0) + cs * (-h / 2.0);
297
}
298

299

300
__device__ inline float devPolyIoU(float const * const dbbox1, float const * const dbbox2) {
301

302

303
    float2 ps1[maxn], ps2[maxn];
304
    int n1 = 4;
305
    int n2 = 4;
306

307

308

309

310
    RotBox2Poly(dbbox1, ps1);
311
    RotBox2Poly(dbbox2, ps2);
312

313
    // printf("ps1: %f, %f, %f, %f, %f, %f, %f, %f\n", ps1[0].x, ps1[0].y, ps1[1].x, ps1[1].y, ps1[2].x, ps1[2].y, ps1[3].x, ps1[3].y);
314
    // printf("ps2: %f, %f, %f, %f, %f, %f, %f, %f\n", ps2[0].x, ps2[0].y, ps2[1].x, ps2[1].y, ps2[2].x, ps2[2].y, ps2[3].x, ps2[3].y);
315
    float inter_area = intersectArea(ps1, n1, ps2, n2);
316
    //printf("inter_area: %f \n", inter_area);
317
    float union_area = fabs(area(ps1, n1)) + fabs(area(ps2, n2)) - inter_area;
318
    //printf("before union_area\n");
319
    //printf("union_area: %f \n", union_area);
320
    float iou = 0;
321
    if (union_area == 0) {
322
        iou = (inter_area + 1) / (union_area + 1);
323
    } else {
324
        iou = inter_area / union_area;
325
    }
326
    // printf("iou: %f \n", iou);
327
    return iou;
328
}
329

330
__global__ void overlaps_kernel(const int N, const int K, const float* dev_boxes,
331
                           const float * dev_query_boxes, float* dev_overlaps) {
332

333
//   const int col_start = blockIdx.y;
334
//   const int row_start = blockIdx.x;
335
  
336
  // corresponding to n
337
  int x = blockIdx.x * blockDim.x + threadIdx.x;
338
  // corresponding to k
339
  int y = blockIdx.y * blockDim.y + threadIdx.y;
340
  if ((x < N) && (y < K)) {
341
    int offset = x * K + y;
342
    
343
    //printf
344
    // printf("offset: %d dbbox: %f %f %f %f %f\n", offset, (dev_boxes + x*5)[0],
345
    // (dev_boxes + x*5)[1], (dev_boxes + x*5)[2], (dev_boxes + x*5)[3], 
346
    // (dev_boxes + x*5)[4] );
347
    // printf("offset: %d dbbox: %f %f %f %f %f\n", offset, (dev_query_boxes + y*5)[0],
348
    // (dev_query_boxes + y*5)[1], (dev_query_boxes + y*5)[2], (dev_query_boxes + y*5)[3], 
349
    // (dev_query_boxes + y*5)[4] );
350
    
351
    dev_overlaps[offset] = devPolyIoU(dev_boxes + x * 5, dev_query_boxes + y * 5);
352
  } 
353
}
354

355

356
void _set_device(int device_id) {
357
  int current_device;
358
  CUDA_CHECK(cudaGetDevice(&current_device));
359
  if (current_device == device_id) {
360
    return;
361
  }
362
  // The call to cudaSetDevice must come before any calls to Get, which
363
  // may perform initialization using the GPU.
364
  CUDA_CHECK(cudaSetDevice(device_id));
365
}
366

367

368
void _overlaps(float* overlaps,const float* boxes,const float* query_boxes, int n, int k, int device_id) {
369

370
  _set_device(device_id);
371

372
  float* overlaps_dev = NULL;
373
  float* boxes_dev = NULL;
374
  float* query_boxes_dev = NULL;
375

376

377
  CUDA_CHECK(cudaMalloc(&boxes_dev,
378
                        n * 5 * sizeof(float)));
379

380

381

382
  CUDA_CHECK(cudaMemcpy(boxes_dev,
383
                        boxes,
384
                        n * 5 * sizeof(float),
385
                        cudaMemcpyHostToDevice));
386

387

388

389
  CUDA_CHECK(cudaMalloc(&query_boxes_dev,
390
                        k * 5 * sizeof(float)));
391

392

393

394
  CUDA_CHECK(cudaMemcpy(query_boxes_dev,
395
                        query_boxes,
396
                        k * 5 * sizeof(float),
397
                        cudaMemcpyHostToDevice));
398

399
  CUDA_CHECK(cudaMalloc(&overlaps_dev,
400
                        n * k * sizeof(float)));
401

402

403
  if (true){}
404

405

406
  dim3 blocks(DIVUP(n, 32),
407
              DIVUP(k, 32));
408

409
  dim3 threads(32, 32);
410

411

412
  overlaps_kernel<<<blocks, threads>>>(n, k,
413
                                    boxes_dev,
414
                                    query_boxes_dev,
415
                                    overlaps_dev);
416

417
  CUDA_CHECK(cudaMemcpy(overlaps,
418
                        overlaps_dev,
419
                        n * k * sizeof(float),
420
                        cudaMemcpyDeviceToHost));
421

422

423
  CUDA_CHECK(cudaFree(overlaps_dev));
424
  CUDA_CHECK(cudaFree(boxes_dev));
425
  CUDA_CHECK(cudaFree(query_boxes_dev));
426

427
}
428

429
Product

Resources

Company