I have a C program that finds all possible triangles (for example 3 3 4 4 5 5 => 20 triangles, 7 without duplicates). I managed to do this by storing all triangles in abc format by concantenating a,b,c, but my program is too slow. I found out that the function to find all triangles takes 1.68s for 1000 numbers and the uniq() function as well. (The limit is 2s):
int getTriangles(const int nosniky[], int N, int trojuhelniky[]) {
int count = 0;
tcount = 0;
// The last triangle found
int TEMP = 0;
for (int i = 0; i < N; i++) {
for (int j = i + 1; j < N; j++) {
for (int m = j + 1; m < N; m++) {
int a = nosniky[i];
int b = nosniky[j];
int c = nosniky[m];
if (a + b > c && a + c > b && c + b > a) {
// Order a,b,c so that a<b<c
order(&a, &b);
order(&a, &c);
order(&b, &c);
count++;
// Triangle will be stored as abc, so we need to join all ints into
// one (3,3,4 => 334)
int temp = joinInts(a, b);
if (i == 0 && j == 1 && m == 2) {
// first triangle, for example a=3,b=3,c=4 => 334
TEMP = joinInts(temp, c);
tcount++;
trojuhelniky[tcount - 1] = joinInts(temp, c);
} else if (joinInts(temp, c) == TEMP) {
// This is a duplicate, ignoring this one
} else {
tcount++;
trojuhelniky[tcount - 1] = joinInts(temp, c);
// TEMP = the new last found triangle
TEMP = joinInts(temp, c);
}
}
}
}
}
// Remove remaining duplicates, 1.68s as well
int newcount = uniq(trojuhelniky, tcount);
return newcount;
}
I order the input, store the "last found" triangle in TEMP variable and I check if the next one is the same, if not, the next triangle is the new TEMP. This way I get rid of most of the duplicates, I remove the rest in uniq() function.
So my question is, what can I do to improve the speed of my code? I already managed to go from 6s to 3.2s, now I am stuck.
The best case scenario would be removing duplicates in real time in the 3 for loops, but I have no idea how to do that, so my best bet now is trying to optimize my not-so-great code :D
EDIT: By "triangle", I mean a+b>c, b+c>c and a+c>b, so for input 3 3 4 4 5 5, I should find 20 possible combinations that meet that requirement. After deleting duplicates, the result should be 7 ( 3,3,4 is the same as 4,3,3).
EDIT 2: The input ranges from 3 to 1000 random integers. This is the uniq() function:
int uniq(int *a, size_t len) {
size_t i, j;
qsort(a, len, sizeof(int), icmp);
for (i = j = 0; i < len; i++) {
if (a[i] == 0) {
break;
}
if (a[i] != a[j]) a[++j] = a[i];
}
return (int) j + 1;
}
The output for input 2 9 18 4 1 5 19 6 3 1 2 8
should be 27.
Note first, that if you guarantee the input array sorted, while you maintain i < j < m, you will never get a combination of (3,4,3). Taking
int a = nosniky[i];
int b = nosniky[j];
int c = nosniky[m];
will make (a,b,c) a non-decreasing sequence. So this is not a problem at all.
You can, however, get a duplicate when your input data contain duplicates. For example, with input of 3,4,4,5 you get the same triple (a,b,c) = (3,4,5) for (i,j,m) = (0,1,3) and for (i,j,m) = (0,2,3).
To avoid this you just need to increment i,j,m in a smart way to skip duplicates: if nosniky[1] == 4 and nosniky[2] == 4 you should not increment j from 1 to 2, but rather keep incrementing it until you find nosniky[j] different from nosniky[j-1] (or you hit j == N).
int count = 0;
..... // sort nosniky[] in ascending order here
for (int i = 0; i < N;) {
int a = nosniky[i];
for (int j = i + 1; j < N;) {
int b = nosniky[j];
for (int m = j + 1; m < N;) {
int c = nosniky[m];
if (a + b > c && a + c > b && c + b > a) {
int temp = jointInts(joinInts(a, b), c);
trojuhelniky[count] = temp;
count ++;
}
while (++m < N && nosniky[m] == c)
; // skip duplicates of the third edge
}
while (++j < N && nosniky[j] == b)
; // skip duplicates of the second edge
}
while (++i < N && nosniky[i] == a)
; // skip duplicates of the first edge
}
return count;
By the way, once you guarantee the nosniky[] array is sorted, you know c is not less than a and b. Then, as @pmg noted in a comment, the test for a triangle inequality can be reduced to the first comparison, because remaining two are certainly satisfied:
if (a + b > c) {
int temp = jointInts(joinInts(a, b), c);
trojuhelniky[count] = temp;
count ++;
}
If you are fine with just counting the triangles and do not need to collect them all, you can simplify the innermost loop:
for (int m = j + 1; m < N;) {
int c = nosniky[m];
if (a + b > c) {
count ++;
}
while (++m < N && nosniky[m] == c)
; // skip duplicates of the third edge
}
Following an advice from the comment by Michael Dorgan, you can save some time by early terminating a search through lenghts of c which are known to be too large:
for (int m = j + 1; m < N;) {
int c = nosniky[m];
if (c >= a + b) // too large
break; // skip the rest
if (a + b > c) {
count ++;
}
while (++m < N && nosniky[m] == c)
; // skip duplicates of the third edge
}
Then the next condition becomes tautological, because it's a negation of the previous one, hence we can get rid of it:
for (int m = j + 1; m < N;) {
int c = nosniky[m];
if (c >= a + b) // too large
break; // skip the rest
count ++;
while (++m < N && nosniky[m] == c)
; // skip duplicates of the third edge
}
If you expect very large values of N, you can also try to optimize searching through c values which are too small, for example with a binary search. You would need a binary-search function which takes an array to search, a range of indices and a target value, and returns an index (position) of the first value equal or greater than the target (or the right end of the range if not found).
int c_minimal = b - a;
// do binary search for c_minimal
// between indices j+1 (inclusive) and N (exclusive)
int initial_m = search(nosniky, j+1, N, c_minimal);
// start from the first value big enough
for (int m = initial_m; m < N;) {
int c = nosniky[m];
if (c >= a + b) // too large
break; // skip the rest
count ++;
while (++m < N && nosniky[m] == c)
; // skip duplicates of the third edge
}
I had been working on faster versions, but CiaPan came up with the best algorithm overall.
But, I was curious as to how the improvements would benchmark, so I created a test and benchmark framework.
For my versions, I first applied the two fixes suggested by pmg. And, created a second version with Michael's suggested fix.
Then, I added two versions from CiaPan's answer.
I used test data from OP's two expected results. And, I added some random tests as well.
The results were dramatic. For some of the larger test vectors, CiaPan's best algorithm was up to 62x faster!
Congrats to all who contributed!
Here's the output of the program:
20:55:49 ph: STARTUP 11/19/20-20:55:49
20:55:49 ph: CMD ./main
main: VSHF hashinc=00000001 VLIM=000003E8 vshf=0
main: VSHF hashinc=00000002 VLIM=000003E8 vshf=1
main: VSHF hashinc=00000004 VLIM=000003E8 vshf=2
main: VSHF hashinc=00000008 VLIM=000003E8 vshf=3
main: VSHF hashinc=00000010 VLIM=000003E8 vshf=4
main: VSHF hashinc=00000020 VLIM=000003E8 vshf=5
main: VSHF hashinc=00000040 VLIM=000003E8 vshf=6
main: VSHF hashinc=00000080 VLIM=000003E8 vshf=7
main: VSHF hashinc=00000100 VLIM=000003E8 vshf=8
main: VSHF hashinc=00000200 VLIM=000003E8 vshf=9
main: VSHF hashinc=00000400 VLIM=000003E8 vshf=10
main: HASH i=0 hashnew=000FFFFF hashold=000003FF hashinc=000003FF
main: HASH old64=00000000000003FF new64=00000000000FFFFF
main: HASH i=1 hashnew=3FFFFFFF hashold=000FFFFF hashinc=000003FF
main: HASH old64=00000000000FFFFF new64=000000003FFFFFFF
main: HASH i=2 hashnew=FFFFFFFFFF hashold=3FFFFFFF hashinc=000003FF
main: HASH old64=000000003FFFFFFF new64=000000FFFFFFFFFF
xrealloc: len=24 totlen=24 -- tstvec
xrealloc: len=24 totlen=48 -- tstshuf
xrealloc: len=24 totlen=72 -- tstrand
xrealloc: len=48000000 totlen=48000072 -- tstrst
0.000002584 uniq:7 dup:4 1.000x -- getTriangles / Etoile's original
0.000002584 uniq:7 dup:4 1.000x -- getFix1 / orig + pmg 2 fixes
0.000002584 uniq:7 dup:4 1.000x -- getFix2 / + Michael's fix
0.000002026 uniq:7 dup:0 1.275x -- getFix3 / CiaPan's first -- no uniq
0.000001257 uniq:7 dup:0 2.056x -- getFix4 / CiaPan w/o trojuhelniky
xrealloc: len=48 totlen=48 -- tstvec
xrealloc: len=48 totlen=96 -- tstshuf
xrealloc: len=48 totlen=144 -- tstrand
xrealloc: len=96000000 totlen=96000144 -- tstrst
0.000005447 uniq:27 dup:6 1.000x -- getTriangles / Etoile's original
0.000004819 uniq:27 dup:6 1.130x -- getFix1 / orig + pmg 2 fixes
0.000004400 uniq:27 dup:6 1.238x -- getFix2 / + Michael's fix
0.000002235 uniq:27 dup:0 2.437x -- getFix3 / CiaPan's first -- no uniq
0.000002305 uniq:27 dup:0 2.363x -- getFix4 / CiaPan w/o trojuhelniky
dorand: N 541
xrealloc: len=2164 totlen=2164 -- tstvec
xrealloc: len=2164 totlen=4328 -- tstshuf
xrealloc: len=2164 totlen=6492 -- tstrand
xrealloc: len=4328000000 totlen=4328006492 -- tstrst
3.202697770 uniq:5784912 dup:6465629 1.000x -- getTriangles / Etoile's original
1.157011824 uniq:5784912 dup:3694551 2.768x -- getFix1 / orig + pmg 2 fixes
1.136350066 uniq:5784912 dup:3694551 2.818x -- getFix2 / + Michael's fix
0.055532664 uniq:5784912 dup:0 57.672x -- getFix3 / CiaPan's first -- no uniq
0.046850154 uniq:5784912 dup:0 68.360x -- getFix4 / CiaPan w/o trojuhelniky
dorand: N 156
0.054916311 uniq:235073 dup:54517 1.000x -- getTriangles / Etoile's original
0.023576459 uniq:235073 dup:39138 2.329x -- getFix1 / orig + pmg 2 fixes
0.022960595 uniq:235073 dup:39138 2.392x -- getFix2 / + Michael's fix
0.001602867 uniq:235073 dup:0 34.261x -- getFix3 / CiaPan's first -- no uniq
0.001270071 uniq:235073 dup:0 43.239x -- getFix4 / CiaPan w/o trojuhelniky
dorand: N 268
0.366441886 uniq:1121119 dup:539276 1.000x -- getTriangles / Etoile's original
0.150401844 uniq:1121119 dup:340122 2.436x -- getFix1 / orig + pmg 2 fixes
0.147924908 uniq:1121119 dup:340122 2.477x -- getFix2 / + Michael's fix
0.008634109 uniq:1121119 dup:0 42.441x -- getFix3 / CiaPan's first -- no uniq
0.006991432 uniq:1121119 dup:0 52.413x -- getFix4 / CiaPan w/o trojuhelniky
20:56:21 ph: complete (ELAPSED: 00:00:32.196043730)
Here's the program source if anybody wants to duplicate the results or add new/faster algorithms:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <time.h>
int opt_v;
int dupcnt;
typedef unsigned long long u64;
#define VLIM 1000
int vshf;
#if 0
typedef unsigned int hash_t;
#else
typedef u64 hash_t;
#endif
int
icmp(const void *a,const void *b)
{
const int *ia = a;
const int *ib = b;
return *ia - *ib;
}
int
uniq(hash_t *a, size_t len)
{
size_t i, j;
qsort(a, len, sizeof(*a), icmp);
for (i = j = 0; i < len; i++) {
if (a[i] == 0) {
break;
}
if (a[i] != a[j])
a[++j] = a[i];
else
dupcnt += 1;
}
return (int) j + 1;
}
static inline void
order(int *a,int *b)
{
int tmp = *a;
if (tmp > *b) {
*a = *b;
*b = tmp;
}
}
static inline hash_t
joinInts(hash_t a,hash_t b)
{
a <<= vshf;
a |= b;
return a;
}
int
getTriangles(int *nosniky, int N, hash_t *trojuhelniky)
{
int count = 0;
int tcount = 0;
// The last triangle found
hash_t TEMP = 0;
for (int i = 0; i < N; i++) {
for (int j = i + 1; j < N; j++) {
for (int m = j + 1; m < N; m++) {
int a = nosniky[i];
int b = nosniky[j];
int c = nosniky[m];
if (a + b > c && a + c > b && c + b > a) {
// Order a,b,c so that a<b<c
order(&a, &b);
order(&a, &c);
order(&b, &c);
count++;
// Triangle will be stored as abc, so we need to join all ints into
// one (3,3,4 => 334)
hash_t temp = joinInts(a, b);
if (i == 0 && j == 1 && m == 2) {
// first triangle, for example a=3,b=3,c=4 => 334
TEMP = joinInts(temp, c);
tcount++;
trojuhelniky[tcount - 1] = joinInts(temp, c);
} else if (joinInts(temp, c) == TEMP) {
// This is a duplicate, ignoring this one
}
else {
tcount++;
trojuhelniky[tcount - 1] = joinInts(temp, c);
// TEMP = the new last found triangle
TEMP = joinInts(temp, c);
}
}
}
}
}
// Remove remaining duplicates, 1.68s as well
int newcount = uniq(trojuhelniky, tcount);
return newcount;
}
int
getFix1(int *nosniky, int N, hash_t *trojuhelniky)
{
int count = 0;
int tcount = 0;
qsort(nosniky, N, sizeof(int), icmp);
// The last triangle found
hash_t TEMP = 0;
for (int i = 0; i < N; i++) {
int a = nosniky[i];
for (int j = i + 1; j < N; j++) {
int b = nosniky[j];
for (int m = j + 1; m < N; m++) {
int c = nosniky[m];
if (a + b > c) {
// Order a,b,c so that a<b<c
#if 0
order(&a, &b);
order(&a, &c);
order(&b, &c);
#endif
count++;
// Triangle will be stored as abc, so we need to join all ints into
// one (3,3,4 => 334)
hash_t temp = joinInts(a, b);
if (TEMP == 0) {
// first triangle, for example a=3,b=3,c=4 => 334
TEMP = joinInts(temp, c);
trojuhelniky[tcount++] = joinInts(temp, c);
} else if (joinInts(temp, c) == TEMP) {
// This is a duplicate, ignoring this one
}
else {
trojuhelniky[tcount++] = joinInts(temp, c);
// TEMP = the new last found triangle
TEMP = joinInts(temp, c);
}
}
}
}
}
// Remove remaining duplicates, 1.68s as well
int newcount = uniq(trojuhelniky, tcount);
return newcount;
}
int
getFix2(int *nosniky, int N, hash_t *trojuhelniky)
{
int count = 0;
int tcount = 0;
qsort(nosniky, N, sizeof(int), icmp);
// The last triangle found
hash_t TEMP = 0;
for (int i = 0; i < N; i++) {
int a = nosniky[i];
for (int j = i + 1; j < N; j++) {
int b = nosniky[j];
hash_t temp_ab = joinInts(a, b);
for (int m = j + 1; m < N; m++) {
int c = nosniky[m];
if (a + b <= c)
break;
count++;
hash_t temp_abc = joinInts(temp_ab,c);
// first triangle, for example a=3,b=3,c=4 => 334
if (TEMP == 0) {
TEMP = temp_abc;
trojuhelniky[tcount++] = temp_abc;
continue;
}
// This is a duplicate, ignoring this one
if (temp_abc == TEMP)
continue;
trojuhelniky[tcount++] = temp_abc;
// TEMP = the new last found triangle
TEMP = temp_abc;
}
}
}
// Remove remaining duplicates, 1.68s as well
int newcount = uniq(trojuhelniky, tcount);
return newcount;
}
int
getFix3(int *nosniky, int N, hash_t *trojuhelniky)
{
int count = 0;
int tcount = 0;
qsort(nosniky, N, sizeof(int), icmp);
// The last triangle found
hash_t TEMP = 0;
for (int i = 0; i < N;) {
int a = nosniky[i];
for (int j = i + 1; j < N;) {
int b = nosniky[j];
hash_t temp_ab = joinInts(a, b);
for (int m = j + 1; m < N;) {
int c = nosniky[m];
if (a + b <= c)
break;
count++;
hash_t temp_abc = joinInts(temp_ab,c);
do {
// first triangle, for example a=3,b=3,c=4 => 334
if (TEMP == 0) {
TEMP = temp_abc;
trojuhelniky[tcount++] = temp_abc;
break;
}
// This is a duplicate, ignoring this one
if (temp_abc == TEMP)
break;
trojuhelniky[tcount++] = temp_abc;
TEMP = temp_abc;
} while (0);
// skip duplicates of the third edge
while (++m < N && nosniky[m] == c);
}
// skip duplicates of the second edge
while (++j < N && nosniky[j] == b);
}
// skip duplicates of the first edge
while (++i < N && nosniky[i] == a);
}
// Remove remaining duplicates, 1.68s as well
#if 0
int newcount = uniq(trojuhelniky, tcount);
#else
int newcount = tcount;
#endif
return newcount;
}
int
getFix4(int *nosniky, int N, hash_t *trojuhelniky)
{
int tcount = 0;
qsort(nosniky, N, sizeof(int), icmp);
for (int i = 0; i < N;) {
int a = nosniky[i];
for (int j = i + 1; j < N;) {
int b = nosniky[j];
for (int m = j + 1; m < N;) {
int c = nosniky[m];
if (a + b <= c)
break;
tcount++;
// skip duplicates of the third edge
while (++m < N && nosniky[m] == c);
}
// skip duplicates of the second edge
while (++j < N && nosniky[j] == b);
}
// skip duplicates of the first edge
while (++i < N && nosniky[i] == a);
}
// Remove remaining duplicates, 1.68s as well
#if 0
int newcount = uniq(trojuhelniky, tcount);
#else
int newcount = tcount;
#endif
return newcount;
}
typedef long long tsc_t;
int *tstvec;
hash_t *tstrst;
int *tstshuf;
int *tstrand;
int maxN;
tsc_t tsczero;
tsc_t
tscget(void)
{
struct timespec ts;
tsc_t tsc;
clock_gettime(CLOCK_MONOTONIC,&ts);
tsc = ts.tv_sec;
tsc *= 1000000000;
tsc += ts.tv_nsec;
tsc -= tsczero;
return tsc;
}
double
tscsec(tsc_t tsc)
{
double sec;
sec = tsc;
sec /= 1e9;
return sec;
}
int itermax = 100;
tsc_t tscbase;
int
dofnc(int N,int (*fnc)(int *,int,hash_t *),const char *tag)
{
tsc_t tscbeg;
tsc_t tscend;
tsc_t tscbest;
int expval;
tscbest = 1LL << 62;
expval = 0;
for (int itercur = 1; itercur <= itermax; ++itercur) {
for (int i = 0; i < N; ++i)
tstvec[i] = tstshuf[i];
dupcnt = 0;
tscbeg = tscget();
expval = fnc(tstvec,N,tstrst);
tscend = tscget();
tscend -= tscbeg;
if (tscend < tscbest)
tscbest = tscend;
}
if (tscbase == 0)
tscbase = tscbest;
double ratio = tscbase;
ratio /= tscbest;
printf("%12.9f uniq:%d dup:%d %.3fx -- %s\n",
tscsec(tscbest),expval,dupcnt,ratio,tag);
return expval;
}
void
showvec(int *vec,int N)
{
int totlen = 0;
if (! opt_v)
return;
printf("\n");
for (int i = 0; i < N; ++i) {
if (totlen == 0)
totlen = printf("showvec:");
int curlen = printf(" %d",vec[i]);
totlen += curlen;
if (totlen > 60) {
printf("\n");
totlen = 0;
}
}
if (totlen > 0)
printf("\n");
}
size_t totalloc;
void *
xrealloc(void *ptr,size_t len,const char *tag)
{
void *tmp;
tmp = realloc(ptr,len);
if (tmp == NULL) {
printf("xrealloc: error ptr=%p len=%zu tag='%s' -- %s\n",
ptr,len,tag,strerror(errno));
exit(1);
}
totalloc += len;
printf("xrealloc: len=%zu totlen=%zu -- %s\n",len,totalloc,tag);
return tmp;
}
#define XREALLOC(_vec,_N) \
_vec = xrealloc(_vec,sizeof(*_vec) * (_N),#_vec)
void
allocvec(int N)
{
if (N > maxN) {
totalloc = 0;
XREALLOC(tstvec,N);
XREALLOC(tstshuf,N);
XREALLOC(tstrand,N);
XREALLOC(tstrst,1000000L * N);
maxN = N;
}
}
#define DOFNC(_fnc,_reason) \
do { \
tryval = dofnc(N,_fnc,#_fnc " / " _reason); \
if (expval < 0) \
expval = tryval; \
if (tryval != expval) \
exit(1); \
} while (0)
void
dotest(int exparg,const int *dft,int N)
{
int tryval;
int expval = exparg;
printf("\n");
allocvec(N);
for (int i = 0; i < N; ++i)
tstshuf[i] = dft[i];
showvec(tstshuf,N);
// shuffle
for (int isrc = N - 1; isrc > 0; --isrc) {
int idst = rand();
idst %= isrc;
int tmp = tstshuf[isrc];
tstshuf[isrc] = tstshuf[idst];
tstshuf[idst] = tmp;
}
showvec(tstshuf,N);
tscbase = 0;
DOFNC(getTriangles,"Etoile's original");
DOFNC(getFix1,"orig + pmg 2 fixes");
DOFNC(getFix2,"+ Michael's fix");
DOFNC(getFix3,"CiaPan's first -- no uniq");
DOFNC(getFix4,"CiaPan w/o trojuhelniky");
#if 0
if (actval != expval)
exit(1);
#endif
}
// 20 7
const int tst_345[] = { 3, 3, 4, 4, 5, 5 };
//-1 27
const int tst_27[] = { 2, 9, 18, 4, 1, 5, 19, 6, 3, 1, 2, 8 };
#define countof(_vec) (sizeof(_vec) / sizeof(_vec[0]))
void
dorand(void)
{
int N;
N = rand();
N %= 1000;
N += 1;
printf("\n");
printf("dorand: N %d\n",N);
allocvec(N);
for (int i = 0; i < N; ++i) {
int val = (rand() % (VLIM - 1)) + 1;
tstrand[i] = val;
}
dotest(-1,tstrand,N);
}
void
hashinit(void)
{
hash_t hashinc;
for (vshf = 0; vshf < 64; ++vshf) {
hashinc = 1;
hashinc <<= vshf;
printf("main: VSHF hashinc=%8.8llX VLIM=%8.8X vshf=%d\n",
hashinc,VLIM,vshf);
if (hashinc >= VLIM)
break;
}
hashinc -= 1;
hash_t hashold = hashinc;
u64 old64 = hashinc;
hash_t hashnew;
u64 new64;
for (int i = 0; i < 3; ++i) {
hashnew = joinInts(hashold,hashinc);
new64 = old64;
new64 <<= vshf;
new64 |= hashinc;
printf("main: HASH i=%d hashnew=%8.8llX hashold=%8.8llX hashinc=%8.8llX \n",
i,hashnew,hashold,hashinc);
printf("main: HASH old64=%16.16llX new64=%16.16llX\n",
old64,new64);
if (hashnew < hashold)
exit(1);
if (hashnew < new64)
exit(2);
if (hashnew < old64)
exit(3);
hashold = hashnew;
old64 = new64;
}
}
int
main(int argc,char **argv)
{
--argc;
++argv;
for (; argc > 0; --argc, ++argv) {
char *cp = *argv;
if (*cp != '-')
break;
cp += 2;
switch (cp[-1]) {
case 'v':
opt_v = ! opt_v;
break;
}
}
tsczero = tscget();
setlinebuf(stdout);
hashinit();
itermax = 100;
dotest(7,tst_345,countof(tst_345));
dotest(27,tst_27,countof(tst_27));
itermax = 5;
for (int i = 1; i <= 3; ++i)
dorand();
return 0;
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With