I need to check the string to see it matches any of the prefixes. The number of prefixes to be compared will increase in the future. So I have concern on the performance of code like below. What are the options to make it run faster when there are lots of strings need to be checked?
int checkString(const char *name)
{
if(!name) return 0;
if(strncmp(name, "AE_", 3) == 0 ) return 1;
if(strncmp(name, "AEDZ_", 5) == 0 ) return 1;
if(strncmp(name, "EDPZ_", 5) == 0 ) return 1;
if(strncmp(name, "EFAN_", 5) == 0 ) return 1;
if(strncmp(name, "E_GCA", 5 ) == 0 ) return 1;
if(strncmp(name, "EFFAN_", 6) == 0 ) return 1;
if(strncmp(name, "EPDPZ_", 6) == 0 ) return 1;
if(strncmp(name, "EDDPZ_", 6) == 0 ) return 1;
if(strncmp(name, "ECADF_", 6) == 0 ) return 1;
if(strncmp(name, "EPCEA_", 6) == 0 ) return 1;
if(strncmp(name, "CFEXXX_", 7) == 0 ) return 1;
if(strncmp(name, "IFEXX_", 7) == 0 ) return 1;
if(strncmp(name, "EINFFAN_", 8) == 0 ) return 1;
if(strncmp(name, "NXXEFAN_", 8) == 0 ) return 1;
if(strncmp(name, "ENAEAZY_", 8) == 0 ) return 1;
if(strncmp(name, "EYYYYYY_", 8) == 0 ) return 1;
if(strncmp(name, "ENEOENUE_", 9) == 0 ) return 1;
/*
more strncmp to be added.
*/
return 0;
}
One-time, ahead-of-time setup:
regex_t re;
regcomp(&re, "^(AE_|AEDZ|_EDPZ_|EFAN_|E_GCA|" /*...*/ ")", REG_EXTENDED);
To check:
return regexec(&re, name, 0, 0, 0) == 0;
On any good regex implementation, the regcomp
will compile the regex to a DFA that executes in a number of steps bounded by the length of the longest prefix.
What are the options to make it run faster when there are lots of strings need to be checked?
If the n
prefixes were sorted, then at most log2(n)
compares would be needed. Code could use bsearch()
.
#include <stdio.h>
#include <stdlib.h>
const char *prefix[] = {"AE_", "AEDZ_", "CFEXXX_", "ECADF_", "EDDPZ_",
"EDPZ_", "EFAN_", "EFFAN_", "EINFFAN_", "ENAEAZY_", "ENEOENUE_", "EPCEA_",
"EPDPZ_", "EYYYYYY_", "E_GCA", "IFEXX_", "NXXEFAN_"};
int cmp(const void *key, const void *element) {
const char *k = key;
const char *e = *(const char **) element;
size_t elen = strlen(e);
printf("strncmp(%s,%s,%zu)\n", k,e,elen);
return strncmp(k, e, elen);
}
void test(const char *key) {
printf("Search for <%s>\n", key);
size_t n = sizeof prefix/sizeof prefix[0];
const char **s = bsearch(key, prefix, n, sizeof prefix[0], cmp);
if (s) {
printf("Found <%s>\n", *s);
} else {
printf("Not Found\n");
}
}
int main() {
test("E_GC");
test("E_GCA");
test("E_GCA_");
}
Output
Search for <E_GC>
strncmp(E_GC,EINFFAN_,8)
strncmp(E_GC,EYYYYYY_,8)
strncmp(E_GC,IFEXX_,6)
strncmp(E_GC,E_GCA,5)
Not Found
Search for <E_GCA>
strncmp(E_GCA,EINFFAN_,8)
strncmp(E_GCA,EYYYYYY_,8)
strncmp(E_GCA,IFEXX_,6)
strncmp(E_GCA,E_GCA,5)
Found <E_GCA>
Search for <E_GCA_>
strncmp(E_GCA_,EINFFAN_,8)
strncmp(E_GCA_,EYYYYYY_,8)
strncmp(E_GCA_,IFEXX_,6)
strncmp(E_GCA_,E_GCA,5)
Found <E_GCA>
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With