Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to optimize below multiple strncmp?

Tags:

c

algorithm

I need to check the string to see it matches any of the prefixes. The number of prefixes to be compared will increase in the future. So I have concern on the performance of code like below. What are the options to make it run faster when there are lots of strings need to be checked?

int checkString(const char *name)
{
    if(!name) return 0;

    if(strncmp(name, "AE_", 3) == 0 ) return 1;                                                                              
    if(strncmp(name, "AEDZ_", 5) == 0 ) return 1;                                                                            
    if(strncmp(name, "EDPZ_", 5) == 0 ) return 1;                                                                            
    if(strncmp(name, "EFAN_", 5) == 0 ) return 1;                                                                            
    if(strncmp(name, "E_GCA", 5 ) == 0 ) return 1;                                                                           
    if(strncmp(name, "EFFAN_", 6) == 0 ) return 1;                                                                           
    if(strncmp(name, "EPDPZ_", 6) == 0 ) return 1;                                                                           
    if(strncmp(name, "EDDPZ_", 6) == 0 ) return 1;                                                                           
    if(strncmp(name, "ECADF_", 6) == 0 ) return 1;                                                                           
    if(strncmp(name, "EPCEA_", 6) == 0 ) return 1;                                                                           
    if(strncmp(name, "CFEXXX_", 7) == 0 ) return 1;                                                                          
    if(strncmp(name, "IFEXX_", 7) == 0 ) return 1;                                                                           
    if(strncmp(name, "EINFFAN_", 8) == 0 ) return 1;                                                                         
    if(strncmp(name, "NXXEFAN_", 8) == 0 ) return 1;                                                                         
    if(strncmp(name, "ENAEAZY_", 8) == 0 ) return 1;                                                                         
    if(strncmp(name, "EYYYYYY_", 8) == 0 ) return 1;                                                                         
    if(strncmp(name, "ENEOENUE_", 9) == 0 ) return 1;                                                                        
    /*
    more strncmp to be added.
    */

    return 0;
}       
like image 897
Wayne Li Avatar asked Oct 17 '22 11:10

Wayne Li


2 Answers

One-time, ahead-of-time setup:

regex_t re;
regcomp(&re, "^(AE_|AEDZ|_EDPZ_|EFAN_|E_GCA|" /*...*/ ")", REG_EXTENDED);

To check:

return regexec(&re, name, 0, 0, 0) == 0;

On any good regex implementation, the regcomp will compile the regex to a DFA that executes in a number of steps bounded by the length of the longest prefix.

like image 99
R.. GitHub STOP HELPING ICE Avatar answered Oct 21 '22 06:10

R.. GitHub STOP HELPING ICE


What are the options to make it run faster when there are lots of strings need to be checked?

If the n prefixes were sorted, then at most log2(n) compares would be needed. Code could use bsearch().

#include <stdio.h>
#include <stdlib.h>

const char *prefix[] = {"AE_", "AEDZ_", "CFEXXX_", "ECADF_", "EDDPZ_",
    "EDPZ_", "EFAN_", "EFFAN_", "EINFFAN_", "ENAEAZY_", "ENEOENUE_", "EPCEA_",
    "EPDPZ_", "EYYYYYY_", "E_GCA",  "IFEXX_", "NXXEFAN_"};

int cmp(const void *key, const void *element) {
  const char *k = key;
  const char *e = *(const char **) element;
  size_t elen = strlen(e);
  printf("strncmp(%s,%s,%zu)\n", k,e,elen);
  return strncmp(k, e, elen);
}

void test(const char *key) {
  printf("Search for <%s>\n", key);
  size_t n = sizeof prefix/sizeof prefix[0];
  const char **s = bsearch(key, prefix, n, sizeof prefix[0], cmp);
  if (s) {
    printf("Found <%s>\n", *s);
  } else {
    printf("Not Found\n");
  }
}

int main() {
  test("E_GC");
  test("E_GCA");
  test("E_GCA_");
}

Output

Search for <E_GC>
strncmp(E_GC,EINFFAN_,8)
strncmp(E_GC,EYYYYYY_,8)
strncmp(E_GC,IFEXX_,6)
strncmp(E_GC,E_GCA,5)
Not Found
Search for <E_GCA>
strncmp(E_GCA,EINFFAN_,8)
strncmp(E_GCA,EYYYYYY_,8)
strncmp(E_GCA,IFEXX_,6)
strncmp(E_GCA,E_GCA,5)
Found <E_GCA>
Search for <E_GCA_>
strncmp(E_GCA_,EINFFAN_,8)
strncmp(E_GCA_,EYYYYYY_,8)
strncmp(E_GCA_,IFEXX_,6)
strncmp(E_GCA_,E_GCA,5)
Found <E_GCA>
like image 42
chux - Reinstate Monica Avatar answered Oct 21 '22 06:10

chux - Reinstate Monica