The following string of mine tried to find difference between two strings. But it's horribly slow as it iterate the length of string:
#include <string>
#include <vector>
#include <iostream>
using namespace std;
int hd(string s1, string s2) {
// hd stands for "Hamming Distance"
int dif = 0;
for (unsigned i = 0; i < s1.size(); i++ ) {
string b1 = s1.substr(i,1);
string b2 = s2.substr(i,1);
if (b1 != b2) {
dif++;
}
}
return dif;
}
int main() {
string string1 = "AAAAA";
string string2 = "ATATT";
string string3 = "AAAAA";
int theHD12 = hd(string1,string2);
cout << theHD12 << endl;
int theHD13 = hd(string1,string3);
cout << theHD13 << endl;
}
Is there a fast alternative to do that? In Perl we can have the following approach:
sub hd {
return ($_[0] ^ $_[1]) =~ tr/\001-\255//;
}
which is much2 faster than iterating the position.
I wonder what's the equivalent of it in C++?
Try to replace the for loop by:
for (unsigned i = 0; i < s1.size(); i++ ) {
if (b1[i] != b2[i]) {
dif++;
}
}
This should be a lot faster because no new strings are created.
Fun with the STL:
#include <numeric> //inner_product
#include <functional> //plus, equal_to, not2
#include <string>
#include <stdexcept>
unsigned int
hd(const std::string& s1, const std::string& s2)
{
// TODO: What should we do if s1.size() != s2.size()?
if (s1.size() != s2.size()){
throw std::invalid_argument(
"Strings passed to hd() must have the same lenght"
);
}
return std::inner_product(
s1.begin(), s1.end(), s2.begin(),
0, std::plus<unsigned int>(),
std::not2(std::equal_to<std::string::value_type>())
);
}
Use iterators:
int GetHammingDistance(const std::string &a, const std::string &b)
{
// Hamming distance is not defined for strings of different lengths.
ASSERT(a.length() == b.length());
std::string::const_iterator a_it = a.begin();
std::string::const_iterator b_it = b.begin();
std::string::const_iterator a_end = a.end();
std::string::const_iterator b_end = b.end();
int distance = 0;
while (a_it != a_end && b_it != b_end)
{
if (*a_it != *b_it) ++distance;
++a_it; ++b_it;
}
return distance;
}
Choice 1: Modify your original code to be as effecient as possable.
int hd(string const& s1, string const& s2)
{
// hd stands for "Hamming Distance"
int dif = 0;
for (std::string::size_type i = 0; i < s1.size(); i++ )
{
char b1 = s1[i];
char b2 = s2[i];
dif += (b1 != b2)?1:0;
}
return dif;
}
Second option use some of the STL algorithms to do the heavy lifting.
struct HammingFunc
{
inline int operator()(char s1,char s2)
{
return s1 == s2?0:1;
}
};
int hd(string const& s1, string const& s2)
{
int diff = std::inner_product(s1.begin(),s1.end(),
s2.begin(),
0,
std::plus<int>(),HammingFunc()
);
return diff;
}
Some obvious points that might make it faster:
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With