In the past I have made a function that generates an unique id (number) from a string. Today I discover that it is not as unique as should be. Never saw a problem before with it. Today two different inputs generates the same id (number).
I use the same technique in Delphi, C++, PHP and Javascript to generate the same id's so there is no difference when different languages are involved to a project. For example this can be handy to communicate, for HTML id's, tempfiles etc.
In general, what I do is calculate a CRC16 of a string, add the sum and return it.
For example, these two strings generate the same id (number):
o.uniqueId( 'M:/Mijn Muziek/Various Artists/Revs & ElBee - Tell It To My Heart.mp3' );
o.uniqueId( 'M:/Mijn Muziek/Various Artists/Dwight Yoakam - The Back Of Your Hand.Mp3');
They both generates an id of 224904.
The following example is a javascript example. My question is, how can i avoid (with a little change) that it generates a duplicate? (In case you might wonder what 'o.' means, it is the object where these functions belongs to):
o.getCrc16 = function(s, bSumPos) {
if(typeof s !== 'string' || s.length === 0) {
return 0;
}
var crc = 0xFFFF,
L = s.length,
sum = 0,
x = 0,
j = 0;
for(var i = 0; i < L; i++) {
j = s.charCodeAt(i);
sum += ((i + 1) * j);
x = ((crc >> 8) ^ j) & 0xFF;
x ^= x >> 4;
crc = ((crc << 8) ^ (x << 12) ^ (x << 5) ^ x) & 0xFFFF;
}
return crc + ((bSumPos ? 1 : 0) * sum);
}
o.uniqueId = function(s, bres) {
if(s == undefined || typeof s != 'string') {
if(!o.___uqidc) {
o.___uqidc = 0;
} else {
++o.___uqidc;
}
var od = new Date(),
i = s = od.getTime() + '' + o.___uqidc;
} else {
var i = o.getCrc16(s, true);
}
return((bres) ? 'res:' : '') + (i + (i ? s.length : 0));
};
How can I avoid duplicates with use of a little change to the code?
All right, did allot of testing and come to this. A relative short unique id generated by the following:
o.lz = function(i,c)
{
if( typeof c != 'number' || c <= 0 || (typeof i != 'number' && typeof i != 'string') )
{ return i; }
i+='';
while( i.length < c )
{ i='0'+i; }
return i;
}
o.getHashCode = function(s)
{
var hash=0,c=(typeof s == 'string')?s.length:0,i=0;
while(i<c)
{
hash = ((hash<<5)-hash)+s.charCodeAt(i++);
//hash = hash & hash; // Convert to 32bit integer
}
return ( hash < 0 )?((hash*-1)+0xFFFFFFFF):hash; // convert to unsigned
};
o.uniqueId = function( s, bres )
{
if( s == undefined || typeof s != 'string' )
{
if( !o.___uqidc )
{ o.___uqidc=0; }
else { ++o.___uqidc; }
var od = new Date(),
i = s = od.getTime()+''+o.___uqidc;
}
else { var i = o.getHashCode( s ); }
return ((bres)?'res:':'')+i.toString(32)+'-'+o.lz((s.length*4).toString(16),3);
};
Examples:
o.uniqueId( 'M:/Mijn Muziek/Various Artists/Revs & ElBee - Tell It To My Heart.mp3' );
o.uniqueId( 'M:/Mijn Muziek/Various Artists/Dwight Yoakam - The Back Of Your Hand.Mp3');
Will produce the following id's:
dh8qi9t-114
je38ugg-120
For my purpose it seems to be unique enough, also the extra length adds some more uniqueness. Test it on filesystem with approx 40.000 mp3 files and did not found any collision.
If you think this is not the way to go, please let me know.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With