From Uncle Bob's book Clean Code (example was in Java, so this was my first java translation), I've been playing with the refactoring example on prime numbers.
The question is: How would you refactor the following code?
There are 4 versions here: UglyVersion :-) , BobVersion, PeteVersion, PeteVersionClassMember. I'm doing this for fun, and hope you enjoy too :-)
class ProgramBad
{
static void Main()
{
int[] result = GeneratePrimes.generatePrimes(30);
foreach (var i in result)
Console.Write(i.ToString() + ", ");
}
}
/// <summary>
/// Given an array of integers starting at 2, cross out the multiples of 2. Fine the next
/// uncrossed integer, and cross out all of its multiples.
/// Repeat until you have passed the square root of the maximum value
/// </summary>
public class GeneratePrimes
{
public static int[] generatePrimes(int maxValue)
{
if (maxValue >= 2) // the only valid case
{
// declarations
int s = maxValue + 1; // size of the array
bool[] f = new bool[s];
int i;
// initialize array to be true
for (i = 0; i < s; i++)
{
f[i] = true;
}
// get rid of non-primes
f[0] = f[1] = false;
//sieve
int j;
for (i = 2; i < Math.Sqrt(s) + 1; i++)
{
if (f[i]) // if i is uncrossed, cross its multiples
{
for (j = 2 * i; j < s; j += i)
f[j] = false; // multiple is not a prime
}
}
// how many primes are there?
int count = 0;
for (i = 0; i < s; i++)
{
if (f[i])
count++; // bump count
}
int[] primes = new int[count];
//move the primes into the result
for (i = 0, j=0;i<s ; i++)
{
if (f[i])
primes[j++] = i;
}
return primes; // return the primes
} else // maxvalue < 2
return new int[0]; // return null array if bad input
}
}
Bob's refactored version:
class ProgramBob
{
static void Main()
{
int[] result = PrimeGeneratorBob.generatePrimes(30);
foreach (var i in result)
Console.Write(i + ", ");
}
}
/// <summary>
/// Generates prime number up to a user specified maximum
/// The algorithm used is the Sieve of Eratosthenes.
/// Given an array of integers starting at 2:
/// Find the first uncrossed (eg 3 ) integer, and cross out all its multiples (eg 6,9,12,14)
/// Repeat until there are no more multipes in the array
/// </summary>
class PrimeGeneratorBob
{
static bool[] crossedOut;
static int[] result;
static public int[] generatePrimes(int maxValue)
{
if (maxValue < 2)
return new int[0];
else
{
uncrossIntegersUpTo(maxValue);
crossOutMultiples();
putUncrossedIntegersIntoResult();
return result;
}
}
static void uncrossIntegersUpTo(int maxValue)
{
crossedOut = new bool[maxValue + 1]; // as bool array starts at 0, and if maxvalue = 10, we need an array of length 11
for (int i = 2; i < crossedOut.Length; i++) // less than as we don't want to reference crossedOut[4] which doesn't exist
crossedOut[i] = false;
}
static void crossOutMultiples()
{
int limit = determineIterationLimit();
for (int i = 2; i <= limit; i++)
if (notCrossed(i))
crossOutMultiplesOf(i);
}
static int determineIterationLimit()
{
// Every multiple in the array has a prime factor that
// is less than or equal to the square root of the array size,
// which is the largest number we are trying to find primes in.
// So we don't have to cross out numbers
// larger than that square root of the maxnumber, as they will have been crossed out already
double iterationLimit = Math.Sqrt(crossedOut.Length);
return (int) iterationLimit;
}
static void crossOutMultiplesOf(int i)
{
for (int multiple = 2*i; multiple < crossedOut.Length; multiple += i)
crossedOut[multiple] = true;
}
static bool notCrossed(int i)
{
return crossedOut[i] == false;
}
static void putUncrossedIntegersIntoResult()
{
result = new int[numberOfUncrossedIntegers()];
for (int j = 0, i = 2; i < crossedOut.Length; i++)
if (notCrossed(i))
result[j++] = i;
}
static int numberOfUncrossedIntegers()
{
int count = 0;
for (int i = 2; i < crossedOut.Length; i++)
if (notCrossed(i))
count++;
return count;
}
}
What I like about this is:
I paired with a friend over the weekend, and we came up with this version:
class PetesRefactored
{
static void MainPete()
{
PrimeGeneratorPete pg = new PrimeGeneratorPete();
int[] arrayOfPrimes = pg.generatePrimes(30);
foreach (int prime in arrayOfPrimes)
Console.Write(prime + ", ");
}
}
/// <summary>
/// Generates prime number up to a user specified maximum
/// The algorithm used is the Sieve of Eratosthenes.
/// Given an array of integers starting at 2:
/// Find the first uncrossed (eg 3 ) integer, and cross out all its multiples (eg 6,9,12,14)
/// Repeat until there are no more multipes in the array
/// </summary>
class PrimeGeneratorPete
{
public int[] generatePrimes(int maxValue)
{
bool[] crossedOut;
if (maxValue < 2)
return new int[0];
else
{
crossedOut = new bool[maxValue + 1];
uncrossIntegersUpTo(crossedOut);
crossOutMultiples(crossedOut);
return putUncrossedIntegersIntoResult(crossedOut);
}
}
void uncrossIntegersUpTo(bool[] crossedOut)
{
for (int i = 2; i < crossedOut.Length; i++)
crossedOut[i] = false;
}
void crossOutMultiples(bool[] crossedOut)
{
int limit = determineIterationLimit(crossedOut);
for (int i = 2; i <= limit; i++)
if (!crossedOut[i])
crossOutMultiplesOf(crossedOut, i);
}
int determineIterationLimit(bool[] crossedOut)
{
// Every multiple in the array has a prime factor that
// is less than or equal to the square root of the array size,
// which is the largest number we are trying to find primes in.
// So we don't have to cross out numbers
// larger than that square root of the maxnumber, as they will have been crossed out already
double iterationLimit = Math.Sqrt(crossedOut.Length);
return (int) iterationLimit;
}
void crossOutMultiplesOf(bool[] crossedOut, int i)
{
for (int multiple = 2*i; multiple < crossedOut.Length; multiple += i)
crossedOut[multiple] = true;
}
int[] putUncrossedIntegersIntoResult(bool[] crossedOut)
{
int[] result = new int[numberOfUncrossedIntegers(crossedOut)];
for (int j = 0, i = 2; i < crossedOut.Length; i++)
if (!crossedOut[i])
result[j++] = i;
return result;
}
int numberOfUncrossedIntegers(bool[] crossedOut)
{
int count = 0;
for (int i = 2; i < crossedOut.Length; i++)
if (!crossedOut[i])
count++;
return count;
}
}
We:
And thanks to Casey and anon.
Here is the code with crossedOut as a class level variable. I like this as it cuts down on some noise when passing parameters into methods.
class PrimeGeneratorPeteClassMember
{
bool[] crossedOut;
public int[] generatePrimes(int maxValue)
{
if (maxValue < 2)
return new int[0];
else
{
crossedOut = new bool[maxValue + 1];
uncrossIntegersUpTo();
crossOutMultiples();
return putUncrossedIntegersIntoResult();
}
}
void uncrossIntegersUpTo()
{
for (int i = 2; i < crossedOut.Length; i++)
crossedOut[i] = false;
}
void crossOutMultiples()
{
int limit = determineIterationLimit();
for (int i = 2; i <= limit; i++)
if (!crossedOut[i])
crossOutMultiplesOf(i);
}
int determineIterationLimit()
{
double iterationLimit = Math.Sqrt(crossedOut.Length);
return (int)iterationLimit;
}
void crossOutMultiplesOf(int i)
{
for (int multiple = 2 * i; multiple < crossedOut.Length; multiple += i)
crossedOut[multiple] = true;
}
int[] putUncrossedIntegersIntoResult()
{
int[] result = new int[numberOfUncrossedIntegers()];
for (int j = 0, i = 2; i < crossedOut.Length; i++)
if (!crossedOut[i])
result[j++] = i;
return result;
}
int numberOfUncrossedIntegers()
{
int count = 0;
for (int i = 2; i < crossedOut.Length; i++)
if (!crossedOut[i])
count++;
return count;
}
}
Honestly? I wouldn't have refactored at all. Maybe it's just because I used to be a math wonk but I find the first version much easier to read.
It often does not make much sense to refactor an algorithm. When you refactor code it means you expect to reuse or change parts of it. In this case the entire block of code is static and immutable - the only thing you might change is to swap out the entire function with a different algorithm. One-line functions like notCrossed seem particularly useless; they merely serve to make the code more verbose without helping to explain anything that isn't already obvious.
Actually, maybe there are two refactorings I would do:
Change the class name GeneratePrimes into PrimeGenerator, as you already did. Verb class names always throw me for a loop - is it a class, or a method?
Change it to either return an IList<int> or IEnumerable<int>. Returning array types is Considered Harmful.
Edit - one more third refactoring would be to remove some of the useless comments and use meaningful variable names instead (where appropriate).
Other than that - stick with the original version!
Edit - actually, the more I look at the original, the more I dislike it. Not because of the way it's organized, just the way it's written. Here's my rewrite:
public class PrimeGenerator
{
public static IEnumerable<int> GeneratePrimes(int maxValue)
{
if (maxValue < 2)
return Enumerable.Empty<int>();
bool[] primes = new bool[maxValue + 1];
for (int i = 2; i <= maxValue; i++)
primes[i] = true;
for (int i = 2; i < Math.Sqrt(maxValue + 1) + 1; i++)
{
if (primes[i])
{
for (int j = 2 * i; j <= maxValue; j += i)
primes[j] = false;
}
}
return Enumerable.Range(2, maxValue - 1).Where(i => primes[i]);
}
}
There, much better!
Just to clarify Maxwells inverted IF comment:
This is bad:
if (maxValue >= 2)
{ blah }
else
return new int[];
This is good
if (maxValue < 2)
return new int[0];
blah
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With