before I ask anything, just letting everyone know that this is for fun and I have posted all of my code that I have so far; will post more as things are fixed/implemented), sorry for the lengthy post!
I have two questions here and I will post all of my code below.
Right now It is just a text based program that accepts upto 26 letters and output all valid words from the 200K word+ dictionary posted here:
http://www.calvin.edu/~rpruim/scrabble/ospd3.txt
the below C program requires the dictionary to be sliced into 26 files containing all the words beginning with each letter in each file (all a words in file 'a' etc...) perl is posted below to do so.
Word Finder (c):
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define NUM_CHARS 26
#define MAX_WORD_LEN 20
#define WORDS_PER_LINE 12
/* Character link structure */
typedef struct char_link
{
struct char_link **cl; /* All of this links possible next characters */
short eow; /* END OF WORD (means this is the last letter of a valid word */
} CHARLINK;
/* Global found word count, used for printing '\n' char. */
unsigned short gwc = 0;
CHARLINK * _init_link(CHARLINK **link)
{
short i;
(*link)->cl = (CHARLINK **) malloc(NUM_CHARS * sizeof(CHARLINK *));
for (i = 0; i < NUM_CHARS; i++)
(*link)->cl[i] = NULL;
(*link)->eow = 0;
return (*link);
}
void _build_char_link(CHARLINK *link)
{
FILE *fp;
char *ptr, file[2];
CHARLINK *current_link = NULL;
char line_buffer[MAX_WORD_LEN];
unsigned short size = 0;
static short letter_index = 0;
int current_letter = 0;
sprintf(file, "%c", letter_index + 'a');
current_link = _init_link(&link);
if (fp = fopen(file, "r"))
{
while (fgets(line_buffer, MAX_WORD_LEN, fp) > 0)
{
/* Skip letter_index */
ptr = line_buffer + 1;
while(*ptr && (*ptr != '\n' && *ptr != '\r'))
{
current_letter = (int)(*ptr - 'a');
/* Create and jump to new link */
if (!current_link->cl[current_letter])
{
current_link->cl[current_letter] = (CHARLINK *) malloc (sizeof(CHARLINK));
current_link = _init_link(¤t_link->cl[current_letter]);
}
/* Jump to existing link */
else
current_link = current_link->cl[current_letter];
ptr++;
}
current_link->eow = 1;
/* Reset our current_link pointer to the letter_index link */
current_link = link;
}
fclose(fp);
}
else
printf("Warning: Couldn't import words for letter: %s\n", file);
letter_index++;
}
void _draw_tree(CHARLINK *link, short letter, short depth)
{
short i, tmp;
if (!depth)
{
printf("Data for letter %c\n", letter + 'a');
printf("%c\n", letter + 'a');
}
for (i = 0; i < NUM_CHARS; i++)
{
if (link->cl[i])
{
tmp = depth;
while (tmp-- >= 0)
printf("\t");
printf("%c(%d)\n", i + 'a', link->cl[i]->eow);
_draw_tree(link->cl[i], letter, depth + 1);
}
}
}
void _get_possible_words(CHARLINK *link, char *prefix, char *letters, unsigned int input_len, unsigned int depth)
{
short i, len, j;
unsigned int attempted = 0x00000000;
if (link->eow)
{
printf("\t%s", prefix);
if (++gwc == WORDS_PER_LINE)
{
printf("\n");
gwc = 0;
}
}
len = strlen(prefix);
for (i = 0; i < input_len; i++)
{
if (letters[i])
{
j = (1 << (letters[i] - 'a'));
if (!(j & attempted) && link->cl[letters[i] - 'a'])
{
prefix[len] = letters[i];
letters[i] = '\0';
_get_possible_words(link->cl[prefix[len] - 'a'], prefix, letters, input_len, depth + 1);
letters[i] = prefix[len];
prefix[len] = '\0';
}
attempted |= j;
}
}
}
int main(int argc, char *argv[])
{
short i;
/* 26 link structures for a-z */
CHARLINK root_nodes[NUM_CHARS];
printf("Building structures ");
for (i = 0; i < NUM_CHARS; i++)
{
_build_char_link(&root_nodes[i]);
printf(". ");
}
printf("Done!\n");
/* Debug, what do our trees look like? */
//for (i = 0; i < NUM_CHARS; i++)
// _draw_tree(&root_nodes[i], i, 0);
for(;;)
{
short input_len = 0;
unsigned int j = 0, attempted = 0x00000000;
char input[26] = {0};
char letters[26] = {0};
char prefix[26] = {0};
printf("Enter letters ('0' to exit): ");
gets(input); /* Yay buffer overflow */
if (input[0] == '0') break;
sprintf(letters, "%s", input);
input_len = strlen(input);
for (i = 0; i < input_len; i++)
{
j = (1 << (input[i] - 'a'));
if (!(j & attempted))
{
prefix[0] = input[i];
letters[i] = '\0';
_get_possible_words(&root_nodes[prefix[0] - 'a'], prefix, letters, input_len, 1);
letters[i] = input[i];
attempted |= j;
}
}
printf("\n");
}
return 255;
}
file split(perl):
#!/usr/bin/perl
open(FH, "< words.txt");
my %w = map { $_ => {} } 'a'..'z';
while (<FH>)
{
s/\s+$//;
$w{lc $1}->{lc $_} = 1 if /^(\w)/;
}
foreach my $l ( keys %w )
{
open (OUT, "> $l");
foreach my $a ( keys %{$w{$l}} )
{
print OUT "$a\n";
}
close OUT;
}
Just some thoughts on your Perl.
No reason for the big hash initialization. You can initialize in one line with this:
my %w = map { $_ => {} } 'a'..'z';
But there is really no reason to init at all, Perl will autovivify the hash refs for you when you say:
$w{$1}{$_} = 1 if /^(\w)/;
But you have an error, if a word starts with a capitol letter, it will go into the wrong key. If you want to catch these kind of errors, you can use Hash::Util's lock_keys
to prevent new keys being added to your hash. To fix the bug, normalize your words by use lc
or uc
to force the correct case.
You've got some other stylistic problems with your Perl. Also, since you are working with (presumably) large files, why keep all the words in memory?
#!/usr/bin/perl
use strict;
use warnings;
use IO::Handle;
open my $fh, '<', $wordlist_path
or die "Error opening word list '$wordlist' - $!\n";
# Open a handle for each target file.
my %handle = map {
open my $fh, '>', $_
or die "Error opening sublist $_ - $!\n";
$_ => $fh;
} 'a'..'z';
while( my $word = <$fh> ) {
$word = clean_word( $word );
my $first_letter = substr $word, 0, 1;
$handle{$first_letter}->print( "$word\n" );
}
sub clean_word {
my $word = shift;
chomp $word;
$word = lc $word;
$word =~ s/^\s*//;
$word =~ s/\s*$//;
return $word;
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With