pset 5
speller
Implement a program that spell-checks a file, a la the below, using a hash table.
$ ./speller texts/lalaland.txtMISSPELLED WORDS
[...]AHHHHHHHHHHHHHHHHHHHHHHHHHHHT[...]Shangri[...]fianc[...]Sebastian's[...]
WORDS MISSPELLED:WORDS IN DICTIONARY:WORDS IN TEXT:TIME IN load:TIME IN check:TIME IN size:TIME IN unload:TIME IN TOTAL:Directorydictionaries
- large
- small
Directorykeys
- *.txt
Directorytexts
- *.txt
- dictionary.c
- dictionary.h
- Makefile
- speller.c
- staff.txt
- student.txt
// Implements a dictionary's functionality
#include <stdio.h>#include <stdlib.h>#include <stdbool.h>#include <strings.h>#include <string.h>#include <ctype.h>
#include "dictionary.h"
// Represents a node in a hash tabletypedef struct node{ char word[LENGTH + 1]; struct node *next;}node;
// Number of buckets in hash tableconst unsigned int N = 223007;
// Number of words in dictionary fileunsigned int word_count = 0;
// Hash tablenode *table[N];
// Returns true if word is in dictionary else falsebool check(const char *word){ // TODO unsigned int index = hash(word); for (node *n = table[index]; n != NULL; n = n->next) { if (strcasecmp(n->word, word) == 0) { return true; } }
return false;}
// Hashes word to a numberunsigned int hash(const char *word){ // TODO // DJB2 Hash below. Source: http://www.cse.yorku.ca/~oz/hash.html unsigned int hash = 5381; int c;
while ((c = *word++)) { hash = ((hash << 5) + hash) + tolower(c); /* hash * 33 + c */ }
return hash % N;}
// Loads dictionary into memory, returning true if successful else falsebool load(const char *dictionary){ // TODO FILE *file = fopen(dictionary, "r"); if (file == NULL) { return false; }
char word[LENGTH]; while (fscanf(file, "%s", word) != EOF) { node *n = malloc(sizeof(node)); if (n == NULL) { return false; } strcpy(n->word, word); n->next = NULL;
unsigned int index = hash(word); n->next = table[index]; table[index] = n;
word_count++; } // Close file stream fclose(file);
// Success return true;}
// Returns number of words in dictionary if loaded else 0 if not yet loadedunsigned int size(void){ // TODO return word_count;}
// Unloads dictionary from memory, returning true if successful else falsebool unload(void){ // TODO unsigned int free_count = 0; for (int i = 0; i < N; i++) { for (node *n = table[i]; n != NULL;) { node *tmp = n; n = n->next; free(tmp); free_count++; } }
if (free_count == size()) { return true; }
return false;}// Declares a dictionary's functionality
#ifndef DICTIONARY_H#define DICTIONARY_H
#include <stdbool.h>
// Maximum length for a word// (e.g., pneumonoultramicroscopicsilicovolcanoconiosis)#define LENGTH 45
// Prototypesbool check(const char *word);unsigned int hash(const char *word);bool load(const char *dictionary);unsigned int size(void);bool unload(void);
#endif // DICTIONARY_H// Implements a spell-checker
#include <ctype.h>#include <stdio.h>#include <sys/resource.h>#include <sys/time.h>
#include "dictionary.h"
// Undefine any definitions#undef calculate#undef getrusage
// Default dictionary#define DICTIONARY "dictionaries/large"
// Prototypedouble calculate(const struct rusage *b, const struct rusage *a);
int main(int argc, char *argv[]){ // Check for correct number of args if (argc != 2 && argc != 3) { printf("Usage: ./speller [DICTIONARY] text\n"); return 1; }
// Structures for timing data struct rusage before, after;
// Benchmarks double time_load = 0.0, time_check = 0.0, time_size = 0.0, time_unload = 0.0;
// Determine dictionary to use char *dictionary = (argc == 3) ? argv[1] : DICTIONARY;
// Load dictionary getrusage(RUSAGE_SELF, &before); bool loaded = load(dictionary); getrusage(RUSAGE_SELF, &after);
// Exit if dictionary not loaded if (!loaded) { printf("Could not load %s.\n", dictionary); return 1; }
// Calculate time to load dictionary time_load = calculate(&before, &after);
// Try to open text char *text = (argc == 3) ? argv[2] : argv[1]; FILE *file = fopen(text, "r"); if (file == NULL) { printf("Could not open %s.\n", text); unload(); return 1; }
// Prepare to report misspellings printf("\nMISSPELLED WORDS\n\n");
// Prepare to spell-check int index = 0, misspellings = 0, words = 0; char word[LENGTH + 1];
// Spell-check each word in text for (int c = fgetc(file); c != EOF; c = fgetc(file)) { // Allow only alphabetical characters and apostrophes if (isalpha(c) || (c == '\'' && index > 0)) { // Append character to word word[index] = c; index++;
// Ignore alphabetical strings too long to be words if (index > LENGTH) { // Consume remainder of alphabetical string while ((c = fgetc(file)) != EOF && isalpha(c));
// Prepare for new word index = 0; } }
// Ignore words with numbers (like MS Word can) else if (isdigit(c)) { // Consume remainder of alphanumeric string while ((c = fgetc(file)) != EOF && isalnum(c));
// Prepare for new word index = 0; }
// We must have found a whole word else if (index > 0) { // Terminate current word word[index] = '\0';
// Update counter words++;
// Check word's spelling getrusage(RUSAGE_SELF, &before); bool misspelled = !check(word); getrusage(RUSAGE_SELF, &after);
// Update benchmark time_check += calculate(&before, &after);
// Print word if misspelled if (misspelled) { printf("%s\n", word); misspellings++; }
// Prepare for next word index = 0; } }
// Check whether there was an error if (ferror(file)) { fclose(file); printf("Error reading %s.\n", text); unload(); return 1; }
// Close text fclose(file);
// Determine dictionary's size getrusage(RUSAGE_SELF, &before); unsigned int n = size(); getrusage(RUSAGE_SELF, &after);
// Calculate time to determine dictionary's size time_size = calculate(&before, &after);
// Unload dictionary getrusage(RUSAGE_SELF, &before); bool unloaded = unload(); getrusage(RUSAGE_SELF, &after);
// Abort if dictionary not unloaded if (!unloaded) { printf("Could not unload %s.\n", dictionary); return 1; }
// Calculate time to unload dictionary time_unload = calculate(&before, &after);
// Report benchmarks printf("\nWORDS MISSPELLED: %d\n", misspellings); printf("WORDS IN DICTIONARY: %d\n", n); printf("WORDS IN TEXT: %d\n", words); printf("TIME IN load: %.2f\n", time_load); printf("TIME IN check: %.2f\n", time_check); printf("TIME IN size: %.2f\n", time_size); printf("TIME IN unload: %.2f\n", time_unload); printf("TIME IN TOTAL: %.2f\n\n", time_load + time_check + time_size + time_unload);
// Success return 0;}
// Returns number of seconds between b and adouble calculate(const struct rusage *b, const struct rusage *a){ if (b == NULL || a == NULL) { return 0.0; } else { return ((((a->ru_utime.tv_sec * 1000000 + a->ru_utime.tv_usec) - (b->ru_utime.tv_sec * 1000000 + b->ru_utime.tv_usec)) + ((a->ru_stime.tv_sec * 1000000 + a->ru_stime.tv_usec) - (b->ru_stime.tv_sec * 1000000 + b->ru_stime.tv_usec))) / 1000000.0); }}