andreas.schiffler
BAN USERC# implementation
namespace SimpleRegex
{
class Program
{
static bool MatchesInDirection(string text, string pattern, int direction)
{
// Scan over pattern in specified direction
int currentTextPosition = (direction < 0) ? text.Length - 1 : 0;
for (int currentPatternPosition = (direction < 0) ? pattern.Length - 1 : 0;
currentPatternPosition >= 0 && currentPatternPosition < pattern.Length;
currentPatternPosition += direction)
{
if (currentTextPosition == -1 || currentTextPosition == text.Length)
{
return false;
}
char currentPatternCharacter = pattern[currentPatternPosition];
char currentTextCharacter = text[currentTextPosition];
if (currentPatternCharacter == '*')
{
// Skip over current wildcard character
while (currentTextPosition >= 0 && currentTextPosition < text.Length &&
text[currentTextPosition] == currentTextCharacter)
{
currentTextPosition += direction;
}
}
else
{
// Check match
if (currentPatternCharacter == currentTextCharacter)
{
currentTextPosition += direction;
}
else
{
return false;
}
}
}
return true;
}
static bool Matches(string text, string pattern)
{
// Error checking
if (string.IsNullOrEmpty(text) || string.IsNullOrEmpty(pattern) || text.Length == 0 || pattern.Length == 0)
{
return false;
}
// Match pattern forwards and backwards
return MatchesInDirection(text, pattern, 1) || MatchesInDirection(text, pattern, -1);
}
static void Main(string[] args)
{
System.Console.WriteLine(Matches("acb", "a*b")); //// true - * matches c
System.Console.WriteLine(Matches("abbc", "abc*")); //// false - no match for second b
System.Console.WriteLine(Matches("**bc", "bc")); //// true - *'s match nothing
System.Console.WriteLine(Matches("bbabbabb", "*a*a*")); //// true - *'s match b's
System.Console.WriteLine(Matches("bc", "abc")); //// false - no match for a
System.Console.ReadLine();
}
}
}
More straight C but with additional features
- ordering of input words to be able to skip words that are too long and end the search on the first hit
- use simple data structures and memory operations for caching
- thorough input validation
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* qsort comparison function */
int cmp_strlen_desc(const void *a, const void *b)
{
return strlen(*(char **)a) < strlen(*(char **)b);
}
/* solution function */
const char* maxword(const char* words[], int len, const char* letters)
{
const int cacheSize = 254; // assume full character range 1-255
int availableLettersLen;
int availableLettersCache[cacheSize];
int availableLetters[cacheSize];
char **sortedWords;
char *curChar;
int *curCount;
char *result;
// Input validation
if (words == NULL || len < 1 || letters == NULL) return NULL;
// Letters length
availableLettersLen = strlen(letters);
// More input validation
if (availableLettersLen == 0) return NULL;
// Sort words by length descending
sortedWords = (char **)malloc(len * sizeof(char *));
if (sortedWords == NULL) return NULL;
for (int i=0; i<len; i++) {
curChar = (char *)words[i];
// More input validation
if (curChar == NULL) {
free(sortedWords);
return NULL;
}
sortedWords[i] = curChar; // copy pointers
}
qsort(sortedWords, len, sizeof(char *), cmp_strlen_desc);
// Preprocess letters by counting them into a cache
memset((void *)availableLettersCache, 0, cacheSize * sizeof(int));
curChar = (char *)letters;
while (*curChar) {
availableLettersCache[*curChar - 1]++;
curChar++;
}
// Scan words
for (int i = 0; i < len; i++) {
curChar = (char *)sortedWords[i];
// Word too long, skip
if (strlen(curChar) > availableLettersLen) continue;
// Reset availability tracker from cache
memcpy(availableLetters, availableLettersCache, cacheSize * sizeof(int));
// Scan letters of word
while (*curChar) {
curCount = availableLetters + *curChar - 1;
if (!*curCount) break; // letter unavailable, end search
*curCount++; // letter available, decrease count
curChar++;
}
// If word was fully scanned, we found our answer
if (!*curChar) {
result = sortedWords[i];
free(sortedWords);
return result;
}
}
free(sortedWords);
return NULL;
}
/* test driver */
int main()
{
const char* words[] = { "abacus",
"deltoid",
"gaff",
"giraffe",
"microphone",
"reef",
"qar" };
const char* word = maxword(words, 7, "aeffgirq");
printf("%s\n", word);
}
1) Determine input data set that achieves 100% code coverage.
If profiling is possible:
2) Call instrumented API with this data set
3) Observe leak reports
If profiling is not possible:
2) Call API repeatedly (i.e. in a loop) with this data set
3) Measure memory usage of process over time
If memory measurements are not possible:
2) Call API repeatedly in a loop for extended periods of time
3) Run other applications on host system to evaluate impact of API
Could use various optimizations:
- andreas.schiffler April 28, 2014- terminate outer loop once longest was found
- sort entries by length, and trim to list where entry[i].Length<=stocks.Length
- use Dictionary<string,byte> for minor memory optimizations (if english words are used, a byte per char should be sufficient)
- replace Dictionary with byte[256] array if entries are ASCII/bytes only