Finite automata in Perl
Social links
View Ashley Pond V's profile on LinkedIn
Miscellaneous

Other pages

Description

This is about as basic as you can get.

Code
#!/usr/bin/perl -w

use strict;
#=====================================================================
#  PROGRAM PROPER
#=====================================================================
my $test = shift;  # we want a binary "tape" of 1s and 0s as the arg

unless ( $test and $test =~ /^[01]+$/ ) {
    die "Give a string (tape) of zeros and ones only!\n";
}
my @test_queue = split '', $test; # turn string into queue/array

my $dfa = make_dfa();             # all DFAs are the same here

$dfa->($_) for @test_queue;       # feed the test queue to the DFA

printf
    "Final state for $test is: %s (%s)\n",
    $dfa->('state'),
    $dfa->() ? "accept state" : "no accept state";

exit 0;
#=====================================================================
#  DFA MAKER
#=====================================================================
sub make_dfa {

    my $state  = 'q1';  # initial state

# sub refs of tests for our closure to use
    my $tests = { 
        q1 => sub { shift() == 1 ? 'q2':'q1' },
        q2 => sub { shift() == 1 ? 'q2':'q1' },  # accept state
        };

    return sub {
        my $step = shift;
        if ( lc $step eq 'state' ) 
        {
            $state;
        }
        elsif ( defined $step  )
        {
            $state = $tests->{$state}->( $step );
            $state eq 'q2' ? 1 : 0;
        }
        else
        {
            $state eq 'q2' ? 1 : undef;
        }
    }
}
One more; watch a functional script act object oriented
#!/usr/bin/perl -w
use strict;
#=====================================================================
#  OUTLINE
#=====================================================================
# Grammar Rules for DFA definitions: 

# a definition/grammar is given as a scalar and looks like this:
#     STATE( TEST_VALUE => TRANS_STATE, TEST_VALUE => TRANS_STATE ),
#    STATE2( TEST_VALUE => TRANS_STATE, TEST_VALUE => TRANS_STATE ),

# "State" is a letter + number(s) combination to name the given state.
# "Trans_state" is the transition state where the machine arrives when
# given the transition state's "Test_value." The unique test values
# make up the machines language. The unique states make up the
# machine's states (they can be redefined). The first state in the
# definition will be the machine's start state. Capital letter states
# are accept states. The syntax looks like perl grammar but it is not!
# So...

# this: "m1( 0 => m1, 1 => m2 ), M2( 0 => m1, 1 => m2 )"

# is a simple 2 state machine. "m1" is the start state because it is
# given first (and not rescinded). "m2" is the accept state because
# it's CAPITALIZED (M2 and m2 are equivalent, the capital is only
# syntax to show that it is an accept state). The set containing 0 and
# 1 is the machine's language because that is all given test values. A
# flow chart of our machine looks like this:

#          ______    1     ======  ___
#     --->|      |------->||    ||/   \ 
#   0/    |  m1  |        || m2 ||    / 1
#    \___/|      |<-------||    ||<---  
#          ------     0    ====== 
#=====================================================================
#  PROGRAM PROPER
#=====================================================================
# this is a definition for a word-ish state machine. "1"s are
# consonants and "0"s are vowels. sorry there's no diagram. basically
# it requires there is at least one vowel (it has several accept
# states, capital "M"s) and that there are never 4 vowels or 5
# consonants in a row. accept means the input looks like a word. for
# instance, 1001 looks like a word b/c it could be "seat" or "zoot."
# it could also be "muup" but hey, it's a pretty simple machine.

my $definition = q{
    m1 ( 0 => m6,  1 => m2  ), # start state
    m2 ( 0 => m6,  1 => m3  ),
    m3 ( 0 => m6,  1 => m4  ),
    m4 ( 0 => m6,  1 => m5  ),
    m5 ( 0 => m6,  1 => m14 ),
    M6 ( 0 => m11, 1 => m7  ),
    M7 ( 0 => m6,  1 => m8  ),
    M8 ( 0 => m6,  1 => m9  ),
    M9 ( 0 => m6,  1 => m10 ),
    m10( 0 => m6,  1 => m14 ),
    M11( 0 => m12, 1 => m7  ),
    m12( 0 => m13, 1 => m7  ),
    m13( 0 => m13, 1 => m13 ), # dead end -- 4 vowels in a row
    m14( 0 => m14, 1 => m14 ), # dead end -- 5 consonants in a row
};

my $input_tape = shift || die "Give me some input.\n";
my $original = $input_tape;

$input_tape =~ s/[b-df-hj-np-tv-z]/1/ig; # change consonants to 1s
$input_tape =~ s/[aeiou]/0/ig;           # vowels to zeros
$input_tape =~ s/\D//g;                  # collapse the rest

my @input_tape_queue = split '', $input_tape;

my $dfa = make_dfa($definition);

$dfa->($_) for @input_tape_queue; # send input through machine

printf
    qq|Final state for "$original" is %s.  %s.\n|,
    $dfa->('state'),
    $dfa->() ? "Looks like a word" : "There's no way that's a word";

exit 0;
#=====================================================================
#  SUBROUTINES
#=====================================================================
sub make_dfa {

    my %DFA;
    my $def = shift;

    $def =~ s/#[^\n]*\n//g;   # hard kill comments
    $def =~ s/\s+//g;         # kill spacing

# catch rules as "m1(q1 => 1, q2 => 3)" into $1 [m2] and $2 [(q1..)]

    while ( $def =~ m!([a-zA-Z]+\d+)\(([^)]+)\),?!g ) {

        my $state = lc $1;
        my @rules = split/,/, $2;
        $DFA{$state} = [ @rules ]; 

# check for duplicates later and report redefined
        push @{$DFA{'states'}}, $state;

# capital state name means it's an accept state
        $DFA{'accept_state'}{$state} = 1 if $1 =~ /[A-Z]/;
    }

# dupes?
    my %tmp_counter;

    for ( @{$DFA{'states'}} ) {

        next unless $tmp_counter{$_}++; # next unless seen before
        warn qq|State "$_" redefined in DFA definition!\n|;
    }
    my $tests = {};
    foreach my $state ( @{$DFA{'states'}} ) {

# $code is used to build-up test subs for our closure to use
        my $code = q{
            my $input = shift;
        };
        foreach my $sub_test ( @{$DFA{$state}} ) {

            my ( $test_value, $return ) = split(/=>/, $sub_test);
            $code .= qq{ return '$return' 
			     if \$input == $test_value;\n };

            $DFA{'alphabet'}{$test_value} ||= 1;
        }
        $tests->{$state} = eval "sub { $code }";
    }

# start state (1st state from DFA def) which will be remembered as
# a closure as long as the DFA lives
    my $state = $DFA{'states'}[0]; 

# we're ready to return our machine
    return
        sub {
            my $input = shift;
            if ( lc $input eq 'state' ) 
            {
# user requested to know what named state the machine is currently in
                return $state;
            }
            elsif ( lc $input eq 'alphabet' ) 
            {
# user requested to know what the acceptable language/input is
                return sort keys %{ $DFA{'alphabet'} };
            }
            elsif ( defined $input 
                    and exists $DFA{'alphabet'}{$input} )
            {
# there's acceptable input (in its alphabet) to machine
                $state = $tests->{$state}->( $input );
# now we test if state arrived at is an accept state and return true
# or false
            }
            return exists $DFA{'accept_state'}{$state} ? 1 : undef;
        };
}
# Possibilities for a fuller implementation: 
#    automatic grammar check
#       * states should not be redefined or warn
#       * there should be >= 1 accept state or warn
#    and you should be able to add or subtract grammar on the fly
Usage
jinx[13]>automata-grammar
Give me some input.
jinx[14]>automata-grammar asdsdfaposidfuasdkn
Final state for "asdsdfaposidfuasdkn" is m14.
       There's no way that's a word.
jinx[15]>automata-grammar bookkeeper
Final state for "bookkeeper" is m7.
       Looks like a word.
jinx[16]>automata-grammar fiddle-dee-dee
Final state for "fiddle-dee-dee" is m11.
       Looks like a word.

Discussion

There shall be no discussion! Play around with them if you’re inclined. Perl’s strengths as a text engine make it naturally good for this kind of thing.

Thanks to Terrence Brannon for discovering a buglet in the code whereby a tape of “0” would fail though it shouldn’t—fixed now with a defined() check.

Search these pages via Google
Text, original code, fonts, and graphics ©1990-2008 Ashley Pond V.