Commit b520e4b1 authored by Richard Levitte's avatar Richard Levitte
Browse files

Add a tool that (semi)automatically created the API documentation

required for FIPS.
parent 449f2517
Loading
Loading
Loading
Loading

fips/tools/README

0 → 100644
+26 −0
Original line number Diff line number Diff line
FIPS tools explained
====================

api_list.pl
    a script to produce an API description, saying what parameters are
    for input, output or both.

    Most often, the direction of a parameter is determined automatically.
    However, quite a number of them are educated guesses.  Either way,
    the information is stored in the file declarations.dat in this
    directory, and can be manually corrected; simply go through
    declarations.dat, look for any value with the key 'direction'
    where the value contains a question mark.  Those should be changed
    to whatever is true, and the values should be one of the
    following:

	<-	output
	->	input
	<->	both

api_fns.pm
    a module that helps api_list.pl do its job.

declarations.dat
    a file of information about public fips symbols.  See api_list.pl
    above.

fips/tools/api_fns.pm

0 → 100644
+336 −0
Original line number Diff line number Diff line
package api_data;
use strict;

use Data::Dumper;
use File::Slurp;

# The basic data store for a declaration is a hash holding the following
# information (let's simply call this structure "declaration"):
# sym       => string (the symbol of the declaration)
# symcomment=> string (if there's a comment about this symbol) or undef
# type      => string (type definition text, with a '?' where the symbol should be
# kind      => 0 (variable)
#              1 (function)
# params    => list reference (list of declarations, one for each parameter)
#              [only exists when kind = 1]
# direction => 0 (input)
#              1 (output)
#              2 (input and output)
#              3 (output or input and output)
#              +4 (guess)
#              [only exists when this symbol is a parameter to a function]

# Constructor
sub new {
    my $class = shift;
    my $self = {};
    $self->{DECLARATIONS} = {};
    bless($self, $class);
    return $self;
}

sub read_declaration_db {
    my $self = shift;
    my $declaration_file = shift;
    my $buf = read_file($declaration_file);
    $self->{DECLARATIONS} = eval $buf;
    die $@ if $@;
}

sub write_declaration_db {
    my $self = shift;
    my $declaration_file = shift;

    $Data::Dumper::Purity = 1;
    open FILE,">".$declaration_file ||
	die "Can't open '$declaration_file': $!\n";
    print FILE "my ",Data::Dumper->Dump([ $self->{DECLARATIONS} ], [qw(declaration_db)]);
    close FILE;
}

sub insert_declaration {
    my $self = shift;
    my %decl = @_;
    my $sym = $decl{sym};

    if ($self->{DECLARATIONS}->{$sym}) {
	foreach my $k (('sym', 'symcomment','oldsym','objfile','kind')) {
	    $self->{DECLARATIONS}->{$sym}->{$k} = $decl{$k};
	}
	if ($self->{DECLARATIONS}->{$sym}->{kind} == 1) {
	    # Replace parameters only if the kind or type has changed
	    my $oldp = $self->{DECLARATIONS}->{$sym}->{params};
	    my $newp = $decl{params};
	    my $l = scalar(@{$oldp});
	    for my $pn (0..($l - 1)) {
		if ($oldp->[$pn]->{kind} != $newp->[$pn]->{kind}
		    || $oldp->[$pn]->{type} ne $newp->[$pn]->{type}) {
		    $self->{DECLARATIONS}->{$sym}->{params} = $newp;
		}
	    }
	}
    } else {
	$self->{DECLARATIONS}->{$decl{sym}} = { %decl };
    }
}

# Input is a simple C declaration, output is a declaration structure
sub _parse_declaration {
    my $decl = shift;
    my $newname = shift;
    my $objfile = shift;
    my $namecomment = shift;
    my %parsed_decl = ();

    my $debug = 0;

    print "DEBUG: going to parse: $decl\n" if $debug;

    # Start with changing all parens to { and } except the outermost
    # Within these, convert all commas to semi-colons
    my $s = "";
    do {
	print "DEBUG: decl: $decl\n" if $debug;
	$s = $decl;
	if ($decl =~ m/
		       \(
		         ([^\(\)]*)
		         \(
		           ([^\(\)]*)
		         \)
		     /x) {
	    print "DEBUG: \`: $`\n" if $debug;
	    print "DEBUG: 1: $1\n" if $debug;
	    print "DEBUG: 2: $2\n" if $debug;
	    print "DEBUG: \': $'\n" if $debug;

	    my $a = "$`"."("."$1";
	    my $b = "{"."$2"."}";
	    my $c = "$'";
	    print "DEBUG: a: $a\n" if $debug;
	    print "DEBUG: b: $b\n" if $debug;
	    print "DEBUG: c: $c\n" if $debug;
	    $b =~ s/,/;/g;
	    print "DEBUG: b: $b\n" if $debug;

	    $decl = $a.$b.$c;
	}
    } while ($s ne $decl);

    # There are types that we look for.  The first is the function pointer
    # T (*X)(...)
    if ($decl =~ m/
		   ^\s*
		   ([^\(]+)	# Return type of the function pointed at
		   \(
		     \s*\*\s*
		     ([^\)]*)	# Function returning or variable holding fn ptr
		   \)
		   \s*
		   \(
		     ([^\)]*)	# Parameter for the function pointed at
		   \)
		   \s*$
		 /x) {
	print "DEBUG: function pointer variable or function\n" if $debug;
	print "DEBUG:  1: $1\n" if $debug;
	print "DEBUG:  2: $2\n" if $debug;
	print "DEBUG:  3: $3\n" if $debug;

	my $tmp1 = $1 . "(*?)" . "(" . $3 . ")";
	my $tmp2 = $2;

	$tmp1 =~ tr/\{\}\;/(),/; # Convert all braces and semi-colons
				# back to parens and commas

	$tmp2 =~ tr/\{\}\;/(),/; # Convert all braces and semi-colons
				# back to parens and commas

	# Parse the symbol part with a fake type.  This will determine if
	# it's a variable or a function.
	my $subdeclaration = _parse_declaration("int " . $tmp2, $newname);
	map { $parsed_decl{$_} = $subdeclaration->{$_} } ( "sym",
							   "kind",
							   "params" );
	$parsed_decl{symcomment} = $namecomment if $namecomment;
	$parsed_decl{type} = $tmp1;
    }
    # If that wasn't it, check for the simple function declaration
    # T X(...)
    elsif ($decl =~ m/^\s*(.*?\W)(\w+)\s*\(\s*(.*)\s*\)\s*$/) {
	print "DEBUG: function\n" if $debug;
	print "DEBUG:  1: $1\n" if $debug;
	print "DEBUG:  2: $2\n" if $debug;
	print "DEBUG:  3: $3\n" if $debug;

	$parsed_decl{kind} = 1;
	$parsed_decl{type} = $1."?";
	$parsed_decl{sym} = $newname ? $newname : $2;
	$parsed_decl{symcomment} = $namecomment if $namecomment;
	$parsed_decl{oldsym} = $newname ? $2 : undef;
	$parsed_decl{params} = [
	    map { tr/\{\}\;/(),/; _parse_declaration($_,undef,undef,undef) }
	    grep { !/^\s*void\s*$/ }
	    split(/\s*,\s*/, $3)
	    ];
    }
    # If that wasn't it either, try to get a variable
    # T X or T X[...]
    elsif ($decl =~ m/^\s*(.*\W)(\w+)(\s*\[.*\])?\s*$/) {
	print "DEBUG: variable\n" if $debug;
	print "DEBUG:  1: $1\n" if $debug;
	print "DEBUG:  2: $2\n" if $debug;

	$parsed_decl{kind} = 0;
	$parsed_decl{type} = $1."?";
	$parsed_decl{sym} = $newname ? $newname : $2;
	$parsed_decl{symcomment} = $namecomment if $namecomment;
	$parsed_decl{oldsym} = $newname ? $2 : undef;
    }
    # Special for the parameter "..."
    elsif ($decl =~ m/^\s*\.\.\.\s*$/) {
	%parsed_decl = ( kind => 0, type => "?", sym => "..." );
    }
    # Otherwise, we got something weird
    else {
	print "Warning: weird declaration: $decl\n";
	%parsed_decl = ( kind => -1, decl => $decl );
    }
    $parsed_decl{objfile} = $objfile;

    print Dumper({ %parsed_decl }) if $debug;
    return { %parsed_decl };
}

sub add_declaration {
    my $self = shift;
    my $parsed = _parse_declaration(@_);
    $self->insert_declaration( %{$parsed} );
}

sub complete_directions {
    my $self = shift;
    foreach my $sym (keys %{$self->{DECLARATIONS}}) {
	if ($self->{DECLARATIONS}->{$sym}->{kind} == 1) {
	    map {
		if (!$_->{direction} || $_->{direction} =~ m/\?/) {
		    if ($_->{type} =~ m/const/) {
			$_->{direction} = '->'; # Input
		    } elsif ($_->{sym} =~ m/ctx/ || $_->{type} =~ m/ctx/i) {
			$_->{direction} = '<-?'; # Guess output
		    } elsif ($_->{type} =~ m/\*/) {
			if ($_->{type} =~ m/(short|int|char|size_t)/) {
			    $_->{direction} = '<-?'; # Guess output
			} else {
			    $_->{direction} = '<-? <->?'; # Guess output or input/output
			}
		    } else {
			$_->{direction} = '->'; # Input
		    }
		}
	    } @{$self->{DECLARATIONS}->{$sym}->{params}};
	}
    }
}

sub on_all_declarations {
    my $self = shift;
    my $fn = shift;
    foreach my $sym (sort keys %{$self->{DECLARATIONS}}) {
	&$fn($self->{DECLARATIONS}->{$sym});
    }
}

sub get_function_declaration_strings_from_file {
    my $fn = shift;
    my %declarations = ();
    my $line = "";
    my $cppline = "";

    my $debug = 0;

    foreach my $headerline (`cat $fn`) {
	chomp $headerline;
	print STDERR "DEBUG0: $headerline\n" if $debug;
	# First, treat the line at a CPP level; remove comments, add on more
	# lines if there's an ending backslash or an incomplete comment.
	# If none of that is true, then remove all comments and check if the
	# line starts with a #, skip if it does, otherwise continue.
	if ($cppline && $headerline) { $cppline .= " "; }
	$cppline .= $headerline;
	$cppline =~ s^\"(.|\\\")*\"^@@^g; # Collapse strings
	$cppline =~ s^/\*.*?\*/^^g;	  # Remove all complete comments
	print STDERR "DEBUG1: $cppline\n" if $debug;
	if ($cppline =~ m/\\$/) { # Keep on reading if the current line ends
				  # with a backslash
	    $cppline = $`;
	    next;
	}
	next if $cppline =~ m/\/\*/; # Keep on reading if there remains the
				     # start of a comment
	next if $cppline =~ m/"/;    # Keep on reading if there remains the
				     # start of a string
	if ($cppline =~ m/^\#/) {
	    $cppline = "";
	    next;
	}

	# Done with the preprocessor part, add the resulting line to the
	# line we're putting together to get a statement.
	if ($line && $cppline) { $line .= " "; }
	$line .= $cppline;
	$cppline = "";
	$line =~ s%extern\s+\@\@\s+\{%%g; # Remove 'extern "C" {'
	$line =~ s%\{[^\{\}]*\}%\$\$%g; # Collapse any compound structure
	print STDERR "DEBUG2: $line\n" if $debug;
	next if $line =~ m%\{%;	# If there is any compound structure start,
	# we are not quite done reading.
	$line =~ s%\}%%;		# Remove a lonely }, it's probably a rest
	# from 'extern "C" {'
	$line =~ s%^\s+%%;		# Remove beginning blanks
	$line =~ s%\s+$%%;		# Remove trailing blanks
	$line =~ s%\s+% %g;		# Collapse multiple blanks to one.
	if ($line =~ m/;/) {
	    print STDERR "DEBUG3: $`\n" if $debug;
	    my $decl = $`;	#`; # (emacs is stupid that way)
	    $line = $';		#'; # (emacs is stupid that way)

	    # Find the symbol by taking the declaration and fiddling with it:
	    # (remember, we're just extracting the symbol, so we're allowed
	    # to cheat here ;-))
	    # 1. Remove all paired parenthesies, innermost first.  While doing
	    #    this, if something like "(* foo)(" is found, this is a
	    #    function pointer; change it to "foo("
	    # 2. Remove all paired square parenthesies.
	    # 3. Remove any $$ with surrounding spaces.
	    # 4. Pick the last word, that's the symbol.
	    my $tmp;
	    my $sym = $decl;
	    print STDERR "DEBUG3.1: $sym\n" if $debug;
	    do {
		$tmp = $sym;
		# NOTE: The order of these two is important, and it's also
		# important not to use the g modifier.
		$sym =~ s/\(\s*\*\s*(\w+)\s*\)\s*\(/$1(/;
		$sym =~ s/\([^\(\)]*\)//;
		print STDERR "DEBUG3.2: $sym\n" if $debug;
	    } while ($tmp ne $sym);
	    do {
		$tmp = $sym;
		$sym =~ s/\[[^\[\]]*\]//g;
	    } while ($tmp ne $sym);
	    $sym =~ s/\s*\$\$\s*//g;
	    $sym =~ s/.*[\s\*](\w+)\s*$/$1/;
	    print STDERR "DEBUG4: $sym\n" if $debug;
	    if ($sym =~ m/\W/) {
		print STDERR "Warning[$fn]: didn't find proper symbol in declaration:\n";
		print STDERR "    decl: $decl\n";
		print STDERR "    sym:  $sym\n";
	    }
	    $declarations{$sym} = $decl;
	}
    }
    return %declarations;
}

1;

fips/tools/api_list.pl

0 → 100644
+267 −0
Original line number Diff line number Diff line
#!/bin/env perl
#
# Quick and dirty utility to help assemble the mandated (but otherwise
# useless) API documentation. We get the list of external function
# symbols from fipscanister.o, pair those with the source file names
# (from ./fips/fipssyms.h), and map to the object file name containing
# them.
# 
# Requires the "nm" and "find" utilities.
# Execure from the root of the FIPS module source code workarea

use HTML::Entities;
use File::Basename;

$here = dirname($0);
require "$here/api_fns.pm";

$_direction_question = ''; # Set to '?' to show "<-?", "<->?" for uncertain directions

print STDERR "Info: finding FIPS renames and reimplementations of OpenSSL symbols\n";
# Get mapping of old (source code) to new (live as renamed) symbols
foreach $file ("./fips/fipssyms.h") {
    open(IN, $file) || die "Error opening $file";
    # grab pairs until assembler symbols
    my $buf = '';
    my $reimplementations = 1;	# When 1, we're looking at reimplementations
				# (not renames) of OpenSSL functions.  They
				# still have to be saved to get the API.
    while (<IN>) {
	$reimplementations = 0 if m|^\s*/\*\sRename\ssymbols\s|;

	if ($buf) {
	    $_ = $buf . $_;
	    $buf = '';
	}
	if (s/\\\n$//) {
	    $buf = $_;
	    next;
	}
	if (m/\(/) {
	    ($oldname, $newname) = m/#define\s+(\S+)\(.*\)\s+(\S+)\(.*\)/;
	} else {
	    ($oldname, $newname) = m/#define\s+(\S+)\s+(\S+)/;
	}

	$oldname || next;
	if (!$reimplementations) {
	    $oldname{$newname} = $oldname;
	}
	$oldimpl{$newname} = $oldname;
	last if (/assembler/)
    }
    close(IN);
    # %oldname is the mapping of new function names to old
    print "<!-- Total of ", scalar(keys %oldname), " mapped symbols in $file -->\n";
}

print STDERR "Info: finding FIPS symbols in object files\n";
# generate list of external function names in fipscanister.o
$file = "./fips/fipscanister.o";
for (`nm -g --defined-only -p -o $file`) {
    chomp;
    s/^\S+ T // || next;
    m/^fips_/ && next;
    $fipssyms{$_}++;
    $objname =~ s/\.o$/\.\[o\|c\]/;
    $objname{$symname} = $objname;
}
# keys %fipssyms is the list of module functions
print "<!-- Total of ", scalar(keys %fipssyms), " functions in $file -->\n";

# grab filename to symbol name mapping, each line is of the format
#	./fips/sha/fips_sha1_selftest.o:00000000 T FIPS_selftest_sha1
# discard the offset and type ":00000000 T".
for (`find . -name '*.o' \\! -name 'fipscanister.o' -exec nm -g --defined-only -p -o {} \\;`) {
        ($objname, $symname) = m/^(\S+):\S+\s+T+\s+(\S+)/;
        $objname || next;
#	$fipssyms{$symname} || next;
	$objname =~ s/\.o$/\.\[o\|c\]/;
        $objname{$symname} = $objname;
        }
# %objname is the mapping of new symbol name to (source/object) file name
print "<!-- Total of ", scalar(keys %objname), " functions found in files -->\n";

print STDERR "Info: finding declarations in header files\n";

# grab filenames in include/openssl, run each of them through
# get_function_declarations_from_file (defined in api_fns.pl)
# and collect the result.
%declarations = ();
while (<include/openssl/*.h ./crypto/cryptlib.h>) {
    my %decls = api_data::get_function_declaration_strings_from_file($_);
    map { $declarations{$_} = $decls{$_} } keys %decls;
}
# %declarations is the mapping of old symbol name to their declaration
print "<!-- Total of ", scalar(keys %declarations), " declarations found in header files -->\n";

# Add the markers FIPS_text_start and FIPS_text_end
$declarations{FIPS_text_start} = "void *FIPS_text_start()";
$declarations{FIPS_text_end} = "void *FIPS_text_end()";


# Read list of API names obtained from edited "nm -g fipscanister.o"
$spill = 0;
sub printer {
    foreach (@_) {
	if ($_->{kind} >= 0) {
	    if ($spill) {
		print " " x $indent;
		print "kind:     ",$_->{kind} ? "function" : "variable","\n";
		print " " x $indent;
		print "sym:      ",$_->{sym},"\n";
		print " " x $indent;
		print "type:     ",$_->{type},"\n";
	    }
	    if ($_->{kind}) {
		$c = 0;
		map {
		    if ($spill) {
			print " " x $indent;
			printf "param %d:\n", ++$c;
		    }
		    $indent += 2;
		    printer($_);
		    my $direction = $_->{direction};
		    if (!$_direction_question) {
			$direction =~ s/<-\? <->\?/<->/;
			$direction =~ s/\?//g;
		    }
		    print " " x $indent,$direction," ",$_->{sym},"\n";
		    $indent -= 2;
		} @{$_->{params}};
		if ($_->{type} !~ m/^\s*void\s*$/) {
		    print " " x $indent;
		    print "<- Return\n";
		}
	    }
	} else {
	    if ($spill) {
		print " " x $indent;
		print "decl:     ",$_->{decl},"\n";
	    }
	}
    }
}

sub html_printer {
    my $print_mode = shift;	# 0 = print declaration with symbol in bold,
				#     call recursively with 1 for each parameter,
				#     call recursively with 2 for each parameter
				# 1 = print declaration with sym grey background,
				#     call recursivelt with 3 for each parameter
				# 2 = just print declaration
    my $d = shift;		# Parsed declaration
    my $s = '';

    if ($print_mode == 0) {
	$d->{sym} || return $s;
	my $h = "<hr><br />\n";
	$h .= $d->{sym} . ($d->{symcomment} ? " " . $d->{symcomment} : "");
	$h .= " in file " . $d->{objfile} . "<br />\n<br />\n";

	$s .= '<b>' . $d->{sym} . '</b>';
	if ($d->{kind} == 1) {
	    $s .= '(';
	    $s .= join(', ',
		       map {
			   html_printer(1,$_);
		       } @{$d->{params}});
	    $s .= ')';
	}
	my $t = $d->{type};
	$t =~ s/\?/$s/;
	$s = $t;
	if ($d->{kind} == 1) {
	    map {
		my $direction = $_->{direction};
		if (!$_direction_question) {
		    $direction =~ s/<-\? <->\?/<->/;
		    $direction =~ s/\?//g;
		}
		$s .= "<br />\n";
		$s .= encode_entities($direction
				      . "\xA0" x (9 - length($direction)));
		$s .= $_->{sym};
	    } @{$d->{params}};
	}
	if ($d->{type} !~ m/^\s*void\s*\?$/) {
	    $s .= "<br />\n";
	    $s .= encode_entities('<-'.("\xA0" x 7).'Return');
	}
	$s = $h . $s;
    } elsif ($print_mode == 1) {
	$s .= '<span style="background: #c0c0c0">' . $d->{sym} . '</span>';
	if ($d->{kind} == 1) {
	    $s .= '(';
	    $s .= join(', ',
		       map {
			   html_printer(3,$_);
		       } @{$d->{params}});
	    $s .= ')';
	}
	my $t = $d->{type};
	$t =~ s/\?/$s/;
	$s = $t;
    } elsif ($print_mode == 2) {
	$s .= $d->{sym};
	if ($d->{kind} == 1) {
	    $s .= '(';
	    $s .= join(', ',
		       map {
			   html_printer(2,$_);
		       } @{$d->{params}});
	    $s .= ')';
	}
	my $t = $d->{type};
	$t =~ s/\?/$s/;
	$s = $t;
    }
    return $s;
}

print STDERR "Info: building/updating symbol information database\n";

$d = api_data->new();
if (-s "$here/declarations.dat") {
    $d->read_declaration_db("$here/declarations.dat");
} else {
    print STDERR "Warning: there was no file '$here/declarations.dat'.  A new one will be created\n";
}

for (sort keys %fipssyms) {
    $newname = $_;
    $namecomment = undef;
    if ($oldname{$newname}) {
	$oldname = $oldname{$newname};
	$objname = $objname{$oldname} ? $objname{$oldname} : $objname{$newname};
	$namecomment = "(renames $oldname)";
    } else {
	$objname = $objname{$newname};
    }
    if ($oldimpl{$newname}) {
	$apisym = $oldimpl{$newname};
	$namecomment = "(reimplements $apisym)" if !$namecomment;
    } else {
	$apisym = $newname;
    }
    $declaration = $declarations{$apisym};
    print "<!--\n";
    print "$newname\t\t$namecomment\tin file $objname:\n";
    print "  ",$declaration,"\n  ";
    $d->add_declaration($declaration,$newname,$objname,$namecomment);
    print "-->\n";
}

$d->complete_directions();
$d->write_declaration_db("$here/declarations.dat");

print STDERR "Info: printing output\n";

$d->on_all_declarations(
    sub {
	my $decl = shift; 
	#$indent = 2;
	#print printer($decl);
	print "<p>",html_printer(0,$decl),"</p>\n";
    });
+7155 −0

File added.

Preview size limit exceeded, changes collapsed.