lineends.pl 3.97 KB
Newer Older
powelld's avatar
powelld committed
#!/usr/local/bin/perl
#
#  Heuristically converts line endings to the current OS's preferred format
#  
#  All existing line endings must be identical (e.g. lf's only, or even
#  the accidental cr.cr.lf sequence.)  If some lines end lf, and others as
#  cr.lf, the file is presumed binary.  If the cr character appears anywhere
#  except prefixed to an lf, the file is presumed binary.  If there is no 
#  change in the resulting file size, or the file is binary, the conversion 
#  is discarded.
#  
#  Todo: Handle NULL stdin characters gracefully.
#

use IO::File;
use File::Find;

# The ignore list is '-' seperated, with this leading hyphen and
# trailing hyphens in ever concatinated list below.
$ignore = "-";

# Image formats
$ignore .= "gif-jpg-jpeg-png-ico-bmp-";

# Archive formats
$ignore .= "tar-gz-z-zip-jar-war-bz2-tgz-";

# Many document formats
$ignore .= "eps-psd-pdf-chm-ai-";

# Some encodings
$ignore .= "ucs2-ucs4-";

# Some binary objects
$ignore .= "class-so-dll-exe-obj-lib-a-o-lo-slo-sl-dylib-";

# Some build env files 
$ignore .= "mcp-xdc-ncb-opt-pdb-ilk-exp-res-pch-idb-sbr-";

$preservedate = 1;

$forceending = 0;

$givenpaths = 0;

$notnative = 0;

while (defined @ARGV[0]) {
    if (@ARGV[0] eq '--touch') {
        $preservedate = 0;
    }
    elsif (@ARGV[0] eq '--nocr') {
        $notnative = -1;
    }
    elsif (@ARGV[0] eq '--cr') {
        $notnative = 1;
    }
    elsif (@ARGV[0] eq '--force') {
        $forceending = 1;
    }
    elsif (@ARGV[0] eq '--FORCE') {
        $forceending = 2;
    }
    elsif (@ARGV[0] =~ m/^-/) {
        die "What is " . @ARGV[0] . " supposed to mean?\n\n" 
          . "Syntax:\t$0 [option()s] [path(s)]\n\n" . <<'OUTCH'
Where:  paths specifies the top level directory to convert (default of '.')
        options are;

          --cr     keep/add one ^M
          --nocr   remove ^M's
          --touch  the datestamp (default: keeps date/attribs)
          --force  mismatched corrections (unbalanced ^M's)
          --FORCE  all files regardless of file name!

OUTCH
    }
    else {
        find(\&totxt, @ARGV[0]);
        print "scanned " . @ARGV[0] . "\n";
        $givenpaths = 1;
    }
    shift @ARGV;
}

if (!$givenpaths) {
    find(\&totxt, '.');
    print "did .\n";
}

sub totxt {
        $oname = $_;
        $tname = '.#' . $_;
        if (!-f) {
            return;
        }
        @exts = split /\./;
        if ($forceending < 2) {
            while ($#exts && ($ext = pop(@exts))) {
                if ($ignore =~ m|-$ext-|i) {
                    return;
                }
            }
        }
        return if ($File::Find::dir =~ m|^(.+/)?.svn(/.+)?$|);
        @ostat = stat($oname);
        $srcfl = new IO::File $oname, "r" or die;
        $dstfl = new IO::File $tname, "w" or die;
        binmode $srcfl; 
        if ($notnative) {
            binmode $dstfl;
        }
        undef $t;
        while (<$srcfl>) { 
            if (s/(\r*)\n$/\n/) {
                $n = length $1;
                if (!defined $t) { 
                    $t = $n; 
                }
                if (!$forceending && (($n != $t) || m/\r/)) {
                    print "mismatch in " .$oname. ":" .$n. " expected " .$t. "\n";
                    undef $t;
                    last;
                }
                elsif ($notnative > 0) {
                    s/\n$/\r\n/; 
                }
            }
            print $dstfl $_; 
        }
        if (defined $t && (tell $srcfl == tell $dstfl)) {
            undef $t;
        }
        undef $srcfl;
        undef $dstfl;
        if (defined $t) {
            unlink $oname or die;
            rename $tname, $oname or die;
            @anames = ($oname);
            if ($preservedate) {
                utime $ostat[9], $ostat[9], @anames;
            }
            chmod $ostat[2] & 07777, @anames;
            chown $ostat[5], $ostat[6], @anames;
            print "Converted file " . $oname . " to text in " . $File::Find::dir . "\n"; 
        }
        else {
            unlink $tname or die;
        }
}