#!/usr/bin/perl -w
#
# Copyright 2002, 2003, 2004, 2007 Kevin Ryde
#
# as-simulate-i386 is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# as-simulate-i386 is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
# for more details.
#
# You can get a copy of the GNU General Public License online at
# http://www.gnu.org/licenses/gpl.txt, or you should have one in the file
# COPYING which comes with GNU Emacs and other GNU programs.  Failing that,
# write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
# Boston, MA 02110-1301 USA.


# Usage: as-simulate-i386 [as-options] [filename.s]...
#
# A wrapper around the "as" assembler which mungs the input to simulate
# selected instructions, namely cmov, psadbw, and the SSE2 paddq, psubq,
# pmuludq, pand, por and pxor.  The transformed input is then passed to the
# real assembler and the .o etc is generated in the usual way.  This can be
# used to run code using those instructions on a cpu which doesn't otherwise
# have them.
#
# Note that cmov and psadbw are only noticed as explicit .byte sequences, as
# generated for portability by gnu mp x86-defs.m4.
#
# cmov is emulated by a simple jump.  psadbw is emulated by a blob of code,
# reading and writing the normal mmx registers.  The SSE2 instructions are
# emulated by little subroutines emitted as needed.  The xmm registers are
# emulated by a memory area and that space is .comm common storage, so in
# principle different files assembled with as-simulate-i386 use the same
# area.
#
# Bugs:
#
# psadbw simulation is incomplete.
#

use strict;


my $AS = '/usr/bin/as';

my $debug = 0;

my @real_as = ($AS);
my $seen_file = 0;
my $input_filename;

# uncomment this to show processed output
# @real_as = ("tee /dev/tty |", $AS);


while (my $arg = shift @ARGV) {
  if ($arg eq '-o') {
    push @real_as, $arg;
    push @real_as, shift @ARGV;

  } elsif ($arg eq '-') {
    # read from stdin
    push @real_as, $arg;
    $seen_file = 1;
    $input_filename = '<stdin>';

  } elsif ($arg =~ /^-/) {
    push @real_as, $arg;

  } else {
    # read from file
    open (STDIN, '<' . $arg) or die;
    push @real_as, '-';
    $seen_file = 1;
    $input_filename = $arg;
  }
}

if (! $seen_file) {
  print STDERR "as-simulate: no input file\n";
  exit (1);
}

if ($debug) {
  print STDERR "as-simulate: running: ", join (" ", @real_as), "\n";
}
open (STDOUT, '|' . join (" ", @real_as)) or die;




my $count_sse2 = 0;
my (%pmuludq, %paddsubq, %plogop);
my $count_cmov = 0;

while (<>) {
  if (/^\s*pmuludq\s+%mm([0-7]),\s*%mm([0-7])/) {
    my $src = $1;
    my $dst = $2;
    print "\tcall\tLpmuludq_mm$1_mm$2\n";
    $pmuludq{$1.$2} = 1;
    $count_sse2++;

  } elsif (/^\s*p(add|sub)q\s+%mm([0-7]),\s*%mm([0-7])/) {
    my $op = $1;
    my $src = $2;
    my $dst = $3;
    print "\tcall\tLp${op}q_mm${src}_mm${dst}\n";
    $paddsubq{$1.$2.$3} = 1;
    $count_sse2++;

  } elsif (/^\s*p(and|or|xor)\s+%xmm([0-7]),\s*%xmm([0-7])/) {
    my $op = $1;
    my $src = $2;
    my $dst = $3;
    print "\tcall\tLp${op}_xmm${src}_xmm${dst}\n";
    $plogop{$1.$2.$3} = 1;
    $count_sse2++;

  } elsif (/^\s*\.byte\s+15\s*,\s*(6[4-9]|7[0-9])/) {
    # .byte sequence for cmov, as used by gnu mp

    /^\s*\.byte\s+15\s*,\s*([0-9]+)\s*,\s*([0-9]+)/ or die;
    my $ttn = $1 & 15;
    my $src = $2 & 7;
    my $dst = ($2 >> 3) & 7;

    # print STDERR "ttn=$ttn src=$src dst=$dst\n";
    my $jmp = 0x70 + $ttn;
    my $mov = 192 + ($src << 3) + $dst;
   
    print "\t.byte\t$jmp, 2\n";
    print "\tjmp\t.Lcmov$count_cmov\n";
    print "\t.byte\t0x89, $mov\n";
    print ".Lcmov$count_cmov:\n";
    $count_cmov++;

  } elsif (/^\s*\.byte\s+0x0f\s*,\s*0xf6\s*,\s*(19[2-9]|2[0-9][0-9])/) {
    # .byte sequence for register-to-register psadbw, as used by gnu mp
    my $src = $1 & 7;
    my $dst = ($1 >> 3) & 7;

    # This works enough for the sum of bytes done in the gnu mp popcounts,
    # but is otherwise a long way short of correct.

    print "	pushl	%eax\n";
    print " 	pushl	%edx\n";
    print " 	pushf\n";
    print " 	subl	\$8, %esp\n";
    print " 	movq	%mm$src, (%esp)\n";
    print " 	movzbl	(%esp), %eax\n";
    foreach my $i (1 .. 7) {
      print "	movzbl	$i(%esp), %edx\n";
      print " 	addl	%edx, %eax\n";
    }
    print " 	movd	%eax, %mm$dst\n";
    print " 	addl	\$8, %esp\n";
    print " 	popf\n";
    print " 	popl	%edx\n";
    print " 	popl	%eax\n";

  # This bit was an emulation of a bug in old versions of gas.  In some old
  # versions an "addl $_GLOBAL_OFFSET_TABLE_, %eax" ended up with the value
  # in %eax 1 too big, because it comes out with 1 fewer object code bytes
  # than the usual addl into %ebx.
  #
  # } elsif (/addl\s+\$_GLOBAL_OFFSET_TABLE_, %eax/) {
  #   print 'addl     $_GLOBAL_OFFSET_TABLE_+1, %eax',"\n";

  } else {
    print $_;
  }
}

if ($count_sse2 != 0) {
  print "\n";
  foreach (0..7) {
    print ".comm xmm$_,4,16\n";
  }
  print "\n";
}

foreach (sort keys %pmuludq) {
    my $src = substr($_,0,1);
    my $dst = substr($_,1,1);
    print "Lpmuludq_mm${src}_mm${dst}:\n"
        . "pushf\n"
        . "pushl %edx\n"
	. "pushl %eax\n"
	. "movd  %mm$src, %eax\n"
	. "movd  %mm$dst, %edx\n"
	. "mull  %edx\n"
	. "xchg  4(%esp), %edx\n"
	. "xchg  (%esp), %eax\n"
        . "movq  (%esp), %mm$dst\n"
        . "addl  \$8, %esp\n"
	. "popf\n"
	. "ret\n"
	;
  }

foreach (sort keys %paddsubq) {
    my $op = substr($_,0,3);
    my $src = substr($_,3,1);
    my $dst = substr($_,4,1);
    my $opc = ($op eq 'add' ? 'adc' : 'sbb');

    print "Lp${op}q_mm${src}_mm${dst}:\n"
        . "pushf\n"
        . "pushl %edx\n"
	. "pushl %eax\n"
        . "subl  \$16, %esp\n"
	. "movq  %mm$src, 8(%esp)\n"
	. "movq  %mm$dst, (%esp)\n"
        . "movl  (%esp), %eax\n"
        . "movl  4(%esp), %edx\n"
	. "$op   8(%esp), %eax\n"
	. "$opc  12(%esp), %edx\n"
	. "xchg  16(%esp), %eax\n"
	. "xchg  20(%esp), %edx\n"
        . "movq  16(%esp), %mm$dst\n"
        . "addl  \$24, %esp\n"
	. "popf\n"
	. "ret\n"
	;
}

foreach (sort keys %plogop) {
    my $op = substr($_,0,-2);
    my $src = substr($_,-2,1);
    my $dst = substr($_,-1,1);

    print "Lp${op}_xmm${src}_xmm${dst}:\n"
        . "pushf\n"
        . "pushl %eax\n"
	. "movl  xmm$src, %eax\n"
        . "$op   %eax, xmm$dst\n"
	. "movl  xmm$src+4, %eax\n"
        . "$op   %eax, xmm$dst+4\n"
	. "movl  xmm$src+8, %eax\n"
        . "$op   %eax, xmm$dst+8\n"
	. "movl  xmm$src+12, %eax\n"
        . "$op   %eax, xmm$dst+12\n"
	. "popl  %eax\n"
	. "popf\n"
	. "ret\n"
	;
}

if ($count_sse2 != 0 || $count_cmov != 0) {
  print STDERR "as-simulate: $input_filename: $count_sse2 sse2, $count_cmov cmov\n";
}

close STDOUT or die;


# Local variables:
# perl-indent-level: 2
# End:
