#!/usr/bin/perl

# **********************************************************************************
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Copyright (c) 2009-2015, Marvell International Ltd.
#
# Alternatively, this software may be distributed under the terms of the GNU
# General Public License Version 2, and any use shall comply with the terms and
# conditions of the GPL.  A copy of the GPL is available at
# http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
#
# THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE
# IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
# ARE EXPRESSLY DISCLAIMED.  The GPL license provides additional details about
# this warranty disclaimer.
# ************************************************************************************


# $Id$

use strict;
use FindBin;
use lib "$FindBin::Bin";
use asm_sccplite;
use asm;
use Pod::Usage;
use Getopt::Long;

# tracking the ifdefs
my @IFDEF_FLAGS;
my $IFDEF_IDX = 0;
$IFDEF_FLAGS[0] = 1;

# will hold the tokens, lines, file, and linenum data
my @lines;
my @lines_markup;
my @lines_out;

# will contain the output hex code
#our $HEX_OUT;
my @HEX;
my $HEX_START = $MEM_SIZE-1;


#####################################

my $opt_infile = "";
my $opt_outfile = "";
my $opt_defines;
my $opt_markup = "";
my $opt_cfile = "";
my $opt_no_pad = 0;

GetOptions (
            'in=s'        => \$opt_infile,
            'out=s'       => \$opt_outfile,
            'define|D=s@' => \$opt_defines,
            'markup=s'    => \$opt_markup,
            'cfile=s'     => \$opt_cfile,
            'no_pad'      => \$opt_no_pad,
            
            'man'         => sub { pod2usage(-verbose => 2, -noperldoc => 1); },
            'h|help'      => sub { pod2usage(-verbose => 1); }
           ) or pod2usage(-verbose => 0, -message => "Bad argument");


# required options
if ($opt_infile eq "") {
  pod2usage(-verbose => 0, -message => 
            "Input assembly file is required (-in)!\n");
}

# handle any passed in defines
foreach my $define (@{$opt_defines}) {
  # is there a value?
  if ($define =~ /(.+)=(.+)/) {
    add_define($1, $2);
  }
  else {
    add_define($define, "");
  }
}

preprocess($opt_infile);
if ($IFDEF_IDX != 0) {
  die "Missing final closing #endif...possible run-away #ifdef/#ifndef\n";
}


pass0();
pass1();

unless ($opt_outfile eq "") {
  open FH, ">$opt_outfile" or
    die "Unable to open output assembly file '$opt_outfile'!\n";
}
else {
  *FH = *STDOUT;
}

for (my $i=0; $i<@HEX; $i+=$WIDTH) {
  unless ($opt_no_pad && $i<$HEX_START) {
    print FH get_word(\@HEX, $i);
  }
}
close FH;

# generate the C file
if ($opt_cfile ne "") {
  open FH, ">$opt_cfile" or
    die "Unable to open C file '$opt_cfile'!\n";

  # strip off any leading /'s or any extensions at the end
  my $name = $opt_cfile;
  if ($opt_cfile =~ /([^\/\.]+)(\..+)?$/) {
    $name = $1;
  }

  print FH "unsigned int $name\[\] = {\n   ";
  
  my $num = 0;
  for (my $i=0; $i<@HEX; $i+=$WIDTH) {
    unless ($opt_no_pad && $i<$HEX_START) {
      my $word = get_word(\@HEX, $i);
      print FH "0x$word";
    
      if ($i == @HEX-$WIDTH) {
        print FH "\n};\n";
      }
      else {
        print FH ", ";
        if ($num % 4 == 3) {
          print FH "\n   ";
        }
      }
      $num++;
    }
  }

  print FH "\nunsigned int ${name}_len = $num;\n";

  close FH;
}

# generate markup file
if ($opt_markup ne "") {
  open FH, ">$opt_markup" or
    die "Unable to open output markup file '$opt_markup'!\n";

  # get some nice spacing so things look all purrrty.
  my $max_fileinfo = 0;
  for (my $i=0; $i<@lines_markup; $i++) {
    my $infile = $lines_markup[$i]->{infile};
    if ($infile =~ /([^\/]+)$/) {
      $infile = $1;
    }
    my $linenum = $lines_markup[$i]->{linenum};
    my $fileinfo = "$infile($linenum)";
    $lines_markup[$i]->{fileinfo} = $fileinfo;

    if (length $fileinfo > $max_fileinfo) {
      $max_fileinfo = length $fileinfo;
    }
  }

  # print the line with markup
  for (my $i=0; $i<@lines_markup; $i++) {
    my $infile    = $lines_markup[$i]->{infile};
    my $linenum   = $lines_markup[$i]->{linenum};
    my $orig_line = $lines_markup[$i]->{orig_line};
    my $addr      = $lines_markup[$i]->{addr};
    my $idx       = $lines_markup[$i]->{out_map};
    my $fileinfo  = $lines_markup[$i]->{fileinfo};
    my $hex       = " " x (2*$WIDTH + 2);
    my @tokens    = @{$lines_markup[$i]->{tokens}};
    my $cmd       = get_cmd(\@tokens);

    my $head = "  ";
    if ($idx ne "") {
      my $ptr  = $lines_out[$idx];
      if (exists $ptr->{hex}) {
        my $pad = 2*$WIDTH - length($ptr->{hex});
        $hex = sprintf("0x%s", $ptr->{hex}) . " " x $pad;
        $head = "+ ";
      }
    }
    
    $fileinfo =  $fileinfo . " " x ($max_fileinfo - length $fileinfo);
    
    my $OUT = sprintf("%s%s\t%05x / %05x:\t%s\t| ", $head, $fileinfo, $addr, $addr/$WIDTH, $hex);
    $orig_line =~ s/\s+$//;
    $OUT .= $orig_line;

    if (eqi($cmd, "equ")) {
      if ($orig_line !~ /;/) {
        $OUT .= "\t;";
      }
      $OUT .= " (" . $SYM_HASH->{$tokens[0]} . ")";
    }
    
    print FH $OUT . "\n";
  }
  close FH;
}



#####################################
# Preprocess....currently only support #include
# Recursively called in the case of includes

sub preprocess {
  my $infile = shift;
  my $parent_file = shift;
  my $parent_linenum = shift;

  if ($parent_file eq "") {
    open FH, "<$infile" or
      die "Unable to open assembly file '$infile'!\n";
  }
  else {
    my $dir = $parent_file;
    $dir =~ s/[^\/]+$//;
    
    # absolute or relative path
    if ($infile !~ /^\//) {
      # relative, append the parent directory
      $infile = $dir . $infile;
    }
    
    open FH, "<$infile" or
      die "Unable to open assembly file '$infile' included from $parent_file($parent_linenum)!\n";
  }
  
  my @tmp = <FH>;
  chomp @tmp;
  close FH;
  
  my $linenum = 1;
  foreach my $line (@tmp) {
    my $orig_line = $line;
    # get rid of any comments
    $line =~ s/;.*//;
    
    # get rid of any spaces before after commas
    $line =~ s/\s*,\s*/,/g;

    # get rid of leading spaces
    $line =~ s/^\s+//;
    
    # get rid of trailing spaces
    $line =~ s/\s+$//;
    
    # tokenize by space or ,
    my @tokens = split /\s+|,/, $line;
    
    if ($tokens[$#tokens] eq "\\") {
      die "Currently do not support continuation of lines with '\\' at $opt_infile($linenum)\n";
    }
    
    # look for # preprocess directives
    if ($tokens[0] =~ /^#/) {
      # special case for endif
      if (eqi($tokens[0],"#endif")) {
        if ($IFDEF_FLAGS[$IFDEF_IDX-1]) {
          unless (@tokens == 1) {
            die "Error:  $infile($linenum) - Too many arguments, expected '#endif'\n";
          }
          
          if ($IFDEF_IDX == 0) {
            die "Error:  $infile($linenum) - Encountered #endif without corresponding #ifdef/#ifndef\n";
          }
          
          $IFDEF_IDX--;
        }
      }
      else {
        # only if we're not in an if skip
        if ($IFDEF_FLAGS[$IFDEF_IDX]) {
          if (eqi($tokens[0],"#include")) {
            if (@tokens != 2) {
              die "Error:  $infile($linenum) - Expected '#include \"FILE\"'\n";
            }
            
            # try doing a define substitution
            if ($tokens[1] !~ /^""$/) {
              if (exists $DEFINE_HASH->{$tokens[1]}) {
                $tokens[1] = $DEFINE_HASH->{$tokens[1]};
              }
            }
            
            my $inc_file;
            if ($tokens[1] =~ /^"(.+)"$/) {
              $inc_file = $1;
              preprocess($inc_file, $infile, $linenum)
            }
            else {
              die "Error:  $infile($linenum) - Expected '#include \"FILE\"'\n";
            }
          }
          elsif (eqi($tokens[0],"#define")) {
            unless (@tokens == 3 || @tokens == 2) {
              die "Error:  $infile($linenum) - Expected '#define NAME <OPTIONAL VALUE>'\n";
            }
            
            my $value = "";
            if (@tokens == 3) {
              $value = $tokens[2];
            }
            
            add_define($tokens[1], $value, $infile, $linenum);
          }
          elsif (eqi($tokens[0],"#undef")) {
            unless (@tokens == 2) {
              die "Error:  $infile($linenum) - Expected '#undef NAME'\n";
            }
            
            delete $DEFINE_HASH->{$tokens[1]};
          } 
          elsif (eqi($tokens[0],"#ifdef")) {
            unless (@tokens == 2) {
              die "Error:  $infile($linenum) - Expected '#ifdef NAME'\n";
            }
            
            $IFDEF_IDX++;
            if (exists $DEFINE_HASH->{$tokens[1]}) {
              $IFDEF_FLAGS[$IFDEF_IDX] = 1;
            } else {
              $IFDEF_FLAGS[$IFDEF_IDX] = 0;
            }
          } 
          elsif (eqi($tokens[0],"#ifndef")) {
            unless (@tokens == 2) {
              die "Error:  $infile($linenum) - Expected '#ifndef NAME'\n";
            }
            
            $IFDEF_IDX++;
            if (!exists $DEFINE_HASH->{$tokens[1]}) {
              $IFDEF_FLAGS[$IFDEF_IDX] = 1;
            } else {
              $IFDEF_FLAGS[$IFDEF_IDX] = 0;
            }
          } 
          elsif ($IFDEF_FLAGS[$IFDEF_IDX]) {
            die "Error:  $infile($linenum) - Currently only support #include, #define, #undef, #ifdef, #ifndef, #endif\n";
          } 
        }
      }
    }
    # not preprocessor directive
    elsif ($IFDEF_FLAGS[$IFDEF_IDX]) {
      # do define substitution on the tokens
      define_subs(\@tokens, $infile, $linenum);
     
      my $tmp = {
                 tokens    => \@tokens,
                 infile    => $infile,
                 linenum   => $linenum,
                 line      => $line,
                 orig_line => $orig_line
                };
      
      push @lines, $tmp;
    }
    
    $linenum++;
  }
}


#####################################

# populate the symbol table, mapping symbols to addresses
# also syntax check the assembly
sub pass0 {
  my $addr = 0;

  foreach my $ptr (@lines) {
    push @lines_markup, $ptr;
    
    my @tokens = @{$ptr->{tokens}};
    my $infile = $ptr->{infile};
    my $linenum = $ptr->{linenum};
    
    # save the address
    $ptr->{addr} = $addr;

    # update the symbol table
    my $cmd = get_cmd(\@tokens);
    
    if (eqi($cmd,"org")) {
      # make sure the format is correct
      if (@tokens != 2) {
        die "Error:  $infile($linenum) - Expected '$cmd WORD_ADDR'\n";
      }
    }
    elsif (eqi($cmd,"data")) {
      if (@tokens != 3 || nei($tokens[1], "data")) {
        die "Error:  $infile($linenum) - Expected 'NAME $cmd VALUE'\n";
      }
      add_sym($tokens[0], ($addr/$WIDTH), $infile, $linenum);          # word addressing
    }
    elsif (eqi($cmd,"equ")) {
      if (@tokens < 3 || nei($tokens[1], "equ")) {
        die "Error:  $infile($linenum) - Expected 'NAME $cmd VALUE'\n";
      }
      add_sym($tokens[0], "@tokens[2..$#tokens]", $infile, $linenum);
    }
    elsif ($cmd =~ /^(\w+):$/) {
      # label
      add_sym($1, ($addr/$WIDTH), $infile, $linenum);    # word addressing
      shift @tokens;
    }
    
    # make sure we still have some tokens left (label only lines
    if (@tokens == 0) {
      next;
    }
    
    # check bounds of memory
    if ($addr >= $MEM_SIZE*$WIDTH) {
      die "Error:  Assembled binary is larger than the memory size ($MEM_SIZE)\n";
    }
    
    my $cmd = get_cmd(\@tokens);

    # update the address
    if (eqi($cmd,"org")) {
      sym_subs(\@tokens, $infile, $linenum);
      $addr = $tokens[1] * $WIDTH;
      if ($addr >= $MEM_SIZE*$WIDTH) {
        die "Error:  'org' address parameter ($tokens[1]/$addr) exceeds memory size ($MEM_SIZE)\n";
      }
    }
    elsif (eqi($cmd,"data")) {
      $addr += $WIDTH;
    }
    elsif (eqi($cmd,"equ")) {
      $addr += 0;
    }
    elsif (exists $INSTRS->{$cmd}) {
      $addr += $WIDTH;    
    }
    else {
      die "Error:  $infile($linenum) - Invalid command '$cmd'\n";
    }
    
    # save the tokens
    $ptr->{tokens} = \@tokens;
    
    unless (eqi($cmd,"equ") || eqi($cmd,"org")) {
      push @lines_out, $ptr;
      $ptr->{out_map} = $#lines_out;               # save the mapping from @lines to @lines_out
    }

    # need to look for the special delay field...
    # if it exists, need to add an implied nop after the instruction
    if (exists $INSTRS->{$cmd}->{delay}) {
      sym_subs(\@tokens, $infile, $linenum);
      my $delay = $tokens[-1];
      while ($delay > 0) {
        my $tmp = $delay - 1;
        if ($tmp > 255) {
          $tmp = 255
        }

        my @nop_tokens = ('nopi', $tmp);
        my $cmd_locat = index($ptr->{orig_line}, $cmd);
        my $spacing = substr($ptr->{orig_line}, 0, $cmd_locat);
        $spacing =~ s/\S/ /g;
        my $orig_line;
        if ($ptr->{orig_line} =~ /$cmd(\s+).+/) {
          $orig_line = "${spacing}nopi${1}${tmp}";
        }
        else {
          die "FIXME!!!!";
        }
        
        my $nopi_ptr = {
                        tokens    => \@nop_tokens,
                        infile    => $ptr->{infile},
                        linenum   => $ptr->{linenum},
                        line      => $ptr->{line},
                        orig_line => $orig_line,
                        addr      => $addr
                       };

        push @lines_markup, $nopi_ptr;
        push @lines_out, $nopi_ptr;
        $nopi_ptr->{out_map} = $#lines_out;
        $addr += $WIDTH;

        $delay -= ($tmp+1);
      }
    }
  }
}

# Syntax check and code assembly
sub pass1 {
  foreach my $ptr (@lines_out) {
    my $linenum = $ptr->{linenum};
    my $infile = $ptr->{infile};
    my $tokens = $ptr->{tokens};
    my $addr = $ptr->{addr};
    my $cmd = get_cmd($tokens);  
    
    if ($addr < $HEX_START) {
      $HEX_START = $addr;
    }

    sym_subs($tokens, $infile, $linenum);
    
    # make sure we have a valid command
    if (!exists $DIRECTIVES->{$cmd}) {
      die "Error:  $infile($linenum) - Invalid command '$cmd'\n";
    }
    
    # syntax check and assembly
    if (eqi($cmd,"data")) {
      check_data($tokens->[2], 16, $linenum);
      my $hex_data = sprintf("%08x", $tokens->[2]);
      $hex_data = substr($hex_data, 8-($WIDTH*2));
      $ptr->{hex} = $hex_data;
      fill_hex(\@HEX, $addr, $WIDTH, $hex_data, $infile, $linenum);
    }
    # check the instruction and encode it
    else {
      my $coded_instr = encode_instr($tokens, $infile, $linenum);
      my $hex_data = sprintf("%08x", $coded_instr);
      $hex_data = substr($hex_data, 8-($WIDTH*2));
      $ptr->{hex} = $hex_data;
      fill_hex(\@HEX, $addr, $WIDTH, $hex_data, $infile, $linenum);
    }
  }

  # finally, if @HEX is not divisible by WIDTH, pad
  my $mod = @HEX % $WIDTH;
  for (my $i=0; $i<$mod; $i++) {
    $HEX[@HEX+$i] = "00";
  }
}


##################################### 



# Usage & Help message
# -----------------------------------------------------

=head1 NAME

asm.pl - Assemble an input asm file.  By default a hex representation will
         be output to STDOUT.       

=head1 SYNOPSIS

  asm.pl
      [-in           <Input Asm File>]
      [-out          <Output File>]
      [-markup       <Markup File>]
      [-cfile        <C File>]
      [-define|-D    <name=value> or <name>]
      [-no_pad]

      [-h|help]
      [-man]

=head1 OPTIONS AND ARGUMENTS

  -in          <Input Asm File>         : Input assembly file.  REQUIRED.

  -out         <Output File>            : Output file.  If one is not provided output to STDOUT.
                                          OPTIONAL.

  -cfile       <C File>                 : Generates a C header file with the assembled binary encoded as
                                          an array.  OPTIONAL

  -markup      <Markup File>            : Generate a marked up version of the assembly input file
                                          for debugging.  OPTIONAL

  -define|-D   <name=value> or <name>   : Command line define.  OPTIONAL.

  -no_pad                               : Do not pad the start of the generated binary.  If ORG is used
                                          to set the base address of the code, the assembler will typically
                                          generate padding at the start of the binary such that the binary
                                          can be loaded at SRAM address 0x0.  By passing -no_pad, the assembler
                                          will not pad the start of the binary with dummy data, so the binary
                                          will need to be loaded at the address provided in the ORG.  OPTIONAL

  -h|help                               : Print the usage guide

  -man                                  : Print the man page (contains more detailed help)

=head1 DESCRIPTION

=cut

