Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

What's the best way to parse Excel file in Perl?

What's the simplest way to parse an Excel file in Perl? Converting it to a text file would also work.

like image 482
MCS Avatar asked Jan 09 '09 18:01

MCS


4 Answers

The best way is to use Spreadsheet::ParseExcel.

Here is an example:

#!/usr/bin/perl -w

use strict;
use warnings;

use Spreadsheet::ParseExcel;

my $parser   = Spreadsheet::ParseExcel->new();
my $workbook = $parser->parse('Book1.xls');

for my $worksheet ( $workbook->worksheets() ) {

    my ( $row_min, $row_max ) = $worksheet->row_range();
    my ( $col_min, $col_max ) = $worksheet->col_range();

    for my $row ( $row_min .. $row_max ) {
        for my $col ( $col_min .. $col_max ) {

            my $cell = $worksheet->get_cell( $row, $col );
            next unless $cell;

            print "Row, Col    = ($row, $col)\n";
            print "Value       = ", $cell->value(),       "\n";
            print "Unformatted = ", $cell->unformatted(), "\n";
            print "\n";
        }
    }
}

To convert an Excel file to text with Perl I'd recommend excel2txt which uses Spreadsheet::ParseExcel.

like image 168
jmcnamara Avatar answered Oct 24 '22 05:10

jmcnamara


I have had great luck using Spreadsheet::ParseExcel.

like image 26
Mr. Muskrat Avatar answered Oct 24 '22 04:10

Mr. Muskrat


The 'best' way would ideally be to use a module from our beloved CPAN.

Whenever you have a problem Instantly ask: Why have I not checked CPAN yet?

Which module however I can't be certain, here is a list to get you started, try them, see what works.

  • Spreadsheet::ParseExcel
  • DBD::Excel
  • XML::Excel
  • File::Extract
like image 4
Kent Fredric Avatar answered Oct 24 '22 06:10

Kent Fredric


@echo off  
ECHO CHECK THE VERSION FROM THE PROJECT
    echo see also this [link][1]
for /f "tokens=*" %%i in ('Type Version.txt') do set _Version=%%i
ECHO The _Version is %_Version%

ECHO remove the output html files
del *.html /q
ECHO remove the output log files
del *.log /q


::pause
ECHO %0 > %0.log
ECHO %0.error.log >%0.error.log

set BaseDir=D:\perl\sfw\ExcelToHtml.%_Version%
echo BaseDir is %BaseDir%  1>>%0.log 2>>%0.error.log
ECHO.


set LogLevel=3
echo LogLevel is %LogLevel%  1>>%0.log 2>>%0.error.log
ECHO.


::set ExcelFileToParse="%BaseDir%\CoDA_ETL_Integration.xls"
SET ExcelFileToParse="%BaseDir%\TODO.xls"
echo ExcelFileToParse is %ExcelFileToParse%  1>>%0.log 2>>%0.error.log
echo.

set OutputDir=%BaseDir%
echo OutputDir is %Outputdir%  1>>%0.log 2>>%0.error.log
echo.

ECHO SET THE UNICODE FOR PERL FOR UTF-8
SET PERL_UNICODE=S
ECHO %%PERL_UNICODE%% IS %PERL_UNICODE%

::set PerlScript=parseExcelToCsv.pl
set PerlScript=ExcelToHtml.pl
echo PerlScript is %PerlScript%  1>>%0.log 2>>%0.error.log
ECHO.


echo Action !!!  1>>%0.log 2>>%0.error.log
echo perl %BaseDir%\%PerlScript% %LogLevel% %ExcelFileToParse% %OutputDir%  1>>%0.log 2>>%0.error.log
perl %BaseDir%\%PerlScript% %LogLevel% %ExcelFileToParse% %OutputDir% 1>>%0.log 2>>%0.error.log

%0.error.log
%0.log

::debug pause

use strict;
use Spreadsheet::ParseExcel;
use utf8 ; 
use Encode ; 


package ExcelToHtml ; 



my $DebugLevel = 3 ; 
print defined($ARGV[0]) ? $DebugLevel = $ARGV[0] : "Using max DebugLevel = $DebugLevel", " \n";
my $minimumAmountOfCommandLineArgs = 3 ; 
my ( $ExcelFileToParse , $OutputDir , $BaseFileName ) = (); 


sub main {
    logMsg("$0 SCRIPT START " , 1 );
    #Action !!! call here methods
    checkCommandLineArgs();     
    buildNames (); 

    ParseExcelAndlogMsgFiles ( $ExcelFileToParse  , $BaseFileName , $OutputDir) ; 

    logMsg("$0 SCRIPT STOP " , 1 );

} #eof MAIN 

sub buildNames 
{

    $ExcelFileToParse = "$ARGV[1]" ; 
    $OutputDir="$ARGV[2]" ;
    $BaseFileName = $ExcelFileToParse ; 
    logMsg( "\$ExcelFileToParse is $ExcelFileToParse" ) ; 
    logMsg ( "\$OutputDir is $OutputDir" ) ; 
    logMsg ("\$BaseFileName is $BaseFileName \n" );
    $BaseFileName =~ s/^(.*)(\\|\/)(.*)/$3/;    #strip the directory part 
    logMsg ("\$BaseFileName is $BaseFileName ");
    $BaseFileName =~ s/^(.*)(\.)(.*)/$1/ ;  #strip the file extension
    logMsg ( "\$BaseFileName is $BaseFileName "); 


} #eof sub buildNames


sub ParseExcelAndlogMsgFiles {

my $ExcelFileToParse = shift ; 
my $BaseFileName = shift ; 
my $OutputDir = shift ; 
my $strToReturn = "";

my $parser   = Spreadsheet::ParseExcel->new();
my $workbook = $parser->Parse("$ExcelFileToParse");

for my $worksheet ( $workbook->worksheets() ) {

        my ( $row_min, $row_max ) = $worksheet->row_range();
        my ( $col_min, $col_max ) = $worksheet->col_range();

        for my $row ( $row_min .. $row_max ) {
                my $rowStr = "" ; 
                for my $col ( $col_min .. $col_max ) {

                        my $cell = $worksheet->get_cell( $row, $col );
                        next unless $cell;

                        logMsg ( " Row, Col    = ($row, $col)") ; 
                        logMsg ( "\$cell->encoding() is " . $cell->encoding()) ;

                        logMsg (" Value = ", $cell->value() );
                        $rowStr .=  makeCell($cell->value() )      ;                #The Value

                        logMsg ( "Unformatted = ", $cell->unformatted()) ; 

                } #eof col
            $rowStr = makeRow( $rowStr ); 
            $strToReturn .= $rowStr ; 
     } #eof for my row

        my $worksheetName = $worksheet->{'Name'}  ; 
        my $FileSheet = "$OutputDir/$BaseFileName" .  '.' . $worksheetName . '.' . 'html' ; 
        $strToReturn = makeTable ( $strToReturn );
        $strToReturn = makeFile ( $strToReturn );
        logMsg ( "\$strToReturn  is $strToReturn " ) ; 
        PrintToFile ( $FileSheet, $strToReturn )  ; 
        $strToReturn = "" ;
        $FileSheet = "" ;
        } #eof for my worksheet

} #eof sub


sub trim    
{
        $_[0]=~s/^\s+//;
        $_[0]=~s/\s+$//;
        return $_[0];
}

# =========================================== eof sub trim 
# Action !!!



sub makeRow 
{
    my $row = shift ; 
    return "<tr>" .  $row .  "</tr> \n"  ; 
}

sub makeCell 
{
    my $cell = shift ; 
    return "<td>"  .  "$cell" . " </td>" ; 
}

sub makeTable
{
    my $table = shift ; 
    return "\n <table> " . $table . "</table> \n" ; 

}

sub makeFile
{
    my $file = shift ; 

    $file = "<html> <head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"/></head><body> $file </body></html>\n" ; 
    return $file ; 
}


sub checkCommandLineArgs
{

        if ( @ARGV != $minimumAmountOfCommandLineArgs )
        {
            logErrorMsg ( "Not enougn command line args supplied exit 1" , 1 );
            die "usage: $0 1 args \n" ; 
            exit 1 ; 
        } #eof if
        else 
        {
                foreach (@ARGV) { logMsg ( "$_ " ) ;  }
        } #eof else         if ( @ARGV != 4 )


} #eof sub checkCommandLineArgs

#log a message according to the DebugLevel
sub logMsg 
{
    my $msg = shift ;
    my $importance = shift ; 

    my $CurrentPerlScriptName = "$0" ; 
    my $niceMonth = GetANiceMonth (); 
    my $logFile = "$CurrentPerlScriptName" . '_' . "$niceMonth" .  '.log' ; 
    $msg = GetANiceTime () .' --- ' .   $msg . " \n" ; 

    if ( $importance == 1 ) 
    {
        $msg = "\n============================================================ \n" . $msg ; 
        $msg = $msg . "============================================================ \n" ;
    }


    #debug nothing
    if ( $DebugLevel == 0 ) {   return ; } 

    #just logMsg the message
    if ( $DebugLevel == 1 ) 
    {
        logMsg ( $msg ); 
    } #eof if ( $DebugLevel == 1 ) 

    #logMsg the message in 
    if ( $DebugLevel == 2 ) 
    {
        #READ ALL ROWS OF A FILE TO ALIST 
        open (LOG, ">> $logFile") || print "could not open the \$logFile $logFile !!!\n"; 
        print LOG $msg  ; 
        close LOG;
    }

    #logMsg the message in 
    if ( $DebugLevel == 3 ) 
    {
        #READ ALL ROWS OF A FILE TO ALIST 
        open (LOG, ">> $logFile") || print "could not open the \$logFile $logFile !!!\n"; 
        print LOG $msg  ; 
        close LOG;
        print $msg ; 
    } #eof if ( $DebugLevel == 3 ) 


} #eof sub logMsg 

#log a message according to the DebugLevel
sub logErrorMsg 
{
    my $errorMsg = shift ; 
    my $importance  = shift ; 

    my $CurrentPerlScriptName = "$0" ; 
    my $niceMonth = GetANiceMonth (); 
    my $errorLogFile = "$CurrentPerlScriptName" . "_" .  "$niceMonth" . '.error.log' ; 
    $errorMsg = GetANiceTime () .' --- ' .   $errorMsg . " \n" ; 

        if ( $importance == 1 ) 
        {
            $errorMsg = "\n============================================================ \n" . $errorMsg ; 
            $errorMsg= $errorMsg. "============================================================ \n" ;
        }



    #debug nothing
    if ( $DebugLevel == 0 ) {   return ; } 

    #just logMsg the message
    if ( $DebugLevel == 1 )  {      print $errorMsg ;       } 

    #logMsg the message in a error log file
    if ( $DebugLevel == 2 ) 
    {
        #READ ALL ROWS OF A FILE TO ALIST 
        open (ERRLOG, ">> $errorLogFile") || print "could not open the \$errorLogFile $errorLogFile !!!\n"; 
        print ERRLOG $errorMsg ; 
        close ERRLOG;
    }

    #logMsg the message in 
    if ( $DebugLevel == 3 ) 
    {
        #READ ALL ROWS OF A FILE TO ALIST 
        open (ERRLOG, ">> $errorLogFile") || print "could not open the \$errorLogFile $errorLogFile !!!\n"; 
        print ERRLOG $errorMsg  ; 
        close ERRLOG;
        print $errorMsg ; 
    } #eof if ( $DebugLevel == 3 ) 


} #eof sub logErrorMsg 


#GET A NICE TIME 
sub GetANiceTime {
 # Purpose: returns the time in yyyymmdd-format 
 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); 
 #---- change 'month'- and 'year'-values to correct format ---- 
 $min = "0$min" if ($min < 10); 
 $hour = "0$hour" if ($hour < 10);
 $mon = $mon + 1;
 $mon = "0$mon" if ($mon < 10); 
 $year = $year + 1900;
 $mday = "0$mday" if ($mday < 10); 
 return "$year\.$mon\.$mday" . "-" . "$hour\:$min\:$sec"; 
} #eof sub GetANiceTime


sub GetANiceMonth {
 # Purpose: returns the time in yyyymmdd-format 
 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); 
 #---- change 'month'- and 'year'-values to correct format ---- 
 $mon = $mon + 1;
 $mon = "0$mon" if ($mon < 10); 
 $year = $year + 1900;
 return "$year\.$mon" ; 
} #eof sub GetANiceTime



sub PrintToFile {
my $FileOutput = shift ; 
my $StringToPrint = shift ; 
#READ ALL ROWS OF A FILE TO ALIST 
open (FILEOUTPUT, ">$FileOutput") || 
            print "could not open the \$FileOutput $FileOutput!\n"; 
print  FILEOUTPUT $StringToPrint ; 
close FILEOUTPUT ;

#debug $strToReturn .=  $StringToPrint; 

}
# =========================================== eof sub PrintToFile



#Action !!!
main();




1 ; 

__END__

#VersionHistory: 
#1.0. --- Yordan Georgiev --- Initial creation 
#1.1 --- Yordan Georgiev --- Added conditional logging 
like image 1
Yordan Georgiev Avatar answered Oct 24 '22 05:10

Yordan Georgiev