#! /usr/bin/perl # # ---------------------------------------------------------------------------- # "THE CAPPUCHINO-WARE LICENSE" # Rainer Wichmann wrote this file. As long as you # retain this notice you can do whatever you want with this stuff. If we # meet some day, and you think this stuff is worth it, you can buy me a # cappuchino in return. Rainer Wichmann # ---------------------------------------------------------------------------- # # Script to convert a spectrum in JCAMP-DX format into a simple # text file with rows in 'x y' format (suitable e.g. for gnuplot, # IDL, IRAF, or just anything else you like). # # JCAMP-DX is a format that IMHO is the result of re-inventing the wheel, # and badly. The FITS format widely used e.g. in astronomy is almost # 10 years older, has extensive (and **free**) software support, # and can do everything that JCAMP-DX does, at least w.r.t. spectra. # # This script can handle the AFFN, PAC, SQZ, DIF, and DIFDUP form # of JCAMP-DX TABULAR DATA in the (X++(Y..)) variable list format. use strict; use warnings; use Getopt::Std; my %opts; my $USAGE = "Converts an input file in JCAMP-DX spectral format into a list of x y pairs\n\nUsage: jcamp_conv.pl [-dhq] input.file\n\t-d for debug output\n\t-h for help\n\t-q for NOT writing header info\n\nTo redirect standard output, use:\n\tjcamp_conv.pl input.file > output.file"; my ($infile) = ''; my $lnum = 0; sub readBlock; getopts('hdq', \%opts); if (defined $opts{'h'}) { print "$USAGE\n"; exit 0; } # process file # if (!defined($ARGV[0])) { print "$USAGE\n"; die "ERROR: No input file given on command line !"; } $infile = $ARGV[0]; if (defined $opts{'d'}) { print "Input file: $infile\n"; } open FILE, "< $infile" or die "Cannot open $infile: $!"; while (0 == readBlock) {}; # loop over BLOCKs in file. # # Subroutines # sub dup_conv($) { my ($dup_line) = @_; $dup_line =~ s/(\%|J\d*|K\d*|L\d*|M\d*|N\d*|O\d*|P\d*|Q\d*|R\d*)S/$1/g; $dup_line =~ s/(j\d*|k\d*|l\d*|m\d*|n\d*|o\d*|p\d*|q\d*|r\d*)S/$1/g; $dup_line =~ s/(\@|A\d*|B\d*|C\d*|D\d*|E\d*|F\d*|G\d*|H\d*|I\d*)S/$1/g; $dup_line =~ s/(a\d*|b\d*|c\d*|d\d*|e\d*|f\d*|g\d*|h\d*|i\d*)S/$1/g; $dup_line =~ s/(\%|J\d*|K\d*|L\d*|M\d*|N\d*|O\d*|P\d*|Q\d*|R\d*)T/$1$1/g; $dup_line =~ s/(j\d*|k\d*|l\d*|m\d*|n\d*|o\d*|p\d*|q\d*|r\d*)T/$1$1/g; $dup_line =~ s/(\@|A\d*|B\d*|C\d*|D\d*|E\d*|F\d*|G\d*|H\d*|I\d*)T/$1$1/g; $dup_line =~ s/(a\d*|b\d*|c\d*|d\d*|e\d*|f\d*|g\d*|h\d*|i\d*)T/$1$1/g; $dup_line =~ s/(\%|J\d*|K\d*|L\d*|M\d*|N\d*|O\d*|P\d*|Q\d*|R\d*)U/$1$1$1/g; $dup_line =~ s/(j\d*|k\d*|l\d*|m\d*|n\d*|o\d*|p\d*|q\d*|r\d*)U/$1$1$1/g; $dup_line =~ s/(\@|A\d*|B\d*|C\d*|D\d*|E\d*|F\d*|G\d*|H\d*|I\d*)U/$1$1$1/g; $dup_line =~ s/(a\d*|b\d*|c\d*|d\d*|e\d*|f\d*|g\d*|h\d*|i\d*)U/$1$1$1/g; $dup_line =~ s/(\%|J\d*|K\d*|L\d*|M\d*|N\d*|O\d*|P\d*|Q\d*|R\d*)V/$1$1$1$1/g; $dup_line =~ s/(j\d*|k\d*|l\d*|m\d*|n\d*|o\d*|p\d*|q\d*|r\d*)V/$1$1$1$1/g; $dup_line =~ s/(\@|A\d*|B\d*|C\d*|D\d*|E\d*|F\d*|G\d*|H\d*|I\d*)V/$1$1$1$1/g; $dup_line =~ s/(a\d*|b\d*|c\d*|d\d*|e\d*|f\d*|g\d*|h\d*|i\d*)V/$1$1$1$1/g; $dup_line =~ s/(\%|J\d*|K\d*|L\d*|M\d*|N\d*|O\d*|P\d*|Q\d*|R\d*)W/$1$1$1$1$1/g; $dup_line =~ s/(j\d*|k\d*|l\d*|m\d*|n\d*|o\d*|p\d*|q\d*|r\d*)W/$1$1$1$1$1/g; $dup_line =~ s/(\@|A\d*|B\d*|C\d*|D\d*|E\d*|F\d*|G\d*|H\d*|I\d*)W/$1$1$1$1$1/g; $dup_line =~ s/(a\d*|b\d*|c\d*|d\d*|e\d*|f\d*|g\d*|h\d*|i\d*)W/$1$1$1$1$1/g; $dup_line =~ s/(\%|J\d*|K\d*|L\d*|M\d*|N\d*|O\d*|P\d*|Q\d*|R\d*)X/$1$1$1$1$1$1/g; $dup_line =~ s/(j\d*|k\d*|l\d*|m\d*|n\d*|o\d*|p\d*|q\d*|r\d*)X/$1$1$1$1$1$1/g; $dup_line =~ s/(\@|A\d*|B\d*|C\d*|D\d*|E\d*|F\d*|G\d*|H\d*|I\d*)X/$1$1$1$1$1$1/g; $dup_line =~ s/(a\d*|b\d*|c\d*|d\d*|e\d*|f\d*|g\d*|h\d*|i\d*)X/$1$1$1$1$1$1/g; $dup_line =~ s/(\%|J\d*|K\d*|L\d*|M\d*|N\d*|O\d*|P\d*|Q\d*|R\d*)Y/$1$1$1$1$1$1$1/g; $dup_line =~ s/(j\d*|k\d*|l\d*|m\d*|n\d*|o\d*|p\d*|q\d*|r\d*)Y/$1$1$1$1$1$1$1/g; $dup_line =~ s/(\@|A\d*|B\d*|C\d*|D\d*|E\d*|F\d*|G\d*|H\d*|I\d*)Y/$1$1$1$1$1$1$1/g; $dup_line =~ s/(a\d*|b\d*|c\d*|d\d*|e\d*|f\d*|g\d*|h\d*|i\d*)Y/$1$1$1$1$1$1$1/g; $dup_line =~ s/(\%|J\d*|K\d*|L\d*|M\d*|N\d*|O\d*|P\d*|Q\d*|R\d*)Z/$1$1$1$1$1$1$1$1/g; $dup_line =~ s/(j\d*|k\d*|l\d*|m\d*|n\d*|o\d*|p\d*|q\d*|r\d*)Z/$1$1$1$1$1$1$1$1/g; $dup_line =~ s/(\@|A\d*|B\d*|C\d*|D\d*|E\d*|F\d*|G\d*|H\d*|I\d*)Z/$1$1$1$1$1$1$1$1/g; $dup_line =~ s/(a\d*|b\d*|c\d*|d\d*|e\d*|f\d*|g\d*|h\d*|i\d*)Z/$1$1$1$1$1$1$1$1/g; $dup_line =~ s/(\%|J\d*|K\d*|L\d*|M\d*|N\d*|O\d*|P\d*|Q\d*|R\d*)s/$1$1$1$1$1$1$1$1$1/g; $dup_line =~ s/(j\d*|k\d*|l\d*|m\d*|n\d*|o\d*|p\d*|q\d*|r\d*)s/$1$1$1$1$1$1$1$1$1/g; $dup_line =~ s/(\@|A\d*|B\d*|C\d*|D\d*|E\d*|F\d*|G\d*|H\d*|I\d*)s/$1$1$1$1$1$1$1$1$1/g; $dup_line =~ s/(a\d*|b\d*|c\d*|d\d*|e\d*|f\d*|g\d*|h\d*|i\d*)s/$1$1$1$1$1$1$1$1$1/g; return $dup_line; } sub dif_conv($) { my ($dif_line) = @_; $dif_line =~ s/\%/ 0/g; $dif_line =~ s/J/ 1/g; $dif_line =~ s/K/ 2/g; $dif_line =~ s/L/ 3/g; $dif_line =~ s/M/ 4/g; $dif_line =~ s/N/ 5/g; $dif_line =~ s/O/ 6/g; $dif_line =~ s/P/ 7/g; $dif_line =~ s/Q/ 8/g; $dif_line =~ s/R/ 9/g; $dif_line =~ s/j/ -1/g; $dif_line =~ s/k/ -2/g; $dif_line =~ s/l/ -3/g; $dif_line =~ s/m/ -4/g; $dif_line =~ s/n/ -5/g; $dif_line =~ s/o/ -6/g; $dif_line =~ s/p/ -7/g; $dif_line =~ s/q/ -8/g; $dif_line =~ s/r/ -9/g; return $dif_line; } sub sqz_conv($) { my ($sqz_line) = @_; $sqz_line =~ s/\@/ 0/g; $sqz_line =~ s/A/ 1/g; $sqz_line =~ s/B/ 2/g; $sqz_line =~ s/C/ 3/g; $sqz_line =~ s/D/ 4/g; $sqz_line =~ s/E/ 5/g; $sqz_line =~ s/F/ 6/g; $sqz_line =~ s/G/ 7/g; $sqz_line =~ s/H/ 8/g; $sqz_line =~ s/I/ 9/g; $sqz_line =~ s/a/ -1/g; $sqz_line =~ s/b/ -2/g; $sqz_line =~ s/c/ -3/g; $sqz_line =~ s/d/ -4/g; $sqz_line =~ s/e/ -5/g; $sqz_line =~ s/f/ -6/g; $sqz_line =~ s/g/ -7/g; $sqz_line =~ s/h/ -8/g; $sqz_line =~ s/i/ -9/g; return $sqz_line; } # write out the data table # sub writeOut($$$$$$$) { my ($firstx, $lastx, $npoints, $count, $xfactor, $yfactor, $lines_r) = @_; my @lines = @{$lines_r}; my $sqz_line; my $dif_line; my $dup_line; my $yold = 0; my $xold = 0; my $ycheck = 0; my @values; my $incr = ($lastx - $firstx) / ($npoints - 1.0); # for (my $i = 0; $i < $count; ++$i) { print "$lines[$i]\n"; } for (my $i = 0; $i < $count; ++$i) { if ($lines[$i] =~ /[\%JKLMNOPQRjklmnopqr]/) { if ($lines[$i] =~ /[STUVWYXZs]/) { print STDERR "DIFDUP -> $lines[$i]\n" if defined($opts{'d'}); $dup_line = dup_conv($lines[$i]); print STDERR "DIFDUP <- $dup_line\n" if defined($opts{'d'}); } else { print STDERR "DIF $lines[$i]\n" if defined($opts{'d'}); $dup_line = $lines[$i]; } $sqz_line = sqz_conv($dup_line); $dif_line = dif_conv($sqz_line); @values = split /\ +/, $dif_line; # # convert differences to abs values # for (my $i = 2; $i <= $#values; ++$i) { $values[$i] += $values[$i - 1]; } if ($ycheck == 1) { print STDERR "FAILED Y-VALUE CHECK line $lnum: $xold != $values[0] or $yold != $values[1]\n" if (($xold != $values[0]) or ($yold != $values[1])); $values[0] += $incr; for (my $i = 2; $i <= $#values; ++$i) { printf "%f %f\n", $xfactor*$values[0], $yfactor*$values[$i]; $values[0] += $incr; } $xold = $values[0] - $incr; $yold = $values[$#values]; } else { for (my $i = 1; $i <= $#values; ++$i) { printf "%f %f\n", $xfactor*$values[0], $yfactor*$values[$i]; $values[0] += $incr; } $xold = $values[0] - $incr; $yold = $values[$#values]; } $ycheck = 1; } elsif ($lines[$i] =~ /[\@ABCDEFGHIabcdefghi]/) { print STDERR "SQZ $lines[$i]\n" if defined($opts{'d'}); $sqz_line = sqz_conv($lines[$i]); @values = split /\ +/, $sqz_line; for (my $i = 1; $i <= $#values; ++$i) { printf "%f %f\n", $xfactor*$values[0], $yfactor*$values[$i]; $values[0] += $incr; } $ycheck = 0; } else { print STDERR "PAC $lines[$i]\n" unless (!defined($opts{'d'})); $lines[$i] =~ s/\-/ -/g; $lines[$i] =~ s/\+/ +/g; @values = split /\ +/, $lines[$i]; for (my $i = 1; $i <= $#values; ++$i) { printf "%f %f\n", $xfactor*$values[0], $yfactor*$values[$i]; $values[0] += $incr; } $ycheck = 0; } } } # read the data table # sub readTabular($$$$$) { my ($firstx, $lastx, $npoints, $xfactor, $yfactor) = @_; my @lines = ''; my $count = 0; while () { ++$lnum; if (/^\#\#END=/) { writeOut($firstx, $lastx, $npoints, $count, $xfactor, $yfactor, \@lines); print STDERR "END of block reached (readTabular)\n" unless (!defined($opts{'d'})); return 0; } $lines[$count] = $_; $lines[$count] =~ s/^\s*//; $lines[$count] =~ s/\s*$//; # print "$lines[$count]\n"; ++$count; } } # Read a single BLOCK # sub readBlock { my ($title, $xunits, $yunits, $xfactor, $yfactor); my ($firstx, $lastx, $npoints, $dummy); my $format = 0; my $TABULAR = 1; while () { ++$lnum; if (/^\#\#TITLE=\s*(.*)/) { $title = $1; chomp($title); print STDERR "TITLE $title\n" unless (!defined($opts{'d'})); print "# TITLE $title\n" unless (defined($opts{'q'})); } if (/^\#\#ORIGIN=\s*(.*)/) { $dummy = $1; chomp($dummy); print "# ORIGIN $dummy\n" unless (defined($opts{'q'})); } if (/^\#\#OWNER=\s*(.*)/) { $dummy = $1; chomp($dummy); print "# OWNER $dummy\n" unless (defined($opts{'q'})); } if (/^\#\#DATE=\s*(.*)/) { $dummy = $1; chomp($dummy); print "# DATE $dummy\n" unless (defined($opts{'q'})); } if (/^\#\#TIME=\s*(.*)/) { $dummy = $1; chomp($dummy); print "# TIME $dummy\n" unless (defined($opts{'q'})); } if (/^\#\#RESOLUTION=\s*(.*)/) { $dummy = $1; chomp($dummy); print "# RESOLUTION $dummy\n" unless (defined($opts{'q'})); } if (/^\#\#DELTAX=\s*(.*)/) { $dummy = $1; chomp($dummy); print "# DELTAX $dummy\n" unless (defined($opts{'q'})); } if (/^\#\#XUNITS=\s*(.*)/) { $xunits = $1; chomp($xunits); print STDERR "XUNITS $xunits\n" unless (!defined($opts{'d'})); print "# XUNITS $xunits\n" unless (defined($opts{'q'})); } if (/^\#\#YUNITS=\s*(.*)/) { $yunits = $1; chomp($yunits); print STDERR "YUNITS $yunits\n" unless (!defined($opts{'d'})); print "# YUNITS $yunits\n" unless (defined($opts{'q'})); } if (/^\#\#XFACTOR=\s*(.*)/) { $xfactor = $1; chomp($xfactor); print STDERR "XFACTOR $xfactor\n" unless (!defined($opts{'d'})); print "# XFACTOR $xfactor\n" unless (defined($opts{'q'})); } if (/^\#\#YFACTOR=\s*(.*)/) { $yfactor = $1; chomp($yfactor); print STDERR "YFACTOR $yfactor\n" unless (!defined($opts{'d'})); print "# YFACTOR $yfactor\n" unless (defined($opts{'q'})); } if (/^\#\#FIRSTX=\s*(.*)/) { $firstx = $1; chomp($firstx); print STDERR "FIRSTX $firstx\n" unless (!defined($opts{'d'})); print "# FIRSTX $firstx\n" unless (defined($opts{'q'})); } if (/^\#\#LASTX=\s*(.*)/) { $lastx = $1; chomp($lastx); print STDERR "LASTX $lastx\n" unless (!defined($opts{'d'})); print "# LASTX $lastx\n" unless (defined($opts{'q'})); } if (/^\#\#NPOINTS=\s*(.*)/) { $npoints = $1; chomp($npoints); print STDERR "NPOINTS $npoints\n" unless (!defined($opts{'d'})); print "# NPOINTS $npoints\n" unless (defined($opts{'q'})); } if (/^\#\#XYDATA=\s*\(X\+\+\(Y\.\.Y\)\)/) { $format = $TABULAR; print STDERR "FORMAT (X++(Y..Y))\n" unless (!defined($opts{'d'})); if (0 == readTabular($firstx, $lastx, $npoints, $xfactor, $yfactor)) { return 0; } } if (/^\#\#END=/) { print STDERR "END of block reached (readBlock)\n" unless (!defined($opts{'d'})); return 0; } } return 1; # finished }