#!/usr/bin/perl # wants one or more font names on the command line. They must have a file # extension of .pfa or .pfb, and have an associated metrics file of the same # path and name, except with extension .afm or .pfm respectively. # CAUTION: the displayed Unicode value (U+xxxx) appears to be correct in most # cases, except that the MS Smart Quotes (32 characters) are given as U+0080 # through U+009F. Those Unicode values are reserved for the C1 Control character # group, not printable glyphs. I don't know if the font files hold incorrect # Unicode values, or this program is in error. See PDF::Builder::Resource:: # Glyphs for u2n and n2u tables -- they may be in error. Note that UTF-8 and # other multibyte encodings are not usable with T1 fonts. Note that some fonts # may spill over onto 1 or more additional pages, which of course is beyond # single byte encoding (automap is used here). use strict; use warnings; use lib '../lib'; use PDF::Builder; use PDF::Builder::Util; use File::Basename; my $compress = 'none'; # uncompressed streams #my $compress = 'flate'; # compressed streams my $sx = 33; my $sy = 45; my $fx = 20; # nominal font size in points my $gLLx = 50; # lower left position of grid my $gLLy = 50; my $LoremIpsum=q|Sed ut perspiciatis, unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam eaque ipsa, quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt, explicabo. Nemo enim ipsam voluptatem, quia voluptas sit, aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos, qui ratione voluptatem sequi nesciunt, neque porro quisquam est, qui dolorem ipsum, quia dolor sit, amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt, ut labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit, qui in ea voluptate velit esse, quam nihil molestiae consequatur, vel illum, qui dolorem eum fugiat, quo voluptas nulla pariatur? At vero eos et accusamus et iusto odio dignissimos ducimus, qui blanditiis praesentium voluptatum deleniti atque corrupti, quos dolores et quas molestias excepturi sint, obcaecati cupiditate non provident, similique sunt in culpa, qui officia deserunt mollitia animi, id est laborum et dolorum fuga. Et harum quidem rerum facilis est et expedita distinctio. Nam libero tempore, cum soluta nobis est eligendi optio, cumque nihil impedit, quo minus id, quod maxime placeat, facere possimus, omnis voluptas assumenda est, omnis dolor repellendus. Temporibus autem quibusdam et aut officiis debitis aut rerum necessitatibus saepe eveniet, ut et voluptates repudiandae sint et molestiae non recusandae. Itaque earum rerum hic tenetur a sapiente delectus, ut aut reiciendis voluptatibus maiores alias consequatur aut perferendis doloribus asperiores repellat.|; # build lists of metric paths, glyph files, and metric files my @gns = (); # glyphs: path, name, extension my @mns = (); # metrics: path, name, extension my @ecs = (); # encodings until further notice (one string for each # font, with one or more encodings space-separated) my $mpath = '@./'; # default path list = same dir as glyph file (-m) # must end in / or \ (not added or checked) my $ecflag = 'latin1'; # default encoding to display (-e) # no args, or just -h, -?, or --help if (scalar @ARGV == 0 || (scalar @ARGV == 1 && ($ARGV[0] eq '-h' || $ARGV[0] eq '-?' || $ARGV[0] eq '--help'))) { usage(); exit(1); } # loop through @ARGV, building up @gns (list of glyph file path+names), # @mns (list of metric file path+names), # @ecs (list of encodings for each font file) # check that requested files exist. fatal error if not (show usage). if (processCMD(@ARGV)) { usage(); exit(2); } # use only with single byte encodings, as multibyte (including UTF-8) don't # appear to be compatible with these T1/PS fonts # there may be a number of aliases available for each encoding. # # available encodings (believed to be single byte): # 7bit-jis AdobeStandardEncoding AdobeSymbol AdobeZdingbat ascii # ascii-ctrl cp1006 cp1026 cp1047 cp1250 cp1251 cp1252 cp1253 cp1254 # cp1255 cp1256 cp1257 cp1258 cp37 cp424 cp437 cp500 cp737 cp775 # cp850 cp852 cp855 cp856 cp857 cp858 cp860 cp861 cp862 cp863 cp864 # cp865 cp866 cp869 cp874 cp875 dingbats hp-roman8 iso-8859-1 # iso-8859-2 iso-8859-3 iso-8859-4 iso-8859-5 iso-8859-6 iso-8859-7 # iso-8859-8 iso-8859-9 iso-8859-10 iso-8859-11 iso-8859-13 iso-8859-14 # iso-8859-15 iso-8859-16 iso-ir-165 jis0201-raw koi8-f koi8-r koi8-u # MacArabic MacCentralEurRoman MacCroatian MacCyrillic MacDingbats # MacFarsi MacGreek MacHebrew MacIcelandic MacRoman MacRomanian # MacRumanian MacSami MacSymbol MacThai MacTurkish MacUkrainian nextstep # null posix-bc symbol viscii # # multibyte encodings (do not use): # big5-eten big5-hkscs cp932 cp936 cp949 cp950 euc-cn euc-jp euc-kr # gb12345-raw gb2312-raw gsm0338 hz iso-2022-jp iso-2022-jp-1 iso-2022-kr # jis0208-raw jis0212-raw johab ksc5601-raw MacChineseSimp MacChineseTrad # MacJapanese MacKorean MIME-B MIME-Header MIME-Header-ISO_2022_JP MIME-Q # shiftjis UCS-2BE UCS-2LE UTF-16 UTF-16BE UTF-16LE UTF-32 UTF-32BE # UTF-32LE UTF-7 utf-8-strict utf8 and probably others my ($i, $base, $y, $pdf, $fn, $fnM, $f1); # should have same number of entries each in @gns, @mns, @ecs # loop through list of font names (glyph file names) for ($i=0; $i<scalar @gns; $i++) { $fn = $gns[$i]; # glyph file $fnM = $mns[$i]; # metric file to go with glyph file my $flavor = '?'; # a = ASCII, b = binary metrics file if ($fnM =~ m/\.afm$/i) { $flavor = 'a'; } if ($fnM =~ m/\.pfm$/i) { $flavor = 'b'; } # might be other PS/T1 flavors $base = $fn; $base =~ m#([^/\\]+)$#; $base = $1; $base =~ s#\.pf[ab]$##i; # at least one page for each encoding foreach my $ec (split / /, $ecs[$i]) { $pdf = PDF::Builder->new(-compress => $compress); $f1 = $pdf->corefont('Helvetica'); # for various labels print STDERR "\n$base -- $ec\n"; initNameTable(); # set up u2n and n2u hashes my $fnt; if ($fnM ne '') { if ($flavor eq 'a') { print "Process glyph file $fn\n with AFM file $fnM,\n $ec encoding\n"; $fnt = $pdf->psfont($fn, -afmfile => $fnM, -encode => $ec); } else { print "Process glyph file $fn\n with PFM file $fnM,\n $ec encoding\n"; $fnt = $pdf->psfont($fn, -pfmfile => $fnM, -encode => $ec); } } else { # no metrics file to be used print "Process glyph file $fn\n with no metrics file, $ec encoding\n"; $fnt = $pdf->psfont($fn, -encode => $ec); } my @planes = ($fnt, $fnt->automap()); my $flight = -1; foreach my $plane (@planes) { $flight++; # for plane 1+ ($flight > 0) check if any characters in it if ($flight > 0) { my $flag = 0; # no character found yet foreach my $yp (0..15) { foreach my $x (0..15) { my $ci = $yp*16 + $x; # 0..255 value # always seems to be something at # ci = 32 (U=0020, space) # ci = 33 (U=E000, .notdef) if ($ci == 32 || $ci == 33) { next; } if (defined $plane->uniByEnc($ci) && $plane->uniByEnc($ci) > 0) { $flag = 1; last; } } if ($flag) { last; } } if (!$flag) { next; } # no characters on this plane } # subfonts within overall font (223 characters per plane + space) # they can be treated just like regular fonts my $page = $pdf->page(); $page->mediabox(595,842); my $gfx = $page->gfx(); my $txt = $page->text(); $txt->font($plane,$fx); my $txt2 = $page->text(); $txt2->textlabel($gLLx,800, $f1,20, "font='".$plane->fontname()." / ".$plane->name()."' plane $flight", -hscale=>75); $txt2->textlabel($gLLx,780, $f1,20, "encoding='$ec'"); $txt2->font($f1, 5); $txt2->hscale(80); # distance below baseline (<0) to clear descenders my $u = $plane->underlineposition()*$fx/1000; # draw grid of characters and information # yp character row value (0..F T to B) foreach my $yp (0..15) { $y = 15 - $yp; # y vertical (row) position T to B print STDERR "."; foreach my $x (0..15) { # x horizontal (column) position L to R $txt->translate($gLLx+($sx*$x),$gLLy+($sy*$y)); my $ci = $yp*16 + $x; # 0..255 value my $c = chr($ci); $txt->text($c); my $wx = $plane->width($c)*$fx; # bounding box cell around character $gfx->strokecolor('lightblue'); $gfx->move($gLLx+($sx*$x) ,$gLLy+($sy*$y)+$fx); $gfx->line($gLLx+($sx*$x) ,$gLLy+($sy*$y)+$u); $gfx->line($gLLx+($sx*$x)+$wx,$gLLy+($sy*$y)+$u); $gfx->line($gLLx+($sx*$x)+$wx,$gLLy+($sy*$y)+$fx); $gfx->close(); $gfx->stroke(); # baseline $gfx->strokecolor('gray'); $gfx->move($gLLx+($sx*$x) ,$gLLy+($sy*$y)); $gfx->line($gLLx+($sx*$x)+$wx,$gLLy+($sy*$y)); $gfx->stroke(); # character data $txt2->translate($gLLx+($sx*$x)+$wx,$gLLy+($sy*$y)-6); $txt2->text_right($ci); $txt2->translate($gLLx+($sx*$x)+$wx,$gLLy+($sy*$y)-11); if (defined $plane->uniByEnc($ci)) { $txt2->text_right(sprintf('U+%04X',$plane->uniByEnc($ci))); } else { $txt2->text_right('U+????'); } $txt2->translate($gLLx+($sx*$x)+$wx,$gLLy+($sy*$y)-16); $txt2->text_right($plane->glyphByEnc($ci)); $txt2->translate($gLLx+($sx*$x)+$wx,$gLLy+($sy*$y)-21); $txt2->text_right(sprintf('wx=%i',$plane->wxByEnc($ci))); } # loop through columns (x) } # loop through rows (yp/y) print STDERR "\n"; } # loop through "sub" fonts (planes) # print out some text in this font on next page my $textL = $LoremIpsum; my $page = $pdf->page(); $page->mediabox(595,842); # A4 my $txt = $page->text(); $txt->transform(-translate => [50, 800]); $txt->fillcolor('black'); $txt->font($fnt, 18); $txt->lead(18*1.25); my $toprint; while ($textL ne '') { ($toprint, $textL) = $txt->_text_fill_line($textL, 500, 0); $txt->text($toprint); $txt->nl(); } $pdf->saveas("$0.$base.$ec.pdf"); $pdf->end(); } # loop through each encoding (ec) } # loop $i for each font name (fn) and metrics file (fnM) exit; # consider a -p flag like -m, but for glyph files # would look for glyph file, push path+token instead of just token sub usage { my $message = <<"END_OF_TEXT"; 021_psfonts [flags_1] glyph_file [flags_2] glyph_file [flags_2]... flags_1 -- -m metrics files paths : or ; separated directories specifying where to look for font metrics files. If relative paths (not starting with /), they are relative to the glyph file being processed if @ prepended, else (no @) relative to the current directory. The default metrics path string is just @./ (glyph file's directory). Entries will apply to following glyph file names until a new -m. The paths are not used if a -M or -N flag is given for a glyph file. -e encodings list latin1 latin2 etc. Default is latin1. Any name with characters other than A-Za-z0-9- is assumed to be a glyph file name or another flag (if starts with -). Entries will apply to the following glyph file names until a new -e. glyph_file -- a .pfa or .pfb extension T1 (PS) glyph file (with path) flags_2 -- -m metrics files paths as in flags_1, but replaces whatever existed before. Note that the new path list takes effect at the next glyph file, not the previously-given one. -e encodings list as in flags_1, but replaces whatever existed before. Note that the new encoding list takes effect at the next glyph file, not the previously-given one. -M metrics file path and name an absolute (starting with /) or relative (to the glyph file path if starts with @, otherwise relative to the current directory) path and name. This overrides the -m path list for this ONE glyph file. Normally, -M is needed only when the file name differs between the glyph and metrics files, which is unusual, or you don't want to list this path in -m, or perhaps you have only one glyph file to display and want to give the exact metrics file path and name. It must come IMMEDIATELY AFTER the glyph file it pertains to, and applies only to that one glyph file. -N there is no metrics file for the preceeding glyph file. It must come IMMEDIATELY AFTER the glyph file it pertains to, and applies only to that one glyph file. Going through glyph file names, the complete path, name, and extension must be given for each (no wildcards). The base name and extension are case-sensitive. If a -M or -N flag is not given, the program will search for the metrics file (.afm or .pfm) using each metrics file path entry appended to the glyph file path, the base name of the glyph file, and each extension .afm and .pfm are tried (in that order for a .pfa glyph file, and in the reverse order for a .pfb glyph file). Matching of extensions is case-insensitive, even on Linux systems (e.g., times-roman.AFM is considered a match for times-roman.pfb). END_OF_TEXT print $message; return; } # fill the glyph, metrics, and encoding arrays from the command line # input: @ARGV command line # output: 0 = OK, 1 = failed sub processCMD { my @args = @_; my ($token, @mpaths, $Mpath, $i, $j, $path, $basename, $extension); my $tokenNumber = 0; # at this point, $mpath is default path list and $ecflag is default encoding @mpaths = split /[:;]+/, $mpath; # each element should end in / or \ $ecflag =~ s/^\s+//; # clean off any leading or trailing whitespace $ecflag =~ s/\s+$//; @ecs = split /\s+/, $ecflag; while (@args) { # -m or -e at any time # -M or -N after a glyph file $token = shift @args; $tokenNumber++; # original token index for messages if (substr($token, 0, 2) eq '-m') { $token = substr($token, 2); # strip off -m if run-together if ($token eq '') { # -m flag was by itself. next token is actual path (must exist) if (!scalar @args) { print "missing metrics path after -m (arg $tokenNumber)\n"; return 1; } $mpath = shift @args; } else { # -m flag and path run together $mpath = stripQuotes($token); } # TBD validate mpath: valid structure, valid dirs # $mpath should not have any leading or trailing whitespace, may # have embedded whitespace (Windows) @mpaths = split /[:;]+/, $mpath; if (!@args) { print "no glyph files after -m path list! (arg $tokenNumber)\n"; return 1; } # each mpaths entry should end in a / for ($i=0; $i<scalar @mpaths; $i++) { if ($mpaths[$i] !~ m#[/\\]$#) { $mpaths[$i] .= '/'; } } next; # back to top of loop } # end -m processing if (substr($token, 0, 2) eq '-e') { $token = substr($token, 2); # strip off -e if run-together if ($token ne '') { # -e flag was run together with first (or only) encoding. # next token(s) are rest of list (optional) # NO ' or " around list @ecs = ($token); } else { @ecs = (); } # zero or more names of encodings while (@args) { $token = shift @args; $tokenNumber++; if ($token =~ m/^[a-z0-9-]+$/i && substr($token, 0, 1) ne '-') { # appears to be an encoding. add to list push @ecs, $token; } else { # does not appear to be an encoding. return to args unshift @args, $token; last; } } if (!scalar @ecs) { print "missing encodings list after -e (arg $tokenNumber)\n"; return 1; } if (!@args) { print "no glyph files after -e encodings list! (arg $tokenNumber)\n"; return 1; } next; # back to top of loop } # end -e processing # at this point, should be glyph file name (no flags) if (substr($token, 0, 1) eq '-') { print "unknown or unexpected flag '$token' (arg $tokenNumber)\n"; return 1; } # split into path, basename, extension ($path, $basename, $extension) = splitPath($token); if ($extension =~ m/^pf[ab]$/i) { # acceptable extension name } else { print "expected glyph file extension .pfa or .pfb not found in glyph file '$token' (arg $tokenNumber)\n"; return 1; } # TBD if -p used, prepend path to token if (!-r $token) { print "glyph file '$token' not found or not readable (arg $tokenNumber)\n"; return 1; } push @gns, $token; # look ahead one token for any -M or -N flag NEXT. process and set to '' if (scalar @args) { if (substr($args[0], 0, 2) eq '-N') { # no metrics file to be used push @mns, ''; shift @args; $tokenNumber++; next; } if (substr($args[0], 0, 2) eq '-M') { # explicit metrics file given # if path is relative, append to glyph file's path $token = shift @args; $tokenNumber++; if ($token eq '-M') { # next token is path itself if (!scalar @args) { print "-M flag missing file name following (arg $tokenNumber)\n"; return 1; } $Mpath = shift @args; $tokenNumber++; } else { # -M and file run together in one token $Mpath = stripQuotes(substr($token, 2)); } $Mpath = makeMPath($path, $basename, $extension, $Mpath); if ($Mpath ne '') { # OK path found push @mns, $Mpath; next; } else { print "metrics file extension '$Mpath' not .afm or .pfm, or\n"; print "metrics file not found or is not readable (arg $tokenNumber)\n"; return 1; } } # end of -M processing } # there were more tokens to look at (-N or -M) if (substr($token, 0, 1) eq '-') { print "unknown or unexpected flag '$token' (arg $tokenNumber)\n"; return 1; } # if fell through to here, assume it's a glyph file next up # so process the current glyph file (look for metrics file) # no -M or -N seen, so look for metrics file from -m path $j = 0; # nothing found so far for ($i=0; $i<scalar @mpaths; $i++) { $Mpath = $mpaths[$i]; $Mpath = makeMPath($path, $basename, $extension, $Mpath); if ($Mpath ne '') { # OK path found push @mns, $Mpath; $j = 1; # found a metrics file last; } } # if we fell through to here, couldn't find a metrics file if (!$j) { print "unable to find a metrics file for $path$basename.$extension\n"; return 1; } } # while loop through @args return 0; } # end of processCMD() # split up a path/filename into path, basename, extension sub splitPath { my $token = shift; my ($path, $basename, $extension); if ($token =~ m#^(.*)([/\\])([^/\\]+)$#) { $path = $1.$2; $basename = $3; } elsif ($token =~ m#^(.*)([/\\])$#) { $path = $1.$2; $basename = ''; } elsif ($token =~ m#^([/\\])([^/\\]+)$#) { $path = $1; $basename = $2; } else { $path = ''; $basename = $token; } if ($basename =~ m#^(.*)\.([^.]+)$#) { $basename = $1; $extension = $2; } else { $extension = ''; } return ($path, $basename, $extension); } # end of splitPath() # given glyph file path, basename, extension; and proposed metrics file path # (optionally with a .afm or .pfm file), create the full path of the metrics # file. test if file exists and is readable, and if not, return ''. if a # proposed metrics file path includes the metrics file name, just use that. # a path prefixed with @ is relative to the glyph file path (appended to it). sub makeMPath { my ($gPath, $gName, $gExt, $mPath) = @_; my ($i, $j, $isRelative, $dh, $entry); # $mPath might be empty, or dir with or without trailing / or \ # $mPath might start with a @ -- if relative path, is relative to gPath # $mPath might be a full path+name+afm/pfm my ($p, $b, $e) = splitPath($mPath); if ($b ne '' && $e ne '') { if ($e !~ m/^[ap]fm$/i) { return ''; } # full path and file. see if exists. return either way if (-r $mPath) { # metrics file is readable. return it return $mPath; } else { # metrics file not found or is unreadable. return empty # if starts with @ (shouldn't), it will die here return ''; } } # try each gPath + gName + afm or pfm # .pfa tries .afm before .pfm, .pfb tries .pfm before .afm my @extList = qw/ .afm .pfm /; if ($gExt =~ m#^pfb$#i) { @extList = qw/ .pfm .afm /; # or reverse @extList } $isRelative = 0; if (substr($mPath, 0, 1) eq '@') { $isRelative = 1; # is relative to glyph file path, not current dir $mPath = substr($mPath, 1); # strip @ } if ($mPath !~ m#^[/\\]# && $isRelative) { # relative path: prepend glyph file's path $mPath = $gPath . $mPath; # gPath SHOULD end with / or \ } # make sure mPath ends with / or \ if ($mPath !~ m#[/\\]$#) { $mPath .= '/'; } if (!opendir $dh, $mPath) { print "can't open -m directory $mPath: $!\n"; return 1; } while ($entry = readdir $dh) { if ($entry eq '.' || $entry eq '..') { next; } if (-d $mPath.$entry) { next; } # have an $entry that might be gName.ext for ($j=0; $j<scalar @extList; $j++) { if ($entry =~ m#^$gName$extList[$j]$#i) { # we have a match! remember it if it's readable if (-r $mPath.$entry) { closedir $dh; return $mPath.$entry; } } } } closedir $dh; # got to end of entries without success. indicate failure return ''; } # end of makeMPath() # strip off any ' or " surrounding a string # I'm not sure you'll see something like -M"\Program Files\..." as # "\Program Files\...", \Program Files\..., or just \Program. sub stripQuotes { my $string = shift; if (length($string) < 3) { return $string; } if (substr($string, 0, 1) eq "'" && substr($string, -1, 1) eq "'") { $string = substr($string, 1, length($string) - 2); } elsif (substr($string, 0, 1) eq '"' && substr($string, -1, 1) eq '"') { $string = substr($string, 1, length($string) - 2); } return $string; } # end of stripQuotes() __END__