#!/usr/bin/perl -w # Generate a short man page from --help and --version output. # Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2009, # 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 Free Software Foundation, Inc. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, see . # Written by Brendan O'Dea # Available from ftp://ftp.gnu.org/gnu/help2man/ use 5.008; use strict; use Getopt::Long; use Text::ParseWords qw(shellwords); use Text::Tabs qw(expand); use POSIX qw(strftime setlocale LC_ALL); use Locale::gettext qw(gettext); use Encode qw(decode encode); use I18N::Langinfo qw(langinfo CODESET); my $this_program = 'help2man'; my $this_version = '1.47.6'; my $encoding; { my $gettext = Locale::gettext->domain($this_program); sub _ { $gettext->get($_[0]) } my ($user_locale) = grep defined && length, (map $ENV{$_}, qw(LANGUAGE LC_ALL LC_MESSAGES LANG)), 'C'; my $user_encoding = langinfo CODESET; # Set localisation of date and executable's output. sub configure_locale { delete @ENV{qw(LANGUAGE LC_MESSAGES LANG)}; setlocale LC_ALL, $ENV{LC_ALL} = shift || 'C'; $encoding = langinfo CODESET; } sub dec { $encoding ? decode $encoding, $_[0] : $_[0] } sub enc { $encoding ? encode $encoding, $_[0] : $_[0] } sub enc_user { encode $user_encoding, $_[0] } sub kark # die with message formatted in the invoking user's locale { setlocale LC_ALL, $user_locale; my $fmt = $gettext->get(shift); my $errmsg = enc_user sprintf $fmt, @_; die $errmsg, "\n"; } } sub N_ { $_[0] } sub program_basename; sub get_option_value; sub convert_option; sub fix_italic_spacing; my $version_info = enc_user sprintf _(<<'EOT'), $this_program, $this_version; GNU %s %s Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. Written by Brendan O'Dea EOT my $help_info = enc_user sprintf _(<<'EOT'), $this_program, $this_program; `%s' generates a man page out of `--help' and `--version' output. Usage: %s [OPTION]... EXECUTABLE -n, --name=STRING description for the NAME paragraph -s, --section=SECTION section number for manual page (1, 6, 8) -m, --manual=TEXT name of manual (User Commands, ...) -S, --source=TEXT source of program (FSF, Debian, ...) -L, --locale=STRING select locale (default "C") -i, --include=FILE include material from `FILE' -I, --opt-include=FILE include material from `FILE' if it exists -o, --output=FILE send output to `FILE' -p, --info-page=TEXT name of Texinfo manual -N, --no-info suppress pointer to Texinfo manual -l, --libtool exclude the `lt-' from the program name --help print this help, then exit --version print version number, then exit EXECUTABLE should accept `--help' and `--version' options and produce output on stdout although alternatives may be specified using: -h, --help-option=STRING help option string -v, --version-option=STRING version option string --version-string=STRING version string --no-discard-stderr include stderr when parsing option output Report bugs to . EOT my $section = 1; my $manual = ''; my $source = ''; my $help_option = '--help'; my $version_option = '--version'; my $discard_stderr = 1; my ($opt_name, @opt_include, $opt_output, $opt_info, $opt_no_info, $opt_libtool, $version_text); my %opt_def = ( 'n|name=s' => \$opt_name, 's|section=s' => \$section, 'm|manual=s' => \$manual, 'S|source=s' => \$source, 'L|locale=s' => sub { configure_locale pop }, 'i|include=s' => sub { push @opt_include, [ pop, 1 ] }, 'I|opt-include=s' => sub { push @opt_include, [ pop, 0 ] }, 'o|output=s' => \$opt_output, 'p|info-page=s' => \$opt_info, 'N|no-info' => \$opt_no_info, 'l|libtool' => \$opt_libtool, 'help' => sub { print $help_info; exit }, 'version' => sub { print $version_info; exit }, 'h|help-option=s' => \$help_option, 'v|version-option=s' => \$version_option, 'version-string=s' => \$version_text, 'discard-stderr!' => \$discard_stderr, ); # Parse options. Getopt::Long::config('bundling'); die $help_info unless GetOptions %opt_def and @ARGV == 1; configure_locale unless $encoding; my %include = (); my %replace = (); my %append = (); my %append_match = (); my @sections = (); # retain order of include file or in-line *section*s # Process include file (if given). Format is: # # Optional initial text, ignored. May include lines starting with `-' # which are processed as options. # # [section] # Verbatim text to be included in the named section. By default at # the start, but in the case of `name' and `synopsis' the content # will replace the autogenerated contents. # # [section] # Verbatim text to be appended to the end of the named section. # # /pattern/ # Verbatim text for inclusion below a paragraph matching `pattern'. # while (@opt_include) { my ($inc, $required) = @{shift @opt_include}; next unless -f $inc or $required; kark N_("%s: can't open `%s' (%s)"), $this_program, $inc, $! unless open INC, $inc; my $key; my $hash; while () { # Convert input to internal Perl format, so that multibyte # sequences are treated as single characters. $_ = dec $_; # [section] if (/^\[([^]]+)\]\s*$/) { $key = uc $1; $key =~ s/^\s+//; $key =~ s/\s+$//; $hash = \%include; # Handle explicit [section] if ($key =~ s/^([<>=])\s*//) { if ($1 eq '>') { $hash = \%append; } elsif ($1 eq '=') { $hash = \%replace; } } # NAME/SYNOPSIS replace by default elsif ($key eq _('NAME') or $key eq _('SYNOPSIS')) { $hash = \%replace; } else { $hash = \%include; } push @sections, $key; next; } # /pattern/ if (m!^/(.*)/([ims]*)\s*$!) { my $pat = $2 ? "(?$2)$1" : $1; # Check pattern. eval { $key = qr($pat) }; if ($@) { $@ =~ s/ at .*? line \d.*//; die "$inc:$.:$@"; } $hash = \%append_match; next; } # Check for options before the first section--anything else is # silently ignored, allowing the first for comments and # revision info. unless ($key) { # handle options if (/^-/) { local @ARGV = shellwords $_; GetOptions %opt_def; } next; } $hash->{$key} .= $_; } close INC; kark N_("%s: no valid information found in `%s'"), $this_program, $inc unless $key; } # Compress trailing blank lines. for my $hash (\(%include, %replace, %append, %append_match)) { for (keys %$hash) { $hash->{$_} =~ s/\n+$/\n/ } } # Grab help and version info from executable. my $help_text = get_option_value $ARGV[0], $help_option; $version_text ||= get_option_value $ARGV[0], $version_option; # By default the generated manual pages will include the current date. This may # however be overriden by setting the environment variable $SOURCE_DATE_EPOCH to # an integer value of the seconds since the UNIX epoch. This is primarily # intended to support reproducible builds (wiki.debian.org/ReproducibleBuilds) # and will additionally ensure that the output date string is UTC. my $epoch_secs = time; if (exists $ENV{SOURCE_DATE_EPOCH} and $ENV{SOURCE_DATE_EPOCH} =~ /^(\d+)$/) { $epoch_secs = $1; $ENV{TZ} = 'UTC0'; } # Translators: the following message is a strftime(3) format string, which in # the English version expands to the month as a word and the full year. It # is used on the footer of the generated manual pages. If in doubt, you may # just use %x as the value (which should be the full locale-specific date). my $date = enc strftime _("%B %Y"), localtime $epoch_secs; my $program = program_basename $ARGV[0]; my $package = $program; my $version; if ($opt_output) { unlink $opt_output or kark N_("%s: can't unlink %s (%s)"), $this_program, $opt_output, $! if -e $opt_output; open STDOUT, ">$opt_output" or kark N_("%s: can't create %s (%s)"), $this_program, $opt_output, $!; } # The first line of the --version information is assumed to be in one # of the following formats: # # # # {GNU,Free} # ({GNU,Free} ) # - {GNU,Free} # # and separated from any copyright/author details by a blank line. ($_, $version_text) = ((split /\n+/, $version_text, 2), ''); if (/^(\S+) +\(((?:GNU|Free) +[^)]+)\) +(.*)/ or /^(\S+) +- *((?:GNU|Free) +\S+) +(.*)/) { $program = program_basename $1; $package = $2; $version = $3; } elsif (/^((?:GNU|Free) +)?(\S+) +(.*)/) { $program = program_basename $2; $package = $1 ? "$1$program" : $program; $version = $3; } else { $version = $_; } # No info for `info' itself. $opt_no_info = 1 if $program eq 'info'; if ($opt_name) { # --name overrides --include contents. $replace{_('NAME')} = "$program \\- $opt_name\n"; } # Translators: "NAME", "SYNOPSIS" and other one or two word strings in all # upper case are manual page section headings. The man(1) manual page in your # language, if available should provide the conventional translations. for ($replace{_('NAME')} || ($include{_('NAME')} ||= '')) { if ($_) # Use first name given as $program { $program = $1 if /^([^\s,]+)(?:,?\s*[^\s,\\-]+)*\s+\\?-/; } else # Set a default (useless) NAME paragraph. { $_ = sprintf _("%s \\- manual page for %s %s") . "\n", $program, $program, $version; } } # Man pages traditionally have the page title in caps. my $PROGRAM = uc $program; # Set default page head/footers $source ||= "$package $version"; unless ($manual) { for ($section) { if (/^(1[Mm]|8)/) { $manual = enc _('System Administration Utilities') } elsif (/^6/) { $manual = enc _('Games') } else { $manual = enc _('User Commands') } } } # Extract usage clause(s) [if any] for SYNOPSIS. # Translators: "Usage" and "or" here are patterns (regular expressions) which # are used to match the usage synopsis in program output. An example from cp # (GNU coreutils) which contains both strings: # Usage: cp [OPTION]... [-T] SOURCE DEST # or: cp [OPTION]... SOURCE... DIRECTORY # or: cp [OPTION]... -t DIRECTORY SOURCE... my $PAT_USAGE = _('Usage'); my $PAT_USAGE_CONT = _('or'); if ($help_text =~ s/^($PAT_USAGE):( +(\S+))(.*)((?:\n(?: {6}\1| *($PAT_USAGE_CONT): +\S).*)*)//om) { my @syn = $3 . $4; if ($_ = $5) { s/^\n//; for (split /\n/) { s/^ *(($PAT_USAGE_CONT): +)?//o; push @syn, $_ } } my $synopsis = ''; for (@syn) { $synopsis .= ".br\n" if $synopsis; s!^\S*/!!; s/^lt-// if $opt_libtool; s/^(\S+) *//; $synopsis .= ".B $1\n"; s/\s+$//; s/(([][]|\.\.+)+)/\\fR$1\\fI/g; s/^/\\fI/ unless s/^\\fR//; $_ .= '\fR'; s/(\\fI)( *)/$2$1/g; s/\\fI\\fR//g; s/^\\fR//; s/\\fI$//; s/^\./\\&./; $_ = fix_italic_spacing $_; $synopsis .= "$_\n"; } $include{_('SYNOPSIS')} .= $synopsis; } # Process text, initial section is DESCRIPTION. my $sect = _('DESCRIPTION'); $_ = "$help_text\n\n$version_text"; # Normalise paragraph breaks. s/^\n+//; s/\n*$/\n/; s/\n\n+/\n\n/g; # Join hyphenated lines. s/([A-Za-z])-\n *([A-Za-z])/$1$2/g; # Temporarily exchange leading dots, apostrophes and backslashes for # tokens. s/^\./\x80/mg; s/^'/\x81/mg; s/\\/\x82/g; # Translators: patterns are used to match common program output. In the source # these strings are all of the form of "my $PAT_something = _('...');" and are # regular expressions. If there is more than one commonly used string, you # may separate alternatives with "|". Spaces in these expressions are written # as " +" to indicate that more than one space may be matched. The string # "(?:[\\w-]+ +)?" in the bug reporting pattern is used to indicate an # optional word, so that either "Report bugs" or "Report _program_ bugs" will # be matched. my $PAT_BUGS = _('Report +(?:[\w-]+ +)?bugs|Email +bug +reports +to'); my $PAT_AUTHOR = _('Written +by'); my $PAT_OPTIONS = _('Options'); my $PAT_ENVIRONMENT = _('Environment'); my $PAT_FILES = _('Files'); my $PAT_EXAMPLES = _('Examples'); my $PAT_FREE_SOFTWARE = _('This +is +free +software'); # Start a new paragraph (if required) for these. s/([^\n])\n($PAT_BUGS|$PAT_AUTHOR) /$1\n\n$2 /og; # Convert iso-8859-1 copyright symbol or (c) to nroff # character. s/^Copyright +(?:\xa9|\([Cc]\))/Copyright \\(co/mg; while (length) { # Convert some standard paragraph names. if (s/^($PAT_OPTIONS): *\n+//o) { $sect = _('OPTIONS'); next; } if (s/^($PAT_ENVIRONMENT): *\n+//o) { $sect = _('ENVIRONMENT'); next; } if (s/^($PAT_FILES): *\n+//o) { $sect = _('FILES'); next; } elsif (s/^($PAT_EXAMPLES): *\n+//o) { $sect = _('EXAMPLES'); next; } # Custom section indicated by a line containing "*Section Name*". if (s/^\*(\w(.*\w)?)\* *\n+//) { $sect = uc $1; $sect =~ tr/*/ /; # also accept *Section*Name* push @sections, $sect; next; } # Copyright section. if (/^Copyright /) { $sect = _('COPYRIGHT'); } # Bug reporting section. elsif (/^($PAT_BUGS) /o) { $sect = _('REPORTING BUGS'); } # Author section. elsif (/^($PAT_AUTHOR)/o) { $sect = _('AUTHOR'); } # Examples, indicated by an indented leading $, % or > are # rendered in a constant width font. if (/^( +)([\$\%>] )\S/) { my $indent = $1; my $prefix = $2; my $break = '.IP'; while (s/^$indent\Q$prefix\E(\S.*)\n*//) { $include{$sect} .= "$break\n\\f(CW$prefix$1\\fR\n"; $break = '.br'; } next; } my $matched = ''; # Sub-sections have a trailing colon and the second line indented. if (s/^(\S.*:) *\n / /) { $matched .= $& if %append_match; $include{$sect} .= qq(.SS "$1"\n); } my $indent = 0; my $content = ''; # Option with description. if (s/^( {1,10}([+-]\S.*?))(?:( +(?!-))|\n( {20,}))(\S.*)\n//) { $matched .= $& if %append_match; $indent = length ($4 || "$1$3"); $content = ".TP\n\x84$2\n\x84$5\n"; unless ($4) { # Indent may be different on second line. $indent = length $& if /^ {20,}/; } } # Option without description. elsif (s/^ {1,10}([+-]\S.*)\n//) { $matched .= $& if %append_match; $content = ".HP\n\x84$1\n"; $indent = 80; # not continued } # Indented paragraph with tag. elsif (s/^( +(\S.*?))(?:( +)|\n( {20,}))(\S.*)\n//) { $matched .= $& if %append_match; $indent = length ($4 || "$1$3"); $content = ".TP\n\x84$2\n\x84$5\n"; } # Indented paragraph. elsif (s/^( +)(\S.*)\n//) { $matched .= $& if %append_match; $indent = length $1; $content = ".IP\n\x84$2\n"; } # Left justified paragraph. else { s/(.*)\n//; $matched .= $& if %append_match; $content = ".PP\n" if $include{$sect}; $content .= "$1\n"; } # Append continuations. while ($indent ? s/^ {$indent}(\S.*)\n// : s/^(\S.*)\n//) { $matched .= $& if %append_match; $content .= "\x84$1\n"; } # Move to next paragraph. s/^\n+//; for ($content) { # Leading dot and apostrophe protection. s/\x84\./\x80/g; s/\x84'/\x81/g; s/\x84//g; # Examples should be verbatim. unless ($sect eq _('EXAMPLES')) { # Convert options. s/(^|[ (])(-[][\w=-]+)/$1 . convert_option $2/mge; # Italicise filenames: /a/b, $VAR/c/d, ~/e/f s! (^|[ (]) # space/punctuation before ( (?:\$\w+|~)? # leading variable, or tilde (?:/\w(?:[\w.-]*\w)?)+ # path components ) ($|[ ,;.)]) # space/punctuation after !$1\\fI$2\\fP$3!xmg; $_ = fix_italic_spacing $_; } # Escape remaining hyphens. s/-/\x83/g; if ($sect eq _('COPYRIGHT')) { # Insert line breaks before additional copyright messages # and the disclaimer. s/\n(Copyright |$PAT_FREE_SOFTWARE)/\n.br\n$1/og; } elsif ($sect eq _('REPORTING BUGS')) { # Handle multi-line bug reporting sections of the form: # # Report bugs to # GNU home page: # ... s/\n([[:upper:]])/\n.br\n$1/g; } } # Check if matched paragraph contains /pat/. if (%append_match) { for my $pat (keys %append_match) { if ($matched =~ $pat) { $content .= ".PP\n" unless $append_match{$pat} =~ /^\./; $content .= $append_match{$pat}; } } } $include{$sect} .= $content; } # Refer to the real documentation. unless ($opt_no_info) { my $info_page = $opt_info || $program; $sect = _('SEE ALSO'); $include{$sect} .= ".PP\n" if $include{$sect}; $include{$sect} .= sprintf _(<<'EOT'), $program, $program, $info_page; The full documentation for .B %s is maintained as a Texinfo manual. If the .B info and .B %s programs are properly installed at your site, the command .IP .B info %s .PP should give you access to the complete manual. EOT } # Append additional text. while (my ($sect, $text) = each %append) { $include{$sect} .= $append{$sect}; } # Replace sections. while (my ($sect, $text) = each %replace) { $include{$sect} = $replace{$sect}; } # Output header. print < 1 } @pre, @post; # Output content. my %done; for my $sect (@pre, (grep !$filter{$_}, @sections), @post) { next if $done{$sect}++; # ignore duplicates next unless $include{$sect}; if ($include{$sect}) { my $quote = $sect =~ /\W/ ? '"' : ''; print enc ".SH $quote$sect$quote\n"; for ($include{$sect}) { # Replace leading dot, apostrophe, backslash and hyphen # tokens. s/\x80/\\&./g; s/\x81/\\&'/g; s/\x82/\\e/g; s/\x83/\\-/g; # Convert some latin1 chars to troff equivalents s/\xa0/\\ /g; # non-breaking space print enc $_; } } } close STDOUT or kark N_("%s: error writing to %s (%s)"), $this_program, $opt_output || 'stdout', $!; exit; # Get program basename, and strip libtool "lt-" prefix if required. sub program_basename { local $_ = shift; s!.*/!!; s/^lt-// if $opt_libtool; $_; } # Call program with given option and return results. sub get_option_value { my ($prog, $opt) = @_; my $stderr = $discard_stderr ? '/dev/null' : '&1'; my $value = join '', map { s/ +$//; expand $_ } map { dec $_ } `$prog $opt 2>$stderr`; unless ($value) { my $err = N_("%s: can't get `%s' info from %s%s"); my $extra = $discard_stderr ? "\n" . N_("Try `--no-discard-stderr' if option outputs to stderr") : ''; kark $err, $this_program, $opt, $prog, $extra; } $value; } # Convert option dashes to \- to stop nroff from hyphenating 'em, and # embolden. Option arguments get italicised. sub convert_option { local $_ = '\fB' . shift; s/-/\x83/g; unless (s/\[=(.*)\]$/\\fR[=\\fI$1\\fR]/) { s/=(.)/\\fR=\\fI$1/; s/ (.)/ \\fI$1/; $_ .= '\fR'; } $_; } # Insert spacing escape characters \, and \/ before and after italic text. See # http://www.gnu.org/software/groff/manual/html_node/Ligatures-and-Kerning.html sub fix_italic_spacing { local $_ = shift; s!\\fI(.*?)\\f([BRP])!\\fI\\,$1\\/\\f$2!g; return $_; }