• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Get the contents of an URL.
2    Copyright (C) 2001-2003, 2005-2010, 2012, 2017-2020 Free Software
3    Foundation, Inc.
4    Written by Bruno Haible <haible@clisp.cons.org>, 2001.
5 
6    This program is free software: you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
18 
19 
20 #ifdef HAVE_CONFIG_H
21 # include "config.h"
22 #endif
23 
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <getopt.h>
27 #include <stdbool.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <locale.h>
31 #include <unistd.h>
32 
33 #include "noreturn.h"
34 #include "closeout.h"
35 #include "error.h"
36 #include "error-progname.h"
37 #include "progname.h"
38 #include "relocatable.h"
39 #include "basename-lgpl.h"
40 #include "full-write.h"
41 #include "execute.h"
42 #include "javaexec.h"
43 #include "binary-io.h"
44 #include "propername.h"
45 #include "gettext.h"
46 
47 #define _(str) gettext (str)
48 
49 #ifndef STDOUT_FILENO
50 # define STDOUT_FILENO 1
51 #endif
52 
53 
54 /* Only high-level toolkits, written in languages with exception handling,
55    have an URL datatype and operations to fetch an URL's contents.  Such
56    toolkits are Java (class java.net.URL), Qt (classes QUrl and QUrlOperator).
57    We use the Java toolkit.
58    Note that this program doesn't handle redirection pages; programs which
59    wish to process HTML redirection tags need to include a HTML parser,
60    and only full-fledged browsers like w3m, lynx, links have have both
61    an URL fetcher (which covers at least the protocols "http", "ftp", "file")
62    and a HTML parser.  [Well, this is not true: libxml2 and Java (see
63    <http://java.sun.com/products/jfc/tsc/articles/bookmarks/>) also contain
64    HTML parsers.]  */
65 
66 
67 /* Whether to output something on standard error.
68    This is true by default, because the user should know why we are trying to
69    establish an internet connection.  Also, users get confused if a program
70    produces no output for more than 10 seconds for no apparent reason.  */
71 static bool verbose = true;
72 
73 /* Long options.  */
74 static const struct option long_options[] =
75 {
76   { "help", no_argument, NULL, 'h' },
77   { "quiet", no_argument, NULL, 'q' },
78   { "silent", no_argument, NULL, 'q' },
79   { "version", no_argument, NULL, 'V' },
80   { NULL, 0, NULL, 0 }
81 };
82 
83 
84 /* Forward declaration of local functions.  */
85 _GL_NORETURN_FUNC static void usage (int status);
86 static void fetch (const char *url, const char *file);
87 
88 
89 int
main(int argc,char * argv[])90 main (int argc, char *argv[])
91 {
92   int optchar;
93   bool do_help;
94   bool do_version;
95 
96   /* Set program name for messages.  */
97   set_program_name (argv[0]);
98   error_print_progname = maybe_print_progname;
99 
100   /* Set locale via LC_ALL.  */
101   setlocale (LC_ALL, "");
102 
103   /* Set the text message domain.  */
104   bindtextdomain (PACKAGE, relocate (LOCALEDIR));
105   textdomain (PACKAGE);
106 
107   /* Ensure that write errors on stdout are detected.  */
108   atexit (close_stdout);
109 
110   /* Set default values for variables.  */
111   do_help = false;
112   do_version = false;
113 
114   /* Parse command line options.  */
115   while ((optchar = getopt_long (argc, argv, "hqV", long_options, NULL)) != EOF)
116     switch (optchar)
117     {
118     case '\0':          /* Long option.  */
119       break;
120     case 'h':           /* --help */
121       do_help = true;
122       break;
123     case 'q':           /* --quiet / --silent */
124       verbose = false;
125       break;
126     case 'V':           /* --version */
127       do_version = true;
128       break;
129     default:
130       usage (EXIT_FAILURE);
131       /* NOTREACHED */
132     }
133 
134   /* Version information requested.  */
135   if (do_version)
136     {
137       printf ("%s (GNU %s) %s\n", last_component (program_name),
138               PACKAGE, VERSION);
139       /* xgettext: no-wrap */
140       printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
141 License GPLv3+: GNU GPL version 3 or later <%s>\n\
142 This is free software: you are free to change and redistribute it.\n\
143 There is NO WARRANTY, to the extent permitted by law.\n\
144 "),
145               "2001-2020", "https://gnu.org/licenses/gpl.html");
146       printf (_("Written by %s.\n"), proper_name ("Bruno Haible"));
147       exit (EXIT_SUCCESS);
148     }
149 
150   /* Help is requested.  */
151   if (do_help)
152     usage (EXIT_SUCCESS);
153 
154   /* Test argument count.  */
155   if (optind + 2 != argc)
156     error (EXIT_FAILURE, 0, _("expected two arguments"));
157 
158   /* Fetch the contents.  */
159   fetch (argv[optind], argv[optind + 1]);
160 
161   exit (EXIT_SUCCESS);
162 }
163 
164 /* Display usage information and exit.  */
165 static void
usage(int status)166 usage (int status)
167 {
168   if (status != EXIT_SUCCESS)
169     fprintf (stderr, _("Try '%s --help' for more information.\n"),
170              program_name);
171   else
172     {
173       printf (_("\
174 Usage: %s [OPTION] URL FILE\n\
175 "), program_name);
176       printf ("\n");
177       /* xgettext: no-wrap */
178       printf (_("\
179 Fetches and outputs the contents of an URL.  If the URL cannot be accessed,\n\
180 the locally accessible FILE is used instead.\n\
181 "));
182       printf ("\n");
183       printf (_("\
184 Informative output:\n"));
185       printf (_("\
186   -h, --help                  display this help and exit\n"));
187       printf (_("\
188   -V, --version               output version information and exit\n"));
189       printf (_("\
190   -q, --quiet, --silent       suppress progress indicators\n"));
191       printf ("\n");
192       /* TRANSLATORS: The first placeholder is the web address of the Savannah
193          project of this package.  The second placeholder is the bug-reporting
194          email address for this package.  Please add _another line_ saying
195          "Report translation bugs to <...>\n" with the address for translation
196          bugs (typically your translation team's web or email address).  */
197       printf(_("\
198 Report bugs in the bug tracker at <%s>\n\
199 or by email to <%s>.\n"),
200              "https://savannah.gnu.org/projects/gettext",
201              "bug-gettext@gnu.org");
202     }
203 
204   exit (status);
205 }
206 
207 /* Copy a file's contents to stdout.  */
208 static void
cat_file(const char * src_filename)209 cat_file (const char *src_filename)
210 {
211   int src_fd;
212   char buf[4096];
213   const int buf_size = sizeof (buf);
214 
215   src_fd = open (src_filename, O_RDONLY | O_BINARY);
216   if (src_fd < 0)
217     error (EXIT_FAILURE, errno, _("error while opening \"%s\" for reading"),
218            src_filename);
219 
220   for (;;)
221     {
222       ssize_t n_read = read (src_fd, buf, buf_size);
223       if (n_read < 0)
224         {
225 #ifdef EINTR
226           if (errno == EINTR)
227             continue;
228 #endif
229           error (EXIT_FAILURE, errno, _("error reading \"%s\""), src_filename);
230         }
231       if (n_read == 0)
232         break;
233 
234       if (full_write (STDOUT_FILENO, buf, n_read) < n_read)
235         error (EXIT_FAILURE, errno, _("error writing stdout"));
236     }
237 
238   if (close (src_fd) < 0)
239     error (EXIT_FAILURE, errno, _("error after reading \"%s\""), src_filename);
240 }
241 
242 #if USEJAVA
243 
244 /* Exit code of the Java program.  */
245 static int java_exitcode;
246 
247 static bool
execute_it(const char * progname,const char * prog_path,char ** prog_argv,void * private_data)248 execute_it (const char *progname,
249             const char *prog_path, char **prog_argv,
250             void *private_data)
251 {
252   (void) private_data;
253 
254   java_exitcode =
255     execute (progname, prog_path, prog_argv, true, true, false, false, true,
256              false, NULL);
257   /* Exit code 0 means success, 2 means timed out.  */
258   return !(java_exitcode == 0 || java_exitcode == 2);
259 }
260 
261 #endif
262 
263 /* Fetch the URL.  Upon error, use the FILE as fallback.  */
264 static void
fetch(const char * url,const char * file)265 fetch (const char *url, const char *file)
266 {
267   if (verbose)
268     {
269       fprintf (stderr, _("Retrieving %s..."), url);
270       fflush (stderr);
271     }
272 
273 #if USEJAVA
274   /* First try: using Java.  */
275   {
276     const char *class_name = "gnu.gettext.GetURL";
277     const char *gettextjar;
278     const char *args[2];
279 
280     /* Make it possible to override the gettext.jar location.  This is
281        necessary for running the testsuite before "make install".  */
282     gettextjar = getenv ("GETTEXTJAR");
283     if (gettextjar == NULL || gettextjar[0] == '\0')
284       gettextjar = relocate (GETTEXTJAR);
285 
286     /* Prepare arguments.  */
287     args[0] = url;
288     args[1] = NULL;
289 
290     /* Fetch the URL's contents.  */
291     java_exitcode = 127;
292     if (!execute_java_class (class_name, &gettextjar, 1, true, NULL,
293                              args,
294                              false, true,
295                              execute_it, NULL))
296       {
297         if (verbose)
298           {
299             if (java_exitcode == 0)
300               fprintf (stderr, _(" done.\n"));
301             else if (java_exitcode == 2)
302               fprintf (stderr, _(" timed out.\n"));
303           }
304         return;
305       }
306   }
307 #endif
308 
309   /* Second try: using "wget -q -O - -T 30 url".  */
310   {
311     static bool wget_tested;
312     static bool wget_present;
313 
314     if (!wget_tested)
315       {
316         /* Test for presence of wget: "wget --version > /dev/null"  */
317         char *argv[3];
318         int exitstatus;
319 
320         argv[0] = "wget";
321         argv[1] = "--version";
322         argv[2] = NULL;
323         exitstatus = execute ("wget", "wget", argv, false, false, true, true,
324                               true, false, NULL);
325         wget_present = (exitstatus == 0);
326         wget_tested = true;
327       }
328 
329     if (wget_present)
330       {
331         char *argv[10];
332         int exitstatus;
333 
334         argv[0] = "wget";
335         argv[1] = "--quiet";
336         argv[2] = "--output-document"; argv[3] = "-";
337         argv[4] = "--timeout"; argv[5] = "30";
338         argv[6] = "--user-agent"; argv[7] = "urlget";
339         argv[8] = (char *) url;
340         argv[9] = NULL;
341         exitstatus = execute ("wget", "wget", argv, true, false, false, false,
342                               true, false, NULL);
343         if (exitstatus != 127)
344           {
345             if (exitstatus != 0)
346               goto failed;
347             if (verbose)
348               fprintf (stderr, _(" done.\n"));
349             return;
350           }
351       }
352   }
353 
354   /* Third try: using "lynx -source url".  */
355   {
356     static bool lynx_tested;
357     static bool lynx_present;
358 
359     if (!lynx_tested)
360       {
361         /* Test for presence of lynx: "lynx --version > /dev/null"  */
362         char *argv[3];
363         int exitstatus;
364 
365         argv[0] = "lynx";
366         argv[1] = "--version";
367         argv[2] = NULL;
368         exitstatus = execute ("lynx", "lynx", argv, false, false, true, true,
369                               true, false, NULL);
370         lynx_present = (exitstatus == 0);
371         lynx_tested = true;
372       }
373 
374     if (lynx_present)
375       {
376         char *argv[5];
377         int exitstatus;
378 
379         argv[0] = "lynx";
380         argv[1] = "-useragent=urlget";
381         argv[2] = "-source";
382         argv[3] = (char *) url;
383         argv[4] = NULL;
384         exitstatus = execute ("lynx", "lynx", argv, true, false, false, false,
385                               true, false, NULL);
386         if (exitstatus != 127)
387           {
388             if (exitstatus != 0)
389               goto failed;
390             if (verbose)
391               fprintf (stderr, _(" done.\n"));
392             return;
393           }
394       }
395   }
396 
397   /* Fourth try: using "curl --silent url".  */
398   {
399     static bool curl_tested;
400     static bool curl_present;
401 
402     if (!curl_tested)
403       {
404         /* Test for presence of curl: "curl --version > /dev/null"  */
405         char *argv[3];
406         int exitstatus;
407 
408         argv[0] = "curl";
409         argv[1] = "--version";
410         argv[2] = NULL;
411         exitstatus = execute ("curl", "curl", argv, false, false, true, true,
412                               true, false, NULL);
413         curl_present = (exitstatus == 0 || exitstatus == 2);
414         curl_tested = true;
415       }
416 
417     if (curl_present)
418       {
419         char *argv[6];
420         int exitstatus;
421 
422         argv[0] = "curl";
423         argv[1] = "--silent";
424         argv[2] = "--user-agent"; argv[3] = "urlget";
425         argv[4] = (char *) url;
426         argv[5] = NULL;
427         exitstatus = execute ("curl", "curl", argv, true, false, false, false,
428                               true, false, NULL);
429         if (exitstatus != 127)
430           {
431             if (exitstatus != 0)
432               goto failed;
433             if (verbose)
434               fprintf (stderr, _(" done.\n"));
435             return;
436           }
437       }
438   }
439 
440  failed:
441   if (verbose)
442     fprintf (stderr, _(" failed.\n"));
443   /* Use the file as fallback.  */
444   cat_file (file);
445 }
446