1 /* Get the contents of an URL.
2 Copyright (C) 2001-2003, 2005-2010, 2012, 2017-2020 Free Software
3 Foundation, Inc.
4 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
5
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18
19
20 #ifdef HAVE_CONFIG_H
21 # include "config.h"
22 #endif
23
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <getopt.h>
27 #include <stdbool.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <locale.h>
31 #include <unistd.h>
32
33 #include "noreturn.h"
34 #include "closeout.h"
35 #include "error.h"
36 #include "error-progname.h"
37 #include "progname.h"
38 #include "relocatable.h"
39 #include "basename-lgpl.h"
40 #include "full-write.h"
41 #include "execute.h"
42 #include "javaexec.h"
43 #include "binary-io.h"
44 #include "propername.h"
45 #include "gettext.h"
46
47 #define _(str) gettext (str)
48
49 #ifndef STDOUT_FILENO
50 # define STDOUT_FILENO 1
51 #endif
52
53
54 /* Only high-level toolkits, written in languages with exception handling,
55 have an URL datatype and operations to fetch an URL's contents. Such
56 toolkits are Java (class java.net.URL), Qt (classes QUrl and QUrlOperator).
57 We use the Java toolkit.
58 Note that this program doesn't handle redirection pages; programs which
59 wish to process HTML redirection tags need to include a HTML parser,
60 and only full-fledged browsers like w3m, lynx, links have have both
61 an URL fetcher (which covers at least the protocols "http", "ftp", "file")
62 and a HTML parser. [Well, this is not true: libxml2 and Java (see
63 <http://java.sun.com/products/jfc/tsc/articles/bookmarks/>) also contain
64 HTML parsers.] */
65
66
67 /* Whether to output something on standard error.
68 This is true by default, because the user should know why we are trying to
69 establish an internet connection. Also, users get confused if a program
70 produces no output for more than 10 seconds for no apparent reason. */
71 static bool verbose = true;
72
73 /* Long options. */
74 static const struct option long_options[] =
75 {
76 { "help", no_argument, NULL, 'h' },
77 { "quiet", no_argument, NULL, 'q' },
78 { "silent", no_argument, NULL, 'q' },
79 { "version", no_argument, NULL, 'V' },
80 { NULL, 0, NULL, 0 }
81 };
82
83
84 /* Forward declaration of local functions. */
85 _GL_NORETURN_FUNC static void usage (int status);
86 static void fetch (const char *url, const char *file);
87
88
89 int
main(int argc,char * argv[])90 main (int argc, char *argv[])
91 {
92 int optchar;
93 bool do_help;
94 bool do_version;
95
96 /* Set program name for messages. */
97 set_program_name (argv[0]);
98 error_print_progname = maybe_print_progname;
99
100 /* Set locale via LC_ALL. */
101 setlocale (LC_ALL, "");
102
103 /* Set the text message domain. */
104 bindtextdomain (PACKAGE, relocate (LOCALEDIR));
105 textdomain (PACKAGE);
106
107 /* Ensure that write errors on stdout are detected. */
108 atexit (close_stdout);
109
110 /* Set default values for variables. */
111 do_help = false;
112 do_version = false;
113
114 /* Parse command line options. */
115 while ((optchar = getopt_long (argc, argv, "hqV", long_options, NULL)) != EOF)
116 switch (optchar)
117 {
118 case '\0': /* Long option. */
119 break;
120 case 'h': /* --help */
121 do_help = true;
122 break;
123 case 'q': /* --quiet / --silent */
124 verbose = false;
125 break;
126 case 'V': /* --version */
127 do_version = true;
128 break;
129 default:
130 usage (EXIT_FAILURE);
131 /* NOTREACHED */
132 }
133
134 /* Version information requested. */
135 if (do_version)
136 {
137 printf ("%s (GNU %s) %s\n", last_component (program_name),
138 PACKAGE, VERSION);
139 /* xgettext: no-wrap */
140 printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
141 License GPLv3+: GNU GPL version 3 or later <%s>\n\
142 This is free software: you are free to change and redistribute it.\n\
143 There is NO WARRANTY, to the extent permitted by law.\n\
144 "),
145 "2001-2020", "https://gnu.org/licenses/gpl.html");
146 printf (_("Written by %s.\n"), proper_name ("Bruno Haible"));
147 exit (EXIT_SUCCESS);
148 }
149
150 /* Help is requested. */
151 if (do_help)
152 usage (EXIT_SUCCESS);
153
154 /* Test argument count. */
155 if (optind + 2 != argc)
156 error (EXIT_FAILURE, 0, _("expected two arguments"));
157
158 /* Fetch the contents. */
159 fetch (argv[optind], argv[optind + 1]);
160
161 exit (EXIT_SUCCESS);
162 }
163
164 /* Display usage information and exit. */
165 static void
usage(int status)166 usage (int status)
167 {
168 if (status != EXIT_SUCCESS)
169 fprintf (stderr, _("Try '%s --help' for more information.\n"),
170 program_name);
171 else
172 {
173 printf (_("\
174 Usage: %s [OPTION] URL FILE\n\
175 "), program_name);
176 printf ("\n");
177 /* xgettext: no-wrap */
178 printf (_("\
179 Fetches and outputs the contents of an URL. If the URL cannot be accessed,\n\
180 the locally accessible FILE is used instead.\n\
181 "));
182 printf ("\n");
183 printf (_("\
184 Informative output:\n"));
185 printf (_("\
186 -h, --help display this help and exit\n"));
187 printf (_("\
188 -V, --version output version information and exit\n"));
189 printf (_("\
190 -q, --quiet, --silent suppress progress indicators\n"));
191 printf ("\n");
192 /* TRANSLATORS: The first placeholder is the web address of the Savannah
193 project of this package. The second placeholder is the bug-reporting
194 email address for this package. Please add _another line_ saying
195 "Report translation bugs to <...>\n" with the address for translation
196 bugs (typically your translation team's web or email address). */
197 printf(_("\
198 Report bugs in the bug tracker at <%s>\n\
199 or by email to <%s>.\n"),
200 "https://savannah.gnu.org/projects/gettext",
201 "bug-gettext@gnu.org");
202 }
203
204 exit (status);
205 }
206
207 /* Copy a file's contents to stdout. */
208 static void
cat_file(const char * src_filename)209 cat_file (const char *src_filename)
210 {
211 int src_fd;
212 char buf[4096];
213 const int buf_size = sizeof (buf);
214
215 src_fd = open (src_filename, O_RDONLY | O_BINARY);
216 if (src_fd < 0)
217 error (EXIT_FAILURE, errno, _("error while opening \"%s\" for reading"),
218 src_filename);
219
220 for (;;)
221 {
222 ssize_t n_read = read (src_fd, buf, buf_size);
223 if (n_read < 0)
224 {
225 #ifdef EINTR
226 if (errno == EINTR)
227 continue;
228 #endif
229 error (EXIT_FAILURE, errno, _("error reading \"%s\""), src_filename);
230 }
231 if (n_read == 0)
232 break;
233
234 if (full_write (STDOUT_FILENO, buf, n_read) < n_read)
235 error (EXIT_FAILURE, errno, _("error writing stdout"));
236 }
237
238 if (close (src_fd) < 0)
239 error (EXIT_FAILURE, errno, _("error after reading \"%s\""), src_filename);
240 }
241
242 #if USEJAVA
243
244 /* Exit code of the Java program. */
245 static int java_exitcode;
246
247 static bool
execute_it(const char * progname,const char * prog_path,char ** prog_argv,void * private_data)248 execute_it (const char *progname,
249 const char *prog_path, char **prog_argv,
250 void *private_data)
251 {
252 (void) private_data;
253
254 java_exitcode =
255 execute (progname, prog_path, prog_argv, true, true, false, false, true,
256 false, NULL);
257 /* Exit code 0 means success, 2 means timed out. */
258 return !(java_exitcode == 0 || java_exitcode == 2);
259 }
260
261 #endif
262
263 /* Fetch the URL. Upon error, use the FILE as fallback. */
264 static void
fetch(const char * url,const char * file)265 fetch (const char *url, const char *file)
266 {
267 if (verbose)
268 {
269 fprintf (stderr, _("Retrieving %s..."), url);
270 fflush (stderr);
271 }
272
273 #if USEJAVA
274 /* First try: using Java. */
275 {
276 const char *class_name = "gnu.gettext.GetURL";
277 const char *gettextjar;
278 const char *args[2];
279
280 /* Make it possible to override the gettext.jar location. This is
281 necessary for running the testsuite before "make install". */
282 gettextjar = getenv ("GETTEXTJAR");
283 if (gettextjar == NULL || gettextjar[0] == '\0')
284 gettextjar = relocate (GETTEXTJAR);
285
286 /* Prepare arguments. */
287 args[0] = url;
288 args[1] = NULL;
289
290 /* Fetch the URL's contents. */
291 java_exitcode = 127;
292 if (!execute_java_class (class_name, &gettextjar, 1, true, NULL,
293 args,
294 false, true,
295 execute_it, NULL))
296 {
297 if (verbose)
298 {
299 if (java_exitcode == 0)
300 fprintf (stderr, _(" done.\n"));
301 else if (java_exitcode == 2)
302 fprintf (stderr, _(" timed out.\n"));
303 }
304 return;
305 }
306 }
307 #endif
308
309 /* Second try: using "wget -q -O - -T 30 url". */
310 {
311 static bool wget_tested;
312 static bool wget_present;
313
314 if (!wget_tested)
315 {
316 /* Test for presence of wget: "wget --version > /dev/null" */
317 char *argv[3];
318 int exitstatus;
319
320 argv[0] = "wget";
321 argv[1] = "--version";
322 argv[2] = NULL;
323 exitstatus = execute ("wget", "wget", argv, false, false, true, true,
324 true, false, NULL);
325 wget_present = (exitstatus == 0);
326 wget_tested = true;
327 }
328
329 if (wget_present)
330 {
331 char *argv[10];
332 int exitstatus;
333
334 argv[0] = "wget";
335 argv[1] = "--quiet";
336 argv[2] = "--output-document"; argv[3] = "-";
337 argv[4] = "--timeout"; argv[5] = "30";
338 argv[6] = "--user-agent"; argv[7] = "urlget";
339 argv[8] = (char *) url;
340 argv[9] = NULL;
341 exitstatus = execute ("wget", "wget", argv, true, false, false, false,
342 true, false, NULL);
343 if (exitstatus != 127)
344 {
345 if (exitstatus != 0)
346 goto failed;
347 if (verbose)
348 fprintf (stderr, _(" done.\n"));
349 return;
350 }
351 }
352 }
353
354 /* Third try: using "lynx -source url". */
355 {
356 static bool lynx_tested;
357 static bool lynx_present;
358
359 if (!lynx_tested)
360 {
361 /* Test for presence of lynx: "lynx --version > /dev/null" */
362 char *argv[3];
363 int exitstatus;
364
365 argv[0] = "lynx";
366 argv[1] = "--version";
367 argv[2] = NULL;
368 exitstatus = execute ("lynx", "lynx", argv, false, false, true, true,
369 true, false, NULL);
370 lynx_present = (exitstatus == 0);
371 lynx_tested = true;
372 }
373
374 if (lynx_present)
375 {
376 char *argv[5];
377 int exitstatus;
378
379 argv[0] = "lynx";
380 argv[1] = "-useragent=urlget";
381 argv[2] = "-source";
382 argv[3] = (char *) url;
383 argv[4] = NULL;
384 exitstatus = execute ("lynx", "lynx", argv, true, false, false, false,
385 true, false, NULL);
386 if (exitstatus != 127)
387 {
388 if (exitstatus != 0)
389 goto failed;
390 if (verbose)
391 fprintf (stderr, _(" done.\n"));
392 return;
393 }
394 }
395 }
396
397 /* Fourth try: using "curl --silent url". */
398 {
399 static bool curl_tested;
400 static bool curl_present;
401
402 if (!curl_tested)
403 {
404 /* Test for presence of curl: "curl --version > /dev/null" */
405 char *argv[3];
406 int exitstatus;
407
408 argv[0] = "curl";
409 argv[1] = "--version";
410 argv[2] = NULL;
411 exitstatus = execute ("curl", "curl", argv, false, false, true, true,
412 true, false, NULL);
413 curl_present = (exitstatus == 0 || exitstatus == 2);
414 curl_tested = true;
415 }
416
417 if (curl_present)
418 {
419 char *argv[6];
420 int exitstatus;
421
422 argv[0] = "curl";
423 argv[1] = "--silent";
424 argv[2] = "--user-agent"; argv[3] = "urlget";
425 argv[4] = (char *) url;
426 argv[5] = NULL;
427 exitstatus = execute ("curl", "curl", argv, true, false, false, false,
428 true, false, NULL);
429 if (exitstatus != 127)
430 {
431 if (exitstatus != 0)
432 goto failed;
433 if (verbose)
434 fprintf (stderr, _(" done.\n"));
435 return;
436 }
437 }
438 }
439
440 failed:
441 if (verbose)
442 fprintf (stderr, _(" failed.\n"));
443 /* Use the file as fallback. */
444 cat_file (file);
445 }
446