From 1cbb23b16e6d5cd6c8fb55d3f9e48285aa13f4ed Mon Sep 17 00:00:00 2001 From: Georges Da Costa <dacosta@irit.fr> Date: Tue, 3 May 2022 12:02:09 +0200 Subject: [PATCH] Cleans the jupyter file --- 0_prepare_workload.ipynb | 464 +-------------------------------------- 1 file changed, 3 insertions(+), 461 deletions(-) diff --git a/0_prepare_workload.ipynb b/0_prepare_workload.ipynb index 984219f..7ce38c9 100644 --- a/0_prepare_workload.ipynb +++ b/0_prepare_workload.ipynb @@ -19,242 +19,9 @@ "source": [ "# Download the workload (548.3 MB unzipped)\n", "!wget https://www.cs.huji.ac.il/labs/parallel/workload/l_metacentrum2/METACENTRUM-2013-3.swf.gz \\\n", - " --output-file workload/METACENTRUM-2013-3.swf.gz " + " --no-check-certificate -nc -P workload workload/METACENTRUM-2013-3.swf.gz" ] }, - { - "cell_type": "code", - "execution_count": 5, - "id": "a8982775", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "GNU Wget 1.21.2, a non-interactive network retriever.\r\n", - "Usage: wget [OPTION]... [URL]...\r\n", - "\r\n", - "Mandatory arguments to long options are mandatory for short options too.\r\n", - "\r\n", - "Startup:\r\n", - " -V, --version display the version of Wget and exit\r\n", - " -h, --help print this help\r\n", - " -b, --background go to background after startup\r\n", - " -e, --execute=COMMAND execute a `.wgetrc'-style command\r\n", - "\r\n", - "Logging and input file:\r\n", - " -o, --output-file=FILE log messages to FILE\r\n", - " -a, --append-output=FILE append messages to FILE\r\n", - " -d, --debug print lots of debugging information\r\n", - " -q, --quiet quiet (no output)\r\n", - " -v, --verbose be verbose (this is the default)\r\n", - " -nv, --no-verbose turn off verboseness, without being quiet\r\n", - " --report-speed=TYPE output bandwidth as TYPE. TYPE can be bits\r\n", - " -i, --input-file=FILE download URLs found in local or external FILE\r\n", - " -F, --force-html treat input file as HTML\r\n", - " -B, --base=URL resolves HTML input-file links (-i -F)\r\n", - " relative to URL\r\n", - " --config=FILE specify config file to use\r\n", - " --no-config do not read any config file\r\n", - " --rejected-log=FILE log reasons for URL rejection to FILE\r\n", - "\r\n", - "Download:\r\n", - " -t, --tries=NUMBER set number of retries to NUMBER (0 unlimits)\r\n", - " --retry-connrefused retry even if connection is refused\r\n", - " --retry-on-http-error=ERRORS comma-separated list of HTTP errors to retry\r\n", - " -O, --output-document=FILE write documents to FILE\r\n", - " -nc, --no-clobber skip downloads that would download to\r\n", - " existing files (overwriting them)\r\n", - " --no-netrc don't try to obtain credentials from .netrc\r\n", - " -c, --continue resume getting a partially-downloaded file\r\n", - " --start-pos=OFFSET start downloading from zero-based position OFFSET\r\n", - " --progress=TYPE select progress gauge type\r\n", - " --show-progress display the progress bar in any verbosity mode\r\n", - " -N, --timestamping don't re-retrieve files unless newer than\r\n", - " local\r\n", - " --no-if-modified-since don't use conditional if-modified-since get\r\n", - " requests in timestamping mode\r\n", - " --no-use-server-timestamps don't set the local file's timestamp by\r\n", - " the one on the server\r\n", - " -S, --server-response print server response\r\n", - " --spider don't download anything\r\n", - " -T, --timeout=SECONDS set all timeout values to SECONDS\r\n", - " --dns-timeout=SECS set the DNS lookup timeout to SECS\r\n", - " --connect-timeout=SECS set the connect timeout to SECS\r\n", - " --read-timeout=SECS set the read timeout to SECS\r\n", - " -w, --wait=SECONDS wait SECONDS between retrievals\r\n", - " (applies if more then 1 URL is to be retrieved)\r\n", - " --waitretry=SECONDS wait 1..SECONDS between retries of a retrieval\r\n", - " (applies if more then 1 URL is to be retrieved)\r\n", - " --random-wait wait from 0.5*WAIT...1.5*WAIT secs between retrievals\r\n", - " (applies if more then 1 URL is to be retrieved)\r\n", - " --no-proxy explicitly turn off proxy\r\n", - " -Q, --quota=NUMBER set retrieval quota to NUMBER\r\n", - " --bind-address=ADDRESS bind to ADDRESS (hostname or IP) on local host\r\n", - " --limit-rate=RATE limit download rate to RATE\r\n", - " --no-dns-cache disable caching DNS lookups\r\n", - " --restrict-file-names=OS restrict chars in file names to ones OS allows\r\n", - " --ignore-case ignore case when matching files/directories\r\n", - " -4, --inet4-only connect only to IPv4 addresses\r\n", - " -6, --inet6-only connect only to IPv6 addresses\r\n", - " --prefer-family=FAMILY connect first to addresses of specified family,\r\n", - " one of IPv6, IPv4, or none\r\n", - " --user=USER set both ftp and http user to USER\r\n", - " --password=PASS set both ftp and http password to PASS\r\n", - " --ask-password prompt for passwords\r\n", - " --use-askpass=COMMAND specify credential handler for requesting \r\n", - " username and password. If no COMMAND is \r\n", - " specified the WGET_ASKPASS or the SSH_ASKPASS \r\n", - " environment variable is used.\r\n", - " --no-iri turn off IRI support\r\n", - " --local-encoding=ENC use ENC as the local encoding for IRIs\r\n", - " --remote-encoding=ENC use ENC as the default remote encoding\r\n", - " --unlink remove file before clobber\r\n", - " --xattr turn on storage of metadata in extended file attributes\r\n", - "\r\n", - "Directories:\r\n", - " -nd, --no-directories don't create directories\r\n", - " -x, --force-directories force creation of directories\r\n", - " -nH, --no-host-directories don't create host directories\r\n", - " --protocol-directories use protocol name in directories\r\n", - " -P, --directory-prefix=PREFIX save files to PREFIX/..\r\n", - " --cut-dirs=NUMBER ignore NUMBER remote directory components\r\n", - "\r\n", - "HTTP options:\r\n", - " --http-user=USER set http user to USER\r\n", - " --http-password=PASS set http password to PASS\r\n", - " --no-cache disallow server-cached data\r\n", - " --default-page=NAME change the default page name (normally\r\n", - " this is 'index.html'.)\r\n", - " -E, --adjust-extension save HTML/CSS documents with proper extensions\r\n", - " --ignore-length ignore 'Content-Length' header field\r\n", - " --header=STRING insert STRING among the headers\r\n", - " --compression=TYPE choose compression, one of auto, gzip and none. (default: none)\r\n", - " --max-redirect maximum redirections allowed per page\r\n", - " --proxy-user=USER set USER as proxy username\r\n", - " --proxy-password=PASS set PASS as proxy password\r\n", - " --referer=URL include 'Referer: URL' header in HTTP request\r\n", - " --save-headers save the HTTP headers to file\r\n", - " -U, --user-agent=AGENT identify as AGENT instead of Wget/VERSION\r\n", - " --no-http-keep-alive disable HTTP keep-alive (persistent connections)\r\n", - " --no-cookies don't use cookies\r\n", - " --load-cookies=FILE load cookies from FILE before session\r\n", - " --save-cookies=FILE save cookies to FILE after session\r\n", - " --keep-session-cookies load and save session (non-permanent) cookies\r\n", - " --post-data=STRING use the POST method; send STRING as the data\r\n", - " --post-file=FILE use the POST method; send contents of FILE\r\n", - " --method=HTTPMethod use method \"HTTPMethod\" in the request\r\n", - " --body-data=STRING send STRING as data. --method MUST be set\r\n", - " --body-file=FILE send contents of FILE. --method MUST be set\r\n", - " --content-disposition honor the Content-Disposition header when\r\n", - " choosing local file names (EXPERIMENTAL)\r\n", - " --content-on-error output the received content on server errors\r\n", - " --auth-no-challenge send Basic HTTP authentication information\r\n", - " without first waiting for the server's\r\n", - " challenge\r\n", - "\r\n", - "HTTPS (SSL/TLS) options:\r\n", - " --secure-protocol=PR choose secure protocol, one of auto, SSLv2,\r\n", - " SSLv3, TLSv1, TLSv1_1, TLSv1_2 and PFS\r\n", - " --https-only only follow secure HTTPS links\r\n", - " --no-check-certificate don't validate the server's certificate\r\n", - " --certificate=FILE client certificate file\r\n", - " --certificate-type=TYPE client certificate type, PEM or DER\r\n", - " --private-key=FILE private key file\r\n", - " --private-key-type=TYPE private key type, PEM or DER\r\n", - " --ca-certificate=FILE file with the bundle of CAs\r\n", - " --ca-directory=DIR directory where hash list of CAs is stored\r\n", - " --crl-file=FILE file with bundle of CRLs\r\n", - " --pinnedpubkey=FILE/HASHES Public key (PEM/DER) file, or any number\r\n", - " of base64 encoded sha256 hashes preceded by\r\n", - " 'sha256//' and separated by ';', to verify\r\n", - " peer against\r\n", - " --random-file=FILE file with random data for seeding the SSL PRNG\r\n", - "\r\n", - " --ciphers=STR Set the priority string (GnuTLS) or cipher list string (OpenSSL) directly.\r\n", - " Use with care. This option overrides --secure-protocol.\r\n", - " The format and syntax of this string depend on the specific SSL/TLS engine.\r\n", - "HSTS options:\r\n", - " --no-hsts disable HSTS\r\n", - " --hsts-file path of HSTS database (will override default)\r\n", - "\r\n", - "FTP options:\r\n", - " --ftp-user=USER set ftp user to USER\r\n", - " --ftp-password=PASS set ftp password to PASS\r\n", - " --no-remove-listing don't remove '.listing' files\r\n", - " --no-glob turn off FTP file name globbing\r\n", - " --no-passive-ftp disable the \"passive\" transfer mode\r\n", - " --preserve-permissions preserve remote file permissions\r\n", - " --retr-symlinks when recursing, get linked-to files (not dir)\r\n", - "\r\n", - "FTPS options:\r\n", - " --ftps-implicit use implicit FTPS (default port is 990)\r\n", - " --ftps-resume-ssl resume the SSL/TLS session started in the control connection when\r\n", - " opening a data connection\r\n", - " --ftps-clear-data-connection cipher the control channel only; all the data will be in plaintext\r\n", - " --ftps-fallback-to-ftp fall back to FTP if FTPS is not supported in the target server\r\n", - "WARC options:\r\n", - " --warc-file=FILENAME save request/response data to a .warc.gz file\r\n", - " --warc-header=STRING insert STRING into the warcinfo record\r\n", - " --warc-max-size=NUMBER set maximum size of WARC files to NUMBER\r\n", - " --warc-cdx write CDX index files\r\n", - " --warc-dedup=FILENAME do not store records listed in this CDX file\r\n", - " --no-warc-compression do not compress WARC files with GZIP\r\n", - " --no-warc-digests do not calculate SHA1 digests\r\n", - " --no-warc-keep-log do not store the log file in a WARC record\r\n", - " --warc-tempdir=DIRECTORY location for temporary files created by the\r\n", - " WARC writer\r\n", - "\r\n", - "Recursive download:\r\n", - " -r, --recursive specify recursive download\r\n", - " -l, --level=NUMBER maximum recursion depth (inf or 0 for infinite)\r\n", - " --delete-after delete files locally after downloading them\r\n", - " -k, --convert-links make links in downloaded HTML or CSS point to\r\n", - " local files\r\n", - " --convert-file-only convert the file part of the URLs only (usually known as the basename)\r\n", - " --backups=N before writing file X, rotate up to N backup files\r\n", - " -K, --backup-converted before converting file X, back up as X.orig\r\n", - " -m, --mirror shortcut for -N -r -l inf --no-remove-listing\r\n", - " -p, --page-requisites get all images, etc. needed to display HTML page\r\n", - " --strict-comments turn on strict (SGML) handling of HTML comments\r\n", - "\r\n", - "Recursive accept/reject:\r\n", - " -A, --accept=LIST comma-separated list of accepted extensions\r\n", - " -R, --reject=LIST comma-separated list of rejected extensions\r\n", - " --accept-regex=REGEX regex matching accepted URLs\r\n", - " --reject-regex=REGEX regex matching rejected URLs\r\n", - " --regex-type=TYPE regex type (posix|pcre)\r\n", - " -D, --domains=LIST comma-separated list of accepted domains\r\n", - " --exclude-domains=LIST comma-separated list of rejected domains\r\n", - " --follow-ftp follow FTP links from HTML documents\r\n", - " --follow-tags=LIST comma-separated list of followed HTML tags\r\n", - " --ignore-tags=LIST comma-separated list of ignored HTML tags\r\n", - " -H, --span-hosts go to foreign hosts when recursive\r\n", - " -L, --relative follow relative links only\r\n", - " -I, --include-directories=LIST list of allowed directories\r\n", - " --trust-server-names use the name specified by the redirection\r\n", - " URL's last component\r\n", - " -X, --exclude-directories=LIST list of excluded directories\r\n", - " -np, --no-parent don't ascend to the parent directory\r\n", - "\r\n", - "Email bug reports, questions, discussions to <bug-wget@gnu.org>\r\n", - "and/or open issues at https://savannah.gnu.org/bugs/?func=additem&group=wget.\r\n" - ] - } - ], - "source": [ - "!wget --help" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e75c1fdf", - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": 6, @@ -274,231 +41,6 @@ "!gunzip workload/METACENTRUM-2013-3.swf.gz" ] }, - { - "cell_type": "code", - "execution_count": 1, - "id": "d4cd4f2c", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "GNU Wget 1.21.2, a non-interactive network retriever.\r\n", - "Usage: wget [OPTION]... [URL]...\r\n", - "\r\n", - "Mandatory arguments to long options are mandatory for short options too.\r\n", - "\r\n", - "Startup:\r\n", - " -V, --version display the version of Wget and exit\r\n", - " -h, --help print this help\r\n", - " -b, --background go to background after startup\r\n", - " -e, --execute=COMMAND execute a `.wgetrc'-style command\r\n", - "\r\n", - "Logging and input file:\r\n", - " -o, --output-file=FILE log messages to FILE\r\n", - " -a, --append-output=FILE append messages to FILE\r\n", - " -d, --debug print lots of debugging information\r\n", - " -q, --quiet quiet (no output)\r\n", - " -v, --verbose be verbose (this is the default)\r\n", - " -nv, --no-verbose turn off verboseness, without being quiet\r\n", - " --report-speed=TYPE output bandwidth as TYPE. TYPE can be bits\r\n", - " -i, --input-file=FILE download URLs found in local or external FILE\r\n", - " -F, --force-html treat input file as HTML\r\n", - " -B, --base=URL resolves HTML input-file links (-i -F)\r\n", - " relative to URL\r\n", - " --config=FILE specify config file to use\r\n", - " --no-config do not read any config file\r\n", - " --rejected-log=FILE log reasons for URL rejection to FILE\r\n", - "\r\n", - "Download:\r\n", - " -t, --tries=NUMBER set number of retries to NUMBER (0 unlimits)\r\n", - " --retry-connrefused retry even if connection is refused\r\n", - " --retry-on-http-error=ERRORS comma-separated list of HTTP errors to retry\r\n", - " -O, --output-document=FILE write documents to FILE\r\n", - " -nc, --no-clobber skip downloads that would download to\r\n", - " existing files (overwriting them)\r\n", - " --no-netrc don't try to obtain credentials from .netrc\r\n", - " -c, --continue resume getting a partially-downloaded file\r\n", - " --start-pos=OFFSET start downloading from zero-based position OFFSET\r\n", - " --progress=TYPE select progress gauge type\r\n", - " --show-progress display the progress bar in any verbosity mode\r\n", - " -N, --timestamping don't re-retrieve files unless newer than\r\n", - " local\r\n", - " --no-if-modified-since don't use conditional if-modified-since get\r\n", - " requests in timestamping mode\r\n", - " --no-use-server-timestamps don't set the local file's timestamp by\r\n", - " the one on the server\r\n", - " -S, --server-response print server response\r\n", - " --spider don't download anything\r\n", - " -T, --timeout=SECONDS set all timeout values to SECONDS\r\n", - " --dns-timeout=SECS set the DNS lookup timeout to SECS\r\n", - " --connect-timeout=SECS set the connect timeout to SECS\r\n", - " --read-timeout=SECS set the read timeout to SECS\r\n", - " -w, --wait=SECONDS wait SECONDS between retrievals\r\n", - " (applies if more then 1 URL is to be retrieved)\r\n", - " --waitretry=SECONDS wait 1..SECONDS between retries of a retrieval\r\n", - " (applies if more then 1 URL is to be retrieved)\r\n", - " --random-wait wait from 0.5*WAIT...1.5*WAIT secs between retrievals\r\n", - " (applies if more then 1 URL is to be retrieved)\r\n", - " --no-proxy explicitly turn off proxy\r\n", - " -Q, --quota=NUMBER set retrieval quota to NUMBER\r\n", - " --bind-address=ADDRESS bind to ADDRESS (hostname or IP) on local host\r\n", - " --limit-rate=RATE limit download rate to RATE\r\n", - " --no-dns-cache disable caching DNS lookups\r\n", - " --restrict-file-names=OS restrict chars in file names to ones OS allows\r\n", - " --ignore-case ignore case when matching files/directories\r\n", - " -4, --inet4-only connect only to IPv4 addresses\r\n", - " -6, --inet6-only connect only to IPv6 addresses\r\n", - " --prefer-family=FAMILY connect first to addresses of specified family,\r\n", - " one of IPv6, IPv4, or none\r\n", - " --user=USER set both ftp and http user to USER\r\n", - " --password=PASS set both ftp and http password to PASS\r\n", - " --ask-password prompt for passwords\r\n", - " --use-askpass=COMMAND specify credential handler for requesting \r\n", - " username and password. If no COMMAND is \r\n", - " specified the WGET_ASKPASS or the SSH_ASKPASS \r\n", - " environment variable is used.\r\n", - " --no-iri turn off IRI support\r\n", - " --local-encoding=ENC use ENC as the local encoding for IRIs\r\n", - " --remote-encoding=ENC use ENC as the default remote encoding\r\n", - " --unlink remove file before clobber\r\n", - " --xattr turn on storage of metadata in extended file attributes\r\n", - "\r\n", - "Directories:\r\n", - " -nd, --no-directories don't create directories\r\n", - " -x, --force-directories force creation of directories\r\n", - " -nH, --no-host-directories don't create host directories\r\n", - " --protocol-directories use protocol name in directories\r\n", - " -P, --directory-prefix=PREFIX save files to PREFIX/..\r\n", - " --cut-dirs=NUMBER ignore NUMBER remote directory components\r\n", - "\r\n", - "HTTP options:\r\n", - " --http-user=USER set http user to USER\r\n", - " --http-password=PASS set http password to PASS\r\n", - " --no-cache disallow server-cached data\r\n", - " --default-page=NAME change the default page name (normally\r\n", - " this is 'index.html'.)\r\n", - " -E, --adjust-extension save HTML/CSS documents with proper extensions\r\n", - " --ignore-length ignore 'Content-Length' header field\r\n", - " --header=STRING insert STRING among the headers\r\n", - " --compression=TYPE choose compression, one of auto, gzip and none. (default: none)\r\n", - " --max-redirect maximum redirections allowed per page\r\n", - " --proxy-user=USER set USER as proxy username\r\n", - " --proxy-password=PASS set PASS as proxy password\r\n", - " --referer=URL include 'Referer: URL' header in HTTP request\r\n", - " --save-headers save the HTTP headers to file\r\n", - " -U, --user-agent=AGENT identify as AGENT instead of Wget/VERSION\r\n", - " --no-http-keep-alive disable HTTP keep-alive (persistent connections)\r\n", - " --no-cookies don't use cookies\r\n", - " --load-cookies=FILE load cookies from FILE before session\r\n", - " --save-cookies=FILE save cookies to FILE after session\r\n", - " --keep-session-cookies load and save session (non-permanent) cookies\r\n", - " --post-data=STRING use the POST method; send STRING as the data\r\n", - " --post-file=FILE use the POST method; send contents of FILE\r\n", - " --method=HTTPMethod use method \"HTTPMethod\" in the request\r\n", - " --body-data=STRING send STRING as data. --method MUST be set\r\n", - " --body-file=FILE send contents of FILE. --method MUST be set\r\n", - " --content-disposition honor the Content-Disposition header when\r\n", - " choosing local file names (EXPERIMENTAL)\r\n", - " --content-on-error output the received content on server errors\r\n", - " --auth-no-challenge send Basic HTTP authentication information\r\n", - " without first waiting for the server's\r\n", - " challenge\r\n", - "\r\n", - "HTTPS (SSL/TLS) options:\r\n", - " --secure-protocol=PR choose secure protocol, one of auto, SSLv2,\r\n", - " SSLv3, TLSv1, TLSv1_1, TLSv1_2 and PFS\r\n", - " --https-only only follow secure HTTPS links\r\n", - " --no-check-certificate don't validate the server's certificate\r\n", - " --certificate=FILE client certificate file\r\n", - " --certificate-type=TYPE client certificate type, PEM or DER\r\n", - " --private-key=FILE private key file\r\n", - " --private-key-type=TYPE private key type, PEM or DER\r\n", - " --ca-certificate=FILE file with the bundle of CAs\r\n", - " --ca-directory=DIR directory where hash list of CAs is stored\r\n", - " --crl-file=FILE file with bundle of CRLs\r\n", - " --pinnedpubkey=FILE/HASHES Public key (PEM/DER) file, or any number\r\n", - " of base64 encoded sha256 hashes preceded by\r\n", - " 'sha256//' and separated by ';', to verify\r\n", - " peer against\r\n", - " --random-file=FILE file with random data for seeding the SSL PRNG\r\n", - "\r\n", - " --ciphers=STR Set the priority string (GnuTLS) or cipher list string (OpenSSL) directly.\r\n", - " Use with care. This option overrides --secure-protocol.\r\n", - " The format and syntax of this string depend on the specific SSL/TLS engine.\r\n", - "HSTS options:\r\n", - " --no-hsts disable HSTS\r\n", - " --hsts-file path of HSTS database (will override default)\r\n", - "\r\n", - "FTP options:\r\n", - " --ftp-user=USER set ftp user to USER\r\n", - " --ftp-password=PASS set ftp password to PASS\r\n", - " --no-remove-listing don't remove '.listing' files\r\n", - " --no-glob turn off FTP file name globbing\r\n", - " --no-passive-ftp disable the \"passive\" transfer mode\r\n", - " --preserve-permissions preserve remote file permissions\r\n", - " --retr-symlinks when recursing, get linked-to files (not dir)\r\n", - "\r\n", - "FTPS options:\r\n", - " --ftps-implicit use implicit FTPS (default port is 990)\r\n", - " --ftps-resume-ssl resume the SSL/TLS session started in the control connection when\r\n", - " opening a data connection\r\n", - " --ftps-clear-data-connection cipher the control channel only; all the data will be in plaintext\r\n", - " --ftps-fallback-to-ftp fall back to FTP if FTPS is not supported in the target server\r\n", - "WARC options:\r\n", - " --warc-file=FILENAME save request/response data to a .warc.gz file\r\n", - " --warc-header=STRING insert STRING into the warcinfo record\r\n", - " --warc-max-size=NUMBER set maximum size of WARC files to NUMBER\r\n", - " --warc-cdx write CDX index files\r\n", - " --warc-dedup=FILENAME do not store records listed in this CDX file\r\n", - " --no-warc-compression do not compress WARC files with GZIP\r\n", - " --no-warc-digests do not calculate SHA1 digests\r\n", - " --no-warc-keep-log do not store the log file in a WARC record\r\n", - " --warc-tempdir=DIRECTORY location for temporary files created by the\r\n", - " WARC writer\r\n", - "\r\n", - "Recursive download:\r\n", - " -r, --recursive specify recursive download\r\n", - " -l, --level=NUMBER maximum recursion depth (inf or 0 for infinite)\r\n", - " --delete-after delete files locally after downloading them\r\n", - " -k, --convert-links make links in downloaded HTML or CSS point to\r\n", - " local files\r\n", - " --convert-file-only convert the file part of the URLs only (usually known as the basename)\r\n", - " --backups=N before writing file X, rotate up to N backup files\r\n", - " -K, --backup-converted before converting file X, back up as X.orig\r\n", - " -m, --mirror shortcut for -N -r -l inf --no-remove-listing\r\n", - " -p, --page-requisites get all images, etc. needed to display HTML page\r\n", - " --strict-comments turn on strict (SGML) handling of HTML comments\r\n", - "\r\n", - "Recursive accept/reject:\r\n", - " -A, --accept=LIST comma-separated list of accepted extensions\r\n", - " -R, --reject=LIST comma-separated list of rejected extensions\r\n", - " --accept-regex=REGEX regex matching accepted URLs\r\n", - " --reject-regex=REGEX regex matching rejected URLs\r\n", - " --regex-type=TYPE regex type (posix|pcre)\r\n", - " -D, --domains=LIST comma-separated list of accepted domains\r\n", - " --exclude-domains=LIST comma-separated list of rejected domains\r\n", - " --follow-ftp follow FTP links from HTML documents\r\n", - " --follow-tags=LIST comma-separated list of followed HTML tags\r\n", - " --ignore-tags=LIST comma-separated list of ignored HTML tags\r\n", - " -H, --span-hosts go to foreign hosts when recursive\r\n", - " -L, --relative follow relative links only\r\n", - " -I, --include-directories=LIST list of allowed directories\r\n", - " --trust-server-names use the name specified by the redirection\r\n", - " URL's last component\r\n", - " -X, --exclude-directories=LIST list of excluded directories\r\n", - " -np, --no-parent don't ascend to the parent directory\r\n", - "\r\n", - "Email bug reports, questions, discussions to <bug-wget@gnu.org>\r\n", - "and/or open issues at https://savannah.gnu.org/bugs/?func=additem&group=wget.\r\n" - ] - } - ], - "source": [ - "!wget --help" - ] - }, { "cell_type": "markdown", "id": "graphic-rabbit", @@ -687,7 +229,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -701,7 +243,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, -- GitLab