From eafd1f52461d9dac907f4a0745a2ec239dd31c8d Mon Sep 17 00:00:00 2001 From: Raja Boujbel Date: Thu, 28 Nov 2024 15:36:18 +0100 Subject: [PATCH 01/11] gitignore: add release binary output directory --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index d3db3d09ff0..f76e4c363da 100644 --- a/.gitignore +++ b/.gitignore @@ -99,3 +99,4 @@ doc/*.build src/x_build_libs.ocp # dev setup /.ocamlinit +/release/out From 55306c13fb165d8cc5f663c3bffe2d81f7fa25f6 Mon Sep 17 00:00:00 2001 From: Raja Boujbel Date: Wed, 19 Jun 2024 17:23:42 +0200 Subject: [PATCH 02/11] download: fix POST option for wget --- master_changes.md | 1 + src/repository/opamDownload.ml | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/master_changes.md b/master_changes.md index fad17f3ef0e..7eac516eca5 100644 --- a/master_changes.md +++ b/master_changes.md @@ -169,6 +169,7 @@ users) * `OpamArg.InvalidCLI`: export exception [#6150 @rjbou] ## opam-repository + * `OpamDownload.get_output`: fix `wget` option for `POST` requests [#6036 @rjbou] ## opam-state diff --git a/src/repository/opamDownload.ml b/src/repository/opamDownload.ml index 633f43762b4..ab07a764b33 100644 --- a/src/repository/opamDownload.ml +++ b/src/repository/opamDownload.ml @@ -268,7 +268,8 @@ let get_output ~post ?(args=[]) url = let cmd_args = if post then match cmd_args with - | ("wget"|"curl" as cmd)::args -> Some (cmd :: ["-X"; "POST"] @ args) + | ("curl" as cmd)::args -> Some (cmd :: ["-X"; "POST"] @ args) + | ("wget" as cmd)::args -> Some (cmd :: ["--method"; "POST"] @ args) | _ -> None else Some cmd_args in From f703625497d3f5820a261cd2889ba1014dc4bf0a Mon Sep 17 00:00:00 2001 From: Raja Boujbel Date: Wed, 19 Jun 2024 17:24:56 +0200 Subject: [PATCH 03/11] download: use long format option for curl POST request --- master_changes.md | 1 + src/repository/opamDownload.ml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/master_changes.md b/master_changes.md index 7eac516eca5..bb5e863f8e3 100644 --- a/master_changes.md +++ b/master_changes.md @@ -170,6 +170,7 @@ users) ## opam-repository * `OpamDownload.get_output`: fix `wget` option for `POST` requests [#6036 @rjbou] + * `OpamDownload.get_output`: use long form for `curl` `POST` request option [#6036 @rjbou] ## opam-state diff --git a/src/repository/opamDownload.ml b/src/repository/opamDownload.ml index ab07a764b33..fef1e3c63a7 100644 --- a/src/repository/opamDownload.ml +++ b/src/repository/opamDownload.ml @@ -268,7 +268,7 @@ let get_output ~post ?(args=[]) url = let cmd_args = if post then match cmd_args with - | ("curl" as cmd)::args -> Some (cmd :: ["-X"; "POST"] @ args) + | ("curl" as cmd)::args -> Some (cmd :: ["--request"; "POST"] @ args) | ("wget" as cmd)::args -> Some (cmd :: ["--method"; "POST"] @ args) | _ -> None else Some cmd_args From 70c0a40bd99144c10e0974f803d2be1b529d2092 Mon Sep 17 00:00:00 2001 From: Raja Boujbel Date: Wed, 27 Nov 2024 17:51:55 +0100 Subject: [PATCH 04/11] download: remove wget support for SWH fallback to focus on more fine-grained curl handling --- master_changes.md | 1 + src/repository/opamDownload.ml | 20 ++++++-------------- tests/reftests/swhid.unix.test | 18 +----------------- 3 files changed, 8 insertions(+), 31 deletions(-) diff --git a/master_changes.md b/master_changes.md index bb5e863f8e3..04054682d62 100644 --- a/master_changes.md +++ b/master_changes.md @@ -64,6 +64,7 @@ users) ## Repository * Accurately tag `curl` download command when loaded from global config file [#6270 @rjbou] + * Remove wget support for Software Heritage fallback [#6036 @rjbou - fix #5721] ## Lock diff --git a/src/repository/opamDownload.ml b/src/repository/opamDownload.ml index fef1e3c63a7..e770e9695e3 100644 --- a/src/repository/opamDownload.ml +++ b/src/repository/opamDownload.ml @@ -253,12 +253,6 @@ let download ?quiet ?validate ~overwrite ?compress ?checksum url dstdir = (** Stdout output retrieval and post requests management *) -let post_tools = ["wget"; "curl"] -let check_post_tool () = - match Lazy.force OpamRepositoryConfig.(!r.download_tool) with - | [(CIdent cmd), _], _ -> List.mem cmd post_tools - | _ -> false - let get_output ~post ?(args=[]) url = let cmd_args = download_args ~url ~out:"-" ~retry:OpamRepositoryConfig.(!r.retries) @@ -319,9 +313,8 @@ module SWHID = struct | Some out -> Some (String.concat "" out) | None -> - OpamConsole.error "Software Heritage fallback needs %s or %s installed" - (OpamConsole.colorise `underline "curl") - (OpamConsole.colorise `underline "wget"); + OpamConsole.error "Software Heritage fallback needs %s installed" + (OpamConsole.colorise `underline "curl"); None let get_dir hash = @@ -365,7 +358,8 @@ module SWHID = struct match OpamFile.URL.swhid urlf with | None -> Done (Result None) | Some swhid -> - if check_post_tool () then + match Lazy.force OpamRepositoryConfig.(!r.download_tool) with + | _, `Curl -> check_liveness () @@+ fun alive -> if alive then (* Add a global modifier and/or command for default answering *) @@ -424,10 +418,8 @@ module SWHID = struct Done (Not_available (Some (fallback_err "unreachable"), fallback_err "network failure or API down")) - else + | _ -> Done (Not_available (Some (fallback_err "no retrieval"), - fallback_err "Download tool permitting post request (%s) not \ - set as download tool" - (OpamStd.Format.pretty_list post_tools))) + fallback_err "curl is required for Software Heritage fallback")) end diff --git a/tests/reftests/swhid.unix.test b/tests/reftests/swhid.unix.test index 737711c3e98..f12f5d32bf6 100644 --- a/tests/reftests/swhid.unix.test +++ b/tests/reftests/swhid.unix.test @@ -100,22 +100,6 @@ The following actions will be performed: Done. ### opam clean -c Clearing cache of downloaded files -### opam option download-command=wget -Set to 'wget' the field download-command in global configuration -### opam install snappy-swhid-dir -v | grep -v '^Processing' -The following actions will be performed: -=== install 1 package - - install snappy-swhid-dir 2 - -<><> Processing actions <><><><><><><><><><><><><><><><><><><><><><><><><><><><> -Source https://fake.exe/url.tar.gz is not available. Do you want to try to retrieve it from Software Heritage cache (https://www.softwareheritage.org)? It may take few minutes. [y/N] y --> retrieved snappy-swhid-dir.2 (SWH fallback) --> installed snappy-swhid-dir.2 -Done. -### opam clean -c -Clearing cache of downloaded files -### opam option download-command=curl -Set to 'curl' the field download-command in global configuration ### opam-version: "2.0" url { @@ -152,7 +136,7 @@ The following actions will be performed: <><> Processing actions <><><><><><><><><><><><><><><><><><><><><><><><><><><><> [ERROR] Failed to get sources of snappy-swhid-ko.2: SWH fallback: no retrieval -OpamSolution.Fetch_fail("SWH fallback: Download tool permitting post request (wget and curl) not set as download tool") +OpamSolution.Fetch_fail("SWH fallback: curl is required for Software Heritage fallback") <><> Error report <><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> From cde86049f386f3073a485b828bb1464bcaa42106 Mon Sep 17 00:00:00 2001 From: Raja Boujbel Date: Wed, 19 Jun 2024 18:42:14 +0200 Subject: [PATCH 05/11] download: fix SWH liveness check --- master_changes.md | 1 + src/repository/opamDownload.ml | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/master_changes.md b/master_changes.md index 04054682d62..a9d748858b4 100644 --- a/master_changes.md +++ b/master_changes.md @@ -65,6 +65,7 @@ users) ## Repository * Accurately tag `curl` download command when loaded from global config file [#6270 @rjbou] * Remove wget support for Software Heritage fallback [#6036 @rjbou - fix #5721] + * [BUG] Fix SWH liveness check [#6036 @rjbou - fix #5721] ## Lock diff --git a/src/repository/opamDownload.ml b/src/repository/opamDownload.ml index e770e9695e3..cbaca703644 100644 --- a/src/repository/opamDownload.ml +++ b/src/repository/opamDownload.ml @@ -283,8 +283,13 @@ module SWHID = struct let check_liveness () = OpamProcess.Job.catch (fun _ -> Done false) @@ fun () -> - get_output ~post:true OpamUrl.Op.(instance / "api" / "1" / "ping" / "") - @@| fun _ -> true + get_output ~post:false OpamUrl.Op.(instance / "api" / "1" / "ping" / "") + @@| function + | Some (pong::_) -> + (* curl output after answer the http code *) + (* https://archive.softwareheritage.org/api/1/ping/ *) + OpamStd.String.starts_with ~prefix:"\"pong\"" pong + | Some _ | None -> false let get_value key s = match OpamJson.of_string s with From cadcf7529e7f66d8d8375f96e22e725b5f12c5a2 Mon Sep 17 00:00:00 2001 From: Raja Boujbel Date: Wed, 19 Jun 2024 19:39:10 +0200 Subject: [PATCH 06/11] download: fix SWH cooking request and status retrieval API request --- master_changes.md | 1 + src/repository/opamDownload.ml | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/master_changes.md b/master_changes.md index a9d748858b4..e818b04a92f 100644 --- a/master_changes.md +++ b/master_changes.md @@ -66,6 +66,7 @@ users) * Accurately tag `curl` download command when loaded from global config file [#6270 @rjbou] * Remove wget support for Software Heritage fallback [#6036 @rjbou - fix #5721] * [BUG] Fix SWH liveness check [#6036 @rjbou - fix #5721] + * Update SWH API request [#6036 @rjbou] ## Lock diff --git a/src/repository/opamDownload.ml b/src/repository/opamDownload.ml index cbaca703644..6ab8415c9a9 100644 --- a/src/repository/opamDownload.ml +++ b/src/repository/opamDownload.ml @@ -278,7 +278,10 @@ module SWHID = struct let instance = OpamUrl.of_string "https://archive.softwareheritage.org" (* we keep api 1 hardcoded for the moment *) - let full_url middle hash = OpamUrl.Op.(instance / "api" / "1" / middle / hash / "") + let full_url middle hash = + OpamUrl.Op.(instance / "api" / "1" / middle / hash / "") + let vault_url kind hash = + full_url ("vault/" ^ kind) ("swh:1:dir:" ^ hash) let check_liveness () = OpamProcess.Job.catch (fun _ -> Done false) @@ -323,7 +326,10 @@ module SWHID = struct None let get_dir hash = - let url = full_url "vault/directory" hash in + (* https://archive.softwareheritage.org/api/1/vault/flat/doc/ *) + let url = vault_url "flat" hash in + (* The POST is needed only for asking to cook the archive, it's a no-op on + status check *) get_output ~post:true url @@| OpamStd.Option.replace @@ fun json -> let status = get_value "status" json in let fetch_url = get_value "fetch_url" json in From b1926cfbb9c5afa6636692c1a88f68dea01ffb8d Mon Sep 17 00:00:00 2001 From: Raja Boujbel Date: Tue, 12 Nov 2024 15:24:38 +0100 Subject: [PATCH 07/11] download: add internal function to retrieve http code for curl. --- master_changes.md | 1 + src/repository/opamDownload.ml | 97 +++++++++++++++++++--------------- tests/reftests/download.test | 12 ++--- 3 files changed, 62 insertions(+), 48 deletions(-) diff --git a/master_changes.md b/master_changes.md index e818b04a92f..c978a714206 100644 --- a/master_changes.md +++ b/master_changes.md @@ -174,6 +174,7 @@ users) ## opam-repository * `OpamDownload.get_output`: fix `wget` option for `POST` requests [#6036 @rjbou] * `OpamDownload.get_output`: use long form for `curl` `POST` request option [#6036 @rjbou] + * `OpamDownload.download`: more fine grained HTTP request error code detection for curl [#6036 @rjbou] ## opam-state diff --git a/src/repository/opamDownload.ml b/src/repository/opamDownload.ml index 6ab8415c9a9..b19e75a28f0 100644 --- a/src/repository/opamDownload.ml +++ b/src/repository/opamDownload.ml @@ -129,57 +129,70 @@ let download_command_t ~with_curl_mitigation ~compress ?checksum ~url ~dst c = OpamSystem.make_command ~allow_stdin:false ~stdout cmd args @@> c let tool_return redownload_command url ret = + let open OpamProcess in match Lazy.force OpamRepositoryConfig.(!r.download_tool) with | _, `Default -> if OpamProcess.is_failure ret then - fail (Some "Download command failed", - Printf.sprintf "Download command failed: %s" - (OpamProcess.result_summary ret)) - else Done () + Done (`fail (Some "Download command failed", + Printf.sprintf "Download command failed: %s" + (OpamProcess.result_summary ret))) + else Done `ok | _, `Curl -> - if OpamProcess.is_failure ret then - if ret.r_code = 43 then begin - (* Code 43 is CURLE_BAD_FUNCTION_ARGUMENT (7.1 7-Aug-2000). This should - never be encountered using the curl binary, so we assume that it's - a manifestation of curl/curl#13845 (see also #6120). *) - log "Attempting to mitigate curl/curl#13845"; - redownload_command ~with_curl_mitigation:true @@ function ret -> - if OpamProcess.is_failure ret then - if ret.r_code = 22 then - (* If this broken version of curl persists for some time, it is - relatively straightforward to parse the http response code from - the message, as it hasn't changed. *) - fail (Some "curl failed owing to a server-side issue", - Printf.sprintf "curl failed with server-side error: %s" - (OpamProcess.result_summary ret)) - else - fail (Some "curl failed", - Printf.sprintf "curl failed: %s" - (OpamProcess.result_summary ret)) - else Done () - end else - fail (Some "curl failed", Printf.sprintf "curl failed: %s" - (OpamProcess.result_summary ret)) - else - match ret.OpamProcess.r_stdout with - | [] -> - fail (Some "curl empty response", - Printf.sprintf "curl: empty response while downloading %s" - (OpamUrl.to_string url)) - | l -> - let code = List.hd (List.rev l) in - let num = try int_of_string code with Failure _ -> 999 in - if num >= 400 then - fail (Some ("curl error code " ^ code), - Printf.sprintf "curl: code %s while downloading %s" - code (OpamUrl.to_string url)) - else Done () + match ret with + | { r_code = 0 ; r_stdout = []; _ } -> + Done (`fail (Some "curl empty response", + Printf.sprintf "curl: empty response while downloading %s" + (OpamUrl.to_string url))) + | { r_code = 0 ; r_stdout = (_::_ as l); _ } -> + let code = List.hd (List.rev l) in + (try + let num = int_of_string code in + if num >= 400 then + Done (`http_error num) + else Done `ok + with Failure _ -> + Done (`fail (Some ("curl error " ^ code), + Printf.sprintf "curl: error %s while downloading %s" + code (OpamUrl.to_string url)))) + | { r_code = 43; _ } -> + (* Code 43 is CURLE_BAD_FUNCTION_ARGUMENT (7.1 7-Aug-2000). This should + never be encountered using the curl binary, so we assume that it's + a manifestation of curl/curl#13845 (see also #6120). *) + log "Attempting to mitigate curl/curl#13845"; + (redownload_command ~with_curl_mitigation:true @@ function ret -> + if OpamProcess.is_failure ret then + if ret.r_code = 22 then + (* If this broken version of curl persists for some time, it is + relatively straightforward to parse the http response code from + the message, as it hasn't changed. *) + Done (`fail (Some "curl failed owing to a server-side issue", + Printf.sprintf "curl failed with server-side error: %s" + (OpamProcess.result_summary ret))) + else + Done (`fail (Some "curl failed", + Printf.sprintf "curl failed: %s" + (OpamProcess.result_summary ret))) + else Done `ok) + | _ -> (* code <> 0 / 43 *) + Done (`fail (Some "curl failed", + Printf.sprintf "curl failed: %s" + (OpamProcess.result_summary ret))) -let download_command ~compress ?checksum ~url ~dst () = +let download_command_http_error ~compress ?checksum ~url ~dst () = let download_command = download_command_t ~compress ?checksum ~url ~dst in download_command ~with_curl_mitigation:false @@ tool_return download_command url +let download_command ~compress ?checksum ~url ~dst () = + download_command_http_error ~compress ?checksum ~url ~dst () + @@| function + | `ok -> () + | `http_error code -> + fail (Some ("HTTP error code " ^ string_of_int code), + Printf.sprintf "code %d while downloading %s" + code (OpamUrl.to_string url)) + | `fail (s,l) -> fail (s,l) + let really_download ?(quiet=false) ~overwrite ?(compress=false) ?checksum ?(validate=true) ~url ~dst () = diff --git a/tests/reftests/download.test b/tests/reftests/download.test index eaa0c73f302..c01c510070f 100644 --- a/tests/reftests/download.test +++ b/tests/reftests/download.test @@ -88,9 +88,9 @@ The following actions will be performed: <><> Processing actions <><><><><><><><><><><><><><><><><><><><><><><><><><><><> Processing 1/1: [foo.1: http] + curl "--another-args" "3" -[ERROR] Failed to get sources of foo.1: curl error code ***The curl is a lie*** [args: --another-args 3] +[ERROR] Failed to get sources of foo.1: curl error ***The curl is a lie*** [args: --another-args 3] -OpamSolution.Fetch_fail("https://github.com/UnixJunkie/get_line/archive/v1.0.0.tar.gz (curl: code ***The curl is a lie*** [args: --another-args 3] while downloading https://github.com/UnixJunkie/get_line/archive/v1.0.0.tar.gz)") +OpamSolution.Fetch_fail("https://github.com/UnixJunkie/get_line/archive/v1.0.0.tar.gz (curl: error ***The curl is a lie*** [args: --another-args 3] while downloading https://github.com/UnixJunkie/get_line/archive/v1.0.0.tar.gz)") <><> Error report <><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> @@ -108,9 +108,9 @@ The following actions will be performed: <><> Processing actions <><><><><><><><><><><><><><><><><><><><><><><><><><><><> Processing 1/1: [foo.1: http] + curl "--write-out" "%{http_code}\n" "--retry" "3" "--retry-delay" "2" "--user-agent" "opam/current" "-L" "-o" "${OPAMTMP}/v1.0.0.tar.gz.part" "--" "https://github.com/UnixJunkie/get_line/archive/v1.0.0.tar.gz" -[ERROR] Failed to get sources of foo.1: curl error code ***The curl is a lie*** [args: --write-out %{http_code}\n --retry 3 --retry-delay 2 --user-agent opam/current -L -o ${OPAMTMP}/v1.0.0.tar.gz.part -- https://github.com/UnixJunkie/get_line/archive/v1.0.0.tar.gz] +[ERROR] Failed to get sources of foo.1: curl error ***The curl is a lie*** [args: --write-out %{http_code}\n --retry 3 --retry-delay 2 --user-agent opam/current -L -o ${OPAMTMP}/v1.0.0.tar.gz.part -- https://github.com/UnixJunkie/get_line/archive/v1.0.0.tar.gz] -OpamSolution.Fetch_fail("https://github.com/UnixJunkie/get_line/archive/v1.0.0.tar.gz (curl: code ***The curl is a lie*** [args: --write-out %{http_code}\n --retry 3 --retry-delay 2 --user-agent opam/current -L -o ${OPAMTMP}/v1.0.0.tar.gz.part -- https://github.com/UnixJunkie/get_line/archive/v1.0.0.tar.gz] while downloading https://github.com/UnixJunkie/get_line/archive/v1.0.0.tar.gz)") +OpamSolution.Fetch_fail("https://github.com/UnixJunkie/get_line/archive/v1.0.0.tar.gz (curl: error ***The curl is a lie*** [args: --write-out %{http_code}\n --retry 3 --retry-delay 2 --user-agent opam/current -L -o ${OPAMTMP}/v1.0.0.tar.gz.part -- https://github.com/UnixJunkie/get_line/archive/v1.0.0.tar.gz] while downloading https://github.com/UnixJunkie/get_line/archive/v1.0.0.tar.gz)") <><> Error report <><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> @@ -128,9 +128,9 @@ The following actions will be performed: <><> Processing actions <><><><><><><><><><><><><><><><><><><><><><><><><><><><> Processing 1/1: [foo.1: http] + curl "--another-args" "3" -[ERROR] Failed to get sources of foo.1: curl error code ***The curl is a lie*** [args: --another-args 3] +[ERROR] Failed to get sources of foo.1: curl error ***The curl is a lie*** [args: --another-args 3] -OpamSolution.Fetch_fail("https://github.com/UnixJunkie/get_line/archive/v1.0.0.tar.gz (curl: code ***The curl is a lie*** [args: --another-args 3] while downloading https://github.com/UnixJunkie/get_line/archive/v1.0.0.tar.gz)") +OpamSolution.Fetch_fail("https://github.com/UnixJunkie/get_line/archive/v1.0.0.tar.gz (curl: error ***The curl is a lie*** [args: --another-args 3] while downloading https://github.com/UnixJunkie/get_line/archive/v1.0.0.tar.gz)") <><> Error report <><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> From 7fd83774b67bcd388aaf64295577b454d42e1f45 Mon Sep 17 00:00:00 2001 From: Raja Boujbel Date: Thu, 28 Nov 2024 15:27:43 +0100 Subject: [PATCH 08/11] core: factorise temporary file and directory names creation --- src/core/opamSystem.ml | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/core/opamSystem.ml b/src/core/opamSystem.ml index 7cbb19ad0f4..97f4d788179 100644 --- a/src/core/opamSystem.ml +++ b/src/core/opamSystem.ml @@ -81,10 +81,18 @@ let real_path p = let temp_basename prefix = Printf.sprintf "%s-%d-%06x" prefix (OpamStubs.getpid ()) (Random.int 0xFFFFFF) -let rec mk_temp_dir ?(prefix="opam") () = - let s = Filename.get_temp_dir_name () / temp_basename prefix in +let temp_name ?dir ?(prefix="opam") () = + let tmpdir = + match dir with + | Some d -> d + | None -> Filename.get_temp_dir_name () + in + tmpdir / (temp_basename prefix) + +let rec mk_temp_dir ?prefix () = + let s = temp_name ?prefix () in if Sys.file_exists s then - mk_temp_dir ~prefix () + mk_temp_dir ?prefix () else real_path s @@ -203,7 +211,7 @@ let rec temp_file ?(auto_clean=true) ?dir prefix = | None -> OpamCoreConfig.(!r.log_dir) | Some d -> d in mkdir temp_dir; - let file = temp_dir / temp_basename prefix in + let file = temp_name ~dir:temp_dir ~prefix () in if Hashtbl.mem temp_files file then temp_file ~auto_clean ?dir prefix else ( From 4df6e027e2d890c6d7b89fa3787d80fc3b97767d Mon Sep 17 00:00:00 2001 From: Raja Boujbel Date: Thu, 28 Nov 2024 15:30:08 +0100 Subject: [PATCH 09/11] core: add 'OpamSystem' and 'OpamFilename' function to use and clean temporary files: 'with_tmp_file' and 'with_tmp_file_job' --- master_changes.md | 1 + src/core/opamFilename.ml | 6 ++++++ src/core/opamFilename.mli | 7 +++++++ src/core/opamSystem.ml | 20 ++++++++++++++++++++ src/core/opamSystem.mli | 10 +++++++++- 5 files changed, 43 insertions(+), 1 deletion(-) diff --git a/master_changes.md b/master_changes.md index c978a714206..42a5ef5b9ce 100644 --- a/master_changes.md +++ b/master_changes.md @@ -195,3 +195,4 @@ users) * `OpamSystem`: add `is_archive_from_string` that does the same than `is_archive` but without looking at the file, only analysing the string (extension) [#6219 @rjbou] * `OpamSystem.remove_dir`: do not fail with an exception when directory is a symbolic link [#6276 @btjorge @rjbou - fix #6275] * `OpamParallel.*.{map,reduce,iter}`: Run `Gc.compact` when the main process is waiting for the children processes for the first time [#5396 @kkeundotnet] + * `OpamSystem`, `OpamFilename`: add `with_tmp_file` and `with_tmp_file_job` function, that create a file name in temporary directory and removes it at the end of the call [#6036 @rjbou] diff --git a/src/core/opamFilename.ml b/src/core/opamFilename.ml index e94fbbaa497..1806b9d3323 100644 --- a/src/core/opamFilename.ml +++ b/src/core/opamFilename.ml @@ -266,6 +266,12 @@ let exists filename = let opt_file filename = if exists filename then Some filename else None +let with_tmp_file fn = + OpamSystem.with_tmp_file (fun file -> fn (of_string file)) + +let with_tmp_file_job fjob = + OpamSystem.with_tmp_file_job (fun file -> fjob (of_string file)) + let with_contents fn filename = fn (read filename) diff --git a/src/core/opamFilename.mli b/src/core/opamFilename.mli index ba3763bb454..c9b2589ff4f 100644 --- a/src/core/opamFilename.mli +++ b/src/core/opamFilename.mli @@ -170,6 +170,13 @@ val exists: t -> bool a symlink to one *) val opt_file: t -> t option +(** Execute a function with a file in a temp directory. + It is always cleaned up afterwards. *) +val with_tmp_file: (t -> 'a) -> 'a + +(** Provide an automatically cleaned up file in temp directory to a job *) +val with_tmp_file_job: (t -> 'a OpamProcess.job) -> 'a OpamProcess.job + (** Check whether a file has a given suffix *) val check_suffix: t -> string -> bool diff --git a/src/core/opamSystem.ml b/src/core/opamSystem.ml index 97f4d788179..3ec0735a788 100644 --- a/src/core/opamSystem.ml +++ b/src/core/opamSystem.ml @@ -412,6 +412,26 @@ let with_tmp_dir_job fjob = mkdir dir; OpamProcess.Job.finally (fun () -> remove_dir dir) (fun () -> fjob dir) +let rec with_tmp_file fn = + let file = temp_name () in + if Sys.file_exists file then + with_tmp_file fn + else + try + let e = fn file in + remove_file file; + e + with e -> + OpamStd.Exn.finalise e @@ fun () -> + remove_file file + +let rec with_tmp_file_job fjob = + let file = temp_name () in + if Sys.file_exists file then + with_tmp_file_job fjob + else + OpamProcess.Job.finally (fun () -> remove_file file) (fun () -> fjob file) + let remove file = if (try Sys2.is_directory file with Sys_error _ -> false) then remove_dir file diff --git a/src/core/opamSystem.mli b/src/core/opamSystem.mli index e015d23af5f..9f8e3de2a9d 100644 --- a/src/core/opamSystem.mli +++ b/src/core/opamSystem.mli @@ -33,7 +33,7 @@ exception Internal_error of string val internal_error: ('a, unit, string, 'b) format4 -> 'a (** [with_tmp_dir fn] executes [fn] creates a temporary directory and - passes its name to [fn]. The directory is alwasy removed on completion. *) + passes its name to [fn]. The directory is always removed on completion. *) val with_tmp_dir: (string -> 'a) -> 'a (** [in_tmp_dir fn] executes [fn] in a temporary directory. *) @@ -42,6 +42,14 @@ val in_tmp_dir: (unit -> 'a) -> 'a (** Runs a job with a temp dir that is cleaned up afterwards *) val with_tmp_dir_job: (string -> 'a OpamProcess.job) -> 'a OpamProcess.job +(** [with_tmp_file fn] creates a file name in temporary directory and + passes it to [fn]. The file is always removed on completion. *) +val with_tmp_file: (string -> 'a) -> 'a + +(** Runs a job with a file in temporary directory that is cleaned up afterwards + *) +val with_tmp_file_job: (string -> 'a OpamProcess.job) -> 'a OpamProcess.job + (** Returns true if the default verbose level for base commands (cp, mv, etc.) is reached *) val verbose_for_base_commands: unit -> bool From 7ef8a8032deefb17970c9f4a92832dff97e58e76 Mon Sep 17 00:00:00 2001 From: Raja Boujbel Date: Tue, 12 Nov 2024 19:43:07 +0100 Subject: [PATCH 10/11] download: rework SWH retrieval fallback * Update to new API * More fine-grained workflow: check is archive is already cooked: if not request cooking, otherwise retrieve url of the archive * Use only post request when needed --- master_changes.md | 1 + src/repository/opamDownload.ml | 185 ++++++++++++++++++++++++--------- 2 files changed, 136 insertions(+), 50 deletions(-) diff --git a/master_changes.md b/master_changes.md index 42a5ef5b9ce..9377f0082e4 100644 --- a/master_changes.md +++ b/master_changes.md @@ -67,6 +67,7 @@ users) * Remove wget support for Software Heritage fallback [#6036 @rjbou - fix #5721] * [BUG] Fix SWH liveness check [#6036 @rjbou - fix #5721] * Update SWH API request [#6036 @rjbou] + * Rework SWH fallback to have a more correct archive retrieval and more fine grained error handling [#6036 @rjbou - fix #5721] ## Lock diff --git a/src/repository/opamDownload.ml b/src/repository/opamDownload.ml index b19e75a28f0..5fba7e7c0d9 100644 --- a/src/repository/opamDownload.ml +++ b/src/repository/opamDownload.ml @@ -129,7 +129,6 @@ let download_command_t ~with_curl_mitigation ~compress ?checksum ~url ~dst c = OpamSystem.make_command ~allow_stdin:false ~stdout cmd args @@> c let tool_return redownload_command url ret = - let open OpamProcess in match Lazy.force OpamRepositoryConfig.(!r.download_tool) with | _, `Default -> if OpamProcess.is_failure ret then @@ -296,16 +295,14 @@ module SWHID = struct let vault_url kind hash = full_url ("vault/" ^ kind) ("swh:1:dir:" ^ hash) - let check_liveness () = - OpamProcess.Job.catch (fun _ -> Done false) - @@ fun () -> - get_output ~post:false OpamUrl.Op.(instance / "api" / "1" / "ping" / "") - @@| function - | Some (pong::_) -> - (* curl output after answer the http code *) - (* https://archive.softwareheritage.org/api/1/ping/ *) - OpamStd.String.starts_with ~prefix:"\"pong\"" pong - | Some _ | None -> false + let fallback_err fmt = Printf.sprintf ("SWH fallback: "^^fmt) + + let get_output ?(post=false) url = + get_output ~post url @@| function + | Some out -> out + | None -> + (* Shouldn't happen, we already checked that a post tool is used *) + assert false let get_value key s = match OpamJson.of_string s with @@ -315,6 +312,87 @@ module SWHID = struct | _ -> None) | _ -> None + let check_liveness () = + OpamProcess.Job.catch (fun _ -> Done false) + @@ fun () -> + get_output ~post:false OpamUrl.Op.(instance / "api" / "1" / "ping" / "") + @@| function + | pong::_ -> + (* curl output after answering the http code *) + (* https://archive.softwareheritage.org/api/1/ping/ *) + OpamStd.String.starts_with ~prefix:"\"pong\"" pong + | _ -> false + + (* + Returned error JSONs + { + "error":"Resource not found", + "reason":"The resource /api/1/vault/flat/swh:1:dir:6b700f4b287aee509adbc723d030309188684f4/ could not be found on the server." + } + { + "exception":"NotFoundExc", + "reason":"Cooking of swh:1:dir:6b700f4b287aee509adbc723d030309188684f04 was never requested." + } + { + "exception":"NotFoundExc", + "reason":"swh:1:dir:0000000000000000000000000000000000000000 not found." + } + *) + let parse_err json = + match get_value "exception" json with + | Some "NotFoundExc" -> + (match get_value "reason" json with + | Some reason -> + if OpamStd.String.ends_with ~suffix:"was never requested." reason then + `Uncooked + else if OpamStd.String.ends_with ~suffix:"not found." reason then + `Not_found + else `Error + | None -> `Error) + | Some "Resource not found" -> `Not_found + | Some _ | None -> `Error + + let is_it_cooked url = + OpamSystem.with_tmp_file @@ fun dst -> + let download_cmd ~with_curl_mitigation return = + let cmd, args = + match + download_args ~url ~out:dst + ~with_curl_mitigation + ~retry:OpamRepositoryConfig.(!r.retries) + ~compress:false () + with + | cmd::args -> cmd, args + | _ -> assert false + in + let stdout = OpamSystem.temp_file ~auto_clean:false "dl" in + OpamProcess.Job.finally (fun () -> OpamSystem.remove_file stdout) + @@ fun () -> + OpamSystem.make_command ~allow_stdin:false ~stdout cmd args + @@> return + in + (download_cmd ~with_curl_mitigation:false + @@ tool_return download_cmd url) + @@| fun status -> + let read_last_line file = + let out = String.trim (OpamSystem.read file) in + match String.rindex_opt out '\n' with + | Some b -> + String.sub out (b + 1) (String.length out - b - 1) + | None -> out + in + let status = + match status with + | `ok -> + let json = read_last_line dst in + if String.equal json "" then `Error else `Cooked json + | `http_error 404 -> + let json = read_last_line dst in + parse_err json + | `http_error _ | `fail _ -> `Error + in + status + (* SWH request output example directory: retrieve "status" & "fetch_url" $ curl https://archive.softwareheritage.org/api/1/vault/directory/4453cfbdab1a996658cd1a815711664ee7742380/ @@ -329,53 +407,60 @@ module SWHID = struct } *) - let get_output ?(post=false) url = - get_output ~post url @@| function - | Some out -> - Some (String.concat "" out) - | None -> - OpamConsole.error "Software Heritage fallback needs %s installed" - (OpamConsole.colorise `underline "curl"); - None - - let get_dir hash = - (* https://archive.softwareheritage.org/api/1/vault/flat/doc/ *) - let url = vault_url "flat" hash in - (* The POST is needed only for asking to cook the archive, it's a no-op on - status check *) - get_output ~post:true url @@| OpamStd.Option.replace @@ fun json -> + let read_flat_out json = let status = get_value "status" json in let fetch_url = get_value "fetch_url" json in match status, fetch_url with - | None, _ | _, None -> None + | None, _ | _, None -> + (match parse_err json with + | `Not_found -> `Not_found + | `Error | `Uncooked -> `Malformed) | Some status, Some fetch_url -> - Some (match status with - | "done" -> `Done (OpamUrl.of_string fetch_url) - | "pending" -> `Pending - | "new" -> `New - | "failed" -> `Failed - | _ -> `Unknown) - - let fallback_err fmt = Printf.sprintf ("SWH fallback: "^^fmt) + match status with + | "done" -> `Done (OpamUrl.of_string fetch_url) + | "pending" -> `Pending + | "new" -> `New + | "failed" -> `Failed + | _ -> `Unknown let get_url ?(max_tries=6) swhid = - let attempts = max_tries in + let request_cooking ?(post=false) url = + get_output ~post url @@| fun out -> String.concat "" out + in let hash = OpamSWHID.hash swhid in - let rec aux max_tries = - if max_tries <= 0 then - Done (Not_available - (Some (fallback_err "max_tries"), - fallback_err "%d attempts tried; aborting" attempts)) - else - get_dir hash @@+ function - | Some (`Done fetch_url) -> Done (Result fetch_url) - | Some (`Pending | `New) -> - Unix.sleep 10; - aux (max_tries - 1) - | None | Some (`Failed | `Unknown) -> - Done (Not_available (None, fallback_err "Unknown swhid")) + (* https://archive.softwareheritage.org/api/1/vault/flat/doc/ *) + let url = vault_url "flat" hash in + let rec loop attempt json = + match read_flat_out json with + | `Done fetch_url -> Done (Result fetch_url) + | `Pending | `New -> + log "%s is cooking (%d/%d)..." + (OpamSWHID.to_string swhid) attempt max_tries; + if (attempt : int) >= (max_tries : int) then + Done (Not_available + (Some (fallback_err "attempt"), + fallback_err "%d attempts tried; aborting" max_tries)) + else + (Unix.sleep 10; + request_cooking ~post:false url + @@+ loop (attempt + 1)) + | `Malformed -> + Done (Not_available (None, fallback_err "Malformed request answer")) + | `Failed | `Unknown | `Not_found -> + Done (Not_available (None, fallback_err "Unknown swhid")) in - aux max_tries + let retrieve_url json = loop 1 json in + is_it_cooked url + @@+ function + | `Error -> Done (Not_available (None, fallback_err "Request error")) + | `Not_found -> Done (Not_available (None, fallback_err "Unknown swhid")) + | `Cooked json -> + log "%s is cooked or cooking, requesting url" (OpamSWHID.to_string swhid); + retrieve_url json + | `Uncooked -> + log "%s is uncooked, request cooking" (OpamSWHID.to_string swhid); + request_cooking ~post:true url + @@+ retrieve_url (* for the moment only used in sources, not extra sources or files *) let archive_fallback ?max_tries urlf dirnames = From 530fd27cea79b7adce88e7a0fedda0cc01de8516 Mon Sep 17 00:00:00 2001 From: Raja Boujbel Date: Tue, 12 Nov 2024 19:44:03 +0100 Subject: [PATCH 11/11] download: add some debug logging for SWH fallback --- src/repository/opamDownload.ml | 110 +++++++++++++++++---------------- 1 file changed, 58 insertions(+), 52 deletions(-) diff --git a/src/repository/opamDownload.ml b/src/repository/opamDownload.ml index 5fba7e7c0d9..4d7bf8a0c97 100644 --- a/src/repository/opamDownload.ml +++ b/src/repository/opamDownload.ml @@ -471,58 +471,64 @@ module SWHID = struct | _, `Curl -> check_liveness () @@+ fun alive -> if alive then - (* Add a global modifier and/or command for default answering *) - if OpamConsole.confirm ~default:false - "Source %s is not available. Do you want to try to retrieve it \ - from Software Heritage cache (https://www.softwareheritage.org)? \ - It may take few minutes." - (OpamConsole.colorise `underline - (OpamUrl.to_string (OpamFile.URL.url urlf))) then - (log "SWH fallback for %s" - (OpamUrl.to_string (OpamFile.URL.url urlf)); - get_url ?max_tries swhid @@+ function - | Not_available _ as error -> Done error - | Up_to_date _ -> assert false - | Result url -> - let hash = OpamSWHID.hash swhid in - OpamFilename.with_tmp_dir_job @@ fun dir -> - let archive = OpamFilename.Op.(dir // hash) in - download_as ~overwrite:true url archive @@+ fun () -> - let sources = OpamFilename.Op.(dir / "src") in - OpamFilename.extract_job archive sources @@| function - | Some e -> - Not_available ( - Some (fallback_err "archive extraction failure"), - fallback_err "archive extraction failure %s" - (match e with - | Failure s -> s - | OpamSystem.Process_error pe -> - OpamProcess.string_of_result pe - | e -> Printexc.to_string e)) - | None -> - (match OpamSWHID.compute sources with - | None -> - Not_available ( - Some (fallback_err "can't check archive validity"), - fallback_err - "error on swhid computation, can't check its validity") - | Some computed -> - if String.equal computed hash then - (List.iter (fun (_nv, dst, _sp) -> - (* add a text *) - OpamFilename.copy_dir ~src:sources ~dst) - dirnames; - Result (Some "SWH fallback")) - else - Not_available ( - Some (fallback_err "archive not valid"), - fallback_err - "archive corrupted, opam file swhid %S vs computed %S" - hash computed))) - else - Done (Not_available - (Some (fallback_err "skip retrieval"), - fallback_err "retrieval refused by user")) + (log "API is working"; + (* Add a global modifier and/or command for default answering *) + if OpamConsole.confirm ~default:false + "Source %s is not available. Do you want to try to retrieve it \ + from Software Heritage cache (https://www.softwareheritage.org)? \ + It may take few minutes." + (OpamConsole.colorise `underline + (OpamUrl.to_string (OpamFile.URL.url urlf))) then + (log "SWH fallback for %s with %s" + (OpamStd.Format.pretty_list + (List.map (fun (nv,_,_) -> nv) dirnames)) + (OpamSWHID.to_string swhid); + get_url ?max_tries swhid @@+ function + | Not_available _ as error -> Done error + | Up_to_date _ -> assert false + | Result url -> + log "Downloading %s for %s" (OpamSWHID.to_string swhid) + (OpamStd.Format.pretty_list + (List.map (fun (nv,_,_) -> nv) dirnames)); + let hash = OpamSWHID.hash swhid in + OpamFilename.with_tmp_dir_job @@ fun dir -> + let archive = OpamFilename.Op.(dir // hash) in + download_as ~overwrite:true url archive @@+ fun () -> + let sources = OpamFilename.Op.(dir / "src") in + OpamFilename.extract_job archive sources @@| function + | Some e -> + Not_available ( + Some (fallback_err "archive extraction failure"), + fallback_err "archive extraction failure %s" + (match e with + | Failure s -> s + | OpamSystem.Process_error pe -> + OpamProcess.string_of_result pe + | e -> Printexc.to_string e)) + | None -> + (match OpamSWHID.compute sources with + | None -> + Not_available ( + Some (fallback_err "can't check archive validity"), + fallback_err + "error on swhid computation, can't check its validity") + | Some computed -> + if String.equal computed hash then + (List.iter (fun (_nv, dst, _sp) -> + (* add a text *) + OpamFilename.copy_dir ~src:sources ~dst) + dirnames; + Result (Some "SWH fallback")) + else + Not_available ( + Some (fallback_err "archive not valid"), + fallback_err + "archive corrupted, opam file swhid %S vs computed %S" + hash computed))) + else + Done (Not_available + (Some (fallback_err "skip retrieval"), + fallback_err "retrieval refused by user"))) else Done (Not_available (Some (fallback_err "unreachable"),