From 3038f73e1cc07abf6f145cdc664d2e4da52a2cfd Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 13 May 2024 15:41:23 -0700 Subject: setup: add an escape hatch for "no more default hash algorithm" change Partially revert c8aed5e8 (repository: stop setting SHA1 as the default object hash, 2024-05-07), to keep end-user systems still broken when we have gap in our test coverage but yet give them an escape hatch to set the GIT_DEFAULT_HASH environment variable to "sha1" in order to revert to the previous behaviour. This variable has been in use for using SHA-256 hash by default, and it should be a better fit than inventing a new and test-only knob. Signed-off-by: Junio C Hamano --- environment.h | 1 + repository.c | 40 ++++++++++++++++++++++++++++++++++++++++ setup.c | 2 -- 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/environment.h b/environment.h index 05fd94d7be8..deaa29408f5 100644 --- a/environment.h +++ b/environment.h @@ -56,6 +56,7 @@ const char *getenv_safe(struct strvec *argv, const char *name); #define GIT_OPTIONAL_LOCKS_ENVIRONMENT "GIT_OPTIONAL_LOCKS" #define GIT_TEXT_DOMAIN_DIR_ENVIRONMENT "GIT_TEXTDOMAINDIR" #define GIT_ATTR_SOURCE_ENVIRONMENT "GIT_ATTR_SOURCE" +#define GIT_DEFAULT_HASH_ENVIRONMENT "GIT_DEFAULT_HASH" /* * Environment variable used in handshaking the wire protocol. diff --git a/repository.c b/repository.c index 15c10015b05..f912ee9a7c7 100644 --- a/repository.c +++ b/repository.c @@ -1,5 +1,6 @@ #include "git-compat-util.h" #include "abspath.h" +#include "environment.h" #include "repository.h" #include "object-store-ll.h" #include "config.h" @@ -19,6 +20,27 @@ static struct repository the_repo; struct repository *the_repository = &the_repo; +static void set_default_hash_algo(struct repository *repo) +{ + const char *hash_name; + int algo; + + hash_name = getenv(GIT_DEFAULT_HASH_ENVIRONMENT); + if (!hash_name) + return; + algo = hash_algo_by_name(hash_name); + + /* + * NEEDSWORK: after all, falling back to SHA-1 by assigning + * GIT_HASH_SHA1 to algo here, instead of returning, may give + * us better behaviour. + */ + if (algo == GIT_HASH_UNKNOWN) + return; + + repo_set_hash_algo(repo, algo); +} + void initialize_repository(struct repository *repo) { repo->objects = raw_object_store_new(); @@ -26,6 +48,24 @@ void initialize_repository(struct repository *repo) repo->parsed_objects = parsed_object_pool_new(); ALLOC_ARRAY(repo->index, 1); index_state_init(repo->index, repo); + + /* + * When a command runs inside a repository, it learns what + * hash algorithm is in use from the repository, but some + * commands are designed to work outside a repository, yet + * they want to access the_hash_algo, if only for the length + * of the hashed value to see if their input looks like a + * plausible hash value. + * + * We are in the process of identifying the codepaths and + * giving them an appropriate default individually; any + * unconverted codepath that tries to access the_hash_algo + * will thus fail. The end-users however have an escape hatch + * to set GIT_DEFAULT_HASH environment variable to "sha1" get + * back the old behaviour of defaulting to SHA-1. + */ + if (repo == the_repository) + set_default_hash_algo(repo); } static void expand_base_dir(char **out, const char *in, diff --git a/setup.c b/setup.c index 7c996659bd3..d084703465c 100644 --- a/setup.c +++ b/setup.c @@ -1840,8 +1840,6 @@ int daemonize(void) #define TEST_FILEMODE 1 #endif -#define GIT_DEFAULT_HASH_ENVIRONMENT "GIT_DEFAULT_HASH" - static void copy_templates_1(struct strbuf *path, struct strbuf *template_path, DIR *dir) { -- cgit v1.2.3-59-g8ed1b From e56ae689617765561a23c8ecea0ad15f8df9a57d Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 13 May 2024 15:41:24 -0700 Subject: t1517: test commands that are designed to be run outside repository A few commands, like "git apply" and "git patch-id", have been broken with a recent change to stop setting the default hash algorithm to SHA-1. Test them and fix them in later commits. Signed-off-by: Junio C Hamano --- t/t1517-outside-repo.sh | 61 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100755 t/t1517-outside-repo.sh diff --git a/t/t1517-outside-repo.sh b/t/t1517-outside-repo.sh new file mode 100755 index 00000000000..e0fd495ec11 --- /dev/null +++ b/t/t1517-outside-repo.sh @@ -0,0 +1,61 @@ +#!/bin/sh + +test_description='check random commands outside repo' + +TEST_PASSES_SANITIZE_LEAK=true +. ./test-lib.sh + +test_expect_success 'set up a non-repo directory and test file' ' + GIT_CEILING_DIRECTORIES=$(pwd) && + export GIT_CEILING_DIRECTORIES && + mkdir non-repo && + ( + cd non-repo && + # confirm that git does not find a repo + test_must_fail git rev-parse --git-dir + ) && + test_write_lines one two three four >nums && + git add nums && + cp nums nums.old && + test_write_lines five >>nums && + git diff >sample.patch +' + +test_expect_failure 'compute a patch-id outside repository' ' + git patch-id patch-id.expect && + ( + cd non-repo && + git patch-id <../sample.patch >../patch-id.actual + ) && + test_cmp patch-id.expect patch-id.actual +' + +test_expect_failure 'hash-object outside repository' ' + git hash-object --stdin hash.expect && + ( + cd non-repo && + git hash-object --stdin <../sample.patch >../hash.actual + ) && + test_cmp hash.expect hash.actual +' + +test_expect_failure 'apply a patch outside repository' ' + ( + cd non-repo && + cp ../nums.old nums && + git apply ../sample.patch + ) && + test_cmp nums non-repo/nums +' + +test_expect_success 'grep outside repository' ' + git grep --cached two >expect && + ( + cd non-repo && + cp ../nums.old nums && + git grep --no-index two >../actual + ) && + test_cmp expect actual +' + +test_done -- cgit v1.2.3-59-g8ed1b From e1ae0e95fcdb58517de6032f02ef05e1cb086028 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 13 May 2024 15:41:25 -0700 Subject: builtin/patch-id: fix uninitialized hash function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In c8aed5e8da (repository: stop setting SHA1 as the default object hash, 2024-05-07), we have adapted `initialize_repository()` to no longer set up a default hash function. As this function is also used to set up `the_repository`, the consequence is that `the_hash_algo` will now by default be a `NULL` pointer unless the hash algorithm was configured properly. This is done as a mechanism to detect cases where we may be using the wrong hash function by accident. This change now causes git-patch-id(1) to segfault when it's run outside of a repository. As this command can read diffs from stdin, it does not necessarily need a repository, but then relies on `the_hash_algo` to compute the patch ID itself. It is somewhat dubious that git-patch-id(1) relies on `the_hash_algo` in the first place. Quoting its manpage: A "patch ID" is nothing but a sum of SHA-1 of the file diffs associated with a patch, with line numbers ignored. As such, it’s "reasonably stable", but at the same time also reasonably unique, i.e., two patches that have the same "patch ID" are almost guaranteed to be the same thing. We explicitly document patch IDs to be using SHA-1. Furthermore, patch IDs are supposed to be stable for most of the part. But even with the same input, the patch IDs will now be different depending on the repo's configured object hash. Work around the issue by setting up SHA-1 when there was no startup repository for now. This is arguably not the correct fix, but for now we rather want to focus on getting the segfault fixed. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/patch-id.c | 13 +++++++++++++ t/t1517-outside-repo.sh | 2 +- t/t4204-patch-id.sh | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 1 deletion(-) diff --git a/builtin/patch-id.c b/builtin/patch-id.c index 3894d2b9706..583099cacff 100644 --- a/builtin/patch-id.c +++ b/builtin/patch-id.c @@ -5,6 +5,7 @@ #include "hash.h" #include "hex.h" #include "parse-options.h" +#include "setup.h" static void flush_current_id(int patchlen, struct object_id *id, struct object_id *result) { @@ -237,6 +238,18 @@ int cmd_patch_id(int argc, const char **argv, const char *prefix) argc = parse_options(argc, argv, prefix, builtin_patch_id_options, patch_id_usage, 0); + /* + * We rely on `the_hash_algo` to compute patch IDs. This is dubious as + * it means that the hash algorithm now depends on the object hash of + * the repository, even though git-patch-id(1) clearly defines that + * patch IDs always use SHA1. + * + * NEEDSWORK: This hack should be removed in favor of converting + * the code that computes patch IDs to always use SHA1. + */ + if (!the_hash_algo) + repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + generate_id_list(opts ? opts > 1 : config.stable, opts ? opts == 3 : config.verbatim); return 0; diff --git a/t/t1517-outside-repo.sh b/t/t1517-outside-repo.sh index e0fd495ec11..ac5f3191cc9 100755 --- a/t/t1517-outside-repo.sh +++ b/t/t1517-outside-repo.sh @@ -21,7 +21,7 @@ test_expect_success 'set up a non-repo directory and test file' ' git diff >sample.patch ' -test_expect_failure 'compute a patch-id outside repository' ' +test_expect_success 'compute a patch-id outside repository' ' git patch-id patch-id.expect && ( cd non-repo && diff --git a/t/t4204-patch-id.sh b/t/t4204-patch-id.sh index a7fa94ce0a2..605faea0c7b 100755 --- a/t/t4204-patch-id.sh +++ b/t/t4204-patch-id.sh @@ -310,4 +310,38 @@ test_expect_success 'patch-id handles diffs with one line of before/after' ' test_config patchid.stable true && calc_patch_id diffu1stable diff <<-\EOF && + diff --git a/bar b/bar + index bdaf90f..31051f6 100644 + --- a/bar + +++ b/bar + @@ -2 +2,2 @@ + b + +c + EOF + + git init --object-format=sha1 repo-sha1 && + git -C repo-sha1 patch-id patch-id-sha1 && + git init --object-format=sha256 repo-sha256 && + git -C repo-sha256 patch-id patch-id-sha256 && + test_cmp patch-id-sha1 patch-id-sha256 +' + +test_expect_success 'patch-id without repository' ' + cat >diff <<-\EOF && + diff --git a/bar b/bar + index bdaf90f..31051f6 100644 + --- a/bar + +++ b/bar + @@ -2 +2,2 @@ + b + +c + EOF + nongit git patch-id Date: Mon, 13 May 2024 15:41:26 -0700 Subject: builtin/hash-object: fix uninitialized hash function The git-hash-object(1) command allows users to hash an object even without a repository. Starting with c8aed5e8da (repository: stop setting SHA1 as the default object hash, 2024-05-07), this will make us hit an uninitialized hash function, which subsequently leads to a segfault. Fix this by falling back to SHA-1 explicitly when running outside of a Git repository. Users can use GIT_DEFAULT_HASH environment to specify what hash algorithm they want, so arguably this code should not be needed. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/hash-object.c | 3 +++ t/t1007-hash-object.sh | 6 ++++++ t/t1517-outside-repo.sh | 2 +- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/builtin/hash-object.c b/builtin/hash-object.c index 82ca6d2bfdc..c767414a0cc 100644 --- a/builtin/hash-object.c +++ b/builtin/hash-object.c @@ -123,6 +123,9 @@ int cmd_hash_object(int argc, const char **argv, const char *prefix) else prefix = setup_git_directory_gently(&nongit); + if (nongit && !the_hash_algo) + repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + if (vpath && prefix) { vpath_free = prefix_filename(prefix, vpath); vpath = vpath_free; diff --git a/t/t1007-hash-object.sh b/t/t1007-hash-object.sh index 64aea384860..4c138c6ca45 100755 --- a/t/t1007-hash-object.sh +++ b/t/t1007-hash-object.sh @@ -260,4 +260,10 @@ test_expect_success '--literally with extra-long type' ' echo example | git hash-object -t $t --literally --stdin ' +test_expect_success '--stdin outside of repository' ' + nongit git hash-object --stdin actual && + echo "$(test_oid hello)" >expect && + test_cmp expect actual +' + test_done diff --git a/t/t1517-outside-repo.sh b/t/t1517-outside-repo.sh index ac5f3191cc9..854bb8f343e 100755 --- a/t/t1517-outside-repo.sh +++ b/t/t1517-outside-repo.sh @@ -30,7 +30,7 @@ test_expect_success 'compute a patch-id outside repository' ' test_cmp patch-id.expect patch-id.actual ' -test_expect_failure 'hash-object outside repository' ' +test_expect_success 'hash-object outside repository' ' git hash-object --stdin hash.expect && ( cd non-repo && -- cgit v1.2.3-59-g8ed1b From 5b0ec43757faef025a3238105a54901180891a7f Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 13 May 2024 15:41:27 -0700 Subject: apply: fix uninitialized hash function "git apply" can work outside a repository as a better "GNU patch", but when it does so, it still assumed that it can access the_hash_algo, which is no longer true in the new world order. Make sure we explicitly fall back to SHA-1 algorithm for backward compatibility. Signed-off-by: Junio C Hamano --- builtin/apply.c | 4 ++++ t/t1517-outside-repo.sh | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/builtin/apply.c b/builtin/apply.c index 861a01910ca..e9175f820f0 100644 --- a/builtin/apply.c +++ b/builtin/apply.c @@ -1,6 +1,7 @@ #include "builtin.h" #include "gettext.h" #include "repository.h" +#include "hash.h" #include "apply.h" static const char * const apply_usage[] = { @@ -18,6 +19,9 @@ int cmd_apply(int argc, const char **argv, const char *prefix) if (init_apply_state(&state, the_repository, prefix)) exit(128); + if (!the_hash_algo) + repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + argc = apply_parse_options(argc, argv, &state, &force_apply, &options, apply_usage); diff --git a/t/t1517-outside-repo.sh b/t/t1517-outside-repo.sh index 854bb8f343e..6f32a40c6dd 100755 --- a/t/t1517-outside-repo.sh +++ b/t/t1517-outside-repo.sh @@ -39,7 +39,7 @@ test_expect_success 'hash-object outside repository' ' test_cmp hash.expect hash.actual ' -test_expect_failure 'apply a patch outside repository' ' +test_expect_success 'apply a patch outside repository' ' ( cd non-repo && cp ../nums.old nums && -- cgit v1.2.3-59-g8ed1b