aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2024-05-13 18:46:16 -0700
committerJunio C Hamano <gitster@pobox.com>2024-05-13 18:46:17 -0700
commitd1a1860207df172b1c022ea45166e5d054073ce9 (patch)
treeca030a6cbf6144ddd63e13bb4d66fdab0adfeb17
parentMerge branch 'jc/doc-manpages-l10n' into HEAD (diff)
parentapply: fix uninitialized hash function (diff)
downloadgit-seen.tar.xz
git-seen.zip
Merge branch 'jc/undecided-is-not-necessarily-sha1-fix' into HEADseen
The base topic started to make it an error for a command to leave the hash algorithm unspecified, which revealed a few commands that were not ready for the change. Give users a knob to revert back to the "default is sha-1" behaviour as an escape hatch, and start fixing these breakages. Comments? * jc/undecided-is-not-necessarily-sha1-fix: apply: fix uninitialized hash function builtin/hash-object: fix uninitialized hash function builtin/patch-id: fix uninitialized hash function t1517: test commands that are designed to be run outside repository setup: add an escape hatch for "no more default hash algorithm" change
-rw-r--r--builtin/apply.c4
-rw-r--r--builtin/hash-object.c3
-rw-r--r--builtin/patch-id.c13
-rw-r--r--environment.h1
-rw-r--r--repository.c40
-rw-r--r--setup.c2
-rwxr-xr-xt/t1007-hash-object.sh6
-rwxr-xr-xt/t1517-outside-repo.sh61
-rwxr-xr-xt/t4204-patch-id.sh34
9 files changed, 162 insertions, 2 deletions
diff --git a/builtin/apply.c b/builtin/apply.c
index 861a01910ca..e9175f820f0 100644
--- a/builtin/apply.c
+++ b/builtin/apply.c
@@ -1,6 +1,7 @@
#include "builtin.h"
#include "gettext.h"
#include "repository.h"
+#include "hash.h"
#include "apply.h"
static const char * const apply_usage[] = {
@@ -18,6 +19,9 @@ int cmd_apply(int argc, const char **argv, const char *prefix)
if (init_apply_state(&state, the_repository, prefix))
exit(128);
+ if (!the_hash_algo)
+ repo_set_hash_algo(the_repository, GIT_HASH_SHA1);
+
argc = apply_parse_options(argc, argv,
&state, &force_apply, &options,
apply_usage);
diff --git a/builtin/hash-object.c b/builtin/hash-object.c
index 82ca6d2bfdc..c767414a0cc 100644
--- a/builtin/hash-object.c
+++ b/builtin/hash-object.c
@@ -123,6 +123,9 @@ int cmd_hash_object(int argc, const char **argv, const char *prefix)
else
prefix = setup_git_directory_gently(&nongit);
+ if (nongit && !the_hash_algo)
+ repo_set_hash_algo(the_repository, GIT_HASH_SHA1);
+
if (vpath && prefix) {
vpath_free = prefix_filename(prefix, vpath);
vpath = vpath_free;
diff --git a/builtin/patch-id.c b/builtin/patch-id.c
index 3894d2b9706..583099cacff 100644
--- a/builtin/patch-id.c
+++ b/builtin/patch-id.c
@@ -5,6 +5,7 @@
#include "hash.h"
#include "hex.h"
#include "parse-options.h"
+#include "setup.h"
static void flush_current_id(int patchlen, struct object_id *id, struct object_id *result)
{
@@ -237,6 +238,18 @@ int cmd_patch_id(int argc, const char **argv, const char *prefix)
argc = parse_options(argc, argv, prefix, builtin_patch_id_options,
patch_id_usage, 0);
+ /*
+ * We rely on `the_hash_algo` to compute patch IDs. This is dubious as
+ * it means that the hash algorithm now depends on the object hash of
+ * the repository, even though git-patch-id(1) clearly defines that
+ * patch IDs always use SHA1.
+ *
+ * NEEDSWORK: This hack should be removed in favor of converting
+ * the code that computes patch IDs to always use SHA1.
+ */
+ if (!the_hash_algo)
+ repo_set_hash_algo(the_repository, GIT_HASH_SHA1);
+
generate_id_list(opts ? opts > 1 : config.stable,
opts ? opts == 3 : config.verbatim);
return 0;
diff --git a/environment.h b/environment.h
index 0b2d457f07e..be543ee995b 100644
--- a/environment.h
+++ b/environment.h
@@ -56,6 +56,7 @@ const char *getenv_safe(struct strvec *argv, const char *name);
#define GIT_OPTIONAL_LOCKS_ENVIRONMENT "GIT_OPTIONAL_LOCKS"
#define GIT_TEXT_DOMAIN_DIR_ENVIRONMENT "GIT_TEXTDOMAINDIR"
#define GIT_ATTR_SOURCE_ENVIRONMENT "GIT_ATTR_SOURCE"
+#define GIT_DEFAULT_HASH_ENVIRONMENT "GIT_DEFAULT_HASH"
/*
* Environment variable used to propagate the --no-advice global option to the
diff --git a/repository.c b/repository.c
index 15c10015b05..f912ee9a7c7 100644
--- a/repository.c
+++ b/repository.c
@@ -1,5 +1,6 @@
#include "git-compat-util.h"
#include "abspath.h"
+#include "environment.h"
#include "repository.h"
#include "object-store-ll.h"
#include "config.h"
@@ -19,6 +20,27 @@
static struct repository the_repo;
struct repository *the_repository = &the_repo;
+static void set_default_hash_algo(struct repository *repo)
+{
+ const char *hash_name;
+ int algo;
+
+ hash_name = getenv(GIT_DEFAULT_HASH_ENVIRONMENT);
+ if (!hash_name)
+ return;
+ algo = hash_algo_by_name(hash_name);
+
+ /*
+ * NEEDSWORK: after all, falling back to SHA-1 by assigning
+ * GIT_HASH_SHA1 to algo here, instead of returning, may give
+ * us better behaviour.
+ */
+ if (algo == GIT_HASH_UNKNOWN)
+ return;
+
+ repo_set_hash_algo(repo, algo);
+}
+
void initialize_repository(struct repository *repo)
{
repo->objects = raw_object_store_new();
@@ -26,6 +48,24 @@ void initialize_repository(struct repository *repo)
repo->parsed_objects = parsed_object_pool_new();
ALLOC_ARRAY(repo->index, 1);
index_state_init(repo->index, repo);
+
+ /*
+ * When a command runs inside a repository, it learns what
+ * hash algorithm is in use from the repository, but some
+ * commands are designed to work outside a repository, yet
+ * they want to access the_hash_algo, if only for the length
+ * of the hashed value to see if their input looks like a
+ * plausible hash value.
+ *
+ * We are in the process of identifying the codepaths and
+ * giving them an appropriate default individually; any
+ * unconverted codepath that tries to access the_hash_algo
+ * will thus fail. The end-users however have an escape hatch
+ * to set GIT_DEFAULT_HASH environment variable to "sha1" get
+ * back the old behaviour of defaulting to SHA-1.
+ */
+ if (repo == the_repository)
+ set_default_hash_algo(repo);
}
static void expand_base_dir(char **out, const char *in,
diff --git a/setup.c b/setup.c
index ac4d1e5f29d..c5759a2b755 100644
--- a/setup.c
+++ b/setup.c
@@ -1840,8 +1840,6 @@ int daemonize(void)
#define TEST_FILEMODE 1
#endif
-#define GIT_DEFAULT_HASH_ENVIRONMENT "GIT_DEFAULT_HASH"
-
static void copy_templates_1(struct strbuf *path, struct strbuf *template_path,
DIR *dir)
{
diff --git a/t/t1007-hash-object.sh b/t/t1007-hash-object.sh
index 64aea384860..4c138c6ca45 100755
--- a/t/t1007-hash-object.sh
+++ b/t/t1007-hash-object.sh
@@ -260,4 +260,10 @@ test_expect_success '--literally with extra-long type' '
echo example | git hash-object -t $t --literally --stdin
'
+test_expect_success '--stdin outside of repository' '
+ nongit git hash-object --stdin <hello >actual &&
+ echo "$(test_oid hello)" >expect &&
+ test_cmp expect actual
+'
+
test_done
diff --git a/t/t1517-outside-repo.sh b/t/t1517-outside-repo.sh
new file mode 100755
index 00000000000..6f32a40c6dd
--- /dev/null
+++ b/t/t1517-outside-repo.sh
@@ -0,0 +1,61 @@
+#!/bin/sh
+
+test_description='check random commands outside repo'
+
+TEST_PASSES_SANITIZE_LEAK=true
+. ./test-lib.sh
+
+test_expect_success 'set up a non-repo directory and test file' '
+ GIT_CEILING_DIRECTORIES=$(pwd) &&
+ export GIT_CEILING_DIRECTORIES &&
+ mkdir non-repo &&
+ (
+ cd non-repo &&
+ # confirm that git does not find a repo
+ test_must_fail git rev-parse --git-dir
+ ) &&
+ test_write_lines one two three four >nums &&
+ git add nums &&
+ cp nums nums.old &&
+ test_write_lines five >>nums &&
+ git diff >sample.patch
+'
+
+test_expect_success 'compute a patch-id outside repository' '
+ git patch-id <sample.patch >patch-id.expect &&
+ (
+ cd non-repo &&
+ git patch-id <../sample.patch >../patch-id.actual
+ ) &&
+ test_cmp patch-id.expect patch-id.actual
+'
+
+test_expect_success 'hash-object outside repository' '
+ git hash-object --stdin <sample.patch >hash.expect &&
+ (
+ cd non-repo &&
+ git hash-object --stdin <../sample.patch >../hash.actual
+ ) &&
+ test_cmp hash.expect hash.actual
+'
+
+test_expect_success 'apply a patch outside repository' '
+ (
+ cd non-repo &&
+ cp ../nums.old nums &&
+ git apply ../sample.patch
+ ) &&
+ test_cmp nums non-repo/nums
+'
+
+test_expect_success 'grep outside repository' '
+ git grep --cached two >expect &&
+ (
+ cd non-repo &&
+ cp ../nums.old nums &&
+ git grep --no-index two >../actual
+ ) &&
+ test_cmp expect actual
+'
+
+test_done
diff --git a/t/t4204-patch-id.sh b/t/t4204-patch-id.sh
index a7fa94ce0a2..605faea0c7b 100755
--- a/t/t4204-patch-id.sh
+++ b/t/t4204-patch-id.sh
@@ -310,4 +310,38 @@ test_expect_success 'patch-id handles diffs with one line of before/after' '
test_config patchid.stable true &&
calc_patch_id diffu1stable <diffu1
'
+
+test_expect_failure 'patch-id computes same ID with different object hashes' '
+ test_when_finished "rm -rf repo-sha1 repo-sha256" &&
+
+ cat >diff <<-\EOF &&
+ diff --git a/bar b/bar
+ index bdaf90f..31051f6 100644
+ --- a/bar
+ +++ b/bar
+ @@ -2 +2,2 @@
+ b
+ +c
+ EOF
+
+ git init --object-format=sha1 repo-sha1 &&
+ git -C repo-sha1 patch-id <diff >patch-id-sha1 &&
+ git init --object-format=sha256 repo-sha256 &&
+ git -C repo-sha256 patch-id <diff >patch-id-sha256 &&
+ test_cmp patch-id-sha1 patch-id-sha256
+'
+
+test_expect_success 'patch-id without repository' '
+ cat >diff <<-\EOF &&
+ diff --git a/bar b/bar
+ index bdaf90f..31051f6 100644
+ --- a/bar
+ +++ b/bar
+ @@ -2 +2,2 @@
+ b
+ +c
+ EOF
+ nongit git patch-id <diff
+'
+
test_done