aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/x86/include/asm/word-at-a-time.h
blob: 8338b0432b50e3c2d6bebfbb1ea9bbec1027ff69 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_WORD_AT_A_TIME_H
#define _ASM_WORD_AT_A_TIME_H

#include <linux/kernel.h>

/*
 * This is largely generic for little-endian machines, but the
 * optimal byte mask counting is probably going to be something
 * that is architecture-specific. If you have a reliably fast
 * bit count instruction, that might be better than the multiply
 * and shift, for example.
 */
struct word_at_a_time {
	const unsigned long one_bits, high_bits;
};

#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }

#ifdef CONFIG_64BIT

/*
 * Jan Achrenius on G+: microoptimized version of
 * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
 * that works for the bytemasks without having to
 * mask them first.
 */
static inline long count_masked_bytes(unsigned long mask)
{
	return mask*0x0001020304050608ul >> 56;
}

#else	/* 32-bit case */

/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
static inline long count_masked_bytes(long mask)
{
	/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
	long a = (0x0ff0001+mask) >> 23;
	/* Fix the 1 for 00 case */
	return a & mask;
}

#endif

/* Return nonzero if it has a zero */
static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
{
	unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits;
	*bits = mask;
	return mask;
}

static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c)
{
	return bits;
}

static inline unsigned long create_zero_mask(unsigned long bits)
{
	bits = (bits - 1) & ~bits;
	return bits >> 7;
}

/* The mask we created is directly usable as a bytemask */
#define zero_bytemask(mask) (mask)

static inline unsigned long find_zero(unsigned long mask)
{
	return count_masked_bytes(mask);
}

/*
 * Load an unaligned word from kernel space.
 *
 * In the (very unlikely) case of the word being a page-crosser
 * and the next page not being mapped, take the exception and
 * return zeroes in the non-existing part.
 */
#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT

static inline unsigned long load_unaligned_zeropad(const void *addr)
{
	unsigned long offset, data;
	unsigned long ret;

	asm_volatile_goto(
		"1:	mov %[mem], %[ret]\n"

		_ASM_EXTABLE(1b, %l[do_exception])

		: [ret] "=r" (ret)
		: [mem] "m" (*(unsigned long *)addr)
		: : do_exception);

	return ret;

do_exception:
	offset = (unsigned long)addr & (sizeof(long) - 1);
	addr = (void *)((unsigned long)addr & ~(sizeof(long) - 1));
	data = *(unsigned long *)addr;
	ret = data >> offset * 8;

	return ret;
}

#else /* !CONFIG_CC_HAS_ASM_GOTO_OUTPUT */

static inline unsigned long load_unaligned_zeropad(const void *addr)
{
	unsigned long offset, data;
	unsigned long ret, err = 0;

	asm(	"1:	mov %[mem], %[ret]\n"
		"2:\n"

		_ASM_EXTABLE_FAULT(1b, 2b)

		: [ret] "=&r" (ret), "+a" (err)
		: [mem] "m" (*(unsigned long *)addr));

	if (unlikely(err)) {
		offset = (unsigned long)addr & (sizeof(long) - 1);
		addr = (void *)((unsigned long)addr & ~(sizeof(long) - 1));
		data = *(unsigned long *)addr;
		ret = data >> offset * 8;
	}

	return ret;
}

#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */

#endif /* _ASM_WORD_AT_A_TIME_H */