|
Message-ID: <0897d235-db55-3d3c-12be-34a97debb921@gmail.com> Date: Fri, 23 Feb 2018 14:28:36 -0800 From: J Freyensee <why2jjj.linux@...il.com> To: Igor Stoppa <igor.stoppa@...wei.com>, david@...morbit.com, willy@...radead.org, keescook@...omium.org, mhocko@...nel.org Cc: labbott@...hat.com, linux-security-module@...r.kernel.org, linux-mm@...ck.org, linux-kernel@...r.kernel.org, kernel-hardening@...ts.openwall.com Subject: Re: [PATCH 1/7] genalloc: track beginning of allocations some code snipping . . . > +/** > + * get_bitmap_entry() - extracts the specified entry from the bitmap > + * @map: pointer to a bitmap > + * @entry_index: the index of the desired entry in the bitmap > + * > + * Return: The requested bitmap. > + */ > +static inline unsigned long get_bitmap_entry(unsigned long *map, > + int entry_index) > +{ Apologies if this has been mentioned before, but since this function is expecting map to not be NULL, shouldn't something like: WARN_ON(map == NULL); be used to check the parameters for bad values? BTW, excellent commenting :-). > + return (map[ENTRIES_DIV_LONGS(entry_index)] >> > + ENTRIES_TO_BITS(entry_index % ENTRIES_PER_LONG)) & > + ENTRY_MASK; > +} > + > + > +/** > + * mem_to_units() - convert references to memory into orders of allocation > + * @size: amount in bytes > + * @order: power of 2 represented by each entry in the bitmap > + * > + * Return: the number of units representing the size. > + */ > +static inline unsigned long mem_to_units(unsigned long size, > + unsigned long order) > +{ > + return (size + (1UL << order) - 1) >> order; > +} > + > +/** > + * chunk_size() - dimension of a chunk of memory, in bytes > + * @chunk: pointer to the struct describing the chunk > + * > + * Return: The size of the chunk, in bytes. > + */ > static inline size_t chunk_size(const struct gen_pool_chunk *chunk) > { Same problem here, always expecting chunk to not but NULL. > return chunk->end_addr - chunk->start_addr + 1; > } > > -static int set_bits_ll(unsigned long *addr, unsigned long mask_to_set) > + > +/** > + * set_bits_ll() - based on value and mask, sets bits at address > + * @addr: where to write > + * @mask: filter to apply for the bits to alter > + * @value: actual configuration of bits to store > + * > + * Return: > + * * 0 - success > + * * -EBUSY - otherwise > + */ > +static int set_bits_ll(unsigned long *addr, > + unsigned long mask, unsigned long value) > { > - unsigned long val, nval; > + unsigned long nval; > + unsigned long present; > + unsigned long target; > > nval = *addr; Same issue here with addr. > do { > - val = nval; > - if (val & mask_to_set) > + present = nval; > + if (present & mask) > return -EBUSY; > + target = present | value; > cpu_relax(); > - } while ((nval = cmpxchg(addr, val, val | mask_to_set)) != val); > - > + } while ((nval = cmpxchg(addr, present, target)) != target); > return 0; > } > > -static int clear_bits_ll(unsigned long *addr, unsigned long mask_to_clear) > + > +/** > + * clear_bits_ll() - based on value and mask, clears bits at address > + * @addr: where to write > + * @mask: filter to apply for the bits to alter > + * @value: actual configuration of bits to clear > + * > + * Return: > + * * 0 - success > + * * -EBUSY - otherwise > + */ > +static int clear_bits_ll(unsigned long *addr, > + unsigned long mask, unsigned long value) Dido here for addr too. > { > - unsigned long val, nval; > + unsigned long nval; > + unsigned long present; > + unsigned long target; > > nval = *addr; > + present = nval; > + if (unlikely((present & mask) ^ value)) > + return -EBUSY; > do { > - val = nval; > - if ((val & mask_to_clear) != mask_to_clear) > + present = nval; > + if (unlikely((present & mask) ^ value)) > return -EBUSY; > + target = present & ~mask; > cpu_relax(); > - } while ((nval = cmpxchg(addr, val, val & ~mask_to_clear)) != val); > - > + } while ((nval = cmpxchg(addr, present, target)) != target); > return 0; > } > > -/* > - * bitmap_set_ll - set the specified number of bits at the specified position > + > +/** > + * get_boundary() - verifies address, then measure length. > * @map: pointer to a bitmap > - * @start: a bit position in @map > - * @nr: number of bits to set > + * @start_entry: the index of the first entry in the bitmap > + * @nentries: number of entries to alter > * > - * Set @nr bits start from @start in @map lock-lessly. Several users > - * can set/clear the same bitmap simultaneously without lock. If two > - * users set the same bit, one user will return remain bits, otherwise > - * return 0. > + * Return: > + * * length of an allocation - success > + * * -EINVAL - invalid parameters > */ > -static int bitmap_set_ll(unsigned long *map, int start, int nr) > +static int get_boundary(unsigned long *map, int start_entry, int nentries) Same problem for map. > { > - unsigned long *p = map + BIT_WORD(start); > - const int size = start + nr; > - int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); > - unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start); > - > - while (nr - bits_to_set >= 0) { > - if (set_bits_ll(p, mask_to_set)) > - return nr; > - nr -= bits_to_set; > - bits_to_set = BITS_PER_LONG; > - mask_to_set = ~0UL; > - p++; > - } > - if (nr) { > - mask_to_set &= BITMAP_LAST_WORD_MASK(size); > - if (set_bits_ll(p, mask_to_set)) > - return nr; > - } > + int i; > + unsigned long bitmap_entry; > > - return 0; > + > + if (unlikely(get_bitmap_entry(map, start_entry) != ENTRY_HEAD))j Since get_bitmap_entry() is a new function, I'm not sure why the second parameter is defined as an 'int', looks like unsigned int would be appropriate. > + return -EINVAL; > + for (i = start_entry + 1; i < nentries; i++) { It is being assumed nentries is not negative, which can be possible as the function parameter is defined as 'int'. > + bitmap_entry = get_bitmap_entry(map, i); > + if (bitmap_entry == ENTRY_HEAD || > + bitmap_entry == ENTRY_UNUSED) > + return i; > + } > + return nentries - start_entry; > } > > + > +#define SET_BITS 1 > +#define CLEAR_BITS 0 > + > /* > - * bitmap_clear_ll - clear the specified number of bits at the specified position > + * alter_bitmap_ll() - set/clear the entries associated with an allocation > + * @alteration: indicates if the bits selected should be set or cleared > * @map: pointer to a bitmap > - * @start: a bit position in @map > - * @nr: number of bits to set > + * @start: the index of the first entry in the bitmap > + * @nentries: number of entries to alter > + * > + * The modification happens lock-lessly. > + * Several users can write to the same map simultaneously, without lock. > + * In case of mid-air conflict, when 2 or more writers try to alter the > + * same word in the bitmap, only one will succeed and continue, the others > + * will fail and receive as return value the amount of entries that were > + * not written. Each failed writer is responsible to revert the changes > + * it did to the bitmap. > + * The lockless conflict resolution is implemented through cmpxchg. > + * Success or failure is purely based on first come first served basis. > + * The first writer that manages to gain write access to the target word > + * of the bitmap wins. Whatever can affect the order and priority of execution > + * of the writers can and will affect the result of the race. > * > - * Clear @nr bits start from @start in @map lock-lessly. Several users > - * can set/clear the same bitmap simultaneously without lock. If two > - * users clear the same bit, one user will return remain bits, > - * otherwise return 0. > + * Return: > + * * 0 - success > + * * remaining entries - failure > */ > -static int bitmap_clear_ll(unsigned long *map, int start, int nr) > +static int alter_bitmap_ll(bool alteration, unsigned long *map, > + int start_entry, int nentries) > { map could benefit from a WARN_ON(map == NULL). > - unsigned long *p = map + BIT_WORD(start); > - const int size = start + nr; > - int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); > - unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); > - > - while (nr - bits_to_clear >= 0) { > - if (clear_bits_ll(p, mask_to_clear)) > - return nr; > - nr -= bits_to_clear; > - bits_to_clear = BITS_PER_LONG; > - mask_to_clear = ~0UL; > - p++; > - } > - if (nr) { > - mask_to_clear &= BITMAP_LAST_WORD_MASK(size); > - if (clear_bits_ll(p, mask_to_clear)) > - return nr; > + unsigned long start_bit; > + unsigned long end_bit; > + unsigned long mask; > + unsigned long value; > + int nbits; > + int bits_to_write; > + int index; > + int (*action)(unsigned long *addr, > + unsigned long mask, unsigned long value); > + > + action = (alteration == SET_BITS) ? set_bits_ll : clear_bits_ll; > + > + /* > + * Prepare for writing the initial part of the allocation, from > + * starting entry, to the end of the UL bitmap element which > + * contains it. It might be larger than the actual allocation. > + */ > + start_bit = ENTRIES_TO_BITS(start_entry); > + end_bit = ENTRIES_TO_BITS(start_entry + nentries); > + nbits = ENTRIES_TO_BITS(nentries); these statements won't make any sense if start_entry and nentries are negative values, which is possible based on the function definition alter_bitmap_ll(). Am I missing something that it's ok for these parameters to be negative? > + bits_to_write = BITS_PER_LONG - start_bit % BITS_PER_LONG; > + mask = BITMAP_FIRST_WORD_MASK(start_bit); > + /* Mark the beginning of the allocation. */ > + value = MASK | (1UL << (start_bit % BITS_PER_LONG)); > + index = BITS_DIV_LONGS(start_bit); > + > + /* > + * Writes entries to the bitmap, as long as the reminder is > + * positive or zero. > + * Might be skipped if the entries to write do not reach the end > + * of a bitmap UL unit. > + */ > + while (nbits >= bits_to_write) { > + if (action(map + index, mask, value & mask)) > + return BITS_DIV_ENTRIES(nbits); > + nbits -= bits_to_write; > + bits_to_write = BITS_PER_LONG; > + mask = ~0UL; > + value = MASK; > + index++; > } > > + /* Takes care of the ending part of the entries to mark. */ > + if (nbits > 0) { > + mask ^= BITMAP_FIRST_WORD_MASK((end_bit) % BITS_PER_LONG); > + bits_to_write = nbits; > + if (action(map + index, mask, value & mask)) > + return BITS_DIV_ENTRIES(nbits); > + } > return 0; > } > > + > /** > - * gen_pool_create - create a new special memory pool > - * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents > - * @nid: node id of the node the pool structure should be allocated on, or -1 > + * gen_pool_create() - create a new special memory pool > + * @min_alloc_order: log base 2 of number of bytes each bitmap entry > + * represents > + * @nid: node id of the node the pool structure should be allocated on, > + * or -1 > * > - * Create a new special memory pool that can be used to manage special purpose > - * memory not managed by the regular kmalloc/kfree interface. > + * Create a new special memory pool that can be used to manage special > + * purpose memory not managed by the regular kmalloc/kfree interface. > + * > + * Return: > + * * pointer to the pool - success > + * * NULL - otherwise > */ > struct gen_pool *gen_pool_create(int min_alloc_order, int nid) > { > @@ -167,7 +364,7 @@ struct gen_pool *gen_pool_create(int min_alloc_order, int nid) > EXPORT_SYMBOL(gen_pool_create); > > /** > - * gen_pool_add_virt - add a new chunk of special memory to the pool > + * gen_pool_add_virt() - add a new chunk of special memory to the pool > * @pool: pool to add new memory chunk to > * @virt: virtual starting address of memory chunk to add to pool > * @phys: physical starting address of memory chunk to add to pool > @@ -177,16 +374,20 @@ EXPORT_SYMBOL(gen_pool_create); > * > * Add a new chunk of special memory to the specified pool. > * > - * Returns 0 on success or a -ve errno on failure. > + * Return: > + * * 0 - success > + * * -ve errno - failure > */ > -int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phys, > - size_t size, int nid) > +int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, > + phys_addr_t phys, size_t size, int nid) > { WARN_ON(pool == NULL); ? > struct gen_pool_chunk *chunk; > - int nbits = size >> pool->min_alloc_order; > - int nbytes = sizeof(struct gen_pool_chunk) + > - BITS_TO_LONGS(nbits) * sizeof(long); > + int nentries; > + int nbytes; > > + nentries = size >> pool->min_alloc_order; > + nbytes = sizeof(struct gen_pool_chunk) + > + ENTRIES_DIV_LONGS(nentries) * sizeof(long); > chunk = kzalloc_node(nbytes, GFP_KERNEL, nid); > if (unlikely(chunk == NULL)) > return -ENOMEM; > @@ -205,11 +406,13 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy > EXPORT_SYMBOL(gen_pool_add_virt); > > /** > - * gen_pool_virt_to_phys - return the physical address of memory > + * gen_pool_virt_to_phys() - return the physical address of memory > * @pool: pool to allocate from > * @addr: starting address of memory > * > - * Returns the physical address on success, or -1 on error. > + * Return: > + * * the physical address - success > + * * \-1 - error > */ > phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long addr) > { > @@ -230,7 +433,7 @@ phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long addr) > EXPORT_SYMBOL(gen_pool_virt_to_phys); > > /** > - * gen_pool_destroy - destroy a special memory pool > + * gen_pool_destroy() - destroy a special memory pool > * @pool: pool to destroy > * > * Destroy the specified special memory pool. Verifies that there are no > @@ -248,7 +451,7 @@ void gen_pool_destroy(struct gen_pool *pool) > list_del(&chunk->next_chunk); > > end_bit = chunk_size(chunk) >> order; > - bit = find_next_bit(chunk->bits, end_bit, 0); > + bit = find_next_bit(chunk->entries, end_bit, 0); > BUG_ON(bit < end_bit); > > kfree(chunk); > @@ -259,7 +462,7 @@ void gen_pool_destroy(struct gen_pool *pool) > EXPORT_SYMBOL(gen_pool_destroy); > > /** > - * gen_pool_alloc - allocate special memory from the pool > + * gen_pool_alloc() - allocate special memory from the pool > * @pool: pool to allocate from > * @size: number of bytes to allocate from the pool > * > @@ -267,6 +470,10 @@ EXPORT_SYMBOL(gen_pool_destroy); > * Uses the pool allocation function (with first-fit algorithm by default). > * Can not be used in NMI handler on architectures without > * NMI-safe cmpxchg implementation. > + * > + * Return: > + * * address of the memory allocated - success > + * * NULL - error > */ > unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) > { > @@ -275,7 +482,7 @@ unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) > EXPORT_SYMBOL(gen_pool_alloc); > > /** > - * gen_pool_alloc_algo - allocate special memory from the pool > + * gen_pool_alloc_algo() - allocate special memory from the pool > * @pool: pool to allocate from > * @size: number of bytes to allocate from the pool > * @algo: algorithm passed from caller > @@ -285,6 +492,10 @@ EXPORT_SYMBOL(gen_pool_alloc); > * Uses the pool allocation function (with first-fit algorithm by default). > * Can not be used in NMI handler on architectures without > * NMI-safe cmpxchg implementation. > + * > + * Return: > + * * address of the memory allocated - success > + * * NULL - error > */ > unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size, > genpool_algo_t algo, void *data) > @@ -292,7 +503,7 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size, > struct gen_pool_chunk *chunk; > unsigned long addr = 0; > int order = pool->min_alloc_order; > - int nbits, start_bit, end_bit, remain; > + int nentries, start_entry, end_entry, remain; Be nicer to use "unsigned int", but it's not clear from this diff that this could work with other existing code. > > #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG > BUG_ON(in_nmi()); > @@ -301,29 +512,32 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size, > if (size == 0) > return 0; > > - nbits = (size + (1UL << order) - 1) >> order; > + nentries = mem_to_units(size, order); > rcu_read_lock(); > list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { > if (size > atomic_long_read(&chunk->avail)) > continue; > > - start_bit = 0; > - end_bit = chunk_size(chunk) >> order; > + start_entry = 0; > + end_entry = chunk_size(chunk) >> order; > retry: > - start_bit = algo(chunk->bits, end_bit, start_bit, > - nbits, data, pool); > - if (start_bit >= end_bit) > + start_entry = algo(chunk->entries, end_entry, start_entry, > + nentries, data, pool); > + if (start_entry >= end_entry) > continue; > - remain = bitmap_set_ll(chunk->bits, start_bit, nbits); > + remain = alter_bitmap_ll(SET_BITS, chunk->entries, > + start_entry, nentries); > if (remain) { > - remain = bitmap_clear_ll(chunk->bits, start_bit, > - nbits - remain); > - BUG_ON(remain); > + remain = alter_bitmap_ll(CLEAR_BITS, > + chunk->entries, > + start_entry, > + nentries - remain); > goto retry; > } > > - addr = chunk->start_addr + ((unsigned long)start_bit << order); > - size = nbits << order; > + addr = chunk->start_addr + > + ((unsigned long)start_entry << order); > + size = nentries << order; > atomic_long_sub(size, &chunk->avail); > break; > } > @@ -333,7 +547,7 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size, > EXPORT_SYMBOL(gen_pool_alloc_algo); > > /** > - * gen_pool_dma_alloc - allocate special memory from the pool for DMA usage > + * gen_pool_dma_alloc() - allocate special memory from the pool for DMA usage > * @pool: pool to allocate from > * @size: number of bytes to allocate from the pool > * @dma: dma-view physical address return value. Use NULL if unneeded. > @@ -342,6 +556,10 @@ EXPORT_SYMBOL(gen_pool_alloc_algo); > * Uses the pool allocation function (with first-fit algorithm by default). > * Can not be used in NMI handler on architectures without > * NMI-safe cmpxchg implementation. > + * > + * Return: > + * * address of the memory allocated - success > + * * NULL - error > */ > void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size, dma_addr_t *dma) > { > @@ -362,10 +580,10 @@ void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size, dma_addr_t *dma) > EXPORT_SYMBOL(gen_pool_dma_alloc); > > /** > - * gen_pool_free - free allocated special memory back to the pool > + * gen_pool_free() - free allocated special memory back to the pool > * @pool: pool to free to > * @addr: starting address of memory to free back to pool > - * @size: size in bytes of memory to free > + * @size: size in bytes of memory to free or 0, for auto-detection > * > * Free previously allocated special memory back to the specified > * pool. Can not be used in NMI handler on architectures without > @@ -375,22 +593,29 @@ void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) > { > struct gen_pool_chunk *chunk; > int order = pool->min_alloc_order; > - int start_bit, nbits, remain; > + int start_entry, remaining_entries, nentries, remain; > + int boundary; > > #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG > BUG_ON(in_nmi()); > #endif > > - nbits = (size + (1UL << order) - 1) >> order; > rcu_read_lock(); > list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { > if (addr >= chunk->start_addr && addr <= chunk->end_addr) { > BUG_ON(addr + size - 1 > chunk->end_addr); > - start_bit = (addr - chunk->start_addr) >> order; > - remain = bitmap_clear_ll(chunk->bits, start_bit, nbits); > + start_entry = (addr - chunk->start_addr) >> order; > + remaining_entries = (chunk->end_addr - addr) >> order; > + boundary = get_boundary(chunk->entries, start_entry, > + remaining_entries); > + BUG_ON(boundary < 0); Do you really want to use BUG_ON()? I've thought twice about using BUG_ON() based on Linus's wrath with BUG_ON() code causing an issue with the 4.8 release: https://lkml.org/lkml/2016/10/4/1 Hence why I've been giving WARN_ON() suggestions throughout this review. Thanks, Jay
Powered by blists - more mailing lists
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.