Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20190213011133.GQ23599@brightrain.aerifal.cx>
Date: Tue, 12 Feb 2019 20:11:33 -0500
From: Rich Felker <dalias@...c.org>
To: musl@...ts.openwall.com
Subject: Re: Planned robust mutex internals changes

On Tue, Sep 04, 2018 at 11:09:32PM -0400, Rich Felker wrote:
> On Fri, Aug 31, 2018 at 12:45:12PM -0400, Rich Felker wrote:
> > Recent debugging with Adelie Linux & POSIX conformance tests gave me a
> > bit of a scare about the safety of the current robust mutex
> > implementation. It turned out the be completely unrelated (kernel
> > problem) but it did remind me that there's some mildly sketchy stuff
> > going on right now.
> > 
> > In order to implement ENOTRECOVERABLE, the current implementation
> > changes a bit of the mutex type field to indicate that it's recovered
> > after EOWNERDEAD and will go into ENOTRECOVERABLE state if
> > pthread_mutex_consistent is not called before unlocking. While it's
> > only the thread that holds the lock that needs access to this
> > information (except possibly for the same of pthread_mutex_consistent
> > choosing between EINVAL and EPERM for erroneous calls), the change to
> > the type field is formally a data race with all other threads that
> > perform any operation on the mutex. No individual bits race, and no
> > write races are possible, so things are "okay" in some sense, but it's
> > still not good.
> > 
> > What I plan to do is move this state to the mutex owner/lock field,
> > which should be the only field of the object (aside from waiter count)
> > that's mutable.
> > 
> > Currently, the lock field looks like this:
> > 
> > bits 0-29:  owner; 0 if unlocked or if owned died; -1 if unrecoverable
> > bit 30:     owner died flag, also set if unrecoverable; otherwise 0
> > bit 31:     new waiters flag
> > 
> > Ignoring bit 31, this means the possible values are 0 (unlocked), a
> > tid for the owner, 0x40000000 after owner died, and 0x7fffffff when
> > unrecoverable.
> > 
> > What I'd like too do is use 0x40000000|tid as the value for when the
> > lock is held but pthread_mutex_consistent hasn't yet been called. Note
> > that at the kernel level, bit 29 (0x20000000) is reserved not to be
> > used as part of a tid, so this does not overlap with the special value
> > 0x7fffffff. And there's still some room for extensibility, since we
> > could use bit 31 along with values that don't indicate a mutex state
> > that can be waited on.
> > 
> > With these changes, no fields of mutex object will be mutable except
> > for the lock state (owner, futex) and the waiter count.
> > 
> > Anyway I'm posting this now as notes/reminder and for comments in case
> > others see a problem with the proposal or ideas for improvement. I'll
> > probably hold off on doing any of this until after release.
> 
> And here's the proposed patch.
> 
> Rich

> diff --git a/src/thread/pthread_mutex_consistent.c b/src/thread/pthread_mutex_consistent.c
> index 96b83b5..27c74e5 100644
> --- a/src/thread/pthread_mutex_consistent.c
> +++ b/src/thread/pthread_mutex_consistent.c
> @@ -1,10 +1,14 @@
>  #include "pthread_impl.h"
> +#include "atomic.h"
>  
>  int pthread_mutex_consistent(pthread_mutex_t *m)
>  {
> -	if (!(m->_m_type & 8)) return EINVAL;
> -	if ((m->_m_lock & 0x7fffffff) != __pthread_self()->tid)
> +	int old = m->_m_lock;
> +	int own = old & 0x3fffffff;
> +	if (!(m->_m_type & 4) || !own || !(old & 0x40000000))
> +		return EINVAL;
> +	if (own != __pthread_self()->tid)
>  		return EPERM;
> -	m->_m_type &= ~8U;
> +	a_and(&m->_m_lock, ~0x40000000);
>  	return 0;
>  }
> diff --git a/src/thread/pthread_mutex_timedlock.c b/src/thread/pthread_mutex_timedlock.c
> index d2bd196..02c2ff7 100644
> --- a/src/thread/pthread_mutex_timedlock.c
> +++ b/src/thread/pthread_mutex_timedlock.c
> @@ -18,10 +18,12 @@ int __pthread_mutex_timedlock(pthread_mutex_t *restrict m, const struct timespec
>  	while (spins-- && m->_m_lock && !m->_m_waiters) a_spin();
>  
>  	while ((r=__pthread_mutex_trylock(m)) == EBUSY) {
> -		if (!(r=m->_m_lock) || ((r&0x40000000) && (type&4)))
> +		r = m->_m_lock;
> +		int own = r & 0x3fffffff;
> +		if (!own && (!r || (type&4)))
>  			continue;
>  		if ((type&3) == PTHREAD_MUTEX_ERRORCHECK
> -		 && (r&0x7fffffff) == __pthread_self()->tid)
> +		    && own == __pthread_self()->tid)
>  			return EDEADLK;
>  
>  		a_inc(&m->_m_waiters);
> diff --git a/src/thread/pthread_mutex_trylock.c b/src/thread/pthread_mutex_trylock.c
> index 783ca0c..3fe5912 100644
> --- a/src/thread/pthread_mutex_trylock.c
> +++ b/src/thread/pthread_mutex_trylock.c
> @@ -8,14 +8,14 @@ int __pthread_mutex_trylock_owner(pthread_mutex_t *m)
>  	int tid = self->tid;
>  
>  	old = m->_m_lock;
> -	own = old & 0x7fffffff;
> +	own = old & 0x3fffffff;
>  	if (own == tid && (type&3) == PTHREAD_MUTEX_RECURSIVE) {
>  		if ((unsigned)m->_m_count >= INT_MAX) return EAGAIN;
>  		m->_m_count++;
>  		return 0;
>  	}
> -	if (own == 0x7fffffff) return ENOTRECOVERABLE;
> -	if (own && (!(own & 0x40000000) || !(type & 4))) return EBUSY;
> +	if (own == 0x3fffffff) return ENOTRECOVERABLE;
> +	if (own || (old && !(type & 4))) return EBUSY;
>  
>  	if (m->_m_type & 128) {
>  		if (!self->robust_list.off) {
> @@ -25,6 +25,7 @@ int __pthread_mutex_trylock_owner(pthread_mutex_t *m)
>  		if (m->_m_waiters) tid |= 0x80000000;
>  		self->robust_list.pending = &m->_m_next;
>  	}
> +	tid |= old & 0x40000000;
>  
>  	if (a_cas(&m->_m_lock, old, tid) != old) {
>  		self->robust_list.pending = 0;
> @@ -39,9 +40,8 @@ int __pthread_mutex_trylock_owner(pthread_mutex_t *m)
>  	self->robust_list.head = &m->_m_next;
>  	self->robust_list.pending = 0;
>  
> -	if (own) {
> +	if (old) {
>  		m->_m_count = 0;
> -		m->_m_type |= 8;
>  		return EOWNERDEAD;
>  	}
>  
> diff --git a/src/thread/pthread_mutex_unlock.c b/src/thread/pthread_mutex_unlock.c
> index 7dd00d2..ea9f54d 100644
> --- a/src/thread/pthread_mutex_unlock.c
> +++ b/src/thread/pthread_mutex_unlock.c
> @@ -7,13 +7,18 @@ int __pthread_mutex_unlock(pthread_mutex_t *m)
>  	int cont;
>  	int type = m->_m_type & 15;
>  	int priv = (m->_m_type & 128) ^ 128;
> +	int new = 0;
>  
>  	if (type != PTHREAD_MUTEX_NORMAL) {
>  		self = __pthread_self();
> -		if ((m->_m_lock&0x7fffffff) != self->tid)
> +		int old = m->_m_lock;
> +		int own = old & 0x3fffffff;
> +		if (own != self->tid)
>  			return EPERM;
>  		if ((type&3) == PTHREAD_MUTEX_RECURSIVE && m->_m_count)
>  			return m->_m_count--, 0;
> +		if ((type&4) && (old&0x40000000))
> +			new = 0x7fffffff;
>  		if (!priv) {
>  			self->robust_list.pending = &m->_m_next;
>  			__vm_lock();
> @@ -24,7 +29,7 @@ int __pthread_mutex_unlock(pthread_mutex_t *m)
>  		if (next != &self->robust_list.head) *(volatile void *volatile *)
>  			((char *)next - sizeof(void *)) = prev;
>  	}
> -	cont = a_swap(&m->_m_lock, (type & 8) ? 0x7fffffff : 0);
> +	cont = a_swap(&m->_m_lock, new);
>  	if (type != PTHREAD_MUTEX_NORMAL && !priv) {
>  		self->robust_list.pending = 0;
>  		__vm_unlock();

Just remembered I'd left this unfinished. I'm applying it now.

Rich

Powered by blists - more mailing lists

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.