From: Jan Beulich Subject: x86/P2M: deal with partial success of p2m_set_entry() M2P and PoD stats need to remain in sync with P2M; if an update succeeds only partially, respective adjustments need to be made. If updates get made before the call, they may also need undoing upon complete failure (i.e. including the single-page case). Log-dirty state would better also be kept in sync. Note that the change to set_typed_p2m_entry() may not be strictly necessary (due to the order restriction enforced near the top of the function), but is being kept here to be on the safe side. This is CVE-2021-28705 and CVE-2021-28709 / XSA-389. Signed-off-by: Jan Beulich Reviewed-by: Roger Pau Monné --- I think the M2P aspect (wrt wrongly allowing multiple GFN mappings of a page that may later get freed, i.e. primarily the grant table v2 status pages) is only theoretical at this point. That's because any such special mappings would only ever be 4k ones, and replacing existing small mappings can't fail with -ENOMEM from intermediate page table allocation. And I think all other error returns would, if in fact reachable, represent (presumably security relevant) bugs themselves, so addressing this part may be merely defense-in-depth. --- v4: Re-base ahead of "x86/mm: update log-dirty bitmap when manipulating P2M". v3: Remove bogus special casing of order-0 from p2m_remove_page() and guest_physmap_add_entry(). Adjust description accordingly. v2: Correct undo conditional in p2m_remove_page(). --- a/xen/arch/x86/mm/p2m.c +++ b/xen/arch/x86/mm/p2m.c @@ -794,6 +794,7 @@ p2m_remove_page(struct p2m_domain *p2m, unsigned long i; p2m_type_t t; p2m_access_t a; + int rc; ASSERT(gfn_locked_by_me(p2m, gfn)); P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn_x(gfn), mfn_x(mfn)); @@ -825,8 +826,27 @@ p2m_remove_page(struct p2m_domain *p2m, ioreq_request_mapcache_invalidate(p2m->domain); - return p2m_set_entry(p2m, gfn, INVALID_MFN, page_order, p2m_invalid, - p2m->default_access); + rc = p2m_set_entry(p2m, gfn, INVALID_MFN, page_order, p2m_invalid, + p2m->default_access); + if ( likely(!rc) || !mfn_valid(mfn) ) + return rc; + + /* + * The operation may have partially succeeded. For the failed part we need + * to undo the M2P update and, out of precaution, mark the pages dirty + * again. + */ + for ( i = 0; i < (1UL << page_order); ++i ) + { + p2m->get_entry(p2m, gfn_add(gfn, i), &t, &a, 0, NULL, NULL); + if ( !p2m_is_hole(t) && !p2m_is_special(t) && !p2m_is_shared(t) ) + { + set_gpfn_from_mfn(mfn_x(mfn) + i, gfn_x(gfn) + i); + paging_mark_pfn_dirty(p2m->domain, _pfn(gfn_x(gfn) + i)); + } + } + + return rc; } int @@ -1022,13 +1042,8 @@ guest_physmap_add_entry(struct domain *d /* Now, actually do the two-way mapping */ rc = p2m_set_entry(p2m, gfn, mfn, page_order, t, p2m->default_access); - if ( rc == 0 ) + if ( likely(!rc) ) { - pod_lock(p2m); - p2m->pod.entry_count -= pod_count; - BUG_ON(p2m->pod.entry_count < 0); - pod_unlock(p2m); - if ( !p2m_is_grant(t) ) { for ( i = 0; i < (1UL << page_order); i++ ) @@ -1036,6 +1051,42 @@ guest_physmap_add_entry(struct domain *d gfn_x(gfn_add(gfn, i))); } } + else + { + /* + * The operation may have partially succeeded. For the successful part + * we need to update M2P and dirty state, while for the failed part we + * may need to adjust PoD stats as well as undo the earlier M2P update. + */ + for ( i = 0; i < (1UL << page_order); ++i ) + { + omfn = p2m->get_entry(p2m, gfn_add(gfn, i), &ot, &a, 0, NULL, NULL); + if ( p2m_is_pod(ot) ) + { + BUG_ON(!pod_count); + --pod_count; + } + else if ( mfn_eq(omfn, mfn_add(mfn, i)) && ot == t && + a == p2m->default_access && !p2m_is_grant(t) ) + { + set_gpfn_from_mfn(mfn_x(omfn), gfn_x(gfn) + i); + paging_mark_pfn_dirty(d, _pfn(gfn_x(gfn) + i)); + } + else if ( p2m_is_ram(ot) && !p2m_is_paged(ot) ) + { + ASSERT(mfn_valid(omfn)); + set_gpfn_from_mfn(mfn_x(omfn), gfn_x(gfn) + i); + } + } + } + + if ( pod_count ) + { + pod_lock(p2m); + p2m->pod.entry_count -= pod_count; + BUG_ON(p2m->pod.entry_count < 0); + pod_unlock(p2m); + } out: p2m_unlock(p2m); @@ -1325,6 +1376,49 @@ static int set_typed_p2m_entry(struct do return 0; } } + + P2M_DEBUG("set %d %lx %lx\n", gfn_p2mt, gfn_l, mfn_x(mfn)); + rc = p2m_set_entry(p2m, gfn, mfn, order, gfn_p2mt, access); + if ( unlikely(rc) ) + { + gdprintk(XENLOG_ERR, "p2m_set_entry: %#lx:%u -> %d (0x%"PRI_mfn")\n", + gfn_l, order, rc, mfn_x(mfn)); + + /* + * The operation may have partially succeeded. For the successful part + * we need to update PoD stats, M2P, and dirty state. + */ + if ( order != PAGE_ORDER_4K ) + { + unsigned long i; + + for ( i = 0; i < (1UL << order); ++i ) + { + p2m_type_t t; + mfn_t cmfn = p2m->get_entry(p2m, gfn_add(gfn, i), &t, &a, 0, + NULL, NULL); + + if ( !mfn_eq(cmfn, mfn_add(mfn, i)) || t != gfn_p2mt || + a != access ) + continue; + + if ( p2m_is_ram(ot) ) + { + ASSERT(mfn_valid(mfn_add(omfn, i))); + set_gpfn_from_mfn(mfn_x(omfn) + i, INVALID_M2P_ENTRY); + + ioreq_request_mapcache_invalidate(d); + } + else if ( p2m_is_pod(ot) ) + { + pod_lock(p2m); + BUG_ON(!p2m->pod.entry_count); + --p2m->pod.entry_count; + pod_unlock(p2m); + } + } + } + } else if ( p2m_is_ram(ot) ) { unsigned long i; @@ -1337,12 +1431,6 @@ static int set_typed_p2m_entry(struct do ioreq_request_mapcache_invalidate(d); } - - P2M_DEBUG("set %d %lx %lx\n", gfn_p2mt, gfn_l, mfn_x(mfn)); - rc = p2m_set_entry(p2m, gfn, mfn, order, gfn_p2mt, access); - if ( rc ) - gdprintk(XENLOG_ERR, "p2m_set_entry: %#lx:%u -> %d (0x%"PRI_mfn")\n", - gfn_l, order, rc, mfn_x(mfn)); else if ( p2m_is_pod(ot) ) { pod_lock(p2m);