From 755b521dd14837a237a8e592ff576b10a4b93f7e Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Wed, 31 May 2017 09:36:09 +0200 Subject: [PATCH 01/41] import Workstation 12.5.5 module sources --- vmmon-only/COPYING | 339 ++ vmmon-only/Makefile | 150 + vmmon-only/Makefile.kernel | 39 + vmmon-only/Makefile.normal | 143 + vmmon-only/README | 14 + vmmon-only/autoconf/geninclude.c | 41 + vmmon-only/autoconf/smpcall.c | 37 + vmmon-only/autoconf/tsc_khz.c | 32 + vmmon-only/common/apic.c | 279 + vmmon-only/common/apic.h | 47 + vmmon-only/common/comport.c | 203 + vmmon-only/common/comport.h | 41 + vmmon-only/common/cpuid.c | 209 + vmmon-only/common/cpuid.h | 56 + vmmon-only/common/hashFunc.c | 41 + vmmon-only/common/hostKernel.h | 83 + vmmon-only/common/hostif.h | 148 + vmmon-only/common/hostifGlobalLock.h | 45 + vmmon-only/common/hostifMem.h | 37 + vmmon-only/common/memtrack.c | 551 ++ vmmon-only/common/memtrack.h | 60 + vmmon-only/common/phystrack.c | 495 ++ vmmon-only/common/phystrack.h | 54 + vmmon-only/common/task.c | 2180 ++++++++ vmmon-only/common/task.h | 43 + vmmon-only/common/vmx86.c | 2920 +++++++++++ vmmon-only/common/vmx86.h | 201 + vmmon-only/include/address_defs.h | 95 + vmmon-only/include/basic_initblock.h | 60 + vmmon-only/include/circList.h | 428 ++ vmmon-only/include/community_source.h | 67 + vmmon-only/include/compat_autoconf.h | 41 + vmmon-only/include/compat_cred.h | 47 + vmmon-only/include/compat_highmem.h | 32 + vmmon-only/include/compat_interrupt.h | 55 + vmmon-only/include/compat_ioport.h | 63 + vmmon-only/include/compat_kernel.h | 42 + vmmon-only/include/compat_module.h | 83 + vmmon-only/include/compat_page.h | 75 + vmmon-only/include/compat_pci.h | 72 + vmmon-only/include/compat_pgtable.h | 139 + vmmon-only/include/compat_sched.h | 293 ++ vmmon-only/include/compat_semaphore.h | 49 + vmmon-only/include/compat_spinlock.h | 48 + vmmon-only/include/compat_version.h | 131 + vmmon-only/include/contextinfo.h | 78 + vmmon-only/include/cpu_defs.h | 49 + vmmon-only/include/cpuid_info.h | 95 + vmmon-only/include/crossgdt.h | 79 + vmmon-only/include/driver-config.h | 79 + vmmon-only/include/hashFunc.h | 345 ++ vmmon-only/include/includeCheck.h | 17 + vmmon-only/include/initblock.h | 43 + vmmon-only/include/iocontrols.h | 623 +++ vmmon-only/include/memDefaults.h | 154 + vmmon-only/include/modulecall.h | 495 ++ vmmon-only/include/modulecallstructs.h | 99 + vmmon-only/include/mon_assert.h | 207 + vmmon-only/include/monitorAction_exported.h | 156 + vmmon-only/include/numa_defs.h | 72 + vmmon-only/include/overheadmem_types.h | 131 + vmmon-only/include/pageLock_defs.h | 105 + vmmon-only/include/pagelist.h | 101 + vmmon-only/include/pcip_defs.h | 122 + vmmon-only/include/perfctr_generic.h | 72 + vmmon-only/include/pgtbl.h | 382 ++ vmmon-only/include/pshare_ext.h | 64 + vmmon-only/include/ptsc.h | 263 + vmmon-only/include/rateconv.h | 118 + vmmon-only/include/uccostTable.h | 34 + vmmon-only/include/usercalldefs.h | 35 + vmmon-only/include/vcpuid.h | 65 + vmmon-only/include/vcpuset.h | 808 +++ vmmon-only/include/vcpuset_types.h | 58 + vmmon-only/include/versioned_atomic.h | 170 + vmmon-only/include/vm_asm.h | 93 + vmmon-only/include/vm_asm_x86.h | 998 ++++ vmmon-only/include/vm_asm_x86_64.h | 136 + vmmon-only/include/vm_assert.h | 336 ++ vmmon-only/include/vm_atomic.h | 3896 ++++++++++++++ vmmon-only/include/vm_basic_asm.h | 1258 +++++ vmmon-only/include/vm_basic_asm_x86.h | 548 ++ vmmon-only/include/vm_basic_asm_x86_64.h | 615 +++ vmmon-only/include/vm_basic_asm_x86_common.h | 313 ++ vmmon-only/include/vm_basic_defs.h | 786 +++ vmmon-only/include/vm_basic_math.h | 166 + vmmon-only/include/vm_basic_types.h | 1158 ++++ vmmon-only/include/vm_time.h | 55 + vmmon-only/include/vmm_constants.h | 48 + vmmon-only/include/vmmem_shared.h | 108 + vmmon-only/include/vmware.h | 57 + vmmon-only/include/vmware_pack_begin.h | 43 + vmmon-only/include/vmware_pack_end.h | 44 + vmmon-only/include/vmware_pack_init.h | 65 + vmmon-only/include/x86_basic_defs.h | 242 + vmmon-only/include/x86apic.h | 353 ++ vmmon-only/include/x86cpuid.h | 1783 +++++++ vmmon-only/include/x86cpuid_asm.h | 445 ++ vmmon-only/include/x86desc.h | 601 +++ vmmon-only/include/x86msr.h | 505 ++ vmmon-only/include/x86perfctr.h | 976 ++++ vmmon-only/include/x86segdescrs.h | 101 + vmmon-only/include/x86sel.h | 67 + vmmon-only/include/x86svm.h | 496 ++ vmmon-only/include/x86types.h | 479 ++ vmmon-only/include/x86vendor.h | 37 + vmmon-only/include/x86vt-vmcs-fields.h | 213 + vmmon-only/include/x86vt.h | 1047 ++++ vmmon-only/include/x86vtinstr.h | 623 +++ vmmon-only/linux/driver.c | 2103 ++++++++ vmmon-only/linux/driver.h | 118 + vmmon-only/linux/driverLog.c | 207 + vmmon-only/linux/driverLog.h | 37 + vmmon-only/linux/hostif.c | 3603 +++++++++++++ vmmon-only/linux/hostif_priv.h | 50 + vmmon-only/linux/vmhost.h | 100 + vmmon-only/linux/vmmonInt.h | 109 + vmmon-only/vmcore/driver_vmcore.h | 34 + vmmon-only/vmcore/moduleloop.c | 265 + vmnet-only/COPYING | 339 ++ vmnet-only/Makefile | 150 + vmnet-only/Makefile.kernel | 40 + vmnet-only/Makefile.normal | 77 + vmnet-only/bridge.c | 1761 +++++++ vmnet-only/community_source.h | 67 + vmnet-only/compat_autoconf.h | 41 + vmnet-only/compat_module.h | 83 + vmnet-only/compat_netdevice.h | 346 ++ vmnet-only/compat_skbuff.h | 170 + vmnet-only/compat_sock.h | 77 + vmnet-only/compat_version.h | 131 + vmnet-only/driver-config.h | 79 + vmnet-only/driver.c | 1707 ++++++ vmnet-only/geninclude.c | 41 + vmnet-only/hub.c | 733 +++ vmnet-only/includeCheck.h | 17 + vmnet-only/monitorAction_exported.h | 156 + vmnet-only/net.h | 209 + vmnet-only/netdev_has_dev_net.c | 37 + vmnet-only/netdev_has_net.c | 43 + vmnet-only/netif.c | 598 +++ vmnet-only/nfhook_uses_skb.c | 45 + vmnet-only/procfs.c | 407 ++ vmnet-only/skblin.c | 41 + vmnet-only/smac.c | 4951 ++++++++++++++++++ vmnet-only/smac.h | 119 + vmnet-only/smac_compat.c | 427 ++ vmnet-only/smac_compat.h | 60 + vmnet-only/userif.c | 1161 ++++ vmnet-only/vm_assert.h | 336 ++ vmnet-only/vm_atomic.h | 3896 ++++++++++++++ vmnet-only/vm_basic_asm.h | 1258 +++++ vmnet-only/vm_basic_asm_x86.h | 548 ++ vmnet-only/vm_basic_asm_x86_64.h | 615 +++ vmnet-only/vm_basic_asm_x86_common.h | 313 ++ vmnet-only/vm_basic_defs.h | 786 +++ vmnet-only/vm_basic_types.h | 1158 ++++ vmnet-only/vm_device_version.h | 309 ++ vmnet-only/vm_oui.h | 207 + vmnet-only/vmnetInt.h | 100 + vmnet-only/vmware_pack_begin.h | 43 + vmnet-only/vmware_pack_end.h | 44 + vmnet-only/vmware_pack_init.h | 65 + vmnet-only/vnet.h | 450 ++ vmnet-only/vnetEvent.c | 557 ++ vmnet-only/vnetEvent.h | 50 + vmnet-only/vnetFilter.h | 191 + vmnet-only/vnetFilterInt.h | 0 vmnet-only/vnetInt.h | 365 ++ vmnet-only/vnetKernel.h | 83 + vmnet-only/vnetUserListener.c | 336 ++ vmnet-only/x86cpuid.h | 1815 +++++++ 172 files changed, 68382 insertions(+) create mode 100644 vmmon-only/COPYING create mode 100644 vmmon-only/Makefile create mode 100644 vmmon-only/Makefile.kernel create mode 100644 vmmon-only/Makefile.normal create mode 100644 vmmon-only/README create mode 100644 vmmon-only/autoconf/geninclude.c create mode 100644 vmmon-only/autoconf/smpcall.c create mode 100644 vmmon-only/autoconf/tsc_khz.c create mode 100644 vmmon-only/common/apic.c create mode 100644 vmmon-only/common/apic.h create mode 100644 vmmon-only/common/comport.c create mode 100644 vmmon-only/common/comport.h create mode 100644 vmmon-only/common/cpuid.c create mode 100644 vmmon-only/common/cpuid.h create mode 100644 vmmon-only/common/hashFunc.c create mode 100644 vmmon-only/common/hostKernel.h create mode 100644 vmmon-only/common/hostif.h create mode 100644 vmmon-only/common/hostifGlobalLock.h create mode 100644 vmmon-only/common/hostifMem.h create mode 100644 vmmon-only/common/memtrack.c create mode 100644 vmmon-only/common/memtrack.h create mode 100644 vmmon-only/common/phystrack.c create mode 100644 vmmon-only/common/phystrack.h create mode 100644 vmmon-only/common/task.c create mode 100644 vmmon-only/common/task.h create mode 100644 vmmon-only/common/vmx86.c create mode 100644 vmmon-only/common/vmx86.h create mode 100644 vmmon-only/include/address_defs.h create mode 100644 vmmon-only/include/basic_initblock.h create mode 100644 vmmon-only/include/circList.h create mode 100644 vmmon-only/include/community_source.h create mode 100644 vmmon-only/include/compat_autoconf.h create mode 100644 vmmon-only/include/compat_cred.h create mode 100644 vmmon-only/include/compat_highmem.h create mode 100644 vmmon-only/include/compat_interrupt.h create mode 100644 vmmon-only/include/compat_ioport.h create mode 100644 vmmon-only/include/compat_kernel.h create mode 100644 vmmon-only/include/compat_module.h create mode 100644 vmmon-only/include/compat_page.h create mode 100644 vmmon-only/include/compat_pci.h create mode 100644 vmmon-only/include/compat_pgtable.h create mode 100644 vmmon-only/include/compat_sched.h create mode 100644 vmmon-only/include/compat_semaphore.h create mode 100644 vmmon-only/include/compat_spinlock.h create mode 100644 vmmon-only/include/compat_version.h create mode 100644 vmmon-only/include/contextinfo.h create mode 100644 vmmon-only/include/cpu_defs.h create mode 100644 vmmon-only/include/cpuid_info.h create mode 100644 vmmon-only/include/crossgdt.h create mode 100644 vmmon-only/include/driver-config.h create mode 100644 vmmon-only/include/hashFunc.h create mode 100644 vmmon-only/include/includeCheck.h create mode 100644 vmmon-only/include/initblock.h create mode 100644 vmmon-only/include/iocontrols.h create mode 100644 vmmon-only/include/memDefaults.h create mode 100644 vmmon-only/include/modulecall.h create mode 100644 vmmon-only/include/modulecallstructs.h create mode 100644 vmmon-only/include/mon_assert.h create mode 100644 vmmon-only/include/monitorAction_exported.h create mode 100644 vmmon-only/include/numa_defs.h create mode 100644 vmmon-only/include/overheadmem_types.h create mode 100644 vmmon-only/include/pageLock_defs.h create mode 100644 vmmon-only/include/pagelist.h create mode 100644 vmmon-only/include/pcip_defs.h create mode 100644 vmmon-only/include/perfctr_generic.h create mode 100644 vmmon-only/include/pgtbl.h create mode 100644 vmmon-only/include/pshare_ext.h create mode 100644 vmmon-only/include/ptsc.h create mode 100644 vmmon-only/include/rateconv.h create mode 100644 vmmon-only/include/uccostTable.h create mode 100644 vmmon-only/include/usercalldefs.h create mode 100644 vmmon-only/include/vcpuid.h create mode 100644 vmmon-only/include/vcpuset.h create mode 100644 vmmon-only/include/vcpuset_types.h create mode 100644 vmmon-only/include/versioned_atomic.h create mode 100644 vmmon-only/include/vm_asm.h create mode 100644 vmmon-only/include/vm_asm_x86.h create mode 100644 vmmon-only/include/vm_asm_x86_64.h create mode 100644 vmmon-only/include/vm_assert.h create mode 100644 vmmon-only/include/vm_atomic.h create mode 100644 vmmon-only/include/vm_basic_asm.h create mode 100644 vmmon-only/include/vm_basic_asm_x86.h create mode 100644 vmmon-only/include/vm_basic_asm_x86_64.h create mode 100644 vmmon-only/include/vm_basic_asm_x86_common.h create mode 100644 vmmon-only/include/vm_basic_defs.h create mode 100644 vmmon-only/include/vm_basic_math.h create mode 100644 vmmon-only/include/vm_basic_types.h create mode 100644 vmmon-only/include/vm_time.h create mode 100644 vmmon-only/include/vmm_constants.h create mode 100644 vmmon-only/include/vmmem_shared.h create mode 100644 vmmon-only/include/vmware.h create mode 100644 vmmon-only/include/vmware_pack_begin.h create mode 100644 vmmon-only/include/vmware_pack_end.h create mode 100644 vmmon-only/include/vmware_pack_init.h create mode 100644 vmmon-only/include/x86_basic_defs.h create mode 100644 vmmon-only/include/x86apic.h create mode 100644 vmmon-only/include/x86cpuid.h create mode 100644 vmmon-only/include/x86cpuid_asm.h create mode 100644 vmmon-only/include/x86desc.h create mode 100644 vmmon-only/include/x86msr.h create mode 100644 vmmon-only/include/x86perfctr.h create mode 100644 vmmon-only/include/x86segdescrs.h create mode 100644 vmmon-only/include/x86sel.h create mode 100644 vmmon-only/include/x86svm.h create mode 100644 vmmon-only/include/x86types.h create mode 100644 vmmon-only/include/x86vendor.h create mode 100644 vmmon-only/include/x86vt-vmcs-fields.h create mode 100644 vmmon-only/include/x86vt.h create mode 100644 vmmon-only/include/x86vtinstr.h create mode 100644 vmmon-only/linux/driver.c create mode 100644 vmmon-only/linux/driver.h create mode 100644 vmmon-only/linux/driverLog.c create mode 100644 vmmon-only/linux/driverLog.h create mode 100644 vmmon-only/linux/hostif.c create mode 100644 vmmon-only/linux/hostif_priv.h create mode 100644 vmmon-only/linux/vmhost.h create mode 100644 vmmon-only/linux/vmmonInt.h create mode 100644 vmmon-only/vmcore/driver_vmcore.h create mode 100644 vmmon-only/vmcore/moduleloop.c create mode 100644 vmnet-only/COPYING create mode 100644 vmnet-only/Makefile create mode 100644 vmnet-only/Makefile.kernel create mode 100644 vmnet-only/Makefile.normal create mode 100644 vmnet-only/bridge.c create mode 100644 vmnet-only/community_source.h create mode 100644 vmnet-only/compat_autoconf.h create mode 100644 vmnet-only/compat_module.h create mode 100644 vmnet-only/compat_netdevice.h create mode 100644 vmnet-only/compat_skbuff.h create mode 100644 vmnet-only/compat_sock.h create mode 100644 vmnet-only/compat_version.h create mode 100644 vmnet-only/driver-config.h create mode 100644 vmnet-only/driver.c create mode 100644 vmnet-only/geninclude.c create mode 100644 vmnet-only/hub.c create mode 100644 vmnet-only/includeCheck.h create mode 100644 vmnet-only/monitorAction_exported.h create mode 100644 vmnet-only/net.h create mode 100644 vmnet-only/netdev_has_dev_net.c create mode 100644 vmnet-only/netdev_has_net.c create mode 100644 vmnet-only/netif.c create mode 100644 vmnet-only/nfhook_uses_skb.c create mode 100644 vmnet-only/procfs.c create mode 100644 vmnet-only/skblin.c create mode 100644 vmnet-only/smac.c create mode 100644 vmnet-only/smac.h create mode 100644 vmnet-only/smac_compat.c create mode 100644 vmnet-only/smac_compat.h create mode 100644 vmnet-only/userif.c create mode 100644 vmnet-only/vm_assert.h create mode 100644 vmnet-only/vm_atomic.h create mode 100644 vmnet-only/vm_basic_asm.h create mode 100644 vmnet-only/vm_basic_asm_x86.h create mode 100644 vmnet-only/vm_basic_asm_x86_64.h create mode 100644 vmnet-only/vm_basic_asm_x86_common.h create mode 100644 vmnet-only/vm_basic_defs.h create mode 100644 vmnet-only/vm_basic_types.h create mode 100644 vmnet-only/vm_device_version.h create mode 100644 vmnet-only/vm_oui.h create mode 100644 vmnet-only/vmnetInt.h create mode 100644 vmnet-only/vmware_pack_begin.h create mode 100644 vmnet-only/vmware_pack_end.h create mode 100644 vmnet-only/vmware_pack_init.h create mode 100644 vmnet-only/vnet.h create mode 100644 vmnet-only/vnetEvent.c create mode 100644 vmnet-only/vnetEvent.h create mode 100644 vmnet-only/vnetFilter.h create mode 100644 vmnet-only/vnetFilterInt.h create mode 100644 vmnet-only/vnetInt.h create mode 100644 vmnet-only/vnetKernel.h create mode 100644 vmnet-only/vnetUserListener.c create mode 100644 vmnet-only/x86cpuid.h diff --git a/vmmon-only/COPYING b/vmmon-only/COPYING new file mode 100644 index 00000000..d511905c --- /dev/null +++ b/vmmon-only/COPYING @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/vmmon-only/Makefile b/vmmon-only/Makefile new file mode 100644 index 00000000..de8162e8 --- /dev/null +++ b/vmmon-only/Makefile @@ -0,0 +1,150 @@ +#!/usr/bin/make -f +########################################################## +# Copyright (C) 1998-2015 VMware, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation version 2 and no later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +# +########################################################## + +#### +#### VMware kernel module Makefile to be distributed externally +#### + +#### +#### SRCROOT _must_ be a relative path. +#### +SRCROOT = . + +# +# open-vm-tools doesn't replicate shared source files for different modules; +# instead, files are kept in shared locations. So define a few useful macros +# to be able to handle both cases cleanly. +# +INCLUDE := +ifdef OVT_SOURCE_DIR +AUTOCONF_DIR := $(OVT_SOURCE_DIR)/modules/linux/shared/autoconf +VMLIB_PATH = $(OVT_SOURCE_DIR)/lib/$(1) +INCLUDE += -I$(OVT_SOURCE_DIR)/modules/linux/shared +INCLUDE += -I$(OVT_SOURCE_DIR)/lib/include +else +AUTOCONF_DIR := $(SRCROOT)/shared/autoconf +INCLUDE += -I$(SRCROOT)/shared +endif + + +VM_UNAME = $(shell uname -r) + +# Header directory for the running kernel +ifdef LINUXINCLUDE +HEADER_DIR = $(LINUXINCLUDE) +else +HEADER_DIR = /lib/modules/$(VM_UNAME)/build/include +endif + +BUILD_DIR = $(HEADER_DIR)/.. + +DRIVER := vmmon +PRODUCT := @@PRODUCT@@ + +# Grep program +GREP = /bin/grep + +vm_check_gcc = $(shell if $(CC) $(1) -S -o /dev/null -xc /dev/null \ + > /dev/null 2>&1; then echo "$(1)"; else echo "$(2)"; fi) +vm_check_file = $(shell if test -f $(1); then echo "yes"; else echo "no"; fi) + +ifndef VM_KBUILD +VM_KBUILD := no +ifeq ($(call vm_check_file,$(BUILD_DIR)/Makefile), yes) +VM_KBUILD := yes +endif +export VM_KBUILD +endif + +ifndef VM_KBUILD_SHOWN +ifeq ($(VM_KBUILD), no) +VM_DUMMY := $(shell echo >&2 "Using standalone build system.") +else +VM_DUMMY := $(shell echo >&2 "Using kernel build system.") +endif +VM_KBUILD_SHOWN := yes +export VM_KBUILD_SHOWN +endif + +ifneq ($(VM_KBUILD), no) + +VMCCVER := $(shell $(CC) -dumpversion) + +# If there is no version defined, we are in toplevel pass, not yet in kernel makefiles... +ifeq ($(VERSION),) + +DRIVER_KO := $(DRIVER).ko + +.PHONY: $(DRIVER_KO) + +auto-build: $(DRIVER_KO) + cp -f $< $(SRCROOT)/../$(DRIVER).o + +# $(DRIVER_KO) is a phony target, so compare file times explicitly +$(DRIVER): $(DRIVER_KO) + if [ $< -nt $@ ] || [ ! -e $@ ] ; then cp -f $< $@; fi + +# Pass gcc version down the chain, so we can detect if kernel attempts to use unapproved compiler +VM_CCVER := $(VMCCVER) +export VM_CCVER +VM_CC := $(CC) +export VM_CC + +MAKEOVERRIDES := $(filter-out CC=%,$(MAKEOVERRIDES)) + +# +# Define a setup target that gets built before the actual driver. +# This target may not be used at all, but if it is then it will be defined +# in Makefile.kernel +# +prebuild:: ; +postbuild:: ; + +$(DRIVER_KO): prebuild + $(MAKE) -C $(BUILD_DIR) SUBDIRS=$$PWD SRCROOT=$$PWD/$(SRCROOT) \ + MODULEBUILDDIR=$(MODULEBUILDDIR) modules + $(MAKE) -C $$PWD SRCROOT=$$PWD/$(SRCROOT) \ + MODULEBUILDDIR=$(MODULEBUILDDIR) postbuild +endif + +vm_check_build = $(shell if $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) \ + $(CPPFLAGS) $(CFLAGS) $(CFLAGS_KERNEL) $(LINUXINCLUDE) \ + $(EXTRA_CFLAGS) -Iinclude2/asm/mach-default \ + -DKBUILD_BASENAME=\"$(DRIVER)\" \ + -Werror -S -o /dev/null -xc $(1) \ + > /dev/null 2>&1; then echo "$(2)"; else echo "$(3)"; fi) + +CC_WARNINGS := -Wall -Wstrict-prototypes +CC_OPTS := $(GLOBAL_DEFS) $(CC_WARNINGS) -DVMW_USING_KBUILD +ifdef VMX86_DEVEL +CC_OPTS += -DVMX86_DEVEL +endif +ifdef VMX86_DEBUG +CC_OPTS += -DVMX86_DEBUG +endif + +include $(SRCROOT)/Makefile.kernel + +else + +include $(SRCROOT)/Makefile.normal + +endif + +#.SILENT: diff --git a/vmmon-only/Makefile.kernel b/vmmon-only/Makefile.kernel new file mode 100644 index 00000000..bf805e02 --- /dev/null +++ b/vmmon-only/Makefile.kernel @@ -0,0 +1,39 @@ +#!/usr/bin/make -f +########################################################## +# Copyright (C) 1998,2015 VMware, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation version 2 and no later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +# +########################################################## + +CC_OPTS += -DVMMON -DVMCORE + +INCLUDE := -I$(SRCROOT)/include -I$(SRCROOT)/common -I$(SRCROOT)/linux \ + -I$(SRCROOT)/vmcore + +EXTRA_CFLAGS := $(CC_OPTS) $(INCLUDE) + +EXTRA_CFLAGS += $(call vm_check_build, $(SRCROOT)/autoconf/smpcall.c, -DVMW_HAVE_SMP_CALL_3ARG, ) +EXTRA_CFLAGS += $(call vm_check_build, $(SRCROOT)/autoconf/tsc_khz.c, -DVMW_HAVE_TSC_KHZ, ) + +obj-m += $(DRIVER).o + +$(DRIVER)-y := $(subst $(SRCROOT)/, , $(patsubst %.c, %.o, \ + $(wildcard $(SRCROOT)/linux/*.c $(SRCROOT)/common/*.c $(SRCROOT)/vmcore/*.c))) + +clean: + rm -rf $(wildcard $(DRIVER).mod.c $(DRIVER).ko .tmp_versions \ + Module.symvers Modules.symvers Module.markers modules.order \ + $(foreach dir,linux/ common/ vmcore/ \ + ./,$(addprefix $(dir),.*.cmd .*.o.flags *.o))) diff --git a/vmmon-only/Makefile.normal b/vmmon-only/Makefile.normal new file mode 100644 index 00000000..9e26a969 --- /dev/null +++ b/vmmon-only/Makefile.normal @@ -0,0 +1,143 @@ +#!/usr/bin/make -f +########################################################## +# Copyright (C) 1998,2015 VMware, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation version 2 and no later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +# +########################################################## + +vm_check_build = $(shell if $(CC) $(CC_OPTS) $(INCLUDE) -Werror -S -o /dev/null -xc $(1) \ + > /dev/null 2>&1; then echo "$(2)"; else echo "$(3)"; fi) + +#### +#### DESTDIR is where the module, object files, and dependencies are built +#### +DESTDIR := driver-$(VM_UNAME) + +#### +#### DRIVERNAME should be untouched unless you have a good reason to change +#### it. The form below is how the scripts expect it. +#### +DRIVERNAME := $(DRIVER)-xxx-$(VM_UNAME) + +ifneq (,$(filter x86_64%, $(shell $(CC) -dumpmachine))) +MACHINE := x86_64 +else +MACHINE := x386 +endif + +ifdef QUIET +ECHO := @true +else +ECHO := @echo +endif + +#### +#### You must compile with at least -O level of optimization +#### or the module won't load. +#### If desparate, I think that bringing in might +#### suffice. +#### +CC_WARNINGS := -Wall -Wstrict-prototypes +# Don't use -pipe or egcs-2.91.66 (shipped with RedHat) will die +CC_KFLAGS := -D__KERNEL__ -fno-strength-reduce -fno-omit-frame-pointer \ + -fno-common -DKBUILD_MODNAME=$(DRIVER) +CC_KFLAGS += $(call vm_check_gcc,-falign-loops=2 -falign-jumps=2 -falign-functions=2, \ + -malign-loops=2 -malign-jumps=2 -malign-functions=2) +CC_KFLAGS += $(call vm_check_gcc,-fno-strict-aliasing,) +ifeq ($(MACHINE),x86_64) +CC_KFLAGS += -mno-red-zone -mcmodel=kernel +else +# Gcc 3.0 deprecates -m486 --hpreg +CC_KFLAGS += -DCPU=586 $(call check_gcc,-march=i586,-m486) +endif + +CC_OPTS := -O2 -DMODULE -DVMMON -DVMCORE $(GLOBAL_DEFS) $(CC_KFLAGS) $(CC_WARNINGS) + +INCLUDE := -I$(SRCROOT)/include -I$(SRCROOT)/common -I$(SRCROOT)/linux \ + -I$(SRCROOT)/vmcore -I$(HEADER_DIR) + +INCLUDE += $(shell $(CC) $(INCLUDE) -E $(SRCROOT)/autoconf/geninclude.c \ + | sed -n -e 's!^APATH!-I$(HEADER_DIR)/asm!p') + +CC_OPTS += $(call vm_check_build, $(SRCROOT)/autoconf/smpcall.c, -DVMW_HAVE_SMP_CALL_3ARG, ) +CC_OPTS += $(call vm_check_build, $(SRCROOT)/autoconf/tsc_khz.c, -DVMW_HAVE_TSC_KHZ, ) + +C_TARGETS_LINUX := driver.o hostif.o driverLog.o +C_TARGETS_COMMON := vmx86.o memtrack.o phystrack.o cpuid.o task.o hashFunc.o +C_TARGETS_VMCORE := moduleloop.o +C_TARGETS_LINUX_D := ${C_TARGETS_LINUX:.o=.d} +C_TARGETS_COMMON_D := ${C_TARGETS_COMMON:.o=.d} +C_TARGETS_VMCORE_D := ${C_TARGETS_VMCORE:.o=.d} +C_TARGETS := $(C_TARGETS_LINUX) $(C_TARGETS_COMMON) $(C_TARGETS_VMCORE) + +#### +#### Make Targets are beneath here. +#### + +driver: setup deps + $(MAKE) -C $(DESTDIR) -f ../Makefile SRCROOT=../$(SRCROOT) $(DRIVER).o \ + INCLUDE_DEPS=1 + +setup: + @if [ -d $(DESTDIR) ] ; then true ; else mkdir $(DESTDIR); chmod 755 $(DESTDIR) ; fi + +$(DRIVER) $(DRIVER).o: $(DRIVERNAME) + cp -f $< $@ + +$(DRIVERNAME): $(C_TARGETS) + $(ECHO) "Building $(DRIVERNAME)" + ld -r -o $(DRIVERNAME) $(C_TARGETS) + +auto-build: + $(MAKE) driver QUIET=1 + cp -f $(DESTDIR)/$(DRIVERNAME) $(SRCROOT)/../$(DRIVER).o + +$(C_TARGETS_LINUX): %.o: $(SRCROOT)/linux/%.c + $(ECHO) "Compiling linux/$( $@ + +$(C_TARGETS_LINUX_D): %.d: $(SRCROOT)/linux/%.c + $(ECHO) "Dependencies for $( $@ + +$(C_TARGETS_VMCORE_D): %.d: $(SRCROOT)/vmcore/%.c + $(ECHO) "Dependencies for $( $@ + +deps: setup + $(MAKE) -C $(DESTDIR) -f ../Makefile SRCROOT=../$(SRCROOT) driver_deps + +driver_deps: ${C_TARGETS:.o=.d} + +ifdef INCLUDE_DEPS +include ${C_TARGETS:.o=.d} +endif + +.SILENT: diff --git a/vmmon-only/README b/vmmon-only/README new file mode 100644 index 00000000..6a4da4c4 --- /dev/null +++ b/vmmon-only/README @@ -0,0 +1,14 @@ +This files in this directory and its subdirectories are the kernel module +portion of the VMware Virtual Machine Monitor. In order to build, make +certain the Makefile is correct, especially in whether or not your system +is multi-processor and then just type + + make + +from this directory. A copy of the module will be left in + + driver-/vmmon.o + +(e.g. driver-up-2.0.32/vmmon.o). + +If you have any problems or questions, send mail to support@vmware.com diff --git a/vmmon-only/autoconf/geninclude.c b/vmmon-only/autoconf/geninclude.c new file mode 100644 index 00000000..88d68dc5 --- /dev/null +++ b/vmmon-only/autoconf/geninclude.c @@ -0,0 +1,41 @@ +/********************************************************* + * Copyright (C) 2003 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +#include "compat_version.h" +#include "compat_autoconf.h" + +#ifdef CONFIG_X86_VOYAGER +APATH/mach-voyager +#endif +#ifdef CONFIG_X86_VISWS +APATH/mach-visws +#endif +#ifdef CONFIG_X86_NUMAQ +APATH/mach-numaq +#endif +#ifdef CONFIG_X86_BIGSMP +APATH/mach-bigsmp +#endif +#ifdef CONFIG_X86_SUMMIT +APATH/mach-summit +#endif +#ifdef CONFIG_X86_GENERICARCH +APATH/mach-generic +#endif +APATH/mach-default + diff --git a/vmmon-only/autoconf/smpcall.c b/vmmon-only/autoconf/smpcall.c new file mode 100644 index 00000000..e27a873a --- /dev/null +++ b/vmmon-only/autoconf/smpcall.c @@ -0,0 +1,37 @@ +/********************************************************* + * Copyright (C) 2008 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * Detect whether smp_call_function has 4 or 3 arguments. + * Change happened between 2.6.26 and 2.6.27-rc1. + */ + +#include "compat_version.h" +#include "compat_autoconf.h" + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26) +# error This compile test intentionally fails. +#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 27) +# include + +int +vmware_smp_call_function(void (*func)(void *info), void *info, int wait) +{ + return smp_call_function(func, info, wait); +} +#endif diff --git a/vmmon-only/autoconf/tsc_khz.c b/vmmon-only/autoconf/tsc_khz.c new file mode 100644 index 00000000..f2f262f7 --- /dev/null +++ b/vmmon-only/autoconf/tsc_khz.c @@ -0,0 +1,32 @@ +/********************************************************* + * Copyright (C) 2015 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * Detect if tsc_khz is available. + */ + +#include "compat_version.h" +#include "compat_autoconf.h" + +#include + +int +vmw_tsc_khz(void) +{ + return tsc_khz; +} diff --git a/vmmon-only/common/apic.c b/vmmon-only/common/apic.c new file mode 100644 index 00000000..a9eb1a4c --- /dev/null +++ b/vmmon-only/common/apic.c @@ -0,0 +1,279 @@ +/********************************************************* + * Copyright (C) 2011 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +#include "vmware.h" +#include "hostif.h" +#include "x86cpuid_asm.h" +#include "vm_asm.h" +#include "cpuid.h" +#include "apic.h" + +/* + *---------------------------------------------------------------------- + * + * APIC_GetMA -- + * + * Return the MA of the host's APIC by reading the APIC_BASE + * MSR and applying any necessary masking. + * + * Side effects: + * None + * + * Return value: + * MA of host APIC if successful (guaranteed to be page-aligned), + * or the sentinel (MA)-1 if unsucessful or if X2 APIC mode is enabled + * since this disables the MMIO interface. + * + *---------------------------------------------------------------------- + */ + +MA +APIC_GetMA(void) +{ + uint64 result; + CpuidVendor cpuVendor = CPUID_GetVendor(); + uint32 features = CPUID_GetFeatures(); + + if (!CPUID_ISSET(1, EDX, MSR, features) || + !CPUID_ISSET(1, EDX, APIC, features)) { + return (MA)-1; + } + + if (cpuVendor != CPUID_VENDOR_INTEL && + cpuVendor != CPUID_VENDOR_AMD && + cpuVendor != CPUID_VENDOR_VIA) { + return (MA)-1; + } + + /* + * Check if X2 APIC mode is enabled. + */ + + if ((__GET_MSR(MSR_APIC_BASE) & APIC_MSR_X2APIC_ENABLED) != 0) { + return (MA)-1; + } + + /* + * APIC is present and enabled. The CPUID[0x1].edx[APIC] bit, + * already checked, mirrors the APIC base MSR's enable bit. + */ + + // Mask out goo in the low 12 bits, which is unrelated to the address. + result = __GET_MSR(MSR_APIC_BASE) & ~MASK64(PAGE_SHIFT); + + /* + * On Intel, the high bits are reserved so we mask. + * On AMD, high bits are explicitly MBZ, so no need. + * Via doesn't specify, so we'll assume reserved. + */ + if (cpuVendor == CPUID_VENDOR_INTEL || cpuVendor == CPUID_VENDOR_VIA) { + /* + * Intel suggests using CPUID 0x80000008.eax[7-0] (physical + * address size), with 36 (24 bit MPNs) as a fallback. + * Via has that cpuid leaf as well. + */ + unsigned numPhysicalBits = 36; + + if (CPUID_AddressSizeSupported()) { + numPhysicalBits = __GET_EAX_FROM_CPUID(0x80000008) & 0xff; + } + + result &= MASK64(numPhysicalBits); + } + + ASSERT_ON_COMPILE(sizeof(result) == sizeof(MA)); + return result; +} + + +/* + *---------------------------------------------------------------------- + * + * APIC_Read -- + * + * Reads the given APIC register using the proper interface. Does not + * check to see if the register number is valid. + * + * Side effects: + * Yes. + * + * Return value: + * Value of the register. + * + *---------------------------------------------------------------------- + */ + +uint32 +APIC_Read(const APICDescriptor *desc, // IN + int regNum) // IN +{ + if (desc->isX2) { + return (uint32 )__GET_MSR(MSR_X2APIC_BASE + regNum); + } else { + return desc->base[regNum][0]; + } +} + + +/* + *---------------------------------------------------------------------- + * + * APIC_ReadID -- + * + * Reads the APIC ID using the proper interface. The semantics of the + * ID are different in X2APIC mode so APIC_Read() should not be used. + * + * Side effects: + * None. + * + * Return value: + * APIC ID. + * + *---------------------------------------------------------------------- + */ + +uint32 +APIC_ReadID(const APICDescriptor *desc) // IN +{ + uint32 reg = APIC_Read(desc, APICR_ID); + + if (desc->isX2) { + return reg; + } else { + return (reg & XAPIC_ID_MASK) >> APIC_ID_SHIFT; + } +} + + +/* + *---------------------------------------------------------------------- + * + * APIC_Write -- + * + * Writes the given value to the given APIC register using the proper + * interface. Does not check to see if the register number is valid. + * + * Side effects: + * Yes. + * + * Return value: + * None. + * + *---------------------------------------------------------------------- + */ + +void +APIC_Write(const APICDescriptor *desc, // IN + int regNum, // IN + uint32 val) // IN +{ + if (desc->isX2) { + __SET_MSR(MSR_X2APIC_BASE + regNum, val); + } else { + desc->base[regNum][0] = val; + } +} + + +/* + *---------------------------------------------------------------------- + * + * APIC_ReadICR -- + * + * Reads the APIC ICR using the proper interface. The semantics of the + * ICR are different in X2APIC mode so APIC_Read() should not be used. + * + * Side effects: + * Yes. + * + * Return value: + * The full 64-bit value of the ICR. + * + *---------------------------------------------------------------------- + */ + +uint64 +APIC_ReadICR(const APICDescriptor *desc) // IN +{ + if (desc->isX2) { + return __GET_MSR(MSR_X2APIC_BASE + APICR_ICRLO); + } else { + uint32 icrHi = desc->base[APICR_ICRHI][0]; + uint32 icrLo = desc->base[APICR_ICRLO][0]; + return (uint64) icrHi << 32 | icrLo; + } +} + + +/* + *---------------------------------------------------------------------- + * + * APIC_WriteICR -- + * + * Writes the given value to the APIC ICR using the proper interface. + * The semantics of the ICR are different in X2APIC mode so APIC_Write() + * should not be used. + * + * Side effects: + * Yes. + * + * Return value: + * None. + * + *---------------------------------------------------------------------- + */ + +void +APIC_WriteICR(const APICDescriptor *desc, // IN + uint32 id, // IN + uint32 icrLo) // IN +{ + if (desc->isX2) { + uint64 icr = (uint64) id << 32 | icrLo; + __SET_MSR(MSR_X2APIC_BASE + APICR_ICRLO, icr); + } else { + ASSERT(!(id & ~(APIC_ICRHI_DEST_MASK >> APIC_ICRHI_DEST_OFFSET))); + desc->base[APICR_ICRHI][0] = id << APIC_ICRHI_DEST_OFFSET; + desc->base[APICR_ICRLO][0] = icrLo; + } +} + + +/* + *---------------------------------------------------------------------- + * + * APIC_MaxLvt -- + * + * Reads the maximum number of LVT entries from the APIC version register. + * + * Side effects: + * No. + * + * Return value: + * The maximum number of LVT entries. + * + *---------------------------------------------------------------------- + */ + +uint32 +APIC_MaxLVT(const APICDescriptor *desc) // IN +{ + uint32 ver = APIC_Read(desc, APICR_VERSION); + + return (ver >> APIC_MAX_LVT_SHIFT) & APIC_MAX_LVT_MASK; +} diff --git a/vmmon-only/common/apic.h b/vmmon-only/common/apic.h new file mode 100644 index 00000000..4d5e751c --- /dev/null +++ b/vmmon-only/common/apic.h @@ -0,0 +1,47 @@ +/********************************************************* + * Copyright (C) 2011 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * apic.h -- + * + * Some host APIC helper functions + */ + +#ifndef APIC_H +#define APIC_H + +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMCORE +#include "includeCheck.h" + +#include "vm_basic_types.h" + +typedef struct { + Bool isX2; + volatile uint32 (*base)[4]; +} APICDescriptor; + +MA APIC_GetMA(void); +uint32 APIC_Read(const APICDescriptor *desc, int regNum); +void APIC_Write(const APICDescriptor *desc, int regNum, uint32 val); +void APIC_WriteICR(const APICDescriptor *desc, uint32 id, uint32 icrLo); +uint64 APIC_ReadICR(const APICDescriptor *desc); +uint32 APIC_ReadID(const APICDescriptor *desc); +uint32 APIC_MaxLVT(const APICDescriptor *desc); + +#endif diff --git a/vmmon-only/common/comport.c b/vmmon-only/common/comport.c new file mode 100644 index 00000000..4d591178 --- /dev/null +++ b/vmmon-only/common/comport.c @@ -0,0 +1,203 @@ +/********************************************************* + * Copyright (C) 2006,2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * comport.c -- + * + * Simple COM1 port routines for debugging code that can't do any real + * host IO, such as the worldswitch and related. + * + * They all wait for the last character to complete transmission so if the + * system crashes immediately on return, the last character will be seen + * by the remote end. + * + * These routines do not have any external dependencies so can be called + * by any code that has privilege to access IO ports. + * + * Under Windows, they can be made to forward output to DbgPrint for + * printing via the debugger. Just have USE_DBGPRINT set to 1. If you + * let USE_DBGPRINT be 0 with Windows, make sure the comport hardware is + * powered-up by leaving Hyperterm running with the comport open, else + * Windows will power the chip down. + */ + +#include "comport.h" +#include "vm_basic_asm.h" // for INB, OUTB + +#if 000 // defined(_MSC_VER) +#define USE_DBGPRINT 1 +#define USE_MACPORT80 0 +#else +#define USE_DBGPRINT 0 +#ifdef __APPLE__ +#define USE_MACPORT80 1 +#else +#define USE_MACPORT80 0 +#endif +#endif + +#if USE_DBGPRINT +void DbgPrint(char const *format, ...); +#elif !USE_MACPORT80 +#define IOBASE 0x3F8 // COM1 base IO port number +#define BAUD 115200 // baud rate +#define THR 0 // transmitter holding register +#define LSR 5 // line status register +#define LSR_TE 0x20 // - transmit fifo completely empty +#define LSR_TI 0x40 // - transmitter idle +#endif + + +void +CP_Init(void) +{ +#if !USE_DBGPRINT && !USE_MACPORT80 + OUTB(IOBASE+3, 0x83); // LCR=select DLL/DLH, wordlen=8 bits + OUTB(IOBASE+0, (115200/BAUD)&255); // DLL=lo order baud rate + OUTB(IOBASE+1, (115200/BAUD)/256); // DLH=hi order baud rate + OUTB(IOBASE+3, 0x03); // LCR=select RBR/THR/IER + OUTB(IOBASE+4, 0x07); // MCR=dtr, rts, port-enable + OUTB(IOBASE+2, 0x07); // FCR=reset rcv fifo, reset xmt fifo + OUTB(IOBASE+1, 0); // IER=disable all interrupts +#endif +} + + +void +CP_PutChr(uint8 ch) // IN +{ +#if USE_DBGPRINT + DbgPrint("%c", ch); +#elif USE_MACPORT80 + int bit; + + OUTB(0x80, (ch & 1) | 0x10); + for (bit = 1; bit < 64; bit ++) { + OUTB(0x80, (ch >> (bit & 7)) & 1); + } +#else + if (ch == '\n') CP_PutChr('\r'); + while ((INB(IOBASE+LSR) & LSR_TE) == 0) { } + OUTB(IOBASE+THR, ch); + while ((INB(IOBASE+LSR) & LSR_TI) == 0) { } +#endif +} + + +void +CP_PutDec(uint32 value) // IN +{ +#if USE_DBGPRINT + DbgPrint("%u", value); +#else + char s[12]; + int i; + + i = 0; + do { + s[i++] = (value % 10) + '0'; + value /= 10; + } while (value > 0); + while (--i >= 0) CP_PutChr(s[i]); +#endif +} + + +void +CP_PutHexPtr(void *value) // IN +{ + if (sizeof value == 8) { + CP_PutHex64((uint64)(VA)value); + } + if (sizeof value == 4) { + CP_PutHex32((uint32)(VA)value); + } +} + + +void +CP_PutHex64(uint64 value) // IN +{ + CP_PutHex32((uint32)(value >> 32)); + CP_PutHex32((uint32)value); +} + + +void +CP_PutHex32(uint32 value) // IN +{ +#if USE_DBGPRINT + DbgPrint("%8.8X", value); +#else + CP_PutHex16((uint16)(value >> 16)); + CP_PutHex16((uint16)value); +#endif +} + + +void +CP_PutHex16(uint16 value) // IN +{ +#if USE_DBGPRINT + DbgPrint("%4.4X", value); +#else + CP_PutHex8((uint8)(value >> 8)); + CP_PutHex8((uint8)value); +#endif +} + + +void +CP_PutHex8(uint8 value) // IN +{ +#if USE_DBGPRINT + DbgPrint("%2.2X", value); +#else + CP_PutChr("0123456789ABCDEF"[(value>>4)&15]); + CP_PutChr("0123456789ABCDEF"[value&15]); +#endif +} + + +void +CP_PutSp(void) +{ + CP_PutChr(' '); +} + + +void +CP_PutCrLf(void) +{ + CP_PutChr('\n'); +} + + +void +CP_PutStr(char const *s) // IN +{ +#if USE_DBGPRINT + DbgPrint("%s", s); +#else + char c; + + while ((c = *(s ++)) != 0) { + CP_PutChr(c); + } +#endif +} diff --git a/vmmon-only/common/comport.h b/vmmon-only/common/comport.h new file mode 100644 index 00000000..bf3cdb09 --- /dev/null +++ b/vmmon-only/common/comport.h @@ -0,0 +1,41 @@ +/********************************************************* + * Copyright (C) 2006 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +#ifndef _COMPORT_H +#define _COMPORT_H + +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMX +#include "includeCheck.h" + +#include "vm_basic_types.h" // for uint8, et al + +void CP_Init(void); +void CP_PutChr(uint8 ch); +void CP_PutDec(uint32 value); +void CP_PutHexPtr(void *value); +void CP_PutHex64(uint64 value); +void CP_PutHex32(uint32 value); +void CP_PutHex16(uint16 value); +void CP_PutHex8(uint8 value); +void CP_PutSp(void); +void CP_PutCrLf(void); +void CP_PutStr(char const *s); + +#endif diff --git a/vmmon-only/common/cpuid.c b/vmmon-only/common/cpuid.c new file mode 100644 index 00000000..4984eb00 --- /dev/null +++ b/vmmon-only/common/cpuid.c @@ -0,0 +1,209 @@ +/********************************************************* + * Copyright (C) 1998 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +#ifdef linux +/* Must come before any kernel header file --hpreg */ +# include "driver-config.h" + +# include +#endif +#ifdef __APPLE__ +# include // For strcmp(). +#endif + +#include "vmware.h" +#include "vm_assert.h" +#include "hostif.h" +#include "cpuid.h" +#include "x86cpuid_asm.h" + +uint32 cpuidFeatures; +static CpuidVendor vendor = CPUID_NUM_VENDORS; +static uint32 version; + + +/* + *----------------------------------------------------------------------------- + * + * CPUIDExtendedSupported -- + * + * Determine whether processor supports extended CPUID (0x8000xxxx) + * and how many of them. + * + * Results: + * 0 if extended CPUID is not supported + * otherwise maximum extended CPUID supported (bit 31 set) + * + * Side effects: + * None. + * + *----------------------------------------------------------------------------- + */ + +static uint32 +CPUIDExtendedSupported(void) +{ + uint32 eax; + + eax = __GET_EAX_FROM_CPUID(0x80000000); + if ((eax & 0x80000000) != 0x80000000) { + return 0; + } + + return eax; +} + + +void +CPUID_Init(void) +{ + CPUIDRegs regs; + uint32 *ptr; + char name[16]; + + __GET_CPUID(1, ®s); + version = regs.eax; + cpuidFeatures = regs.edx; + + __GET_CPUID(0, ®s); + ptr = (uint32 *)name; + ptr[0] = regs.ebx; + ptr[1] = regs.edx; + ptr[2] = regs.ecx; + ptr[3] = 0; + + if (strcmp(name, CPUID_INTEL_VENDOR_STRING_FIXED) == 0) { + vendor = CPUID_VENDOR_INTEL; + } else if (strcmp(name, CPUID_AMD_VENDOR_STRING_FIXED) == 0) { + vendor = CPUID_VENDOR_AMD; + } else if (strcmp(name, CPUID_CYRIX_VENDOR_STRING_FIXED) == 0) { + vendor = CPUID_VENDOR_CYRIX; + } else { + Warning("VMMON CPUID: Unrecognized CPU\n"); + vendor = CPUID_VENDOR_UNKNOWN; + } +} + + +CpuidVendor +CPUID_GetVendor(void) +{ + ASSERT(vendor != CPUID_NUM_VENDORS); + return vendor; +} + + +uint32 +CPUID_GetVersion(void) +{ + ASSERT(vendor != CPUID_NUM_VENDORS); + return version; +} + + +/* + *----------------------------------------------------------------------------- + * + * CPUID_SyscallSupported -- + * + * Determine whether processor supports syscall opcode and MSRs. + * + * Results: + * FALSE if processor does not support syscall + * TRUE if processor supports syscall + * + * Side effects: + * It determines value only on first call, caching it for future. + * + *----------------------------------------------------------------------------- + */ + +Bool +CPUID_SyscallSupported(void) +{ + /* + * It is OK to use local static variables here as 'result' does not depend + * on any work done in CPUID_Init(). It purely depends on the CPU. + */ + static Bool initialized = FALSE; + static Bool result; + + if (UNLIKELY(!initialized)) { + result = CPUIDExtendedSupported() >= 0x80000001 + && (__GET_EDX_FROM_CPUID(0x80000001) & (1 << 11)); + initialized = TRUE; + } + + return result; +} + + +Bool +CPUID_LongModeSupported(void) +{ + /* + * It is OK to use local static variables here as 'result' does not depend + * on any work done in CPUID_Init(). It purely depends on the CPU. + */ + static Bool initialized = FALSE; + static Bool result; + + if (UNLIKELY(!initialized)) { + result = CPUIDExtendedSupported() >= 0x80000001 + && (__GET_EDX_FROM_CPUID(0x80000001) & (1 << 29)); + initialized = TRUE; + } + + return result; +} + + +/* + *----------------------------------------------------------------------------- + * + * CPUID_AddressSizeSupported -- + * + * Determine whether processor supports the address size cpuid + * extended leaf. + * + * Results: + * True iff the processor supports CPUID 0x80000008. + * + * Side effects: + * It determines value only on first call, caching it for future. + * + *----------------------------------------------------------------------------- + */ + +Bool +CPUID_AddressSizeSupported(void) +{ + /* + * It is OK to use local static variables here as 'result' does not depend + * on any work done in CPUID_Init(). It purely depends on the CPU. + */ + static Bool initialized = FALSE; + static Bool result; + + if (UNLIKELY(!initialized)) { + result = CPUIDExtendedSupported() >= 0x80000008; + initialized = TRUE; + } + + return result; +} diff --git a/vmmon-only/common/cpuid.h b/vmmon-only/common/cpuid.h new file mode 100644 index 00000000..59bb8067 --- /dev/null +++ b/vmmon-only/common/cpuid.h @@ -0,0 +1,56 @@ +/********************************************************* + * Copyright (C) 1998,2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * cpuid.h -- + * + * wrap CPUID instruction + */ + +#ifndef CPUID_H +#define CPUID_H + +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMCORE +#include "includeCheck.h" + +#include "x86cpuid.h" + +extern uint32 cpuidFeatures; + +void CPUID_Init(void); +CpuidVendor CPUID_GetVendor(void); +uint32 CPUID_GetVersion(void); +Bool CPUID_SyscallSupported(void); +Bool CPUID_LongModeSupported(void); +Bool CPUID_AddressSizeSupported(void); + +static INLINE uint32 +CPUID_GetFeatures(void) +{ + return cpuidFeatures; +} + +static INLINE Bool +CPUID_SSE2Supported(void) +{ + return CPUID_ISSET(1, EDX, SSE2, CPUID_GetFeatures()); +} + +#endif + diff --git a/vmmon-only/common/hashFunc.c b/vmmon-only/common/hashFunc.c new file mode 100644 index 00000000..b8510821 --- /dev/null +++ b/vmmon-only/common/hashFunc.c @@ -0,0 +1,41 @@ +/********************************************************* + * Copyright (C) 2001 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + + +/* + * hashFunc.c -- + * + * The core implementation lives in lib/shared/hashFunc.h because it + * is shared by the vmkernel and vmmon. + */ + +#include "hashFunc.h" + +/* + * Wrappers + */ + +// arbitrary constant +#define HASH_INIT_VALUE (42) + +// 64-bit hash for one 4K page +uint64 +HashFunc_HashPage(const void *addr) +{ + return hash2((uint64 *)addr, PAGE_SIZE / sizeof (uint64), HASH_INIT_VALUE); +} diff --git a/vmmon-only/common/hostKernel.h b/vmmon-only/common/hostKernel.h new file mode 100644 index 00000000..51dc0e3f --- /dev/null +++ b/vmmon-only/common/hostKernel.h @@ -0,0 +1,83 @@ +/********************************************************* + * Copyright (C) 1998 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * hostKernel.h -- + * + * Definition of HOST_KERNEL_* --hpreg + */ + + +#ifndef __HOST_KERNEL_H__ +# define __HOST_KERNEL_H__ + + +# ifdef linux +/* + * In some cases, this files needs to include Linux kernel header file + * asm/page.h. + * + * However, asm/page.h defines PAGE_SHIFT, PAGE_SIZE, PAGE_MASK, PAGE_OFFSET + * and VMware header file vm_basic_types.h defines PAGE_SHIFT, PAGE_SIZE, + * PAGE_MASK, PAGE_OFFSET. PAGE_MASK and PAGE_OFFSET are defined differently + * (XXX we should really prefix the VMware version with VM_ to prevent any + * further confusion), but fortunately the driver does not use them. + * + * So in this file, we must solve the definition conflict for files that + * include both this file and vm_basic_types.h. 2 cases can occur: + * + * 1) this file is included before vm_basic_types.h is included. This is fine, + * because vm_basic_types.h only defines PAGE_* if they don't exist yet. + * + * 2) vm_basic_types.h is included before this file is included. + * We must undefine + * PAGE_* in between. But this only works if asm/page.h is not included + * before this file is included. + * + * In summary: if you make sure you do not include asm/page.h before you + * include this file, then we guarantee that: + * . This file and vm_basic_types.h can be included in any order + * . asm/page.h will be included + * . The PAGE_* definitions will come from asm/page.h + * + * --hpreg + */ + +/* Must come before any kernel header file --hpreg */ +# include "driver-config.h" + +# undef PAGE_SHIFT +# undef PAGE_SIZE +# undef PAGE_MASK +# undef PAGE_OFFSET + +/* For __PAGE_OFFSET --hpreg */ +# include + +# define HOST_KERNEL_VA_2_LA(_x) (_x) +# define HOST_KERNEL_LA_2_VA(_x) (_x) +# else +/* For VA and LA --hpreg */ +# include "vm_basic_types.h" + +# define HOST_KERNEL_VA_2_LA(_addr) ((LA)(_addr)) +# define HOST_KERNEL_LA_2_VA(_addr) ((VA)(_addr)) +# endif + + +#endif /* __HOST_KERNEL_H__ */ diff --git a/vmmon-only/common/hostif.h b/vmmon-only/common/hostif.h new file mode 100644 index 00000000..865abc00 --- /dev/null +++ b/vmmon-only/common/hostif.h @@ -0,0 +1,148 @@ +/********************************************************* + * Copyright (C) 1998-2014 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * hostif.h - Platform dependent interface for supporting + * the vmx86 device driver. + */ + + +#ifndef _HOSTIF_H_ +#define _HOSTIF_H_ + +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMCORE +#include "includeCheck.h" + +#include "vmx86.h" +#include "vcpuset.h" + +#include "hostifMem.h" +#include "hostifGlobalLock.h" + +/* + * Host-specific definitions. + */ +#if !__linux__ && !defined(WINNT_DDK) && !defined __APPLE__ +#error "Only Linux or NT or Mac OS defined for now." +#endif + + +/* + * The default monitor spin time for crosscalls is 50 usec + * in vmcore/vmx/main/monitor_init.c. This value is used + * in vmx86_YieldToSet to decide whether to block and wait + * for another vCPU to process our crosscall, or just wake + * up the other vCPUs and go back to monitor. + */ + +#define CROSSCALL_SPIN_SHORT_US 50 + +/* + * Sleep timeout in usec, see above comment for CROSSCALL_SPIN_SHORT_US + */ + +#define CROSSCALL_SLEEP_US 1000 + +/* Return value for HostIF_IPI. */ +typedef enum { + IPI_NONE, // No IPI was sent. + IPI_UNICAST, // Unicast IPIs were sent. + IPI_BROADCAST // A broadcast IPI was sent to all PCPUs. +} HostIFIPIMode; + +EXTERN int HostIF_Init(VMDriver *vm); +EXTERN int HostIF_LookupUserMPN(VMDriver *vm, VA64 uAddr, MPN *mpn); +EXTERN void *HostIF_MapCrossPage(VMDriver *vm, VA64 uAddr); +EXTERN void HostIF_InitFP(VMDriver *vm); + +EXTERN void *HostIF_AllocPage(void); +EXTERN void HostIF_FreePage(void *ptr); + +EXTERN int HostIF_LockPage(VMDriver *vm, VA64 uAddr, + Bool allowMultipleMPNsPerVA, MPN *mpn); +EXTERN int HostIF_UnlockPage(VMDriver *vm, VA64 uAddr); +EXTERN int HostIF_UnlockPageByMPN(VMDriver *vm, MPN mpn, VA64 uAddr); +EXTERN Bool HostIF_IsLockedByMPN(VMDriver *vm, MPN mpn); +EXTERN void HostIF_FreeAllResources(VMDriver *vm); +EXTERN uint64 HostIF_ReadUptime(void); +EXTERN uint64 HostIF_UptimeFrequency(void); +EXTERN unsigned int HostIF_EstimateLockedPageLimit(const VMDriver *vm, + unsigned int lockedPages); +EXTERN void HostIF_Wait(unsigned int timeoutMs); +EXTERN void HostIF_WaitForFreePages(unsigned int timeoutMs); +EXTERN void *HostIF_AllocCrossGDT(uint32 numPages, MPN maxValidFirst, + MPN *crossGDTMPNs); +EXTERN void HostIF_FreeCrossGDT(uint32 numPages, void *crossGDT); +EXTERN void HostIF_VMLock(VMDriver *vm, int callerID); +EXTERN void HostIF_VMUnlock(VMDriver *vm, int callerID); +#ifdef VMX86_DEBUG +EXTERN Bool HostIF_VMLockIsHeld(VMDriver *vm); +#endif + +EXTERN Bool HostIF_APICInit(VMDriver *vm, Bool setVMPtr, Bool probe); + +EXTERN int HostIF_SemaphoreWait(VMDriver *vm, + Vcpuid vcpuid, + uint64 *args); + +EXTERN int HostIF_SemaphoreSignal(uint64 *args); + +EXTERN void HostIF_SemaphoreForceWakeup(VMDriver *vm, const VCPUSet *vcs); +EXTERN HostIFIPIMode HostIF_IPI(VMDriver *vm, const VCPUSet *vcs); + +EXTERN uint32 HostIF_GetCurrentPCPU(void); +EXTERN void HostIF_CallOnEachCPU(void (*func)(void *), void *data); + +EXTERN Bool HostIF_PrepareWaitForThreads(VMDriver *vm, Vcpuid currVcpu); +EXTERN void HostIF_WaitForThreads(VMDriver *vm, Vcpuid currVcpu); +EXTERN void HostIF_CancelWaitForThreads(VMDriver *vm, Vcpuid currVcpu); +EXTERN void HostIF_WakeUpYielders(VMDriver *vm, Vcpuid currVcpu); + +EXTERN int HostIF_AllocLockedPages(VMDriver *vm, VA64 addr, + unsigned int numPages, Bool kernelMPNBuffer); +EXTERN int HostIF_FreeLockedPages(VMDriver *vm, VA64 addr, + unsigned int numPages, Bool kernelMPNBuffer); +EXTERN MPN HostIF_GetNextAnonPage(VMDriver *vm, MPN mpn); +EXTERN int HostIF_GetLockedPageList(VMDriver *vm, VA64 uAddr, + unsigned int numPages); + +EXTERN int HostIF_ReadPage(MPN mpn, VA64 addr, Bool kernelBuffer); +EXTERN int HostIF_WritePage(MPN mpn, VA64 addr, Bool kernelBuffer); +#ifdef _WIN32 +/* Add a HostIF_ReadMachinePage() if/when needed */ +EXTERN int HostIF_WriteMachinePage(MPN mpn, VA64 addr); +#else +#define HostIF_WriteMachinePage(_a, _b) HostIF_WritePage((_a), (_b), TRUE) +#endif +#if defined __APPLE__ +// There is no need for a fast clock lock on Mac OS. +#define HostIF_FastClockLock(_callerID) do {} while (0) +#define HostIF_FastClockUnlock(_callerID) do {} while (0) +#else +EXTERN void HostIF_FastClockLock(int callerID); +EXTERN void HostIF_FastClockUnlock(int callerID); +#endif +EXTERN int HostIF_SetFastClockRate(unsigned rate); + +EXTERN MPN HostIF_AllocMachinePage(void); +EXTERN void HostIF_FreeMachinePage(MPN mpn); + +EXTERN int HostIF_SafeRDMSR(uint32 msr, uint64 *val); + +#endif // ifdef _HOSTIF_H_ diff --git a/vmmon-only/common/hostifGlobalLock.h b/vmmon-only/common/hostifGlobalLock.h new file mode 100644 index 00000000..678e84ab --- /dev/null +++ b/vmmon-only/common/hostifGlobalLock.h @@ -0,0 +1,45 @@ +/********************************************************* + * Copyright (C) 1998 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * hostifGlobalLock.h - Platform dependent interface. This module + * defines functions for manipulating/checking + * the Global lock used by some drivers. + */ + + +#ifndef _HOSTIFGLOBALLOCK_H_ +#define _HOSTIFGLOBALLOCK_H_ + +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMCORE +#include "includeCheck.h" + + +#ifdef __APPLE__ +Bool HostIFGlobalLock_Start(void); +void HostIFGlobalLock_Stop(void); +#endif +void HostIF_GlobalLock(int callerID); +void HostIF_GlobalUnlock(int callerID); +#ifdef VMX86_DEBUG +Bool HostIF_GlobalLockIsHeld(void); +#endif + + +#endif // ifdef _HOSTIFGLOBALLOCK_H_ diff --git a/vmmon-only/common/hostifMem.h b/vmmon-only/common/hostifMem.h new file mode 100644 index 00000000..318534c2 --- /dev/null +++ b/vmmon-only/common/hostifMem.h @@ -0,0 +1,37 @@ +/********************************************************* + * Copyright (C) 1998 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * hostifMem.h - Platform dependent interface. This module defines + * functions for allocating and releasing memory + * from the kernel. + */ + + +#ifndef _HOSTIFMEM_H_ +#define _HOSTIFMEM_H_ + +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMCORE +#include "includeCheck.h" + + +void *HostIF_AllocKernelMem(size_t size, int nonPageable); +void HostIF_FreeKernelMem(void *ptr); + +#endif // ifdef _HOSTIFMEM_H_ diff --git a/vmmon-only/common/memtrack.c b/vmmon-only/common/memtrack.c new file mode 100644 index 00000000..e53daebc --- /dev/null +++ b/vmmon-only/common/memtrack.c @@ -0,0 +1,551 @@ +/********************************************************* + * Copyright (C) 1998 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * memtrack.c -- + * + * Utility module for tracking pinned memory, which allows later + * lookup by VPN. + * + * -- + * + * Track memory using a 3-level directory, to keep allocations to one + * page in size. The first level is inlined with the MemTrack struct + * and a new page is allocated for each directory in the second level, + * as needed. The third level packs in as many MemTrackEntry structs + * on a single page as possible. + * + * Optionally use a 2-level directory on systems that prefer larger + * contiguous allocations. In this case we allow the first level + * allocation to be larger than 1 page (its size will depend on the + * maximum number of tracked pages, currently set to 4GB). + * + * MemTrack MemTrackDir1 MemTrackDir2 MemTrackDir3 + * (Handle) (Optional) + * + * ---->/----------\ + * | | Dir[0] |----->/----------\ + * | | Dir[1] | | Entry[0] | + * | | ... | | Entry[1] | + * | | Dir[N] | | ... | + * /----------\ MEMTRACK_DIR2_ENTRIES \----------/ | Entry[N] | + * | ... | | \----------/ + * | dir1 |----/----------\ | ->/----------\ + * | ... |\ | Dir[0] |---- | | Dir[N+1] | + * | | \ | Dir[1] |------- | Dir[N+2] | + * \----------/ \ | ... | | ... | + * \| Dir[N] |----- | Dir[N+N] | . + * \----------/ | \----------/ . + * MEMTRACK_DIR1_ENTRIES | . + * --->/----------\ + * | ... | + * | | + * | | + * | Dir[M] |----->/----------\ + * \----------/ | ... | + * | | + * | | + * | Entry[M] | + * \----------/ + * + * We also keep a simple chaining hash table of entries hashed on + * the VPN, for quick lookup. A separate hash table hashed on the MPN + * exists as well, but this is only used in debug builds. + * + * This tracker does not allow pages to be removed. If, in the future, + * we have a use case for removing MPNs from the tracker, a simple + * MemTrackEntry recycle scheme can be implemented at the cost of an + * additional pointer per MemTrackEntry instance. + */ + +#if defined(__linux__) +/* Must come before any kernel header file. */ +# include "driver-config.h" + +# include /* memset() in the kernel */ +#elif defined(WINNT_DDK) +# undef PAGE_SIZE /* Redefined in ntddk.h, and we use that defn. */ +# undef PAGE_SHIFT +# include +#else +# include +#endif + +#include "vmware.h" +#include "hostif.h" + +#include "memtrack.h" + +/* + * Modify this value to increase the maximum number of tracked pages + * per MemTrack instance. + */ +#define MEMTRACK_MAX_TRACKED MBYTES_2_PAGES(4096) + +/* + * Linux uses a 3-level directory, because we want to keep allocations + * to a single page. + */ +#if defined(__linux__) +#define MEMTRACK_3LEVEL (1) +#endif + +#define MEMTRACK_DIR3_ENTRIES (PAGE_SIZE / sizeof (MemTrackEntry)) +#if defined(MEMTRACK_3LEVEL) +#define MEMTRACK_DIR2_ENTRIES (PAGE_SIZE / sizeof (void *)) +#else +#define MEMTRACK_DIR2_ENTRIES (1) +#endif +#define MEMTRACK_DIR1_ENTRIES CEILING(MEMTRACK_MAX_TRACKED, \ + (MEMTRACK_DIR2_ENTRIES * \ + MEMTRACK_DIR3_ENTRIES)) + +#define MEMTRACK_HT_SIZE (16384) +#define MEMTRACK_HT_ENTRIES (PAGE_SIZE / sizeof (void *)) +#define MEMTRACK_HT_PAGES (MEMTRACK_HT_SIZE / MEMTRACK_HT_ENTRIES) + +typedef struct MemTrackDir3 { + MemTrackEntry entries[MEMTRACK_DIR3_ENTRIES]; +} MemTrackDir3; + +#if defined(MEMTRACK_3LEVEL) +typedef struct MemTrackDir2 { + MemTrackDir3 *dir[MEMTRACK_DIR2_ENTRIES]; +} MemTrackDir2; +#else +typedef struct MemTrackDir3 MemTrackDir2; +#endif + +typedef struct MemTrackDir1 { + MemTrackDir2 *dir[MEMTRACK_DIR1_ENTRIES]; +} MemTrackDir1; + +typedef struct MemTrackHTPage { + MemTrackEntry *entries[MEMTRACK_HT_ENTRIES]; +} MemTrackHTPage; + +typedef struct MemTrackHT { + MemTrackHTPage *pages[MEMTRACK_HT_PAGES]; +} MemTrackHT; + +typedef uint64 MemTrackHTKey; + +typedef struct MemTrack { + unsigned numPages; /* Number of pages tracked. */ + MemTrackDir1 dir1; /* First level directory. */ + MemTrackHT vpnHashTable; /* VPN to entry hashtable. */ +#if defined(MEMTRACK_MPN_LOOKUP) + MemTrackHT mpnHashTable; /* MPN to entry hashtable. */ +#endif +} MemTrack; + +/* + * The following functions and macros help allocate and access the + * directory structure. This is convenient because the second level + * directory is optional. + */ + +#define MEMTRACK_IDX2DIR(_idx, _p1, _p2, _p3) \ + do { \ + _p1 = _idx / (MEMTRACK_DIR2_ENTRIES * MEMTRACK_DIR3_ENTRIES); \ + _p2 = (_idx / MEMTRACK_DIR3_ENTRIES) % MEMTRACK_DIR2_ENTRIES; \ + _p3 = _idx % MEMTRACK_DIR3_ENTRIES; \ + } while (0) + +#define MEMTRACK_GETDIR2(_dir1, _p1) (_dir1->dir[_p1]) +#define MEMTRACK_ALLOCDIR2(_dir1, _p1) MemTrackAllocDir2(_dir1, _p1) +#define MEMTRACK_FREEDIR2(_dir1) HostIF_FreePage(_dir1) + +#if defined(MEMTRACK_3LEVEL) +#define MEMTRACK_GETENTRY(_dir1, _p1, _p2, _p3) \ + (&((_dir1->dir[_p1])->dir[_p2])->entries[_p3]) +#define MEMTRACK_GETDIR3(_dir2, _p2) (_dir2->dir[_p2]) +#define MEMTRACK_ALLOCDIR3(_dir2, _p2) MemTrackAllocDir3(_dir2, _p2) +#define MEMTRACK_FREEDIR3(_dir2) HostIF_FreePage(_dir2) +#else +#define MEMTRACK_GETENTRY(_dir1, _p1, _p2, _p3) \ + (&(_dir1->dir[_p1])->entries[_p3]) +#define MEMTRACK_GETDIR3(_dir2, _p2) (_dir2) +#define MEMTRACK_ALLOCDIR3(_dir2, _p2) (_dir2) +#define MEMTRACK_FREEDIR3(_dir2) +#endif + +static INLINE void * +MemTrackAllocPage(void) +{ + void *ptr = HostIF_AllocPage(); + if (ptr != NULL) { + memset(ptr, 0, PAGE_SIZE); + } + return ptr; +} + +#define MEMTRACK_ALLOCDFN(_name, _itype, _otype) \ + static INLINE _otype * \ + _name(_itype *arg, unsigned pos) \ + { \ + if (arg->dir[pos] == NULL) { \ + arg->dir[pos] = MemTrackAllocPage(); \ + } \ + return arg->dir[pos]; \ + } + +#if defined(MEMTRACK_3LEVEL) +MEMTRACK_ALLOCDFN(MemTrackAllocDir3, MemTrackDir2, MemTrackDir3) +#endif +MEMTRACK_ALLOCDFN(MemTrackAllocDir2, MemTrackDir1, MemTrackDir2) + + +/* + *---------------------------------------------------------------------- + * + * MemTrackHTLookup -- + * MemTrackHTInsert -- + * + * Helper functions to insert or lookup entries in the VPN or + * MPN hash tables. Hash tables are always allocated in page + * size chunks. + * + *---------------------------------------------------------------------- + */ + +#define MEMTRACK_HASHKEY(_key, _hash, _page, _pos) \ + do { \ + _hash = _key % MEMTRACK_HT_SIZE; \ + _page = _hash / MEMTRACK_HT_ENTRIES; \ + _pos = _hash % MEMTRACK_HT_ENTRIES; \ + } while(0) + +static INLINE MemTrackEntry ** +MemTrackHTLookup(MemTrackHT *ht, // IN + MemTrackHTKey key) // IN +{ + uint64 hash, page, pos; + + MEMTRACK_HASHKEY(key, hash, page, pos); + + return &ht->pages[page]->entries[pos]; +} + +static INLINE void +MemTrackHTInsert(MemTrackHT *ht, // IN + MemTrackEntry *ent, // IN + MemTrackEntry **chain, // OUT + MemTrackHTKey key) // IN +{ + MemTrackEntry **head = MemTrackHTLookup(ht, key); + *chain = *head; + *head = ent; +} + + +/* + *---------------------------------------------------------------------- + * + * MemTrackCleanup -- + * + * Deallocate all memory associated with the specified tracker. + * + * Results: + * None. + * + * Side effects: + * Memory deallocation. + * + *---------------------------------------------------------------------- + */ + +static void +MemTrackCleanup(MemTrack *mt) // IN +{ + unsigned idx; + unsigned p1; + MemTrackDir1 *dir1; + + if (mt == NULL) { + return; + } + dir1 = &mt->dir1; + + for (p1 = 0; p1 < MEMTRACK_DIR1_ENTRIES; p1++) { + unsigned p2; + MemTrackDir2 *dir2 = MEMTRACK_GETDIR2(dir1, p1); + + if (dir2 == NULL) { + break; + } + for (p2 = 0; p2 < MEMTRACK_DIR2_ENTRIES; p2++) { + MemTrackDir3 *dir3 = MEMTRACK_GETDIR3(dir2, p2); + + if (dir3 == NULL) { + break; + } + MEMTRACK_FREEDIR3(dir3); + } + MEMTRACK_FREEDIR2(dir2); + } + + for (idx = 0; idx < MEMTRACK_HT_PAGES; idx++) { + if (mt->vpnHashTable.pages[idx] != NULL) { + HostIF_FreePage(mt->vpnHashTable.pages[idx]); + } +#if defined(MEMTRACK_MPN_LOOKUP) + if (mt->mpnHashTable.pages[idx] != NULL) { + HostIF_FreePage(mt->mpnHashTable.pages[idx]); + } +#endif + } + + HostIF_FreeKernelMem(mt); +} + + +/* + *---------------------------------------------------------------------- + * + * MemTrack_Init -- + * + * Allocate and initialize the tracker. + * + * Results: + * Handle used to access the tracker. + * + * Side effects: + * Memory allocation. + * + *---------------------------------------------------------------------- + */ + +MemTrack * +MemTrack_Init(void) +{ + MemTrack *mt; + unsigned idx; + +#if defined(MEMTRACK_3LEVEL) + ASSERT_ON_COMPILE(sizeof *mt <= PAGE_SIZE); + ASSERT_ON_COMPILE(sizeof (MemTrackDir2) == PAGE_SIZE); +#endif + ASSERT_ON_COMPILE(sizeof (MemTrackDir3) <= PAGE_SIZE); + + mt = HostIF_AllocKernelMem(sizeof *mt, FALSE); + if (mt == NULL) { + Warning("MemTrack failed to allocate handle.\n"); + goto error; + } + memset(mt, 0, sizeof *mt); + + for (idx = 0; idx < MEMTRACK_HT_PAGES; idx++) { + MemTrackHTPage *htPage = MemTrackAllocPage(); + + if (htPage == NULL) { + Warning("MemTrack failed to allocate VPN hash table (%d).\n", idx); + goto error; + } + mt->vpnHashTable.pages[idx] = htPage; + } + +#if defined(MEMTRACK_MPN_LOOKUP) + for (idx = 0; idx < MEMTRACK_HT_PAGES; idx++) { + MemTrackHTPage *htPage = MemTrackAllocPage(); + + if (htPage == NULL) { + Warning("MemTrack failed to allocate MPN hash table (%d).\n", idx); + goto error; + } + mt->mpnHashTable.pages[idx] = htPage; + } +#endif + + return mt; + +error: + MemTrackCleanup(mt); + return NULL; +} + + +/* + *---------------------------------------------------------------------- + * + * MemTrack_Add -- + * + * Add the specified VPN:MPN pair to the memory tracker. + * + * Results: + * A pointer to the element, or NULL on error. + * + * Side effects: + * Memory allocation. + * + *---------------------------------------------------------------------- + */ + +MemTrackEntry * +MemTrack_Add(MemTrack *mt, // IN + VPN64 vpn, // IN + MPN mpn) // IN +{ + unsigned idx = mt->numPages; + unsigned p1, p2, p3; + MemTrackEntry *ent; + MemTrackDir1 *dir1 = &mt->dir1; + MemTrackDir2 *dir2; + MemTrackDir3 *dir3; + MEMTRACK_IDX2DIR(idx, p1, p2, p3); + + if (p1 >= MEMTRACK_DIR1_ENTRIES || + p2 >= MEMTRACK_DIR2_ENTRIES || + p3 >= MEMTRACK_DIR3_ENTRIES) { + return NULL; + } + + dir2 = MEMTRACK_ALLOCDIR2(dir1, p1); + if (dir2 == NULL) { + return NULL; + } + + dir3 = MEMTRACK_ALLOCDIR3(dir2, p2); + if (dir3 == NULL) { + return NULL; + } + + ent = MEMTRACK_GETENTRY(dir1, p1, p2, p3); + ent->vpn = vpn; + ent->mpn = mpn; + + MemTrackHTInsert(&mt->vpnHashTable, ent, &ent->vpnChain, ent->vpn); +#if defined(MEMTRACK_MPN_LOOKUP) + MemTrackHTInsert(&mt->mpnHashTable, ent, &ent->mpnChain, ent->mpn); +#endif + + mt->numPages++; + + return ent; +} + + +/* + *---------------------------------------------------------------------- + * + * MemTrack_LookupVPN -- + * + * Lookup the specified VPN address in the memory tracker. + * + * Results: + * A pointer to the element, or NULL if not there. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +MemTrackEntry * +MemTrack_LookupVPN(MemTrack *mt, // IN + VPN64 vpn) // IN +{ + MemTrackEntry *next = *MemTrackHTLookup(&mt->vpnHashTable, vpn); + + while (next != NULL) { + if (next->vpn == vpn) { + return next; + } + next = next->vpnChain; + } + + return NULL; +} + + +#if defined(MEMTRACK_MPN_LOOKUP) +/* + *---------------------------------------------------------------------- + * + * MemTrack_LookupMPN -- + * + * Lookup the specified MPN address in the memory tracker. + * + * Results: + * A pointer to the element, or NULL if not there. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ +MemTrackEntry * +MemTrack_LookupMPN(MemTrack *mt, // IN + MPN mpn) // IN +{ + MemTrackEntry *next = *MemTrackHTLookup(&mt->mpnHashTable, mpn); + + while (next != NULL) { + if (next->mpn == mpn) { + return next; + } + next = next->mpnChain; + } + + return NULL; +} +#endif + + +/* + *---------------------------------------------------------------------- + * + * MemTrack_Cleanup -- + * + * Cleanup all resources allocated for the tracker. For + * all pages in the tracker call the user provided clean up + * function. + * + * Results: + * Number of pages in the tracker. + * + * Side effects: + * Memory deallocation. + * + *---------------------------------------------------------------------- + */ + +unsigned +MemTrack_Cleanup(MemTrack *mt, // IN + MemTrackCleanupCb *cb, // IN + void *cData) // IN +{ + unsigned idx; + unsigned count = 0; + + for (idx = 0; idx < mt->numPages; idx++) { + unsigned p1, p2, p3; + MemTrackEntry *ent; + MemTrackDir1 *dir1 = &mt->dir1; + MEMTRACK_IDX2DIR(idx, p1, p2, p3); + + ent = MEMTRACK_GETENTRY(dir1, p1, p2, p3); + cb(cData, ent); + + count++; + } + + MemTrackCleanup(mt); + + return count; +} diff --git a/vmmon-only/common/memtrack.h b/vmmon-only/common/memtrack.h new file mode 100644 index 00000000..977c5af3 --- /dev/null +++ b/vmmon-only/common/memtrack.h @@ -0,0 +1,60 @@ +/********************************************************* + * Copyright (C) 1998 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * memtrack.h -- + * + * Utility module for tracking pinned memory, which allows later + * lookup by VPN. + */ + + +#ifndef _MEMTRACK_H_ +#define _MEMTRACK_H_ + +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMCORE +#include "includeCheck.h" + +#if defined(VMX86_DEBUG) +#define MEMTRACK_MPN_LOOKUP +#endif + +struct MemTrack; + +typedef struct MemTrackEntry { + VPN64 vpn; + MPN mpn; + struct MemTrackEntry *vpnChain; +#if defined(MEMTRACK_MPN_LOOKUP) + struct MemTrackEntry *mpnChain; +#endif +} MemTrackEntry; + +typedef void (MemTrackCleanupCb)(void *cData, MemTrackEntry *entry); + +extern struct MemTrack *MemTrack_Init(void); +extern unsigned MemTrack_Cleanup(struct MemTrack *mt, MemTrackCleanupCb *cb, + void *cbData); +extern MemTrackEntry *MemTrack_Add(struct MemTrack *mt, VPN64 vpn, MPN mpn); +extern MemTrackEntry *MemTrack_LookupVPN(struct MemTrack *mt, VPN64 vpn); +#if defined(MEMTRACK_MPN_LOOKUP) +extern MemTrackEntry *MemTrack_LookupMPN(struct MemTrack *mt, MPN mpn); +#endif + +#endif // _MEMTRACK_H_ diff --git a/vmmon-only/common/phystrack.c b/vmmon-only/common/phystrack.c new file mode 100644 index 00000000..5972e24a --- /dev/null +++ b/vmmon-only/common/phystrack.c @@ -0,0 +1,495 @@ +/********************************************************* + * Copyright (C) 1998,2014 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * phystrack.c -- + * + * track down the utilization of the physical pages. + * + * Depending on configuration phystracker provides either 2-level or + * 3-level structure to track whether page (specified by its MPN) is + * locked or no. Linux uses 3-level structures with top limit of + * 1TB (32bit) or 16TB (64bit). Windows use 2-level structures + * ready to hold 128GB (32bit) or 2TB (64bit) of memory. On Mac + * limit is 2-level 128GB. + * + * 2-level phystracker is built on top of 3-level one by collapsing + * middle level. + */ + + +#ifdef linux +/* Must come before any kernel header file --hpreg */ +# include "driver-config.h" + +# include /* memset() in the kernel */ +#else +# include +#endif + +#include "vmware.h" +#include "vmx86.h" +#include "phystrack.h" +#include "hostif.h" + +#define BYTES_PER_ENTRY (PAGE_SIZE) +#define PHYSTRACK_L3_ENTRIES (8 * BYTES_PER_ENTRY) /* 128MB */ + +#if defined(WINNT_DDK) +#define PHYSTRACK_L1_ENTRIES (PHYSTRACK_MAX_SUPPORTED_GB * 8) +#elif defined(__linux__) +#define PHYSTRACK_L2_ENTRIES (BYTES_PER_ENTRY / sizeof(void *)) /* 64GB or 128GB */ +/* + * Currently MPN is 32 bits. 15 bits are in L3, 9 bits are in L2, + * leaving 8 bits for L1... + */ +#define PHYSTRACK_L1_ENTRIES (256) /* 16TB. */ +#else +#define PHYSTRACK_L1_ENTRIES ((128 + 4) * 8) /* 128 GB */ +#endif + +#ifndef PHYSTRACK_L2_ENTRIES +#define PHYSTRACK_L2_ENTRIES (1) +#else +#define PHYSTRACK_3LEVEL (1) +#endif + +typedef struct PhysTrackerL3 { + uint8 bits[BYTES_PER_ENTRY]; +} PhysTrackerL3; + +#ifdef PHYSTRACK_3LEVEL +typedef struct PhysTrackerL2 { + PhysTrackerL3 *dir[PHYSTRACK_L2_ENTRIES]; +} PhysTrackerL2; +#else +typedef struct PhysTrackerL3 PhysTrackerL2; +#endif + +typedef struct PhysTracker { + VMDriver *vm; /* Used only for debugging and asserts. */ + PhysTrackerL2 *dir[PHYSTRACK_L1_ENTRIES]; +} PhysTracker; + + +/* + * Convert MPN to p1, p2, and p3 indices. p1/p2/p3 must be l-values. + * Currently we support a 64 bit container for an MPN + * in hosted but not an actual 64 bit value as no hosted OS + * supports this yet. Hence in PhysMem tracker we are deliberately using + * a 32-bit container to save memory. Also the tracker is allocating pages + * considering the MPN to be a 32 bit value. This will change once we get + * systems supporting 64 bit memory/addressing space. + * Until then let us assert if a value greater than 32 bit is being passed. + */ +#define PHYSTRACK_MPN2IDX(mpn, p1, p2, p3) \ + do { \ + ASSERT((mpn >> 32) == 0); \ + p2 = (unsigned)(mpn) / PHYSTRACK_L3_ENTRIES; \ + p1 = p2 / PHYSTRACK_L2_ENTRIES; \ + p2 = p2 % PHYSTRACK_L2_ENTRIES; \ + p3 = (unsigned)(mpn) % PHYSTRACK_L3_ENTRIES; \ + } while (0) + +/* + * Convert L3 index to offset and bitmask. offs/bitmask must be l-values. + */ +#define PHYSTRACK_GETL3POS(p3, offs, bitmask) \ + do { \ + offs = (p3) / 8; \ + bitmask = 1 << ((p3) % 8); \ + } while (0) + +/* + * Helpers hiding middle level. + */ +#ifdef PHYSTRACK_3LEVEL +#define PHYSTRACK_GETL3(dir2, p2) (dir2)->dir[(p2)] +#define PHYSTRACK_ALLOCL3(dir2, p2) PhysTrackAllocL3((dir2), (p2)) +#define PHYSTRACK_FREEL3(dir2, p2) PhysTrackFreeL3((dir2), (p2)) +#else +#define PHYSTRACK_GETL3(dir2, p2) (dir2) +#define PHYSTRACK_ALLOCL3(dir2, p2) (dir2) +#define PHYSTRACK_FREEL3(dir2, p2) do { } while (0) +#endif + + +#ifdef PHYSTRACK_3LEVEL +/* + *---------------------------------------------------------------------- + * + * PhysTrackAllocL3 -- + * + * Allocate and hook L3 table to the L2 directory if does not exist. + * Or get existing one if it exists. + * + * Results: + * L3 table. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +static INLINE_SINGLE_CALLER PhysTrackerL3 * +PhysTrackAllocL3(PhysTrackerL2 *dir2, + unsigned int p2) +{ + PhysTrackerL3 *dir3; + + dir3 = dir2->dir[p2]; + if (!dir3) { + ASSERT_ON_COMPILE(sizeof *dir3 == PAGE_SIZE); + dir3 = HostIF_AllocPage(); + if (!dir3) { + PANIC(); + } + memset(dir3, 0, sizeof *dir3); + dir2->dir[p2] = dir3; + } + return dir3; +} + + +/* + *---------------------------------------------------------------------- + * + * PhysTrackFreeL3 -- + * + * Unhook L3 table from L2 directory, and free it. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +static INLINE_SINGLE_CALLER void +PhysTrackFreeL3(PhysTrackerL2 *dir2, + unsigned int p2) +{ + HostIF_FreePage(dir2->dir[p2]); + dir2->dir[p2] = NULL; +} +#endif + + +/* + *---------------------------------------------------------------------- + * + * PhysTrack_Alloc -- + * + * Create new PhysTracker. + * + * Results: + * Creates new PhysTracker. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +PhysTracker * +PhysTrack_Alloc(VMDriver *vm) +{ + PhysTracker *tracker; + + ASSERT(vm != NULL); + + /* allocate a new phystracker */ + tracker = HostIF_AllocKernelMem(sizeof *tracker, FALSE); + if (tracker) { + memset(tracker, 0, sizeof *tracker); + tracker->vm = vm; + } else { + Warning("%s failed\n", __FUNCTION__); + } + + return tracker; +} + + +/* + *---------------------------------------------------------------------- + * + * PhysTrack_Free -- + * + * module deallocation + * + * Results: + * reallocates all structures, including 'tracker' + * + * Side effects: + * tracker deallocated + * + *---------------------------------------------------------------------- + */ + +void +PhysTrack_Free(PhysTracker *tracker) +{ + unsigned int p1; + + ASSERT(tracker); + + for (p1 = 0; p1 < PHYSTRACK_L1_ENTRIES; p1++) { + PhysTrackerL2 *dir2 = tracker->dir[p1]; + + if (dir2) { + unsigned int p2; + + for (p2 = 0; p2 < PHYSTRACK_L2_ENTRIES; p2++) { + PhysTrackerL3 *dir3 = PHYSTRACK_GETL3(dir2, p2); + + if (dir3) { + unsigned int pos; + + for (pos = 0; pos < BYTES_PER_ENTRY; pos++) { + if (dir3->bits[pos]) { + Warning("%s: pfns still locked\n", __FUNCTION__); + PANIC(); + } + } + PHYSTRACK_FREEL3(dir2, p2); + } + } + HostIF_FreePage(dir2); + tracker->dir[p1] = NULL; + } + } + HostIF_FreeKernelMem(tracker); +} + + +/* + *---------------------------------------------------------------------- + * + * PhysTrack_Add -- + * + * add a page to the core map tracking. + * + * Results: + * void + * + * Side effects: + * Fatal if the page is already tracked. + * + *---------------------------------------------------------------------- + */ + +void +PhysTrack_Add(PhysTracker *tracker, // IN/OUT + MPN mpn) // IN: MPN of page to be added +{ + unsigned int p1; + unsigned int p2; + unsigned int p3; + unsigned int pos; + unsigned int bit; + PhysTrackerL2 *dir2; + PhysTrackerL3 *dir3; + + ASSERT(tracker); + ASSERT(HostIF_VMLockIsHeld(tracker->vm)); + PHYSTRACK_MPN2IDX(mpn, p1, p2, p3); + ASSERT(p1 < PHYSTRACK_L1_ENTRIES); + + dir2 = tracker->dir[p1]; + if (!dir2) { + // more efficient with page alloc + ASSERT_ON_COMPILE(sizeof *dir2 == PAGE_SIZE); + dir2 = HostIF_AllocPage(); + if (!dir2) { + PANIC(); + } + memset(dir2, 0, sizeof *dir2); + tracker->dir[p1] = dir2; + } + dir3 = PHYSTRACK_ALLOCL3(dir2, p2); + PHYSTRACK_GETL3POS(p3, pos, bit); + if (dir3->bits[pos] & bit) { + PANIC(); + } + dir3->bits[pos] |= bit; +} + + +/* + *---------------------------------------------------------------------- + * + * PhysTrack_Remove -- + * + * remove a page from the core map tracking + * + * Results: + * void + * + * Side effects: + * Fatal if the page is not tracked + * + *---------------------------------------------------------------------- + */ + +void +PhysTrack_Remove(PhysTracker *tracker, // IN/OUT + MPN mpn) // IN: MPN of page to be removed. +{ + unsigned int p1; + unsigned int p2; + unsigned int p3; + unsigned int pos; + unsigned int bit; + PhysTrackerL2 *dir2; + PhysTrackerL3 *dir3; + + ASSERT(tracker); + ASSERT(HostIF_VMLockIsHeld(tracker->vm)); + PHYSTRACK_MPN2IDX(mpn, p1, p2, p3); + ASSERT(p1 < PHYSTRACK_L1_ENTRIES); + + dir2 = tracker->dir[p1]; + if (!dir2) { + PANIC(); + } + dir3 = PHYSTRACK_GETL3(dir2, p2); + if (!dir3) { + PANIC(); + } + PHYSTRACK_GETL3POS(p3, pos, bit); + if (!(dir3->bits[pos] & bit)) { + PANIC(); + } + dir3->bits[pos] &= ~bit; +} + + +/* + *---------------------------------------------------------------------- + * + * PhysTrack_Test -- + * + * tests whether a page is being tracked + * + * Results: + * TRUE if the page is tracked + * FALSE otherwise + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +Bool +PhysTrack_Test(const PhysTracker *tracker, // IN + MPN mpn) // IN: MPN of page to be tested. +{ + unsigned int p1; + unsigned int p2; + unsigned int p3; + unsigned int pos; + unsigned int bit; + PhysTrackerL2 *dir2; + PhysTrackerL3 *dir3; + + ASSERT(tracker); + ASSERT(HostIF_VMLockIsHeld(tracker->vm)); + PHYSTRACK_MPN2IDX(mpn, p1, p2, p3); + if (p1 >= PHYSTRACK_L1_ENTRIES) { + return FALSE; + } + dir2 = tracker->dir[p1]; + if (!dir2) { + return FALSE; + } + dir3 = PHYSTRACK_GETL3(dir2, p2); + if (!dir3) { + return FALSE; + } + PHYSTRACK_GETL3POS(p3, pos, bit); + return (dir3->bits[pos] & bit) != 0; +} + + +/* + *---------------------------------------------------------------------- + * + * PhysTrack_GetNext -- + * + * Return next tracked page + * + * Results: + * MPN when some tracked page was found + * INVALID_MPN otherwise. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +MPN +PhysTrack_GetNext(const PhysTracker *tracker, // IN + MPN mpn) // IN: MPN of page to be tracked. +{ + unsigned int p1; + unsigned int p2; + unsigned int p3; + + if (mpn == INVALID_MPN) { + mpn = 0; /* First iteration. */ + } else { + mpn++; /* We want the next MPN. */ + } + PHYSTRACK_MPN2IDX(mpn, p1, p2, p3); + + ASSERT(tracker); + ASSERT(HostIF_VMLockIsHeld(tracker->vm)); + for (; p1 < PHYSTRACK_L1_ENTRIES; p1++) { + PhysTrackerL2 *dir2; + + dir2 = tracker->dir[p1]; + if (dir2) { + for (; p2 < PHYSTRACK_L2_ENTRIES; p2++) { + PhysTrackerL3 *dir3; + + dir3 = PHYSTRACK_GETL3(dir2, p2); + if (dir3) { + for (; p3 < PHYSTRACK_L3_ENTRIES; p3++) { + unsigned int pos; + unsigned int bit; + + PHYSTRACK_GETL3POS(p3, pos, bit); + if (dir3->bits[pos] & bit) { + return (p1 * PHYSTRACK_L2_ENTRIES + p2) * PHYSTRACK_L3_ENTRIES + p3; + } + } + } + p3 = 0; + } + } + p2 = 0; p3 = 0; + } + return INVALID_MPN; +} + diff --git a/vmmon-only/common/phystrack.h b/vmmon-only/common/phystrack.h new file mode 100644 index 00000000..e88c19a3 --- /dev/null +++ b/vmmon-only/common/phystrack.h @@ -0,0 +1,54 @@ +/********************************************************* + * Copyright (C) 1998 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * phystrack.h -- + * + * track down the utilization of the physical pages + */ + +#ifndef PHYSTRACK_H +#define PHYSTRACK_H + +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMCORE +#include "includeCheck.h" + +#if defined(WINNT_DDK) +/* XXX: can be more efficient based on server vs. desktop and version of Windows */ +#define PHYSTRACK_MAX_SUPPORTED_GB (2048 + 4) /* 2 TB 64-bit W2k8 + 4 GB PCI */ +#endif + +struct PhysTracker; +struct VMDriver; + +EXTERN struct PhysTracker *PhysTrack_Alloc(struct VMDriver *vm); +EXTERN void PhysTrack_Free(struct PhysTracker *); + +EXTERN void PhysTrack_Add(struct PhysTracker *, MPN); +EXTERN void PhysTrack_Remove(struct PhysTracker *, MPN); +EXTERN Bool PhysTrack_Test(const struct PhysTracker *, MPN); +EXTERN MPN PhysTrack_GetNext(const struct PhysTracker *, MPN); + +#endif + + + + + + diff --git a/vmmon-only/common/task.c b/vmmon-only/common/task.c new file mode 100644 index 00000000..de2429ef --- /dev/null +++ b/vmmon-only/common/task.c @@ -0,0 +1,2180 @@ +/********************************************************* + * Copyright (C) 1998-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * task.c -- + * + * Task initialization and switching routines between the host + * and the monitor. + * + * A task switch: + * -saves the EFLAGS,CR0,CR2,CR4, and IDT + * -jumps to code on the shared page + * which saves the registers, GDT and CR3 + * which then restores the registers, GDT and CR3 + * -restores the IDT,CR0,CR2,CR4 and EFLAGS + * + * This file is pretty much independent of the host OS. + * + */ + +#ifdef linux +/* Must come before any kernel header file --hpreg */ +# include "driver-config.h" +# include /* memset() in the kernel */ + +# define EXPORT_SYMTAB +#else +# include +#endif + +#include "vmware.h" +#include "modulecall.h" +#include "vmx86.h" +#include "task.h" +#include "vm_asm.h" +#include "cpuid.h" +#include "hostif.h" +/* On Linux, must come before any inclusion of asm/page.h --hpreg */ +#include "hostKernel.h" +#include "comport.h" +#include "crossgdt.h" +#include "x86svm.h" +#include "x86vt.h" +#include "x86vtinstr.h" +#include "apic.h" +#include "x86perfctr.h" + +#if defined(_WIN64) +# include "x86.h" +# include "vmmon-asm-x86-64.h" +# define USE_TEMPORARY_GDT 1 +#else +/* It is OK to set this to 1 on 64-bit Linux/Mac OS for testing. */ +# define USE_TEMPORARY_GDT 0 +#endif + +#define TS_ASSERT(t) do { \ + DEBUG_ONLY(if (!(t)) TaskAssertFail(__LINE__);) \ +} while (0) + +static CrossGDT *crossGDT = NULL; +static MPN crossGDTMPNs[CROSSGDT_NUMPAGES]; +static DTR crossGDTDescHKLA; +static Selector kernelStackSegment = 0; +static uint32 dummyLVT; +static Atomic_uint64 hvRootPage[MAX_PCPUS]; +static Atomic_Ptr tmpGDT[MAX_PCPUS]; +static Bool pebsAvailable = FALSE; + + +/* + *----------------------------------------------------------------------------- + * + * TaskAllocHVRootPage -- + * + * Allocate and initialize an HV root page. Upon success, race to be + * the first to store the allocated MPN in '*slot'. + * + * Results: + * None. + * + * Side effects: + * When the call returns, '*slot' contains the MPN of an HV root page if + * a thread succeeded, or INVALID_MPN if all threads failed. + * + *----------------------------------------------------------------------------- + */ + +static void +TaskAllocHVRootPage(Atomic_uint64 *slot) // IN/OUT +{ + uint32 *content; + uint64 vmxBasicMSR; + MPN mpn; + static const MPN invalidMPN = INVALID_MPN; + + ASSERT(slot != NULL); + + /* Allocate the page contents. */ + content = HostIF_AllocKernelMem(PAGE_SIZE, TRUE); + if (content == NULL) { + Warning("%s: Failed to allocate content.\n", __FUNCTION__); + return; + } + + /* + * On VMX-capable hardware, write the VMCS revision identifier at the + * beginning of the HV root page. On SVM-capable hardware, the HV root + * page is just initialized to zeroes. + */ + memset(content, 0, PAGE_SIZE); + if (HostIF_SafeRDMSR(MSR_VMX_BASIC, &vmxBasicMSR) == 0) { + *content = LODWORD(vmxBasicMSR); + } + + /* Allocate the HV root page. */ + mpn = HostIF_AllocMachinePage(); + + if (mpn != INVALID_MPN) { + /* + * Store the MPN of the HV root page. This is done atomically, so if + * several threads concurrently race and call TaskAllocHVRootPage() with + * the same 'slot', only the first one to pass this finish line will win. + */ + if (HostIF_WriteMachinePage(mpn, PtrToVA64(content)) != 0 || + !Atomic_CMPXCHG64(slot, &invalidMPN, &mpn)) { + /* + * Either we couldn't set up the page or this thread lost the race. + * We must free its HV root page. + */ + Warning("%s: Failed to setup page mpn=%llx.\n", + __FUNCTION__, (long long unsigned)mpn); + HostIF_FreeMachinePage(mpn); + } + } else { + Warning("%s: Failed to allocate page.\n", __FUNCTION__); + } + + HostIF_FreeKernelMem(content); +} + + +/* + *----------------------------------------------------------------------------- + * + * TaskGetHVRootPage -- + * + * Lazily allocate an HV root page, and return its MPN. + * + * Results: + * On success: The MPN of the HV root page. + * On failure: INVALID_MPN. + * + * Side effects: + * Might allocate memory, and transition '*slot' from + * INVALID_MPN to a valid MPN. + * + *----------------------------------------------------------------------------- + */ + +static MPN +TaskGetHVRootPage(Atomic_uint64 *slot) // IN/OUT +{ + MPN mpn = Atomic_Read64(slot); + + if (mpn != INVALID_MPN) { + return mpn; + } + + TaskAllocHVRootPage(slot); + + return Atomic_Read64(slot); +} + + +/* + *----------------------------------------------------------------------------- + * + * Task_GetHVRootPageForPCPU -- + * + * Lazily allocate the HV root page for a pCPU, and return its MPN. + * This is used for the VMXON region on Intel/VIA hardware and the + * host save area on AMD hardware. + * + * Results: + * On success: The MPN of the HV root page. + * On failure: INVALID_MPN. + * + * Side effects: + * Might allocate memory, and transition 'hvRootPage[pCPU]' from + * INVALID_MPN to a valid MPN. + * + *----------------------------------------------------------------------------- + */ + +MPN +Task_GetHVRootPageForPCPU(uint32 pCPU) // IN +{ + ASSERT(pCPU < ARRAYSIZE(hvRootPage)); + + return TaskGetHVRootPage(&hvRootPage[pCPU]); +} + + +/* + *----------------------------------------------------------------------------- + * + * TaskAllocGDT -- + * + * Allocate a GDT. Upon success, race to be the first to store its base in + * '*slot'. + * + * Results: + * None. + * + * Side effects: + * When the call returns, '*slot' contains the base of a GDT if a thread + * succeeded, or NULL if all threads failed. + * + *----------------------------------------------------------------------------- + */ + +static void +TaskAllocGDT(Atomic_Ptr *slot) // IN/OUT +{ + Descriptor *base; + + ASSERT(slot); + + /* Allocate a GDT. */ + base = HostIF_AllocKernelMem(0x10000 /* Maximal GDT size */, TRUE); + if (!base) { + Warning("%s: Failed to allocate temporary GDT.\n", __FUNCTION__); + return; + } + + /* + * Store the base of the GDT. This is done atomically, so if several threads + * concurrently race and call TaskAllocGDT() with the same 'slot', only the + * first one to pass this finish line will win. + */ + + if (Atomic_ReadIfEqualWritePtr(slot, NULL, base)) { + /* This thread lost the race. It must free its GDT. */ + HostIF_FreeKernelMem(base); + } +} + + +/* + *----------------------------------------------------------------------------- + * + * TaskGetGDT -- + * + * Lazily allocate a GDT, and return its base. + * + * Results: + * On success: The base of the GDT. + * On failure: NULL. + * + * Side effects: + * Might allocate memory, and transition '*slot' from NULL to a valid + * base. + * + *----------------------------------------------------------------------------- + */ + +static Descriptor * +TaskGetGDT(Atomic_Ptr *slot) // IN/OUT +{ + Descriptor *base = Atomic_ReadPtr(slot); + + if (base) { + return base; + } + + TaskAllocGDT(slot); + + return Atomic_ReadPtr(slot); +} + + +/* + *----------------------------------------------------------------------------- + * + * Task_GetTmpGDT -- + * + * Lazily allocate the temporary GDT for a pCPU, and return its base. + * + * Results: + * On success: The base of the temporary GDT. + * On failure: NULL. + * + * Side effects: + * Might allocate memory, and transition 'tmpGDT[pCPU]' from NULL to a + * valid base. + * + *----------------------------------------------------------------------------- + */ + +Descriptor * +Task_GetTmpGDT(uint32 pCPU) // IN +{ + ASSERT(pCPU < ARRAYSIZE(tmpGDT)); + + return TaskGetGDT(&tmpGDT[pCPU]); +} + + +/* + *----------------------------------------------------------------------------- + * + * TaskFreeHVRootPages -- + * + * Free all HV root pages (allocated by TaskAllocHVRootPage), if any. + * + * Results: + * None. + * + * Side effects: + * None. + * + *----------------------------------------------------------------------------- + */ + +static void +TaskFreeHVRootPages(void) +{ + MPN mpn; + unsigned i; + + for (i = 0; i < ARRAYSIZE(hvRootPage); i++) { + mpn = Atomic_Read64(&hvRootPage[i]); + if (mpn != INVALID_MPN) { + HostIF_FreeMachinePage(mpn); + } + } +} + + +#ifdef VMX86_DEBUG +/* + *----------------------------------------------------------------------------- + * + * TaskAssertFail -- + * + * Output line number to comport and crash. + * + *----------------------------------------------------------------------------- + */ + +static void +TaskAssertFail(int line) +{ + CP_PutStr("TaskAssertFail*: "); + CP_PutDec(line); + CP_PutCrLf(); + SET_CR3(0); +} + + +#endif +/* + *----------------------------------------------------------------------------- + * + * TaskSaveGDT64 -- + * + * Save the current GDT in the caller-supplied struct. + * + * Results: + * *hostGDT64 = copy of the processor's GDT. + * + * Side effects: + * None. + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +TaskSaveGDT64(DTR64 *hostGDT64) // OUT +{ + hostGDT64->offset = 0; + _Get_GDT((DTR *)hostGDT64); +} + + +/* + *----------------------------------------------------------------------------- + * + * TaskSaveIDT64 -- + * + * Save the current IDT in the caller-supplied struct. + * + * Results: + * *hostIDT64 = copy of the processor's IDT. + * + * Side effects: + * None. + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +TaskSaveIDT64(DTR64 *hostIDT64) // OUT +{ + hostIDT64->offset = 0; + _Get_IDT((DTR *)hostIDT64); +} + + +/* + *----------------------------------------------------------------------------- + * + * TaskLoadIDT64 -- + * + * Load the current IDT from the caller-supplied struct. + * + * Results: + * Processor's IDT = *hostIDT64. + * + * Side effects: + * None. + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +TaskLoadIDT64(DTR64 *hostIDT64) // IN +{ + _Set_IDT((DTR *)hostIDT64); +} + + +/* + *----------------------------------------------------------------------------- + * + * TaskCopyGDT64 -- + * + * Copy the given GDT contents to the caller-supplied buffer. + * + * This routine assumes the caller has already verified there is enough + * room in the output buffer. + * + * Results: + * *out = copy of the processor's GDT contents. + * + * Side effects: + * None. + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +TaskCopyGDT64(DTR64 *hostGDT64, // IN GDT to be copied from + Descriptor *out) // OUT where to copy contents to +{ + memcpy(out, + (void *)HOST_KERNEL_LA_2_VA((LA)hostGDT64->offset), + hostGDT64->limit + 1); +} + + +/* + *----------------------------------------------------------------------------- + * + * Task_Terminate -- + * + * Called at driver unload time. Undoes whatever Task_Initialize did. + * + * Results: + * None. + * + * Side effects: + * Release temporary GDT memory. + * + *----------------------------------------------------------------------------- + */ + +void +Task_Terminate(void) +{ + TaskFreeHVRootPages(); + + if (crossGDT != NULL) { + HostIF_FreeCrossGDT(CROSSGDT_NUMPAGES, crossGDT); + crossGDT = NULL; + crossGDTDescHKLA.limit = 0; + crossGDTDescHKLA.offset = 0; + } + + if (USE_TEMPORARY_GDT) { + unsigned i; + + for (i = 0; i < ARRAYSIZE(tmpGDT); i++) { + Descriptor *base = Atomic_ReadPtr(&tmpGDT[i]); + + if (base) { + HostIF_FreeKernelMem(base); + } + } + } +} + + +/* + *----------------------------------------------------------------------------- + * + * Task_Initialize -- + * + * Called at driver load time to initialize module's static data. + * + * Results: + * TRUE iff initialization successful. + * + * Side effects: + * None. + * + *----------------------------------------------------------------------------- + */ + +Bool +Task_Initialize(void) +{ + unsigned i; + + ASSERT_ON_COMPILE(sizeof (Atomic_uint64) == sizeof (MPN)); + for (i = 0; i < ARRAYSIZE(hvRootPage); i++) { + Atomic_Write64(&hvRootPage[i], INVALID_MPN); + } + if (USE_TEMPORARY_GDT) { + for (i = 0; i < ARRAYSIZE(tmpGDT); i++) { + Atomic_WritePtr(&tmpGDT[i], NULL); + } + } + + /* + * The worldswitch code doesn't work with a zero stack segment + * because it temporarily restores the data segments to the stack + * segment. So here we make sure we have a non-zero kernel + * read/write flat data segment. + */ + + kernelStackSegment = GET_SS(); + if (kernelStackSegment == 0) { + DTR hostGDTR; + + GET_GDT(hostGDTR); + for (kernelStackSegment = 8; + kernelStackSegment + 7 <= hostGDTR.limit; + kernelStackSegment += 8) { + uint64 gdte = *(uint64 *)(hostGDTR.offset + kernelStackSegment); + + if ((gdte & 0xFFCFFEFFFFFFFFFFULL) == 0x00CF92000000FFFFULL) { + goto gotnzss; + } + } + Warning("%s: no non-null flat kernel data GDT segment\n", + __FUNCTION__); + + return FALSE; +gotnzss:; + } + if ((kernelStackSegment == 0) || ((kernelStackSegment & 7) != 0)) { + Warning("Task_Initialize: unsupported SS %04x\n", + kernelStackSegment); + return FALSE; + } + + /* + * Check if PEBS is supported. For simplicity we assume there will not + * be mixed CPU models. According to the Intel SDM, PEBS is supported if: + * + * IA32_MISC_ENABLE.EMON_AVAILABE (bit 7) is set and + * IA32_MISC_ENABLE.PEBS_UNAVAILABE (bit 12) is clear. + */ + + pebsAvailable = PerfCtr_PEBSAvailable(); + return TRUE; +} + + +/* + *----------------------------------------------------------------------------- + * + * TaskRestoreHostGDTTRLDT -- + * + * + * Results: + * The host's GDT is copied (or partially copied) to the + * dynamically allocated temporary GDT. + * + * The TR is restored using the temporary GDT then the host's real GDT is + * restored. Finally, the host LDT is restored. + * + * Notes: + * An OS which checks critical data structures, such as the GDT, + * can fail when this module changes the TSS busy bit in the host + * GDT. To avoid this problem, we use a sparse copy of the host + * GDT to perform the manipulation of the TSS busy bit. + * + * See PR 68144. + * + *----------------------------------------------------------------------------- + */ + +static INLINE_SINGLE_CALLER void +TaskRestoreHostGDTTRLDT(Descriptor *tempGDTBase, + DTR64 hostGDT64, + Selector ldt, + Selector cs, + Selector tr) +{ + TS_ASSERT(tr != 0); + TS_ASSERT((tr & 7) == 0); + + if (USE_TEMPORARY_GDT) { + DTR64 tempGDT64; + + /* + * Set up a temporary GDT so that the TSS 'busy bit' can be + * changed without affecting the host's data structures. + */ + + const VA hostGDTVA = HOST_KERNEL_LA_2_VA(hostGDT64.offset); + const unsigned size = sizeof(Descriptor); + const Selector ss = SELECTOR_CLEAR_RPL(GET_SS()); + + ASSERT(hostGDTVA == HOST_KERNEL_LA_2_VA(hostGDT64.offset)); + + ASSERT(SELECTOR_RPL(cs) == 0 && SELECTOR_TABLE(cs) == 0); + ASSERT(SELECTOR_RPL(ss) == 0 && SELECTOR_TABLE(ss) == 0); + + /* + * Copy code and data segments so they remain valid in case of NMI. + * Worldswitch code returns with DS==ES==SS so we don't have to set + * up DS,ES explicitly. + */ + + ASSERT(SELECTOR_CLEAR_RPL(GET_DS()) == ss); + ASSERT(SELECTOR_CLEAR_RPL(GET_ES()) == ss); + tempGDTBase[cs / size] = *(Descriptor *)(hostGDTVA + cs); + tempGDTBase[ss / size] = *(Descriptor *)(hostGDTVA + ss); + + /* + * TR descriptors use two entries (64-bits wide) in 64-bit mode. + */ + + tempGDTBase[tr / size] = *(Descriptor *)(hostGDTVA + tr); + tempGDTBase[tr / size + 1] = *(Descriptor *)(hostGDTVA + tr + size); + + /* + * Clear the 'task busy' bit so we can reload TR. + */ + + if (Desc_Type(&tempGDTBase[tr / size]) == TASK_DESC_BUSY) { + Desc_SetType(&tempGDTBase[tr / size], TASK_DESC); + } + + /* + * Restore the TR using the temp GDT then restore the host's real GDT + * then host LDT. + */ + + tempGDT64.limit = hostGDT64.limit; + tempGDT64.offset = HOST_KERNEL_VA_2_LA((VA)tempGDTBase); + _Set_GDT((DTR *)&tempGDT64); + SET_TR(tr); + _Set_GDT((DTR *)&hostGDT64); + SET_LDT(ldt); + } else { + Descriptor *desc; + + /* + * The host isn't picky about the TR entry. So clear the TSS bit + * in the host GDT, then restore host GDT and TR, then LDT. + */ + + desc = (Descriptor *)((VA)HOST_KERNEL_LA_2_VA(hostGDT64.offset + tr)); + if (Desc_Type(desc) == TASK_DESC_BUSY) { + Desc_SetType(desc, TASK_DESC); + } + _Set_GDT((DTR *)&hostGDT64); + SET_TR(tr); + SET_LDT(ldt); + } +} + + +/* + *----------------------------------------------------------------------------- + * + * Task_AllocCrossGDT -- + * + * Make sure the crossGDT is allocated and initialized. + * + * Results: + * TRUE iff crossGDT was already initialized or successfully initialized. + * + * Side effects: + * crossGDT static vars set up if not already. + * + *----------------------------------------------------------------------------- + */ + +Bool +Task_AllocCrossGDT(InitBlock *initBlock) // OUT: crossGDT values filled in +{ + DTR64 hostGDT64; + + /* + * Make sure only one of these runs at a time on the whole system, because + * there is only one crossGDT for the whole system. + */ + + HostIF_GlobalLock(2); + + /* + * Maybe the crossGDT has already been set up. + */ + + if (crossGDT == NULL) { + MPN maxValidFirst = + 0xFFC00 /* 32-bit MONITOR_LINEAR_START */ - CROSSGDT_NUMPAGES; + + /* + * The host entries must fit on pages of the crossGDT that are mapped. + * Since we know they are below CROSSGDT_LOWSEG, we can just check that + * CROSSGDT_LOWSEG and below are mapped. + * + * Because the CROSSGDT_LOWSEG segments must reside on the first page of + * the crossGDT (as they must remain valid with paging off), all we need + * do is check that bit 0 of CROSSGDT_PAGEMASK is set (indicating that + * page 0 of the crossGDT will be mapped). + */ + + ASSERT_ON_COMPILE(CROSSGDT_LOWSEG < PAGE_SIZE); + ASSERT_ON_COMPILE(CROSSGDT_PAGEMASK & 1); + + /* + * Allocate the crossGDT. + */ + ASSERT_ON_COMPILE(sizeof *crossGDT == CROSSGDT_NUMPAGES * PAGE_SIZE); + crossGDT = HostIF_AllocCrossGDT(CROSSGDT_NUMPAGES, maxValidFirst, + crossGDTMPNs); + if (crossGDT == NULL) { + HostIF_GlobalUnlock(2); + Warning("%s: unable to allocate crossGDT\n", __FUNCTION__); + + return FALSE; + } + + /* + * Check that the crossGDT meets the address requirements documented in + * bora/doc/worldswitch-pages.txt. + */ + + if (crossGDTMPNs[0] > maxValidFirst) { + HostIF_FreeCrossGDT(CROSSGDT_NUMPAGES, crossGDT); + crossGDT = NULL; + HostIF_GlobalUnlock(2); + Warning("%s: crossGDT MPN %"FMT64"X gt %"FMT64"X\n", __FUNCTION__, + crossGDTMPNs[0], maxValidFirst); + + return FALSE; + } + + /* + * Fill the crossGDT with a copy of our host GDT. VMX will have to fill + * in monitor segments via Task_InitCrossGDT. + * + * We are assuming that all the host segments we will ever need are below + * CROSSGDT_LOWSEG. If this assumption ever breaks, the host segments + * would have to be unconditionally transitioned to the CROSSGDT + * intermediate segments before switching to the monitor. The only time + * the GDT has been found to be bigger than CROSSGDT_LOWSEG is when they + * are running KVM or Xen, and we never see the large segment numbers. + */ + + memset(crossGDT, 0, sizeof *crossGDT); + TaskSaveGDT64(&hostGDT64); + if (hostGDT64.limit > CROSSGDT_LOWSEG * 8 - 1) { + hostGDT64.limit = CROSSGDT_LOWSEG * 8 - 1; + } + TaskCopyGDT64(&hostGDT64, crossGDT->gdtes); + + /* + * Set up descriptor for the crossGDT using host kernel LA as a base. + */ + + crossGDTDescHKLA.limit = sizeof *crossGDT - 1; + crossGDTDescHKLA.offset = HOST_KERNEL_VA_2_LA((VA)crossGDT); + } + + HostIF_GlobalUnlock(2); + + initBlock->crossGDTHKLA = crossGDTDescHKLA.offset; + ASSERT_ON_COMPILE(sizeof initBlock->crossGDTMPNs == sizeof crossGDTMPNs); + memcpy(initBlock->crossGDTMPNs, crossGDTMPNs, sizeof crossGDTMPNs); + + return TRUE; +} + + +/* + *----------------------------------------------------------------------------- + * + * Task_InitCrosspage -- + * + * Initialize the crosspage used to switch to the monitor task. + * + * Results: + * 0 on success + * != 0 on failure + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +int +Task_InitCrosspage(VMDriver *vm, // IN + InitBlock *initParams) // IN/OUT: Initial params from the + // VM +{ + Vcpuid vcpuid; + + if (crossGDT == NULL) { + return 1; + } + + initParams->crossGDTHKLA = crossGDTDescHKLA.offset; + ASSERT_ON_COMPILE(sizeof initParams->crossGDTMPNs == sizeof crossGDTMPNs); + memcpy(initParams->crossGDTMPNs, crossGDTMPNs, sizeof crossGDTMPNs); + + for (vcpuid = 0; vcpuid < initParams->numVCPUs; vcpuid++) { + VA64 crossPageUserAddr = initParams->crosspage[vcpuid]; + VMCrossPage *p = HostIF_MapCrossPage(vm, crossPageUserAddr); + MPN crossPageMPN; + + if (p == NULL) { + return 1; + } + + if (HostIF_LookupUserMPN(vm, crossPageUserAddr, &crossPageMPN) != + PAGE_LOOKUP_SUCCESS || + crossPageMPN == 0) { + return 1; + } + + { + /* The version of the crosspage must be the first four + * bytes of the crosspage. See the declaration + * of VMCrossPage in modulecall.h. + */ + + ASSERT_ON_COMPILE(offsetof(VMCrossPage, version) == 0); + ASSERT_ON_COMPILE(sizeof(p->version) == sizeof(uint32)); + + /* p->version is VMX's version; CROSSPAGE_VERSION is vmmon's. */ + if (p->version != CROSSPAGE_VERSION) { + Warning("%s: crosspage version mismatch: vmmon claims %#x, must " + "match vmx version of %#x.\n", __FUNCTION__, + (int)CROSSPAGE_VERSION, p->version); + return 1; + } + } + { + /* The following constants are the size and offset of the + * VMCrossPage->crosspage_size field as defined by the + * vmm/vmx. + */ + + ASSERT_ON_COMPILE(offsetof(VMCrossPage, crosspage_size) == + sizeof(uint32)); + ASSERT_ON_COMPILE(sizeof(p->crosspage_size) == sizeof(uint32)); + + if (p->crosspage_size != sizeof(VMCrossPage)) { + Warning("%s: crosspage size mismatch: vmmon claims %#x bytes, " + "must match vmm size of %#x bytes.\n", __FUNCTION__, + (unsigned)sizeof(VMCrossPage), p->crosspage_size); + return 1; + } + } + + if (crossPageMPN > MA_2_MPN(0xFFFFFFFF)) { + Warning("%s*: crossPageMPN 0x%016" FMT64 "x invalid\n", __FUNCTION__, + crossPageMPN); + return 1; + } + if (!pseudoTSC.initialized) { + Warning("%s*: PseudoTSC has not been initialized\n", __FUNCTION__); + return 1; + } + p->crosspageData.crosspageMA = (uint32)MPN_2_MA(crossPageMPN); + p->crosspageData.hostCrossPageLA = (LA64)(uintptr_t)p; + + /* + * Pass our kernel code segment numbers back to MonitorPlatformInit. + * They have to be in the GDT so they will be valid when the crossGDT is + * active. + */ + + p->crosspageData.hostInitial64CS = GET_CS(); + TS_ASSERT(SELECTOR_RPL (p->crosspageData.hostInitial64CS) == 0 && + SELECTOR_TABLE(p->crosspageData.hostInitial64CS) == 0); + + p->crosspageData.moduleCallInterrupted = FALSE; + p->crosspageData.pseudoTSCConv.p.mult = 1; + p->crosspageData.pseudoTSCConv.p.shift = 0; + p->crosspageData.pseudoTSCConv.p.add = 0; + p->crosspageData.pseudoTSCConv.changed = TRUE; + p->crosspageData.worldSwitchPTSC = Vmx86_GetPseudoTSC(); + p->crosspageData.timerIntrTS = MAX_ABSOLUTE_TS; + p->crosspageData.hstTimerExpiry = MAX_ABSOLUTE_TS; + p->crosspageData.monTimerExpiry = MAX_ABSOLUTE_TS; + vm->crosspage[vcpuid] = p; + } + + return 0; +} + + +/* + *----------------------------------------------------------------------------- + * + * Task_InitCrossGDT -- + * + * Fill in a crossGDT entry from the given template. + * + * Results: + * 0 on success + * != 0 on failure + * + * Side effects: + * CrossGDT entry filled from template. If crossGDT has already been + * initialized, the entry is compared to the given template. Any + * discrepancy is logged and an error is returned. This is necessary + * because this same GDT is shared among all VMs on this host, so really, + * the first call initializes it and the others just do compares. + * + *----------------------------------------------------------------------------- + */ + +int +Task_InitCrossGDT(InitCrossGDT *initCrossGDT) // IN +{ + Descriptor v; + int rc; + uint32 i; + + rc = 1; + i = initCrossGDT->index; + v = initCrossGDT->value; + + HostIF_GlobalLock(3); + if (i >= sizeof crossGDT->gdtes / sizeof crossGDT->gdtes[0]) { + HostIF_GlobalUnlock(3); + Warning("%s: index %u too big\n", __FUNCTION__, i); + } else if (!( (1 << (i * sizeof crossGDT->gdtes[0] / PAGE_SIZE)) + & CROSSGDT_PAGEMASK)) { + HostIF_GlobalUnlock(3); + Warning("%s: index %u not in CROSSGDT_PAGEMASK %x\n", __FUNCTION__, + i, CROSSGDT_PAGEMASK); + } else if (!Desc_Present(&v)) { + HostIF_GlobalUnlock(3); + Warning("%s: entry %u not present\n", __FUNCTION__, i); + } else if (!Desc_Present(crossGDT->gdtes + i)) { + crossGDT->gdtes[i] = v; + HostIF_GlobalUnlock(3); + rc = 0; + } else if (Desc_EqualIgnoreAccessed(crossGDT->gdtes + i, &v)) { + HostIF_GlobalUnlock(3); + rc = 0; + } else { + HostIF_GlobalUnlock(3); + Warning("%s: entry 0x%X mismatch\n", __FUNCTION__, i); + Warning("%s: crossGDT %16.16llX\n", __FUNCTION__, + (long long unsigned)*(uint64 *)(crossGDT->gdtes + i)); + Warning("%s: template %16.16llX\n", __FUNCTION__, + (long long unsigned)*(uint64 *)&v); + } + + return rc; +} + + +/* + *----------------------------------------------------------------------------- + * + * Disable and restore APIC NMI delivery. + * + *----------------------------------------------------------------------------- + */ + +static Bool +TaskDisableNMIDelivery(const APICDescriptor *desc, // IN + int regNum) // IN +{ + uint32 reg = APIC_Read(desc, regNum); + + if (APIC_LVT_DELVMODE(reg) == APIC_LVT_DELVMODE_NMI && + !APIC_LVT_ISMASKED(reg)) { + APIC_Write(desc, regNum, reg | APIC_LVT_MASK); + // Force completion of masking (was bug 78470). + dummyLVT = APIC_Read(desc, regNum); + return TRUE; + } + + return FALSE; +} + + +static void +TaskDisableNMI(const APICDescriptor *desc, // IN + Bool *lint0NMI, // OUT + Bool *lint1NMI, // OUT + Bool *pcNMI, // OUT + Bool *thermalNMI) // OUT +{ + if (desc->base || desc->isX2) { + *lint0NMI = TaskDisableNMIDelivery(desc, APICR_LVT0); + *lint1NMI = TaskDisableNMIDelivery(desc, APICR_LVT1); + *pcNMI = TaskDisableNMIDelivery(desc, APICR_PCLVT); + + /* + * The LVT thermal monitor register was introduced + * in Pentium 4 and Xeon processors. + */ + + if (APIC_MaxLVT(desc) >= 5) { + *thermalNMI = TaskDisableNMIDelivery(desc, APICR_THERMLVT); + } else { + *thermalNMI = FALSE; + } + } else { + *lint0NMI = FALSE; + *lint1NMI = FALSE; + *pcNMI = FALSE; + *thermalNMI = FALSE; + } +} + + +static void +TaskRestoreNMIDelivery(const APICDescriptor *desc, // IN + Bool restore, // IN + int regNum) // IN +{ + if (restore) { + uint32 reg = APIC_Read(desc, regNum); + + APIC_Write(desc, regNum, reg & ~APIC_LVT_MASK); + } +} + + +static void +TaskRestoreNMI(const APICDescriptor *desc, // IN + Bool lint0NMI, // IN + Bool lint1NMI, // IN + Bool pcNMI, // IN + Bool thermalNMI) // IN +{ + TaskRestoreNMIDelivery(desc, lint0NMI, APICR_LVT0); + TaskRestoreNMIDelivery(desc, lint1NMI, APICR_LVT1); + TaskRestoreNMIDelivery(desc, pcNMI, APICR_PCLVT); + TaskRestoreNMIDelivery(desc, thermalNMI, APICR_THERMLVT); +} + + +/* + *----------------------------------------------------------------------------- + * + * TaskEnableTF -- + * + * Turn on EFLAGS. + * + * Results: + * None. + * + * Side effects: + * Trace trapping enabled. + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +TaskEnableTF(void) +{ +#if defined(__GNUC__) + asm volatile ("pushfq ; orb $1,1(%rsp) ; popfq"); +#elif defined(_MSC_VER) + __writeeflags(__readeflags() | EFLAGS_TF); +#else +#error no compiler support for setting TF +#endif +} + + +/* + *----------------------------------------------------------------------------- + * + * TaskDisableTF -- + * + * Turn off EFLAGS. + * + * Results: + * None. + * + * Side effects: + * Trace trapping disabled. + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +TaskDisableTF(void) +{ +#if defined(__GNUC__) + asm volatile ("pushfq ; andb $~1,1(%rsp) ; popfq"); +#elif defined(_MSC_VER) + __writeeflags(__readeflags() & ~EFLAGS_TF); +#else +#error no compiler support for clearing TF +#endif +} + + +static INLINE Bool +TaskGotException(const VMCrossPage *crosspage, unsigned exc) +{ + return crosspage->crosspageCode.faultHandler.wsException[exc]; +} + + +static INLINE void +TaskSetException(VMCrossPage *crosspage, unsigned exc, Bool v) +{ + crosspage->crosspageCode.faultHandler.wsException[exc] = v; +} + + +/* + *----------------------------------------------------------------------------- + * + * TaskSaveDebugRegisters -- + * + * Save debug registers in the host context area of the crosspage. + * + * Results: + * None. + * + * Side effects: + * crosspage->hostDR[*] = some filled with debug register contents + * hostDRSaved = bits set for those we wrote to hostDR[*] array + * hostDRInHW = bits set indicating which hardware DR contents + * still match what the host wants + * hardware DR7 = 0 + * hardware DR7 = 0 + * + *----------------------------------------------------------------------------- + */ + +static INLINE_SINGLE_CALLER void +TaskSaveDebugRegisters(VMCrossPage *crosspage) +{ + Bool saveGotDB; + +#define SAVE_DR(n) \ + do { \ + uintptr_t drReg; \ + GET_DR##n(drReg); \ + crosspage->crosspageData.hostDR[n] = drReg; \ + } while (0) + + /* Hardware contains the host's %dr7, %dr6, %dr3, %dr2, %dr1, %dr0 */ + crosspage->crosspageData.hostDRInHW = ((1 << 7) | (1 << 6) | + (1 << 3) | (1 << 2) | + (1 << 1) | (1 << 0)); + + /* + * Save DR7 since we need to disable debug breakpoints during the world + * switch code. We will get a #DB if DR7 is set, but the + * SwitchDBHandler simply IRETs after setting crosspage gotDB flag. + */ + + saveGotDB = TaskGotException(crosspage, EXC_DB); + TaskSetException(crosspage, EXC_DB, FALSE); + COMPILER_MEM_BARRIER(); /* Prevent hoisting #UD-raising instructions. */ + SAVE_DR(7); + + /* + * In all cases, DR7 shouldn't have the GD bit set. + */ + + TS_ASSERT(!(crosspage->crosspageData.hostDR[7] & DR7_GD)); + + /* + * Save DR6 in order to accommodate the ICEBP instruction and other stuff + * that can modify DR6 bits (trace traps, task switch traps, any others?). + */ + + SAVE_DR(6); + + /* + * It may be that DR7 had the GD bit set, in which case the + * crosspage exception[EXC_DB] flag would have just been set and + * DR6 will be set. If so, fix the saved values to look like + * they were when DR7 was set (before we tripped the #DB), so + * they'll get restored to what they were. Then make sure + * breakpoints are disabled during switch. + * + * Note that I am assuming DR6_BD was clear before the #DB and so + * I'm clearing it here. If it was set, we will end up restoring + * it cleared, but there's no way to tell. Someone suggested that + * ICEBP would tell us but it may also clear DR6<3:0>. + * + * SAVE_DR(6) can raise #DB. + */ + + if (TaskGotException(crosspage, EXC_DB) && + (crosspage->crosspageData.hostDR[6] & DR6_BD)) { + crosspage->crosspageData.hostDR[6] -= DR6_BD; + crosspage->crosspageData.hostDR[7] |= DR7_GD; + SET_DR7(DR7_DEFAULT); + + /* HW: %dr7 and %dr6 are the guest, %dr3, %dr2, %dr1, %dr0 are host */ + crosspage->crosspageData.hostDRInHW = ((1 << 3) | (1 << 2) | + (1 << 1) | (1 << 0)); + } + + /* + * No GD bit, check for enabled breakpoints. Disable them as they may + * coincidentally trip during the switch. + */ + + else if (crosspage->crosspageData.hostDR[7] & DR7_ENABLED) { + SET_DR7(DR7_DEFAULT); // no #DB here, just simple set + /* HW: %dr7 = guest, %dr6, %dr3, %dr2, %dr1, %dr0 = host */ + crosspage->crosspageData.hostDRInHW = ((1 << 6) | (1 << 3) | (1 << 2) | + (1 << 1) | (1 << 0)); + } + + TaskSetException(crosspage, EXC_DB, saveGotDB); + + /* + * hostDR[6,7] have host contents in them now. + */ + + crosspage->crosspageData.hostDRSaved = 0xC0; +#undef SAVE_DR +} + + +/* + *----------------------------------------------------------------------------- + * + * TaskRestoreDebugRegisters -- + * + * Put the debug registers back the way they were when + * TaskSaveDebugRegisters was called. + * + * Results: + * None. + * + * Side effects: + * Debug registers restored from values saved in the crosspage. + * + *----------------------------------------------------------------------------- + */ + +static INLINE_SINGLE_CALLER void +TaskRestoreDebugRegisters(VMCrossPageData *crosspage) +{ +#define RESTORE_DR(n) \ + if ((crosspage->hostDRInHW & (1 << n)) == 0) { \ + /* Guest value for register 'n' in hardware. */ \ + const uintptr_t drReg = (uintptr_t)crosspage->hostDR[n]; \ + if (!(crosspage->shadowDRInHW & (1 << n)) || \ + (drReg != SHADOW_DR(crosspage, n))) { \ + SET_DR##n(drReg); \ + } \ + } + + RESTORE_DR(0); + RESTORE_DR(1); + RESTORE_DR(2); + RESTORE_DR(3); + RESTORE_DR(6); + + /* + * DR7 must be restored last in case DR7 is set. + */ + RESTORE_DR(7); +#undef RESTORE_DR +} + + +/* + *----------------------------------------------------------------------------- + * + * TaskUpdateLatestPTSC -- + * + * Record the per-VM latest visible PTSC value, and indicate that + * this thread is no longer running in the VMM. See + * TaskUpdatePTSCParameters. + * + * Results: + * None. + * + * Side effects: + * May update the latest PTSC value and the PTSC offset reference count. + * + *----------------------------------------------------------------------------- + */ + +static INLINE_SINGLE_CALLER void +TaskUpdateLatestPTSC(VMDriver *vm, VMCrossPageData *crosspage) +{ + if (Vmx86_HwTSCsSynced()) { + uint64 latest; + /* + * Determine a conservative estimate for the last PTSC value the + * VMM may have used. We can't just use + * crosspage->worldSwitchPTSC since some callees of BackToHost + * will compute their own PTSC (or infer a PTSC value from the + * TSC). + */ + uint64 ptsc = RDTSC() + crosspage->pseudoTSCConv.p.add; + do { + latest = Atomic_Read64(&vm->ptscLatest); + if (ptsc <= latest) { + break; + } + } while (!Atomic_CMPXCHG64(&vm->ptscLatest, &latest, &ptsc)); + /* After updating the latest PTSC, decrement the reference count. */ + Atomic_Dec32((Atomic_uint32 *)&vm->ptscOffsetInfo.inVMMCnt); + } +} + + +/* + *----------------------------------------------------------------------------- + * + * TaskUpdatePTSCParameters -- + * + * If the PTSC is behind where it should be, based on the host's + * uptime, then adjust the PTSC parameters. PR 118376. + * + * Results: + * None. + * + * Side effects: + * May update the PTSC parameters. + * + *----------------------------------------------------------------------------- + */ + +static INLINE_SINGLE_CALLER void +TaskUpdatePTSCParameters(VMDriver *vm, + VMCrossPageData *crosspage, + Vcpuid vcpuid) +{ + uint64 tsc, ptsc; + + ASSERT_NO_INTERRUPTS(); + ASSERT_ON_COMPILE(sizeof(vm->ptscOffsetInfo) == sizeof(Atomic_uint64)); + ptsc = Vmx86_GetPseudoTSC(); + /* + * Use unsigned comparison to test ptsc inside the interval: + * [worldSwitchPTSC, worldSwitchPTSC + largeDelta) + * where largeDelta is choosen to be much larger than the normal time + * between worldswitches, but not so large that we'd miss a jump due + * to TSC reset. + */ + if (UNLIKELY((uint64)(ptsc - crosspage->worldSwitchPTSC) > + Vmx86_GetPseudoTSCHz() * 4096)) { + /* + * If the PTSC went backwards since we last left the monitor, then either: + * a) TSC is unsynchronized across cores. + * b) TSC was reset (probably due to host stand by or hibernate). + * c) khzEstimate was incorrect (too low). + * d) the host's reference clock is too low resolution. + * e) the host's reference clock is broken. + * + * We handle case (a) and (b) by switch PTSC over to using the + * reference clock as the basis for pseudo TSC. + * + * For case (c), ideally we'd want to get khzEstimate correct in + * the first place. Using the reference clock for pseudo TSC is + * just a backup if all else failed. It will prevent PTSC from + * drifting from real time over the long run. Additionally, we + * could try to adopt the mult/shift of pseudoTSCConv to make PTSC + * run at the (incorrect) TSC kHz estimate, so that PTSC + * progresses at the correct rate over the short term (while in + * the monitor). + * + * We don't do anything for case (e). If we see it happen, we + * could try to pin the value returned by HostIF_ReadUptime to + * some sane range to help compensate. + */ + if (Vmx86_SetPseudoTSCUseRefClock()) { + ptsc = Vmx86_GetPseudoTSC(); + } + + /* + * For case (d), check for PTSC between (worldSwitchPTSC - Hz) and + * worldSwitchPTSC. That is, if ptsc is still behind + * worldSwitchPTSC (even after ensuring the PTSC is based on the + * reference clock), but by less than a second, assume that the + * reference clock is too low of resolution, and nudge PTSC + * forward to ensure it doesn't go backwards on this VCPU. If we + * are more than a second behind, then we assume that the + * reference clock was stepped (or broken) and we just stay in + * sync with it. + */ + if ((uint64)(crosspage->worldSwitchPTSC - ptsc) < + Vmx86_GetPseudoTSCHz()) { + ptsc = crosspage->worldSwitchPTSC; + } + } + + /* + * While running in the monitor, we can't read the reference + * clock, which is implemented by the host OS. So, offset from + * the current pseudoTSC value using the TSC in order to provide + * high resolution PTSC while in the monitor. The RDTSC below + * must be executed on the same pcpu that the vmm vcpu thread will + * run on (in case of out of sync TSCs). This is guaranteed since + * we are on the on-ramp into the monitor with interrupts + * disabled. + */ + tsc = RDTSC(); + if (Vmx86_HwTSCsSynced()) { + /* + * When the TSCs are synchronized, make Pseudo TSC synchronized + * as well. To ensure this, all vcpu threads of a VM that are + * simultaneously running their VMMs need to use the same exact + * offset. This global offset can be updated only when no + * threads are running in the VMM. In the case of synchronized + * TSCs, updating the offset only when all threads are outside + * the VMM is okay in terms of keeping VMMs' PTSC close to real + * time because the TSCs stop only when all cores enter a deep + * sleep state (otherwise the TSCs wouldn't be in sync to begin + * with). + */ + PseudoTSCOffsetInfo old, new; + do { + old = vm->ptscOffsetInfo; + new = old; + if (new.inVMMCnt == 0) { + int64 ptscOffset; + if (Vmx86_PseudoTSCUsesRefClock()) { + /* Must read ptscLatest after reading ptscOffsetInfo. */ + uint64 latest = Atomic_Read64(&vm->ptscLatest); + if (UNLIKELY(ptsc < latest)) { + /* + * The Vmx86_GetPseudoTSC call above occurred before + * some other vcpu thread exited the monitor; need to + * bump forward. + */ + ptsc = latest; + } + ptscOffset = ptsc - tsc; + } else { + ptscOffset = Vmx86_GetPseudoTSCOffset(); + } + /* + * Since inVMMCnt is zero, it is safe to update our entry in + * ptscOffsets -- no other thread will try to read it until + * the inVMMCnt > 0. + */ + vm->ptscOffsets[vcpuid] = ptscOffset; + /* Try to use this thread's offset as the global offset. */ + new.vcpuid = vcpuid; + } + new.inVMMCnt++; + } while (!Atomic_CMPXCHG64((Atomic_uint64 *)&vm->ptscOffsetInfo, + (uint64 *)&old, (uint64 *)&new)); + /* Use the designated global offset as this thread's offset. */ + crosspage->pseudoTSCConv.p.add = vm->ptscOffsets[new.vcpuid]; + crosspage->pseudoTSCConv.changed = TRUE; + /* + * Need to derive the worldSwitchPTSC value from TSC since the + * PTSC, when calculated from TSC, may drift from the reference + * clock over the short term. + */ + ptsc = tsc + crosspage->pseudoTSCConv.p.add; + } else { + crosspage->pseudoTSCConv.p.add = ptsc - tsc; + crosspage->pseudoTSCConv.changed = TRUE; + } + /* Cache PTSC value for BackToHost. */ + crosspage->worldSwitchPTSC = ptsc; +} + + +/* + *----------------------------------------------------------------------------- + * + * TaskSwitchToMonitor -- + * + * Wrapper that calls code to switch from the host to the monitor. + * + * The basic idea is to do a (*(crosspage->hostToVmm))(crosspage) + * but it's complicated because we must have a common call format + * between GCC and MSC. + * + * Since we have complete control over what GCC does with asm volatile, + * this amounts to having GCC do exactly what MSC does. + * For 64-bit hosts, we pass the parameter in RCX. + * + * For 64-bit GCC, the callee is expected to preserve + * RBX,RBP,RSP,R12..R15, whereas MSC expects the callee to preserve + * RBX,RSI,RDI,RBP,RSP,R12..R15. So for simplicity, we have the + * worldswitch code save RBX,RSI,RDI,RBP,RSP,R12..R15. + * + * From an email with Petr regarding gcc's handling of the stdcall + * attribute for x86-64: + * + * As far as I can tell, for x86_64 there is only one calling + * convention: + * On GCC rdi/rsi/rdx/rcx/r8d/r9d for <= 6 arguments, + * others always on stack, caller always adjusts stack. + * + * On MSC it is rcx/rdx/r8d/r9d for <= 4 arguments, rest on + * stack. When more than 4 arguments are passed, spill space is + * reserved on the stack for the register arguments. Argument + * 5 is accessed at (5 * 8)(rsp). + * + * Side effects: + * The monitor does many things, but it's irrelevant to this code. The + * worldswitch should eventually return here with the host state intact. + * + *----------------------------------------------------------------------------- + */ + +static INLINE_SINGLE_CALLER void +TaskSwitchToMonitor(VMCrossPage *crosspage) +{ + const uint8 *codePtr = ((uint8 *)&crosspage->crosspageCode.worldswitch + + crosspage->crosspageCode.worldswitch.hostToVmm); + +#if defined(__GNUC__) + /* + * Pass the crosspage pointer in RCX just like 64-bit MSC does. + * Tell GCC that the worldswitch preserves RBX,RSI,RDI,RBP,RSP, + * R12..R15 just like the MSC 64-bit calling convention. + */ + + { + uint64 raxGetsWiped, rcxGetsWiped; + + __asm__ __volatile__("call *%%rax" + : "=a" (raxGetsWiped), + "=c" (rcxGetsWiped) + : "0" (codePtr), + "1" (crosspage) + : "rdx", "r8", "r9", "r10", "r11", "cc", "memory"); + } +#elif defined(_MSC_VER) + /* + * The 64-bit calling convention is to pass the argument in RCX and that + * the called function must preserve RBX,RSI,RDI,RBP,RSP,R12..R15. + */ +#pragma warning(suppress: 4055) // Cast of data pointer to function pointer. + (*(void (*)(VMCrossPage *))codePtr)(crosspage); +#else +#error No compiler defined for TaskSwitchToMonitor +#endif +} + + +static void +TaskTestCrossPageExceptionHandlers(VMCrossPage *crosspage) +{ + static Bool testSwitchNMI = TRUE; /* test only first time through */ + + /* + * Test the DB,NMI,MCE handlers to make sure they can set the + * flags. This is calling the handlers in switchNMI.S. + */ + + if (vmx86_debug && testSwitchNMI) { + Bool gotSave; + + testSwitchNMI = FALSE; + + /* + * RAISE_INTERRUPT calls Switch{32,64}DBHandler in switchNMI.S + * (depending on host bitsize). + */ + + gotSave = TaskGotException(crosspage, EXC_DB); + TaskSetException(crosspage, EXC_DB, FALSE); + RAISE_INTERRUPT(1); + TS_ASSERT(TaskGotException(crosspage, EXC_DB)); + TaskSetException(crosspage, EXC_DB, gotSave); + + /* + * RAISE_INTERRUPT calls Switch{32,64}NMIHandler in switchNMI.S + * (depending on host bitsize). + */ + gotSave = TaskGotException(crosspage, EXC_NMI); + TaskSetException(crosspage, EXC_NMI, FALSE); + RAISE_INTERRUPT(EXC_NMI); + TS_ASSERT(TaskGotException(crosspage, EXC_NMI)); + +#if defined(__GNUC__) + /* + * Test the LRETQ in the 64-bit mini NMI handler to make sure + * it works with any 16-byte offset of the stack pointer. + * The INT 2 calls Switch64NMIHandler in switchNMI.S. + */ + { + uint64 v1, v2; + + asm volatile ("\n" + " movl $16, %%ecx \n" + "1000: \n" + " decq %%rsp \n" + " movb $0xDB, (%%rsp) \n" + " int $2 \n" + " loop 1000b \n" + " popq %%rcx \n" + " popq %%rax \n" + : "=a" (v1), "=c" (v2)); + + /* + * Ensure nothing overwritten just above where it is + * allowed to, because the decq rsp/movb 0xDBs pushed 16 + * of them one byte at a time. + */ + + TS_ASSERT(v1 == 0xDBDBDBDBDBDBDBDBULL); + TS_ASSERT(v2 == 0xDBDBDBDBDBDBDBDBULL); + } +#endif + TaskSetException(crosspage, EXC_NMI, gotSave); + + /* + * RAISE_INTERRUPT calls Switch{32,64}MCEHandler in switchNMI.S + * (depending on host bitsize). + */ + + gotSave = TaskGotException(crosspage, EXC_MC); + TaskSetException(crosspage, EXC_MC, FALSE); + RAISE_INTERRUPT(EXC_MC); + TS_ASSERT(TaskGotException(crosspage, EXC_MC)); + TaskSetException(crosspage, EXC_MC, gotSave); + } +} + + +/* + *----------------------------------------------------------------------------- + * + * TaskShouldRetryWorldSwitch -- + * + * Returns whether or not we should retry the world switch. + * + * It is possible that the gotNMI and/or gotMCE was detected when + * switching in the host->monitor direction, in which case the + * retryWorldSwitch flag will be set. If such is the case, we + * want to immediately loop back to the monitor as that is what + * it is expecting us to do. + * + *----------------------------------------------------------------------------- + */ + +static INLINE Bool +TaskShouldRetryWorldSwitch(VMCrossPage *crosspage) +{ + Bool result = crosspage->crosspageData.retryWorldSwitch; + crosspage->crosspageData.retryWorldSwitch = FALSE; + return result; +} + + +/* + *----------------------------------------------------------------------------- + * + * Task_Switch -- + * + * Switches from the host context into the monitor context and + * then receives control when the monitor returns to the + * host. + * + * Think of it as a coroutine switch that changes not only the + * registers, but also the address space and all the hardware + * state. + * + * Results: + * None. + * + * Side effects: + * Jump to the monitor. Has no direct effect on the host-visible + * state except that it might generate an interrupt. + * + *----------------------------------------------------------------------------- + */ + +void +Task_Switch(VMDriver *vm, // IN + Vcpuid vcpuid) // IN +{ + uintptr_t flags; + uint64 fs64 = 0; + uint64 gs64 = 0; + uint64 kgs64 = 0; + uint64 pebsMSR = 0; + DTR64 hostGDT64, hostIDT64; + Selector cs, ds, es, fs, gs, ss; + Selector hostTR; + Selector hostLDT; + Bool lint0NMI; + Bool lint1NMI; + Bool pcNMI; + Bool thermalNMI; + VMCrossPage *crosspage = vm->crosspage[vcpuid]; + uint32 pCPU; + MPN hvRootMPN; + Descriptor *tempGDTBase; + + ASSERT_ON_COMPILE(sizeof(VMCrossPage) == PAGE_SIZE); + TaskDisableNMI(&vm->hostAPIC, &lint0NMI, &lint1NMI, &pcNMI, &thermalNMI); + SAVE_FLAGS(flags); + CLEAR_INTERRUPTS(); + + pCPU = HostIF_GetCurrentPCPU(); + ASSERT(pCPU < ARRAYSIZE(hvRootPage) && pCPU < ARRAYSIZE(tmpGDT)); + + hvRootMPN = Atomic_Read64(&hvRootPage[pCPU]); + tempGDTBase = USE_TEMPORARY_GDT ? Atomic_ReadPtr(&tmpGDT[pCPU]) : NULL; + + /* + * We can't allocate memory with interrupts disabled on all hosts + * so we dummy up a modulecall to do it before we start in on the + * world switch. We must be careful not to overwrite the + * crosspages arguments when doing this though, see bug 820257. + */ + if (hvRootMPN == INVALID_MPN && + (crosspage->crosspageData.activateVMX || + crosspage->crosspageData.activateSVM)) { + crosspage->crosspageData.userCallType = MODULECALL_USERCALL_NONE; + crosspage->crosspageData.moduleCallType = MODULECALL_ALLOC_VMX_PAGE; + crosspage->crosspageData.pcpuNum = pCPU; + } else if (USE_TEMPORARY_GDT && tempGDTBase == NULL) { + crosspage->crosspageData.userCallType = MODULECALL_USERCALL_NONE; + crosspage->crosspageData.moduleCallType = MODULECALL_ALLOC_TMP_GDT; + crosspage->crosspageData.pcpuNum = pCPU; + } else { + do { + uintptr_t cr0reg, cr2reg, cr3reg, cr4reg; + uint64 efer = ~0ULL; + Bool needVMXOFF = FALSE; + MA foreignVMCS = ~0ULL; + MA foreignHSAVE = ~0ULL; + + vm->currentHostCpu[vcpuid] = pCPU; + + TaskUpdatePTSCParameters(vm, &crosspage->crosspageData, vcpuid); + + /* + * Disable PEBS if it is supported and enabled. Do this while on the + * hosts IDT - PR 848701. + */ + if (pebsAvailable) { + pebsMSR = __GET_MSR(IA32_MSR_PEBS_ENABLE); + if (pebsMSR != 0) { + __SET_MSR(IA32_MSR_PEBS_ENABLE, 0); + } + } + + /* + * Save the host's standard IDT and set up an IDT that only + * has space for all the hardware exceptions (though only a + * few are handled). + */ + + TaskSaveIDT64(&hostIDT64); + TaskLoadIDT64(&crosspage->crosspageData.switchHostIDTR); + TaskTestCrossPageExceptionHandlers(crosspage); + + if (crosspage->crosspageData.activateVMX) { + /* + * Ensure that VMX is enabled and locked in the feature control MSR, + * so that we can set CR4.VMXE to activate VMX. + */ + uint64 bits = MSR_FEATCTL_LOCK | MSR_FEATCTL_VMXE; + uint64 featCtl = __GET_MSR(MSR_FEATCTL); + if ((featCtl & bits) != bits) { + if ((featCtl & MSR_FEATCTL_LOCK) != 0) { + Panic("Intel VT-x is disabled and locked on CPU %d\n", pCPU); + } + __SET_MSR(MSR_FEATCTL, featCtl | bits); + } + } + + /* + * Save CR state. The monitor deals with EFER. + */ + + GET_CR2(cr2reg); + GET_CR0(cr0reg); + GET_CR4(cr4reg); + GET_CR3(cr3reg); + crosspage->crosspageData.hostCR3 = cr3reg; + + /* + * Any reserved bits in CR0 must be preserved when we switch + * to the VMM. [See PR 291004.] (On the other hand, Intel + * recommends that we clear any reserved CR4 bits.) + */ + crosspage->crosspageData.wsCR0 &= ~CR0_RESERVED; + crosspage->crosspageData.wsCR0 |= (cr0reg & CR0_RESERVED); + + /* + * CR4.VMXE must be enabled to support VMX in the monitor, and it + * can't be cleared if it is set on the host. + */ + if (crosspage->crosspageData.activateVMX || (cr4reg & CR4_VMXE) != 0) { + crosspage->crosspageData.wsCR4 |= CR4_VMXE; + } + + /* + * The world-switch CR4.MCE and CR4.PCIDE should always reflect the + * host's values. CR4.PCIDE will be cleared once we're in the monitor, + * running on a CR3 with a PCID field of 0. + */ + crosspage->crosspageData.wsCR4 = + (crosspage->crosspageData.wsCR4 & ~(CR4_MCE | CR4_PCIDE)) | + (cr4reg & (CR4_MCE | CR4_PCIDE)); + + /* + * The world-switch should never have global pages enabled. Therefore, + * switching to the monitor's CR4 ensures that global pages are + * flushed. + */ + ASSERT((crosspage->crosspageData.wsCR4 & CR4_PGE) == 0); + + /* + * Load the world-switch CR0 and CR4. We can't load the monitor's + * CR3 yet, because the current code isn't mapped into the + * monitor's address space. + */ + SET_CR0((uintptr_t)crosspage->crosspageData.wsCR0); + SET_CR4((uintptr_t)crosspage->crosspageData.wsCR4); + + TaskSaveDebugRegisters(crosspage); + + TaskSaveGDT64(&hostGDT64); + + if (crosspage->crosspageData.activateVMX) { + MA vmxonRegion = MPN_2_MA(hvRootMPN); + VMXStatus status = VMXON_2_STATUS(&vmxonRegion); + if (status == VMX_Success) { + needVMXOFF = TRUE; + } else { + VMPTRST(&foreignVMCS); + } + } + + if (crosspage->crosspageData.activateSVM) { + efer = __GET_MSR(MSR_EFER); + if ((efer & MSR_EFER_SVME) == 0) { + __SET_MSR(MSR_EFER, efer | MSR_EFER_SVME); + } + foreignHSAVE = __GET_MSR(MSR_VM_HSAVE_PA); + __SET_MSR(MSR_VM_HSAVE_PA, MPN_2_MA(hvRootMPN)); + } + + /* + * If NMI stress testing enabled, set EFLAGS. This will + * make sure there is a valid IDT, GDT, stack, etc. at every + * instruction boundary during the switch. + */ + if (WS_INTR_STRESS) { + TaskEnableTF(); + } + + /* + * GS and FS are saved outside of the TaskSwitchToMonitor() code to + * + * 1) minimize the amount of code handled there, and + * + * 2) prevent us from faulting if they happen to be in the LDT + * (since the LDT is saved and restored here too). + * + * Also, the 32-bit Mac OS running in legacy mode has + * CS, DS, ES, SS in the LDT! + */ + cs = GET_CS(); + ss = GET_SS(); +#if defined __APPLE__ + /* + * The 64-bit Mac OS kernel leaks segment selectors from + * other threads into 64-bit threads. When the selectors + * reference a foreign thread's LDT, we may not be able to + * reload them using our thread's LDT. So, let's just clear + * them instead of trying to preserve them. [PR 467140] + */ + ds = 0; + es = 0; + fs = 0; + gs = 0; +#else + ds = GET_DS(); + es = GET_ES(); + fs = GET_FS(); + gs = GET_GS(); +#endif + GET_LDT(hostLDT); + GET_TR(hostTR); + + kgs64 = GET_KernelGS64(); + gs64 = GET_GS64(); + fs64 = GET_FS64(); + + /* + * Make sure stack segment is non-zero so worldswitch can use it + * to temporarily restore DS,ES on return. + */ + if (ss == 0) { + SET_SS(kernelStackSegment); + } + + TS_ASSERT(SELECTOR_TABLE(cs) == SELECTOR_GDT); + TS_ASSERT(SELECTOR_TABLE(ds) == SELECTOR_GDT); + TS_ASSERT(SELECTOR_TABLE(ss) == SELECTOR_GDT); + + DEBUG_ONLY(crosspage->crosspageData.tinyStack[0] = 0xDEADBEEF;) + /* Running in host context prior to TaskSwitchToMonitor() */ + TaskSwitchToMonitor(crosspage); + /* Running in host context after to TaskSwitchToMonitor() */ + + TS_ASSERT(crosspage->crosspageData.tinyStack[0] == 0xDEADBEEF); + + /* + * Temporarily disable single-step stress as VMX/VMCS change code + * ASSERTS on RFLAGS content without allowing TF/RF to be set. + */ + if (WS_INTR_STRESS) { + TaskDisableTF(); + } + + if (needVMXOFF) { + VMXOFF(); + } else if (foreignVMCS != ~0ULL) { + VMPTRLD_UNCHECKED(&foreignVMCS); + } + + if (WS_INTR_STRESS) { + TaskEnableTF(); + } + + if (crosspage->crosspageData.activateSVM) { + __SET_MSR(MSR_VM_HSAVE_PA, foreignHSAVE); + if ((efer & MSR_EFER_SVME) == 0) { + __SET_MSR(MSR_EFER, efer); + } + } + + /* + * Restore CR state. + * CR3 should already have been restored. CR0 and CR4 have to + * be restored if the world switch values do not match the host's. + * CR2 always has to be restored. CR8 never has to be restored. + */ + SET_CR2(cr2reg); + if (crosspage->crosspageData.wsCR0 != cr0reg) { + SET_CR0(cr0reg); + } + if (crosspage->crosspageData.wsCR4 != cr4reg) { + SET_CR4(cr4reg); + } else if ((cr4reg & CR4_PCIDE) != 0) { + /* + * Flush PCID 0. + */ + ASSERT((cr4reg & CR4_PGE) == 0); + SET_CR4(cr4reg | CR4_PGE); + SET_CR4(cr4reg); + } + if (vmx86_debug) { + uintptr_t cr; + GET_CR0(cr); + ASSERT(cr == cr0reg); + GET_CR4(cr); + ASSERT(cr == cr4reg); + GET_CR3(cr); + ASSERT(cr == cr3reg); + } + + /* + * TaskSwitchToMonitor() returns with GDT = crossGDT so switch back to + * the host GDT here. We will also restore host TR as the task busy + * bit needs to be fiddled with. Also restore host LDT while we're + * at it. + */ + TaskRestoreHostGDTTRLDT(tempGDTBase, hostGDT64, + hostLDT, cs, hostTR); + + SET_DS(ds); + SET_ES(es); + + /* + * First, restore %fs and %gs from the in-memory descriptor tables, + * and then overwrite the bases in the descriptor cache with the + * saved 64-bit values. + */ + + SET_FS(fs); + SET_GS(gs); + SET_FS64(fs64); + SET_GS64(gs64); + SET_KernelGS64(kgs64); + + /* Restore debug registers and host's IDT; turn off stress test. */ + if (WS_INTR_STRESS) { + TaskDisableTF(); + } + + TaskRestoreDebugRegisters(&crosspage->crosspageData); + + ASSERT_NO_INTERRUPTS(); + + /* + * Restore standard host interrupt table and re-enable PEBS afterwards + * iff we disabled it. + */ + + TaskLoadIDT64(&hostIDT64); + + if (pebsMSR != 0) { + __SET_MSR(IA32_MSR_PEBS_ENABLE, pebsMSR); + } + + TaskUpdateLatestPTSC(vm, &crosspage->crosspageData); + vm->currentHostCpu[vcpuid] = INVALID_PCPU; + + /* + * If an #NMI or #MCE was logged while switching, re-raise such an + * interrupt or exception for the host to consume. Handlers preserve + * NMI-blocking (when not stress-testing or changing VIP/VIP) by using + * synthetic irets instead of real irets. By this point, if an NMI + * was received during switching, NMIs should still be blocked. + * + * When stress testing, NMIs are almost guaranteed to be synthetic, so + * no NMI is raised. + * + * If a #UD was logged while switching, warn accordingly rather than + * raising a new exception as this would likely panic the host kernel. + */ + + if (UNLIKELY(TaskGotException(crosspage, EXC_NMI))) { + TaskSetException(crosspage, EXC_NMI, FALSE); + if (!WS_INTR_STRESS) { + RAISE_INTERRUPT(EXC_NMI); + } + } + + if (UNLIKELY(TaskGotException(crosspage, EXC_MC))) { + TaskSetException(crosspage, EXC_MC, FALSE); + if (vmx86_debug) { + CP_PutStr("Task_Switch: forwarding MCE to host\n"); + } + RAISE_INTERRUPT(EXC_MC); + } + if (UNLIKELY(TaskGotException(crosspage, EXC_UD))) { + Warning("#UD occurred on switch back to host; dumping core"); + } + /* + * The NMI/MCE checks above are special cases for interrupts + * received during worldswitch. Here is the more generic case + * of forwarding NMIs received while executing the VMM/guest. + */ + if (crosspage->crosspageData.moduleCallType == MODULECALL_INTR && + crosspage->crosspageData.args[0] == EXC_NMI) { + /* + * If VMM was interrupted by an NMI, do the INT 2 so the + * host will handle it, but then return immediately to the + * VMM in case the VMM was in the middle of a critical + * region. E.g. the NMI may have interrupted the VMM while + * an interrupt was in service, before the VMM or host has + * done the EOI. + */ + RAISE_INTERRUPT(EXC_NMI); + crosspage->crosspageData.retryWorldSwitch = TRUE; + } + } while (UNLIKELY(TaskShouldRetryWorldSwitch(crosspage))); + } + + if (crosspage->crosspageData.moduleCallType == MODULECALL_INTR) { + /* + * Note we must do the RAISE_INTERRUPT before ever enabling + * interrupts or bad things have happened (might want to know exactly + * what bad things btw). + */ +#ifdef _WIN64 + if (crosspage->crosspageData.args[0] <= 0xFF && + (crosspage->crosspageData.args[0] >= 0x14 || + crosspage->crosspageData.args[0] == EXC_MC)) { + RAISE_INTERRUPT((unsigned char)crosspage->crosspageData.args[0]); + } else { + Warning("%s: Received Unexpected Interrupt: 0x%"FMT64"X\n", + __FUNCTION__, crosspage->crosspageData.args[0]); + Panic("Received Unexpected Interrupt: 0x%"FMT64"X\n", + crosspage->crosspageData.args[0]); + } +#else + /* + * Note2 RAISE_INTERRUPT() only takes a constant and hence with switch + * statement. + */ +#define IRQ_INT(_x) case _x: RAISE_INTERRUPT(_x); break +#define IRQ_INT2(_x) IRQ_INT(_x); IRQ_INT(_x + 1) +#define IRQ_INT4(_x) IRQ_INT2(_x); IRQ_INT2(_x + 2) +#define IRQ_INT8(_x) IRQ_INT4(_x); IRQ_INT4(_x + 4) +#define IRQ_INT16(_x) IRQ_INT8(_x); IRQ_INT8(_x + 8) +#define IRQ_INT32(_x) IRQ_INT16(_x); IRQ_INT16(_x + 16) + + switch (crosspage->crosspageData.args[0]) { + // These are the general IO interrupts + // It would be nice to generate this dynamically, but see Note2 above. + + /* + * Pass Machine Check Exception (Interrupt 0x12) to the host. + * See bug #45286 for details. + */ + IRQ_INT(EXC_MC); + + /* + * pass the reserved vectors (20-31) as well. amd64 windows + * generates these. + */ + + IRQ_INT8(0x14); + IRQ_INT4(0x1c); + + IRQ_INT32(0x20); + IRQ_INT32(0x40); + IRQ_INT32(0x60); + IRQ_INT32(0x80); + IRQ_INT32(0xa0); + IRQ_INT32(0xc0); + IRQ_INT32(0xe0); + + default: + /* + * XXXX nt running on a 2 processor machine we hit this Panic + * with int 0xD1 0x61 ... + */ + + Warning("%s: Received Unexpected Interrupt: 0x%"FMT64"X\n", + __FUNCTION__, crosspage->crosspageData.args[0]); + Panic("Received Unexpected Interrupt: 0x%"FMT64"X\n", + crosspage->crosspageData.args[0]); + } +#endif + } + + RESTORE_FLAGS(flags); + TaskRestoreNMI(&vm->hostAPIC, lint0NMI, lint1NMI, pcNMI, thermalNMI); +} diff --git a/vmmon-only/common/task.h b/vmmon-only/common/task.h new file mode 100644 index 00000000..b2fcfd97 --- /dev/null +++ b/vmmon-only/common/task.h @@ -0,0 +1,43 @@ +/********************************************************* + * Copyright (C) 1998-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + + + +#ifndef TASK_H +#define TASK_H + +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMCORE +#include "includeCheck.h" + +struct InitBlock; +struct InitCrossGDT; + +extern Bool Task_AllocCrossGDT(struct InitBlock *initBlock); +extern int Task_InitCrosspage(VMDriver *vm, struct InitBlock *params); +extern int Task_InitCrossGDT(struct InitCrossGDT *initCrossGDT); +extern void Task_Switch(VMDriver *vm, Vcpuid vcpuid); +extern Bool Task_Initialize(void); +extern void Task_Terminate(void); +extern MPN Task_GetHVRootPageForPCPU(uint32 pCPU); +extern Descriptor *Task_GetTmpGDT(uint32 pCPU); + +#endif + + + diff --git a/vmmon-only/common/vmx86.c b/vmmon-only/common/vmx86.c new file mode 100644 index 00000000..dbe794c5 --- /dev/null +++ b/vmmon-only/common/vmx86.c @@ -0,0 +1,2920 @@ +/********************************************************* + * Copyright (C) 1998-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * vmx86.c -- + * + * Platform independent routines for creating/destroying/running + * virtual machine monitors. + */ + +#ifdef linux +/* Must come before any kernel header file --hpreg */ +# include "driver-config.h" + +# include /* memset() in the kernel */ +# include /* jiffies from the kernel */ +#else +# include +#endif + +#ifdef __APPLE__ +#include // must come before "vmware.h" +#endif + +#include "vmware.h" +#include "vm_assert.h" +#include "vm_basic_math.h" +#include "vmx86.h" +#include "task.h" +#include "initblock.h" +#include "vm_asm.h" +#include "iocontrols.h" +#include "hostif.h" +#include "cpuid.h" +#include "vcpuset.h" +#include "memtrack.h" +#include "hashFunc.h" +#if defined(_WIN64) +#include "x86.h" +#include "vmmon-asm-x86-64.h" +#endif +#include "x86vt.h" +#include "x86svm.h" +#include "x86cpuid_asm.h" +#if defined(linux) +#include +#endif +#include "x86perfctr.h" + + +PseudoTSC pseudoTSC; + +/* + * Keep track of the virtual machines that have been + * created using the following structures. + */ + +static VMDriver *vmDriverList = NULL; + +static LockedPageLimit lockedPageLimit = { + 0, // host: does not need to be initialized. + 0, // configured: must be set by some VM as it is powered on. + (uint32)MAX_LOCKED_PAGES, // dynamic +}; + +/* Percentage of guest "paged" memory that must fit within the hard limit. */ +static unsigned minVmMemPct; + +/* Number of pages actually locked by all virtual machines */ +static unsigned numLockedPages; + +/* Total virtual machines on this host */ +static unsigned vmCount; + +/* Total number of open vmmon file handles. */ +static unsigned fdCount; + +/* + * We implement a list of allocated VM ID's using an array. + * The array is initialized with the values 1...MAX_VMS-1, INVALID_VMID. + * vmIDsAllocated holds the last VM ID given out and vmIDsUnused + * holds the next VM ID to give out. + */ + +#define INVALID_VMID (-1) +static int vmIDList[MAX_VMS]; +static int vmIDsAllocated; +static int vmIDsUnused; + +/* Max rate requested for fast clock by any virtual machine. */ +static unsigned globalFastClockRate; + +/* + *---------------------------------------------------------------------- + * + * Vmx86AdjustLimitForOverheads -- + * + * This function adjusts an overall limit on the number of + * locked pages to take into account overhead for the vmx processes, etc. + * since the hostOS will also see this as overhead. We do this for all + * vmx processes, not just ones whose vms have been admitted. + * + * If "vm" is NULL, we are allocating a global page and have no + * perVMOverhead term to take into account. + * + * Results: + * Number of remaining pages considered to be lockable on this host. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +static INLINE unsigned +Vmx86AdjustLimitForOverheads(const VMDriver* vm, + const uint32 limit) +{ + uint32 extraCost = (vm != NULL) ? vmCount * vm->memInfo.perVMOverhead : 0; + ASSERT(HostIF_GlobalLockIsHeld()); + + return (extraCost < limit) ? (limit - extraCost) : 0; +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86LockedPageLimit -- + * + * There are three limits controlling how many pages we can lock on + * a host: + * + * lockedPageLimit.configured is controlled by UI, + * lockedPageLimit.dynamic is controlled by authd's hardLimitMonitor, + * lockedPageLimit.host is calculated dynamically based on kernel stats + * by vmmon using kernel stats. + * + * We can lock the MIN of these values. + * + * Results: + * Number of pages to lock on this host. + * + * Side effects: + * Updates the host locked pages limit. + * + *---------------------------------------------------------------------- + */ + +static INLINE unsigned +Vmx86LockedPageLimit(const VMDriver* vm) // IN: +{ + uint32 overallLimit; + ASSERT(HostIF_GlobalLockIsHeld()); + + lockedPageLimit.host = HostIF_EstimateLockedPageLimit(vm, numLockedPages); + overallLimit = MIN(MIN(lockedPageLimit.configured, lockedPageLimit.dynamic), + lockedPageLimit.host); + + return Vmx86AdjustLimitForOverheads(vm, overallLimit); +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86HasFreePages -- + * + * Returns TRUE if the vm can lock more pages. This is true if + * we are below the host's hard memory limit and this vm has not + * exceeded its maximum allocation. + * Callers must ensure driver-wide and VM serialization + * typically by using HostIF_GlobalLock() and HostIF_VMLock(). + * + * Results: + * TRUE if pages can be locked, FALSE otherwise + * + * Side effects: + * None + * + *---------------------------------------------------------------------- + */ + +static INLINE Bool +Vmx86HasFreePages(VMDriver *vm, + unsigned int numPages, + Bool checkVM) +{ + /* + * 1) Be careful with overflow. + * 2) lockedPageLimit and vm->memInfo.maxAllocation can be decreased below + * the current numLockedPages and vm->memInfo.locked + * 3) lockedPageLimit.host can go lower than numLockedPages. + */ + + ASSERT(HostIF_GlobalLockIsHeld() && + (!checkVM || HostIF_VMLockIsHeld(vm))); + + if (checkVM) { + /* + * Check the per-vm limit. + */ + + ASSERT(HostIF_VMLockIsHeld(vm)); + if (vm->memInfo.admitted) { + if (vm->memInfo.maxAllocation <= vm->memInfo.locked) { + return FALSE; + } else if (vm->memInfo.maxAllocation - vm->memInfo.locked < numPages) { + return FALSE; + } + } + } else { + /* + * Check the global limit. + */ + + unsigned limit = Vmx86LockedPageLimit(vm); + + if (limit <= numLockedPages) { + return FALSE; + } else if (limit - numLockedPages < numPages) { + return FALSE; + } + } + + return TRUE; +} + + +#ifdef VMX86_DEBUG +/* + *---------------------------------------------------------------------- + * + * Vmx86VMIsRegistered -- + * + * Check if "vm" is on the list of VMDrivers. + * + * Results: + * Return TRUE iff "vm" is on the list of VMDrivers. + * + * Side effects: + * None + * + *---------------------------------------------------------------- + */ + +static Bool +Vmx86VMIsRegistered(VMDriver *vm, Bool needsLock) +{ + VMDriver *tmp; + Bool found = FALSE; + + ASSERT(needsLock || HostIF_GlobalLockIsHeld()); + + if (needsLock) { + HostIF_GlobalLock(5); + } + + for (tmp = vmDriverList; tmp != NULL; tmp = tmp->nextDriver) { + if (tmp == vm) { + found = TRUE; + break; + } + } + + if (needsLock) { + HostIF_GlobalUnlock(5); + } + + return found; +} +#endif + + +/* + *---------------------------------------------------------------------- + * + * Vmx86_InitIDList -- + * + * Called when the driver is initialized. + * Set up the list of available VM ID's. + * + * Results: + * None. Sets up global data. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +void +Vmx86_InitIDList(void) +{ + int i; + + HostIF_GlobalLock(32); + + for (i = 0; i < MAX_VMS; i++) { + vmIDList[i] = i + 1; + } + vmIDList[MAX_VMS - 1] = INVALID_VMID; + vmIDsUnused = 0; + vmIDsAllocated = INVALID_VMID; + + HostIF_GlobalUnlock(32); +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86FreeVMID -- + * + * Return a VM ID to the list of available VM ID's. + * + * Results: + * None + * + * Side effects: + * None + * + *---------------------------------------------------------------------- + */ + +static void +Vmx86FreeVMID(int vmID) // IN +{ + int i; + + ASSERT(HostIF_GlobalLockIsHeld()); + + /* Deleting head of the list. */ + if (vmID == vmIDsAllocated) { + int tmp; + + tmp = vmIDList[vmIDsAllocated]; + vmIDList[vmIDsAllocated] = vmIDsUnused; + vmIDsAllocated = tmp; + vmIDsUnused = vmID; + + return; + } + + for (i = vmIDsAllocated; vmIDList[i] != INVALID_VMID; i = vmIDList[i]) { + if (vmIDList[i] == vmID) { + vmIDList[i] = vmIDList[vmID]; + vmIDList[vmID] = vmIDsUnused; + vmIDsUnused = vmID; + + return; + } + } +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86AllocVMID -- + * + * Grab a VM ID from the list of available VM ID's. + * + * Results: + * The VM ID, in the range [ 0 ; MAX_VMS ). + * + * Side effects: + * None + * + *---------------------------------------------------------------------- + */ + +static int +Vmx86AllocVMID(void) +{ + int vmID; + + ASSERT(HostIF_GlobalLockIsHeld()); + + vmID = vmIDsUnused; + ASSERT(0 <= vmID && vmID < MAX_VMS); + vmIDsUnused = vmIDList[vmID]; + vmIDList[vmID] = vmIDsAllocated; + vmIDsAllocated = vmID; + + return vmID; +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86RegisterVMOnList -- + * + * Add a VM to the list of registered VMs and increment + * the count of VMs. + * + * Results: + * None + * + * Side effects: + * Add VM to linked list. + * Increment count of VMs. + * + *---------------------------------------------------------------- + */ + +static void +Vmx86RegisterVMOnList(VMDriver *vm) // IN +{ + int vmID; + VMDriver **vmp; + + ASSERT(HostIF_GlobalLockIsHeld()); + vmCount++; + vmID = Vmx86AllocVMID(); + ASSERT(vm->userID == 0); + vm->userID = vmID + 1; + ASSERT(vm->userID > 0); + + for (vmp = &vmDriverList; *vmp != NULL; vmp = &(*vmp)->nextDriver) { + if (*vmp == vm) { + Warning("VM already registered on the list of VMs.\n"); + return; + } + } + *vmp = vm; +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86DeleteVMFromList -- + * + * Delete a VM from the list of registered VMs and decrement + * the count of VMs. This function should be called on any + * VM registered on the VMDriverList before invoking + * Vmx86FreeAllVMResources to free its memory. + * + * Results: + * None + * + * Side effects: + * Remove VM from linked list. + * Decrement count of VMs. + * + *---------------------------------------------------------------- + */ + +static void +Vmx86DeleteVMFromList(VMDriver *vm) +{ + VMDriver **vmp; + + ASSERT(vm); + ASSERT(HostIF_GlobalLockIsHeld()); + for (vmp = &vmDriverList; *vmp != vm; vmp = &(*vmp)->nextDriver) { + if (*vmp == NULL) { + Warning("VM is not on the list of registered VMs.\n"); + return; + } + } + *vmp = vm->nextDriver; + vmCount--; + + Vmx86FreeVMID(vm->userID - 1); + numLockedPages -= vm->memInfo.locked; + + /* + * If no VM is running, reset the configured locked-page limit so + * that the next VM to power on sets it appropriately. + */ + + if (vmCount == 0) { + lockedPageLimit.configured = 0; + } +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86FreeAllVMResources + * + * Free the resources allocated for a vm that is not registered + * on the VMDriverList. Except in the case of Vmx86_CreateVM(), + * this should be called only after a call to Vmx86DeleteVMFromList(). + * + * Results: + * None + * + * Side effects: + * Memory freed. + * + *---------------------------------------------------------------------- + */ + +static void +Vmx86FreeAllVMResources(VMDriver *vm) +{ + ASSERT(!HostIF_GlobalLockIsHeld()); + if (vm) { + ASSERT(!Vmx86VMIsRegistered(vm, TRUE)); + + Vmx86_SetHostClockRate(vm, 0); + + HostIF_FreeAllResources(vm); + + HostIF_FreeKernelMem(vm); + } +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86ReserveFreePages -- + * + * Returns TRUE and increases locked page counts if the vm can lock + * more pages. This is true if we are below the host's hard memory + * limit and this vm has not exceeded its maximum allocation. + * The function is thread-safe. + * + * If ignoreLimits is TRUE then additional pages may be reserved even + * if limits are violated. The request to ignore limits may come in + * cases of anonymous page allocations. Swapping is not always possible + * at those points but a swap target will have been posted so that the + * vmm will release memory shortly allowing the excessive reservation + * to be reduced. + * + * Results: + * TRUE if pages are reserved for locking, FALSE otherwise + * + * Side effects: + * The global lock and VM's lock are acquired and released. + * + *---------------------------------------------------------------------- + */ + +static Bool +Vmx86ReserveFreePages(VMDriver *vm, + unsigned int numPages, + Bool ignoreLimits) +{ + Bool retval = FALSE; + int retries = 3; + + ASSERT(vm); + + for (retries = 3; !retval && (retries > 0); retries--) { + HostIF_GlobalLock(17); + HostIF_VMLock(vm, 0); + + // Check VM's limit and don't wait. + retval = Vmx86HasFreePages(vm, numPages, TRUE); + if (!retval) { + HostIF_VMUnlock(vm, 0); + HostIF_GlobalUnlock(17); + break; + } else { + // Wait to satisfy the global limit. + retval = Vmx86HasFreePages(vm, numPages, FALSE); + if (retval) { + numLockedPages += numPages; + vm->memInfo.locked += numPages; + HostIF_VMUnlock(vm, 0); + HostIF_GlobalUnlock(17); + break; + } else { + /* + * There are not enough pages -- drop the locks and wait for + * the host and/or other VMs to produce free pages. + */ + + HostIF_VMUnlock(vm, 0); + HostIF_GlobalUnlock(17); + HostIF_WaitForFreePages(10); + } + } + } + + if (!retval && ignoreLimits) { + HostIF_GlobalLock(17); + HostIF_VMLock(vm, 0); + numLockedPages += numPages; + vm->memInfo.locked += numPages; + HostIF_VMUnlock(vm, 0); + HostIF_GlobalUnlock(17); + retval = TRUE; + } + + return retval; +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86UnreserveFreePages -- + * + * Decreases the global and VM's locked page counts. + * The function is thread-safe. + * + * Results: + * void + * + * Side effects: + * The global lock and VM's lock are acquired and released. + * + *---------------------------------------------------------------------- + */ + +static void +Vmx86UnreserveFreePages(VMDriver *vm, + unsigned int numPages) +{ + ASSERT(vm); + + HostIF_GlobalLock(18); + HostIF_VMLock(vm, 1); + + ASSERT(numLockedPages >= numPages); + ASSERT(vm->memInfo.locked >= numPages); + + numLockedPages -= numPages; + vm->memInfo.locked -= numPages; + + HostIF_VMUnlock(vm, 1); + HostIF_GlobalUnlock(18); +} + + +/* + *----------------------------------------------------------------------------- + * + * Vmx86_CreateVM -- + * + * Allocate and initialize a driver structure for a virtual machine. + * + * Results: + * VMDriver structure or NULL on error. + * + * Side effects: + * May allocate kernel memory. + * + *----------------------------------------------------------------------------- + */ + +VMDriver * +Vmx86_CreateVM(void) +{ + VMDriver *vm; + Vcpuid v; + + vm = HostIF_AllocKernelMem(sizeof *vm, TRUE); + if (vm == NULL) { + return NULL; + } + memset(vm, 0, sizeof *vm); + + vm->userID = 0; + vm->memInfo.admitted = FALSE; + vm->fastSuspResFlag = 0; + for (v = 0; v < MAX_INITBLOCK_CPUS; v++) { + vm->currentHostCpu[v] = INVALID_PCPU; + } + + if (HostIF_Init(vm)) { + goto cleanup; + } + + HostIF_GlobalLock(0); + +#ifdef _WIN32 + if (vmCount >= MAX_VMS_WIN32) { + HostIF_GlobalUnlock(0); + goto cleanup; + } +#endif + if (vmCount >= MAX_VMS) { + HostIF_GlobalUnlock(0); + goto cleanup; + } + + Vmx86RegisterVMOnList(vm); + + HostIF_GlobalUnlock(0); + + return vm; + +cleanup: + /* + * The VM is not on a list, "vmCount" has not been incremented, + * "vm->cowID" is INVALID_VMID, and either the VM's mutex hasn't + * been initialized or we've only taken the global lock and checked + * a counter since, so we know that the VM has not yet locked any + * pages. + */ + + ASSERT(vm->memInfo.locked == 0); + Vmx86FreeAllVMResources(vm); + + return NULL; +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86_ReleaseVM -- + * + * Release a VM (either created here or from a bind). + * + * Results: + * zero if successful + * + * Side effects: + * Decrement VM reference count. + * Release resources (those that are left) when count reaches 0. + * + *---------------------------------------------------------------------- + */ + +int +Vmx86_ReleaseVM(VMDriver *vm) // IN: +{ + ASSERT(vm); + HostIF_GlobalLock(1); + Vmx86DeleteVMFromList(vm); + HostIF_GlobalUnlock(1); + Vmx86FreeAllVMResources(vm); + + return 0; +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86_Open -- + * + * Called on open of the fd. + * + * Results: + * None. + * + * Side effects: + * Bumps fdCount. + * + *---------------------------------------------------------------------- + */ + +void +Vmx86_Open(void) +{ + HostIF_GlobalLock(123); + ASSERT(fdCount < MAX_INT32); + if (fdCount < MAX_INT32) { + fdCount++; + } + HostIF_GlobalUnlock(123); +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86_Close -- + * + * Called on close of the fd. + * + * Results: + * None. + * + * Side effects: + * Decrements fdCount + * May de-initialize ptsc. + * + *---------------------------------------------------------------------- + */ + +void +Vmx86_Close(void) +{ + HostIF_GlobalLock(124); + + /* + * If fdCount hits MAX_INT32 saturate the counter and leave it at + * MAX_INT32. + */ + + ASSERT(fdCount > 0); + if (fdCount < MAX_INT32) { + fdCount--; + } + + /* + * If no VMs are running and there are no open file handles, reset the + * pseudo TSC state so that the next VM to initialize is free to + * initialize the system wide PTSC however it wants. See PR 403505. + */ + + if (fdCount == 0) { + ASSERT(vmCount == 0); + pseudoTSC.initialized = FALSE; + } + HostIF_GlobalUnlock(124); +} + + +/* + *----------------------------------------------------------------------------- + * + * Vmx86_InitVM -- + * + * Initializaiton of the VM. Expects all initial arguments + * to be part of the InitBlock structure. + * + * Results: + * 0 on success + * != 0 on failure + * + * Side effects: + * Many + * + *----------------------------------------------------------------------------- + */ + +int +Vmx86_InitVM(VMDriver *vm, // IN + InitBlock *initParams) // IN/OUT: Initial params from the VM +{ + int retval; + + if (initParams->magicNumber != INIT_BLOCK_MAGIC) { + Warning("Bad magic number for init block 0x%x\n", + initParams->magicNumber); + + return 1; + } + if (initParams->numVCPUs > MAX_INITBLOCK_CPUS) { + Warning("Too many VCPUs for init block %d\n", initParams->numVCPUs); + + return 1; + } + vm->numVCPUs = initParams->numVCPUs; + + HostIF_InitFP(vm); + + /* + * Initialize the driver's part of the cross-over page used to + * talk to the monitor + */ + + retval = Task_InitCrosspage(vm, initParams); + if (retval) { + Warning("Task crosspage init died with retval=%d\n", retval); + /* + * Note that any clean-up of resources will be handled during + * power-off when Vmx86_ReleaseVM() is called as part of + * MonitorLoop_PowerOff(). + */ + + return 1; + } + + /* + * Check if we want to arbitrarily fail every N VM initializations. + * Useful in testing PR 72482. + */ + + if (initParams->vmInitFailurePeriod != 0) { + static uint32 counter = 0; + + if ((++counter) % initParams->vmInitFailurePeriod == 0) { + Warning("VM initialization failed on %d iteration\n", counter); + + return 1; + } + } + + return 0; +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86_ReadTSCAndUptime -- + * + * Atomically read the TSC and the uptime. + * + * Results: + * The current TSC and uptime values. + * + * Side effects: + * none + * + * + *---------------------------------------------------------------------- + */ + +void +Vmx86_ReadTSCAndUptime(VmTimeStart *st) // OUT: return value +{ + uintptr_t flags; + + SAVE_FLAGS(flags); + CLEAR_INTERRUPTS(); + + st->count = RDTSC(); + st->time = HostIF_ReadUptime(); + + RESTORE_FLAGS(flags); +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86_ComputekHz -- + * + * Given aggregate cycles and system uptime, computes cycle rate as, + * + * khz = cycles / (uptime / HostIF_UptimeFrequency()) / 1000 + * + * We need to do the computation carefully to avoid overflow or + * undue loss of precision. Also, on Linux we can't do a + * 64/64=64 bit division directly, as the gcc stub for that + * is not linked into the kernel. + * + * Results: + * Returns the computed khz value, or 0 if uptime == 0. + * + * Side effects: + * none + * + *---------------------------------------------------------------------- + */ + +uint32 +Vmx86_ComputekHz(uint64 cycles, uint64 uptime) +{ + uint64 hz; + uint64 freq; + + freq = HostIF_UptimeFrequency(); + while (cycles > MAX_UINT64 / freq) { + cycles >>= 1; + uptime >>= 1; + } + + if (uptime == 0) { + return 0; + } + + hz = (cycles * freq) / uptime; + return (uint32) ((hz + 500) / 1000); +} + + +#ifdef __APPLE__ +/* + *---------------------------------------------------------------------- + * + * Vmx86GetBusyKHzEstimate + * + * Return an estimate the of the processor's kHz rating, based on + * a spinloop. This is especially useful on systems where the TSC + * is known to run at its maximum rate when we are using the CPU. + * As of 2006, Intel Macs are this way... the TSC rate is 0 if the + * CPU is in a deep enough sleep state, or at its max rate otherwise. + * + * Results: + * Processor speed in kHz. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +static INLINE_SINGLE_CALLER uint32 +Vmx86GetBusyKHzEstimate(void) +{ + static const int ITERS = 100; + static const int CYCLES_PER_ITER = 20000; + int i; + uint64 j; + uint64 aggregateCycles = 0; + uint64 aggregateUptime = 0; + + for (i = 0; i < ITERS; i++) { + NO_INTERRUPTS_BEGIN() { + aggregateCycles -= RDTSC(); + aggregateUptime -= HostIF_ReadUptime(); + for (j = RDTSC() + CYCLES_PER_ITER; RDTSC() < j; ) + ; + aggregateCycles += RDTSC(); + aggregateUptime += HostIF_ReadUptime(); + } NO_INTERRUPTS_END(); + } + + return Vmx86_ComputekHz(aggregateCycles, aggregateUptime); +} +#else // ifdef __APPLE__ + + +/* + *---------------------------------------------------------------------- + * + * Vmx86GetkHzEstimate + * + * Return an estimate of the processor's kHz rating, based on + * the ratio of the cycle counter and system uptime since the + * driver was loaded. + * This function could be called (on Windows) at IRQL DISPATCH_LEVEL. + * + *---------------------------------------------------------------------- + */ + +static INLINE_SINGLE_CALLER uint32 +Vmx86GetkHzEstimate(VmTimeStart *st) // IN: start time +{ + uint64 cDiff, tDiff; + uintptr_t flags; + + SAVE_FLAGS(flags); + CLEAR_INTERRUPTS(); + cDiff = RDTSC() - st->count; + tDiff = HostIF_ReadUptime() - st->time; + RESTORE_FLAGS(flags); + + return Vmx86_ComputekHz(cDiff, tDiff); +} +#endif // ifdef __APPLE__ + + +/* + *---------------------------------------------------------------------- + * + * Vmx86_GetkHzEstimate + * + * Return an estimate of the processor's kHz rating, based on + * the ratio of the cycle counter and system uptime since the + * driver was loaded. Or based on a spinloop. + * + * This function could be called (on Windows) at IRQL DISPATCH_LEVEL. + * + * Results: + * Processor speed in kHz. + * + * Side effects: + * Result is cached. + * + *---------------------------------------------------------------------- + */ + +uint32 +Vmx86_GetkHzEstimate(VmTimeStart *st) // IN: start time +{ + static uint32 kHz; + + /* + * Cache and return the first result for consistency. + * TSC values can be changed without notification. + * TSC frequency can be vary too (SpeedStep, slowing clock on HALT, etc.) + */ + if (kHz != 0) { + return kHz; + } + +#ifdef __APPLE__ + return kHz = Vmx86GetBusyKHzEstimate(); +#else + return kHz = Vmx86GetkHzEstimate(st); +#endif +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86_SetHostClockRate -- + * + * The monitor wants to poll for events at the given rate. If no VM + * is specified, then 'rate' is ignored and the last set rate is set + * again. + * + * Results: + * 0 for success, host-specific error code for failure. + * + * Side effects: + * May increase the host timer interrupt rate, etc. + * + *---------------------------------------------------------------------- + */ + +int +Vmx86_SetHostClockRate(VMDriver *vm, // IN: VM instance pointer + unsigned rate) // IN: rate in Hz +{ + unsigned newGlobalRate; + VMDriver *cur; + int retval = 0; + + if (!vm) { + Log("Resetting last set host clock rate of %d\n", globalFastClockRate); + HostIF_FastClockLock(0); + retval = HostIF_SetFastClockRate(globalFastClockRate); + HostIF_FastClockUnlock(0); + + return retval; + } + + /* Quick test before locks are acquired. */ + if (vm->fastClockRate == rate) { + return retval; + } + + HostIF_FastClockLock(2); + if (vm->fastClockRate == rate) { + HostIF_FastClockUnlock(2); + + return retval; + } + + /* + * Loop through all vms to find new max rate. + */ + newGlobalRate = rate; + HostIF_GlobalLock(19); + for (cur = vmDriverList; cur != NULL; cur = cur->nextDriver) { + if (cur != vm && cur->fastClockRate > newGlobalRate) { + newGlobalRate = cur->fastClockRate; + } + } + HostIF_GlobalUnlock(19); + + if (newGlobalRate != globalFastClockRate) { + retval = HostIF_SetFastClockRate(newGlobalRate); + if (!retval) { + globalFastClockRate = newGlobalRate; + } + } + if (!retval) { + vm->fastClockRate = rate; + } + HostIF_FastClockUnlock(2); + + return retval; +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86_MonTimerIPI -- + * + * Check for VCPUs that are in the monitor and need an IPI to fire + * their next MonTimer callback. Should be called once per fast + * timer interrupt if the fast timer is in use. + * + * Results: + * None. + * + * Side effects: + * May send IPIs. + * + *---------------------------------------------------------------------- + */ + +void +Vmx86_MonTimerIPI(void) +{ + VMDriver *vm; + VmAbsoluteTS pNow, expiry; + + /* + * Loop through all vms -- needs the global lock to protect vmDriverList. + */ + + HostIF_GlobalLock(21); + + pNow = Vmx86_GetPseudoTSC(); + + for (vm = vmDriverList; vm != NULL; vm = vm->nextDriver) { + Vcpuid v; + VCPUSet expiredVCPUs; + VCPUSet_Empty(&expiredVCPUs); + + for (v = 0; v < vm->numVCPUs; v++) { + VMCrossPage *crosspage = vm->crosspage[v]; + + if (!crosspage) { + continue; // VCPU is not initialized yet + } + expiry = crosspage->crosspageData.monTimerExpiry; + if (expiry != 0 && expiry <= pNow) { + VCPUSet_Include(&expiredVCPUs, v); + } + } + if (!VCPUSet_IsEmpty(&expiredVCPUs) && + HostIF_IPI(vm, &expiredVCPUs) == IPI_BROADCAST) { + // no point in doing a broadcast for more than one VM. + break; + } + } + HostIF_GlobalUnlock(21); +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86_GetNumVMs -- + * + * Return the number of VMs. + * + * Results: + * The number of VMs. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int32 +Vmx86_GetNumVMs(void) +{ + return vmCount; +} + + +int32 +Vmx86_GetTotalMemUsage(void) +{ + VMDriver *vm; + int totalmem = 0; + + HostIF_GlobalLock(15); + vm = vmDriverList; + + for (vm = vmDriverList; vm != NULL; vm = vm->nextDriver) { + /* + * The VM lock is not strictly necessary as the vm will + * stay on the list until we release the global lock and + * because of order in which "admitted" and "mainMemSize" + * are set when each VM is admitted. + */ + + if (vm->memInfo.admitted) { + totalmem += PAGES_2_MBYTES(ROUNDUP(vm->memInfo.mainMemSize, + MBYTES_2_PAGES(1))); + } + } + + HostIF_GlobalUnlock(15); + + return totalmem; +} + + +static INLINE unsigned +Vmx86MinAllocationFunc(unsigned nonpaged, + unsigned anonymous, + unsigned mainmem, + unsigned memPct) +{ + return RatioOf(memPct, mainmem, 100) + nonpaged + anonymous; +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86MinAllocation -- + * + * Computes the minimum number of pages that must be allocated to a + * specific vm. The minAllocation for a vm is defined as + * some percentage of guest memory plus 100% of nonpagable (overhead) + * memory. + * + * Results: + * The minAllocation for this vm. + * + * + * Side effects: + * Analyzes the vm info, requiring the vm lock. + * + *---------------------------------------------------------------------- + */ + +static INLINE unsigned +Vmx86MinAllocation(VMDriver *vm, + unsigned memPct) +{ + ASSERT(HostIF_VMLockIsHeld(vm)); + + return Vmx86MinAllocationFunc(vm->memInfo.nonpaged, vm->memInfo.anonymous, + vm->memInfo.mainMemSize, memPct); +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86CalculateGlobalMinAllocation -- + * + * Computes the sum of minimum allocations of each vm assuming a given + * percentage of guest memory must fit within host RAM. + * + * Results: + * Number of pages that must fit within host ram for a given overcommit + * level. + * + * + * Side effects: + * None. The actual minAllocations of each vm are NOT updated during + * this computation. + * + *---------------------------------------------------------------------- + */ + +static unsigned +Vmx86CalculateGlobalMinAllocation(unsigned memPct) +{ + VMDriver *vm; + unsigned minAllocation = 0; + + ASSERT(HostIF_GlobalLockIsHeld()); + /* Pages of other vms required to fit inside the hard limit. */ + for (vm = vmDriverList; vm; vm = vm->nextDriver) { + HostIF_VMLock(vm, 2); + if (vm->memInfo.admitted) { + minAllocation += Vmx86MinAllocation(vm, memPct); + } + HostIF_VMUnlock(vm, 2); + } + + return minAllocation; +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86UpdateMinAllocations -- + * + * Updates the minimum allocation for each vm based on the global + * overcommitment percentage. + * + * Results: + * minAllocations for vms are changed. + * + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +static INLINE_SINGLE_CALLER void +Vmx86UpdateMinAllocations(unsigned memPct) // IN: +{ + VMDriver *vm; + + ASSERT(HostIF_GlobalLockIsHeld()); + /* Pages of other vms required to fit inside the hard limit. */ + for (vm = vmDriverList; vm; vm = vm->nextDriver) { + HostIF_VMLock(vm, 3); + if (vm->memInfo.admitted) { + vm->memInfo.minAllocation = Vmx86MinAllocation(vm, memPct); + } + HostIF_VMUnlock(vm, 3); + } +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86_SetConfiguredLockedPagesLimit -- + * + * Set the user defined limit on the number of pages that can + * be locked. This limit can be raised at any time but not lowered. + * This avoids having a user lower the limit as vms are running and + * inadvertently cause the vms to crash because of memory starvation. + * + * Results: + * Returns TRUE on success and FALSE on failure to set the limit + * + * Side effects: + * Hard limit may be changed. + * + *---------------------------------------------------------------------- + */ + +Bool +Vmx86_SetConfiguredLockedPagesLimit(unsigned limit) // IN: +{ + Bool retval = FALSE; + + HostIF_GlobalLock(4); + if (limit >= lockedPageLimit.configured) { + lockedPageLimit.configured = limit; + retval = TRUE; + } + HostIF_GlobalUnlock(4); + + return retval; +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86_SetDynamicLockedPageLimit -- + * + * Set the dynamic locked page limit. This limit is determined by + * authd in response to host pressure. It can be both raised and + * lowered at any time. + * + * Results: + * None. + * + * Side effects: + * Hard limit may be changed. + * + *---------------------------------------------------------------------- + */ + +void +Vmx86_SetDynamicLockedPagesLimit(unsigned limit) // IN: +{ + HostIF_GlobalLock(11); + lockedPageLimit.dynamic = limit; + HostIF_GlobalUnlock(11); +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86_LockPage -- + * + * Lock a page. + * + * Results: + * A PAGE_LOCK_* status code and the MPN of the locked page on success. + * + * Side effects: + * Number of global and per-VM locked pages increased. + * + *---------------------------------------------------------------------- + */ + +int +Vmx86_LockPage(VMDriver *vm, // IN: VMDriver + VA64 uAddr, // IN: VA of the page to lock + Bool allowMultipleMPNsPerVA, // IN: allow locking many pages with the same VA + MPN *mpn) // OUT +{ + int retval; + + /* Atomically check and reserve locked memory */ + if (!Vmx86ReserveFreePages(vm, 1, FALSE)) { + return PAGE_LOCK_LIMIT_EXCEEDED; + } + + HostIF_VMLock(vm, 4); + retval = HostIF_LockPage(vm, uAddr, allowMultipleMPNsPerVA, mpn); + HostIF_VMUnlock(vm, 4); + + if (retval != PAGE_LOCK_SUCCESS) { + Vmx86UnreserveFreePages(vm, 1); + } + + return retval; +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86_UnlockPage -- + * + * Unlock a page. + * + * Results: + * A PAGE_UNLOCK_* status code. + * + * Side effects: + * Number of global and per-VM locked pages decreased. + * + *---------------------------------------------------------------------- + */ + +int +Vmx86_UnlockPage(VMDriver *vm, // IN + VA64 uAddr) // IN +{ + int retval; + + HostIF_VMLock(vm, 5); + retval = HostIF_UnlockPage(vm, uAddr); + HostIF_VMUnlock(vm, 5); + + if (retval == PAGE_UNLOCK_SUCCESS) { + Vmx86UnreserveFreePages(vm, 1); + } + + return retval; +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86_UnlockPageByMPN -- + * + * Unlock a page. + * + * Results: + * A PAGE_UNLOCK_* status code. + * + * Side effects: + * Number of global and per-VM locked pages decreased. + * + *---------------------------------------------------------------------- + */ + +int +Vmx86_UnlockPageByMPN(VMDriver *vm, // IN: VMDriver + MPN mpn, // IN: the page to unlock + VA64 uAddr) // IN: optional valid VA for this MPN +{ + int retval; + + HostIF_VMLock(vm, 6); + retval = HostIF_UnlockPageByMPN(vm, mpn, uAddr); + HostIF_VMUnlock(vm, 6); + + if (retval == PAGE_UNLOCK_SUCCESS) { + Vmx86UnreserveFreePages(vm, 1); + } + + return retval; +} + + +/* + *----------------------------------------------------------------------------- + * + * Vmx86_AllocLockedPages -- + * + * Allocate physical locked pages from the kernel. + * + * Initially the pages are not mapped to any user or kernel + * address space. + * + * Results: + * Non-negative value on partial/full completion: actual number of + * allocated MPNs. MPNs of the allocated pages are copied to the + * caller's buffer at 'addr'. + * + * Negative system specific error code on error (NTSTATUS on Windows, + * etc.) + * + * Side effects: + * Number of global and per-VM locked pages is increased. + * + *----------------------------------------------------------------------------- + */ + +int +Vmx86_AllocLockedPages(VMDriver *vm, // IN: VMDriver + VA64 addr, // OUT: VA of an array for + // allocated MPNs. + unsigned numPages, // IN: number of pages to allocate + Bool kernelMPNBuffer, // IN: is the MPN buffer in kernel + // or user address space? + Bool ignoreLimits) // IN: should limits be ignored? +{ + int allocatedPages; + + if (!Vmx86ReserveFreePages(vm, numPages, ignoreLimits)) { + // XXX What kind of system-specific error code is that? --hpreg + return PAGE_LOCK_LIMIT_EXCEEDED; + } + + HostIF_VMLock(vm, 7); + allocatedPages = HostIF_AllocLockedPages(vm, addr, numPages, + kernelMPNBuffer); + HostIF_VMUnlock(vm, 7); + + if (allocatedPages < 0) { + Vmx86UnreserveFreePages(vm, numPages); + } else if (allocatedPages < numPages) { + Vmx86UnreserveFreePages(vm, numPages - allocatedPages); + } + + return allocatedPages; +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86_FreeLockedPages -- + * + * Frees physical locked pages from the kernel previosly allocated + * by Vmx86_AllocLockedPages(). + * + * Results: + * 0 on success, + * non-0 system specific error code on error (NTSTATUS on Windows, etc.) + * + * Side effects: + * Number of global and per-VM locked pages is decreased. + * + *---------------------------------------------------------------------- + */ + +int +Vmx86_FreeLockedPages(VMDriver *vm, // IN: VM instance pointer + VA64 addr, // IN: user or kernel array of MPNs to free + unsigned numPages, // IN: number of pages to free + Bool kernelMPNBuffer) // IN: is the MPN buffer in kernel or user address space? +{ + int ret; + + HostIF_VMLock(vm, 8); + ret = HostIF_FreeLockedPages(vm, addr, numPages, kernelMPNBuffer); + HostIF_VMUnlock(vm, 8); + + if (ret == 0) { + Vmx86UnreserveFreePages(vm, numPages); + } + + return ret; +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86_GetNextAnonPage -- + * + * Queries the driver to retrieve the list of anonymous pages. + * A supplied value of INVALID_MPN will start the query from + * the head of the list. Callers supply the previously received + * mpn to retrieve the next in the chain. Note: There is no + * guarantee of coherency. + * + * Results: + * A valid mpn or INVALID_MPN if the list has been exhausted. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +MPN +Vmx86_GetNextAnonPage(VMDriver *vm, // IN: VM instance pointer + MPN mpn) // IN: MPN +{ + MPN ret; + + HostIF_VMLock(vm, 22); + ret = HostIF_GetNextAnonPage(vm, mpn); + HostIF_VMUnlock(vm, 22); + + return ret; +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86_GetLockedPageList -- + * + * puts MPNs of pages that were allocated by HostIF_AllocLockedPages() + * into user mode buffer. + * + * Results: + * non-negative number of the MPNs in the buffer on success. + * negative error code on error. + * + * Side effects: + * none + * + *---------------------------------------------------------------------- + */ + +int +Vmx86_GetLockedPageList(VMDriver *vm, // IN: VM instance pointer + VA64 uAddr, // OUT: user mode buffer for MPNs + unsigned int numPages) // IN: size of the buffer in MPNs +{ + int ret; + + HostIF_VMLock(vm, 9); + ret = HostIF_GetLockedPageList(vm, uAddr, numPages); + HostIF_VMUnlock(vm, 9); + + return ret; +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86_GetMemInfo -- + * + * Return the info about all VMs. + * + * Results: + * TRUE if all info was successfully copied. + * + * Side effects: + * VMGetMemInfoArgs is filled in. If the supplied curVM is null + * then only the baseline information will be returned. Calling + * with a null curVM may return results for maxLockedPages + * that differ from those when the vm is passed if huge pages + * are in use. + * + *---------------------------------------------------------------------- + */ + +Bool +Vmx86_GetMemInfo(VMDriver *curVM, + Bool curVMOnly, + VMMemInfoArgs *outArgs, + int outArgsLength) +{ + VMDriver *vm; + int outSize; + int wantedVMs; + + HostIF_GlobalLock(7); + + if (curVMOnly) { + wantedVMs = 1; + } else { + wantedVMs = vmCount; + } + + outSize = VM_GET_MEM_INFO_SIZE(wantedVMs); + if (outSize > outArgsLength) { + HostIF_GlobalUnlock(7); + + return FALSE; + } + + outArgs->numVMs = wantedVMs; + outArgs->numLockedPages = numLockedPages; + outArgs->maxLockedPages = Vmx86LockedPageLimit(curVM); + outArgs->lockedPageLimit = lockedPageLimit; + outArgs->globalMinAllocation = Vmx86CalculateGlobalMinAllocation(minVmMemPct); + outArgs->minVmMemPct = minVmMemPct; + outArgs->callerIndex = (uint32)-1; + outArgs->currentTime = HostIF_ReadUptime() / HostIF_UptimeFrequency(); + + if (curVM == NULL) { + HostIF_GlobalUnlock(7); + + return TRUE; + } + + curVM->memInfo.timestamp = outArgs->currentTime; + if (wantedVMs == 1) { + outArgs->memInfo[0] = curVM->memInfo; + outArgs->callerIndex = 0; + } else { + int i; + for (i = 0, vm = vmDriverList; + vm != NULL && i < vmCount; + i++, vm = vm->nextDriver) { + if (vm == curVM) { + outArgs->callerIndex = i; + } + HostIF_VMLock(vm, 10); + outArgs->memInfo[i] = vm->memInfo; + HostIF_VMUnlock(vm, 10); + } + } + + HostIF_GlobalUnlock(7); + if (outArgs->callerIndex == -1) { + return FALSE; + } + return TRUE; +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86SetMemoryUsage -- + * + * Updates the paged, nonpaged, and anonymous memory reserved memory + * values for the vm. + * + * Results: + * None + * + * Side effects: + * None + * + *---------------------------------------------------------------------- + */ + +static void +Vmx86SetMemoryUsage(VMDriver *curVM, // IN/OUT + unsigned paged, // IN + unsigned nonpaged, // IN + unsigned anonymous, // IN + unsigned aminVmMemPct) // IN +{ + ASSERT(HostIF_VMLockIsHeld(curVM)); + curVM->memInfo.paged = paged; + curVM->memInfo.nonpaged = nonpaged; + curVM->memInfo.anonymous = anonymous; + curVM->memInfo.minAllocation = Vmx86MinAllocation(curVM, aminVmMemPct); + curVM->memInfo.maxAllocation = curVM->memInfo.mainMemSize + nonpaged + + anonymous; +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86_Admit -- + * + * Set the memory management information about this VM and handles + * admission control. We allow vm to power on if there is room for + * the minimum allocation for all running vms in memory. Note that + * the hard memory limit can change dynamically in windows so we + * don't have guarantees due to admission control. + * + * Results: + * Returns global information about the memory state in args as well + * as a value indicating whether or not the virtual machine was + * started. + * + * Side effects: + * None + * + *---------------------------------------------------------------------- + */ + +void +Vmx86_Admit(VMDriver *curVM, // IN + VMMemInfoArgs *args) // IN/OUT +{ + Bool allowAdmissionCheck = FALSE; + unsigned int globalMinAllocation; + + HostIF_GlobalLock(9); + + /* + * Update the overcommitment level and minimums for all vms if they can + * fit under new minimum limit. If they do not fit, do nothing. And of + * course if existing VMs cannot fit under limit, likelihood that new VM + * will fit in is zero. + */ + + globalMinAllocation = Vmx86CalculateGlobalMinAllocation(args->minVmMemPct); + if (globalMinAllocation <= Vmx86LockedPageLimit(NULL)) { + allowAdmissionCheck = TRUE; + minVmMemPct = args->minVmMemPct; + Vmx86UpdateMinAllocations(args->minVmMemPct); + } + + HostIF_VMLock(curVM, 12); + + curVM->memInfo.shares = args->memInfo->shares; + curVM->memInfo.touchedPct = 100; + curVM->memInfo.dirtiedPct = 100; + curVM->memInfo.mainMemSize = args->memInfo->mainMemSize; + curVM->memInfo.perVMOverhead = args->memInfo->perVMOverhead; + + /* + * Always set the allocations required for the current configuration + * so that the user will know how bad situation really is with the + * suggested percentage. + */ + + curVM->memInfo.admitted = FALSE; + Vmx86SetMemoryUsage(curVM, args->memInfo->paged, args->memInfo->nonpaged, + args->memInfo->anonymous, args->minVmMemPct); + if (allowAdmissionCheck && + globalMinAllocation + curVM->memInfo.minAllocation <= + Vmx86LockedPageLimit(curVM)) { + curVM->memInfo.admitted = TRUE; + } + +#if defined _WIN32 + if (curVM->memInfo.admitted) { + unsigned int allocatedPages, nonpaged; + signed int pages; + MPN *mpns; + + /* + * More admission control: Get enough memory for the nonpaged portion + * of the VM. Drop locks for this long operation. + * XXX Timeout? + */ + + HostIF_VMUnlock(curVM, 12); + HostIF_GlobalUnlock(9); + +#define ALLOCATE_CHUNK_SIZE 64 + allocatedPages = 0; + nonpaged = args->memInfo->nonpaged + args->memInfo->anonymous; + mpns = HostIF_AllocKernelMem(nonpaged * sizeof *mpns, FALSE); + if (mpns == NULL) { + goto undoAdmission; + } + while (allocatedPages < nonpaged) { + pages = Vmx86_AllocLockedPages(curVM, + PtrToVA64(mpns + allocatedPages), + MIN(ALLOCATE_CHUNK_SIZE, nonpaged - allocatedPages), + TRUE, + FALSE); + if (pages <= 0) { + break; + } + allocatedPages += pages; + } + + /* + * Free the allocated pages. + * XXX Do not free the pages but hand them directly to the admitted VM. + */ + + for (pages = 0; pages < allocatedPages; pages += ALLOCATE_CHUNK_SIZE) { + Vmx86_FreeLockedPages(curVM, PtrToVA64(mpns + pages), + MIN(ALLOCATE_CHUNK_SIZE, allocatedPages - pages), TRUE); + } + HostIF_FreeKernelMem(mpns); +#undef ALLOCATE_CHUNK_SIZE + +undoAdmission: + if (allocatedPages != nonpaged) { + curVM->memInfo.admitted = FALSE; // undo admission + } + + HostIF_GlobalLock(9); + HostIF_VMLock(curVM, 12); + } +#endif + + /* Return global state to the caller. */ + args->memInfo[0] = curVM->memInfo; + args->numVMs = vmCount; + args->numLockedPages = numLockedPages; + args->maxLockedPages = Vmx86LockedPageLimit(curVM); + args->lockedPageLimit = lockedPageLimit; + args->globalMinAllocation = globalMinAllocation; + HostIF_VMUnlock(curVM, 12); + HostIF_GlobalUnlock(9); +} + + +Bool +Vmx86_Readmit(VMDriver *curVM, OvhdMem_Deltas *delta) +{ + unsigned globalMinAllocation, newMinAllocation; + Bool retval = FALSE; + int paged; + int nonpaged; + int anonymous; + + HostIF_GlobalLock(31); + globalMinAllocation = Vmx86CalculateGlobalMinAllocation(minVmMemPct); + HostIF_VMLock(curVM, 31); + paged = curVM->memInfo.paged + delta->paged; + nonpaged = curVM->memInfo.nonpaged + delta->nonpaged; + anonymous = curVM->memInfo.anonymous + delta->anonymous; + + if (nonpaged >= 0 && paged >= 0 && anonymous >= 0) { + globalMinAllocation -= Vmx86MinAllocation(curVM, minVmMemPct); + newMinAllocation = Vmx86MinAllocationFunc(nonpaged, anonymous, + curVM->memInfo.mainMemSize, + minVmMemPct); + if (globalMinAllocation + newMinAllocation <= Vmx86LockedPageLimit(curVM) || + (delta->paged <= 0 && delta->nonpaged <= 0 && delta->anonymous <= 0)) { + Vmx86SetMemoryUsage(curVM, paged, nonpaged, anonymous, minVmMemPct); + retval = TRUE; + } + } + HostIF_VMUnlock(curVM, 31); + HostIF_GlobalUnlock(31); + + return retval; +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86_UpdateMemInfo -- + * + * Updates information about this VM with the new data supplied in + * a patch. + * + * Results: + * Sets the memory usage by this vm based on its memSample data. + * + * Side effects: + * None + * + *---------------------------------------------------------------------- + */ + +void +Vmx86_UpdateMemInfo(VMDriver *curVM, + const VMMemMgmtInfoPatch *patch) +{ + ASSERT(patch->touchedPct <= 100 && patch->dirtiedPct <= 100); + HostIF_VMLock(curVM, 13); + curVM->memInfo.touchedPct = AsPercent(patch->touchedPct); + curVM->memInfo.dirtiedPct = AsPercent(patch->dirtiedPct); + curVM->memInfo.hugePageBytes = patch->hugePageBytes; + HostIF_VMUnlock(curVM, 13); +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86_PAEEnabled -- + * + * Is PAE enabled? + * + * Results: + * TRUE if PAE enabled. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +Bool +Vmx86_PAEEnabled(void) +{ + uintptr_t cr4; + + GET_CR4(cr4); + + return (cr4 & CR4_PAE) != 0; +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86_VMXEnabled -- + * + * Test the VMXE bit as an easy proxy for whether VMX operation + * is enabled. + * + * Results: + * TRUE if the CPU supports VT and CR4.VMXE is set. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +Bool +Vmx86_VMXEnabled(void) +{ + if (VT_CapableCPU()) { + uintptr_t cr4; + + GET_CR4(cr4); + + return (cr4 & CR4_VMXE) != 0; + } else { + return FALSE; + } +} + + +/* + *----------------------------------------------------------------------------- + * + * Vmx86LookupVMByUserIDLocked -- + * + * Lookup a VM by userID. The caller must hold the global lock. + * + * Returns: + * On success: Pointer to the driver's VM instance. + * On failure: NULL (not found). + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static VMDriver * +Vmx86LookupVMByUserIDLocked(int userID) // IN +{ + VMDriver *vm; + + ASSERT(HostIF_GlobalLockIsHeld()); + + for (vm = vmDriverList; vm != NULL; vm = vm->nextDriver) { + if (vm->userID == userID) { + return vm; + } + } + + return NULL; +} + + +/* + *----------------------------------------------------------------------------- + * + * Vmx86_LookupVMByUserID -- + * + * Lookup a VM by userID. + * + * Returns: + * On success: Pointer to the driver's VM instance. + * On failure: NULL (not found). + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +VMDriver * +Vmx86_LookupVMByUserID(int userID) // IN +{ + VMDriver *vm; + + HostIF_GlobalLock(10); + vm = Vmx86LookupVMByUserIDLocked(userID); + HostIF_GlobalUnlock(10); + + return vm; +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86_FastSuspResSetOtherFlag -- + * + * Sets the value of other VM's fastSuspResFlag. + * + * Returns: + * TRUE if VM was found and flag was set successfully. + * FALSE if VM was not found. + * + * Side effects: + * The value we set the flag to is this VM's userID. + * + *---------------------------------------------------------------------- + */ + +Bool +Vmx86_FastSuspResSetOtherFlag(VMDriver *vm, // IN + int otherVmUserId) // IN +{ + VMDriver *otherVM; + + HostIF_GlobalLock(35); + otherVM = Vmx86LookupVMByUserIDLocked(otherVmUserId); + if (otherVM != NULL) { + ASSERT(otherVM->fastSuspResFlag == 0); + otherVM->fastSuspResFlag = vm->userID; + } else { + Warning("otherVmUserId (%d) is invalid", otherVmUserId); + } + HostIF_GlobalUnlock(35); + + return otherVM != NULL; +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86_FastSuspResGetMyFlag -- + * + * Gets the value of fastSuspResFlag. If blockWait is true, this + * function will not return until the flag is non-zero, or until + * timeout. + * + * Returns: + * The value of the flag which, if non-zero, should be the userID of + * the vm that set it. + * + * Side effects: + * The flag is reset to zero once read. + * + *---------------------------------------------------------------------- + */ + +int +Vmx86_FastSuspResGetMyFlag(VMDriver *vm, // IN + Bool blockWait) // IN +{ + int retval = 0; + int ntries = 1; + const int waitInterval = 10; /* Wait 10ms at a time. */ + const int maxWaitTime = 100000; /* Wait maximum of 100 seconds. */ + + if (blockWait) { + ntries = maxWaitTime / waitInterval; + } + + while (ntries--) { + HostIF_GlobalLock(6); + retval = vm->fastSuspResFlag; + vm->fastSuspResFlag = 0; + HostIF_GlobalUnlock(6); + if (retval || !ntries) { + break; + } + HostIF_Wait(waitInterval); + } + + return retval; +} + + +/* + *----------------------------------------------------------------------------- + * + * Vmx86EnableHVOnCPU -- + * + * Enable HV on the current CPU, if possible. + * + * Results: + * None. + * + * Side effects: + * HV will be enabled, if possible. + * + *----------------------------------------------------------------------------- + */ + +static void +Vmx86EnableHVOnCPU(void) +{ + if (SVM_CapableCPU()) { + uint64 vmCR = __GET_MSR(MSR_VM_CR); + if (!SVM_LockedFromFeatures(vmCR)) { + CPUIDRegs regs; + __GET_CPUID(0x8000000A, ®s); + if (CPUID_GET(0x8000000A, EDX, SVM_LOCK, regs.edx) != 0) { + __SET_MSR(MSR_VM_CR, (vmCR & ~MSR_VM_CR_SVME_DISABLE) | + MSR_VM_CR_SVM_LOCK); + } + } + } + if (VT_CapableCPU()) { + uint64 featCtl = __GET_MSR(MSR_FEATCTL); + if (!VT_LockedFromFeatures(featCtl)) { + __SET_MSR(MSR_FEATCTL, featCtl | MSR_FEATCTL_LOCK | MSR_FEATCTL_VMXE); + } + } +} + + +/* + *----------------------------------------------------------------------------- + * + * Vmx86RefClockInCycles -- + * + * Convert the reference clock (HostIF_Uptime) to cycle units. + * + *----------------------------------------------------------------------------- + */ + +static INLINE uint64 +Vmx86RefClockInCycles(uint64 uptime) +{ + return Mul64x3264(uptime, + pseudoTSC.refClockToPTSC.ratio.mult, + pseudoTSC.refClockToPTSC.ratio.shift); +} + + +/* + *----------------------------------------------------------------------------- + * + * Vmx86RefClockToPTSC -- + * + * Convert from the reference clock (HostIF_Uptime) time to pseudo TSC. + * + *----------------------------------------------------------------------------- + */ + +static INLINE uint64 +Vmx86RefClockToPTSC(uint64 uptime) +{ + return Vmx86RefClockInCycles(uptime) + + Atomic_Read64(&pseudoTSC.refClockToPTSC.add); +} + + +/* + *----------------------------------------------------------------------------- + * + * Vmx86_InitPseudoTSC -- + * + * Initialize the pseudo TSC state if it is not already initialized. + * If another vmx has initialized the pseudo TSC, then we continue to + * use the parameters specified by the first vmx. + * + * Results: + * None + * + * Side effects: + * - Updates tscHz, the frequency of the PTSC in Hz. That frequency may + * differ from the value passed in if another VM is already running. + * - Updates the refClkToTSC parameters to be consistent with the tscHz + * value that's in use. + * + *----------------------------------------------------------------------------- + */ + +void +Vmx86_InitPseudoTSC(PTSCInitParams *params) // IN/OUT +{ + VmTimeStart startTime; + uint64 tsc, uptime; + + HostIF_GlobalLock(36); + + if (!pseudoTSC.initialized) { + pseudoTSC.hz = params->tscHz; + pseudoTSC.refClockToPTSC.ratio.mult = params->refClockToPTSC.mult; + pseudoTSC.refClockToPTSC.ratio.shift = params->refClockToPTSC.shift; + + Vmx86_ReadTSCAndUptime(&startTime); + tsc = startTime.count; + uptime = startTime.time; + + /* Start Pseudo TSC at initialPTSC (usually 0). */ + pseudoTSC.tscOffset = params->initialPTSC - tsc; + Atomic_Write64(&pseudoTSC.refClockToPTSC.add, + params->initialPTSC - Vmx86RefClockInCycles(uptime)); + + /* forceRefClock gets priority. */ + pseudoTSC.useRefClock = params->forceRefClock; + pseudoTSC.neverSwitchToRefClock = params->forceTSC; + pseudoTSC.hwTSCsSynced = params->hwTSCsSynced; + Log("PTSC: initialized at %"FMT64"u Hz using %s, TSCs are %ssynchronized.\n", + pseudoTSC.hz, pseudoTSC.useRefClock ? "reference clock" : "TSC", + pseudoTSC.hwTSCsSynced ? "" : "not "); + + pseudoTSC.initialized = TRUE; + } + /* + * Allow the calling vmx to respect ptsc.noTSC=TRUE config option + * even if another vmx is already running (pseudoTSC was already + * initialized). Useful for testing. + */ + if (params->forceRefClock) { + Vmx86_SetPseudoTSCUseRefClock(); + } + params->refClockToPTSC.mult = pseudoTSC.refClockToPTSC.ratio.mult; + params->refClockToPTSC.shift = pseudoTSC.refClockToPTSC.ratio.shift; + params->refClockToPTSC.add = Atomic_Read64(&pseudoTSC.refClockToPTSC.add); + params->tscOffset = pseudoTSC.tscOffset; + params->tscHz = pseudoTSC.hz; + params->hwTSCsSynced = pseudoTSC.hwTSCsSynced; + + HostIF_GlobalUnlock(36); +} + + +/* + *----------------------------------------------------------------------------- + * + * Vmx86_GetPseudoTSC -- + * + * Read the pseudo TSC. We prefer to implement the pseudo TSC using + * TSC. On systems where the TSC varies its rate (e.g. Pentium M), + * stops advancing when the core is in deep sleep (e.g. Core 2 Duo), + * or the TSCs can get out of sync across cores (e.g. Opteron due to + * halt clock ramping, Core 2 Duo due to independent core deep sleep + * states; though WinXP does handle the Core 2 Duo out of sync case; + * and on IBM x-Series NUMA machines), we use a reference clock + * (HostIF_ReadUptime()) as the basis for pseudo TSC. + * + * Note that we depend on HostIF_ReadUptime being a high resolution + * timer that is synchronized across all cores. + * + * Results: + * Current value of the PTSC. + * + *----------------------------------------------------------------------------- + */ + +uint64 +Vmx86_GetPseudoTSC(void) +{ + if (Vmx86_PseudoTSCUsesRefClock()) { + return Vmx86RefClockToPTSC(HostIF_ReadUptime()); + } + return RDTSC() + pseudoTSC.tscOffset; +} + + +/* + *----------------------------------------------------------------------------- + * + * Vmx86_CheckPseudoTSC -- + * + * Periodically called by userspace to check whether the TSC is + * reliable, using the reference clock as the trusted time source. + * If the TSC is unreliable, switch the basis of the PTSC from the + * TSC to the reference clock. + * + * Also, recompute the "add" component of the reference clock to PTSC + * conversion, to periodically eliminate the drift between the two + * clocks. That way, if the PTSC switches from using the TSC to the + * reference clock, PTSC will remain (roughly) continuous. See PR + * 547055. + * + * Note that we might be executing concurrently with other threads, + * but it doesn't matter since we only ever go from using the TSC to + * using the reference clock, never the other direction. + * + * Results: + * TRUE if the PTSC is implemented by the reference clock. + * FALSE if the PTSC is implemented by the TSC. + * + * Side effects: + * May switch the basis of the PTSC from the TSC to the reference clock. + * + *----------------------------------------------------------------------------- + */ + +Bool +Vmx86_CheckPseudoTSC(uint64 *lastTSC, // IN/OUT: last/current value of the TSC + uint64 *lastRC) // IN/OUT: last/current value of the reference clock +{ + VmTimeStart curTime; + + Vmx86_ReadTSCAndUptime(&curTime); + + if (pseudoTSC.initialized && *lastTSC && !Vmx86_PseudoTSCUsesRefClock()) { + uint64 tsc, refClkTS, refClkLastTS; + uint64 tscDiff, refClkDiff; + + tsc = curTime.count; + + refClkTS = Vmx86RefClockInCycles(curTime.time); + refClkLastTS = Vmx86RefClockInCycles(*lastRC); + + tscDiff = tsc - *lastTSC; + refClkDiff = refClkTS - refClkLastTS; + + if (((int64)tscDiff < 0) || + (tscDiff * 100 < refClkDiff * 95) || + (tscDiff * 95 > refClkDiff * 100)) { + /* + * TSC went backwards or drifted from the reference clock by + * more than 5% over the last poll period. + */ + Vmx86_SetPseudoTSCUseRefClock(); + } else { + uint64 ptscFromTSC = tsc + pseudoTSC.tscOffset; + Atomic_Write64(&pseudoTSC.refClockToPTSC.add, ptscFromTSC - refClkTS); + } + } + *lastTSC = curTime.count; + *lastRC = curTime.time; + + return Vmx86_PseudoTSCUsesRefClock(); +} + + +typedef struct { + Atomic_uint32 index; + MSRQuery *query; +} Vmx86GetMSRData; + + +/* + *----------------------------------------------------------------------------- + * + * Vmx86GetMSR -- + * + * Collect MSR value on the current logical CPU. + * + * Function must not block (it is invoked from interrupt context). + * Only VT MSRs are supported on VT-capable processors. + * + * Results: + * None. + * + * Side effects: + * 'data->index' is atomically incremented by one. + * + *----------------------------------------------------------------------------- + */ + +static void +Vmx86GetMSR(void *clientData) // IN/OUT: A Vmx86GetMSRData * +{ + Vmx86GetMSRData *data = (Vmx86GetMSRData *)clientData; + MSRQuery *query; + uint32 index; + int err; + + ASSERT(data); + query = data->query; + ASSERT(query); + + index = Atomic_ReadInc32(&data->index); + if (index >= query->numLogicalCPUs) { + return; + } + + query->logicalCPUs[index].tag = HostIF_GetCurrentPCPU(); + + /* + * We treat BIOS_SIGN_ID (microcode version) specially on Intel, + * where the preferred read sequence involves a macro. + */ + + if (CPUID_GetVendor() == CPUID_VENDOR_INTEL && + query->msrNum == MSR_BIOS_SIGN_ID) { + /* safe to read: MSR_BIOS_SIGN_ID architectural since Pentium Pro */ + query->logicalCPUs[index].msrVal = INTEL_MICROCODE_VERSION(); + err = 0; + } else { + /* + * Try to enable HV any time these MSRs are queried. We have seen + * buggy formware that forgets to re-enable HV after waking from + * deep sleep. [PR 1020692] + */ + if (query->msrNum == MSR_FEATCTL || query->msrNum == MSR_VM_CR) { + Vmx86EnableHVOnCPU(); + } + err = HostIF_SafeRDMSR(query->msrNum, &query->logicalCPUs[index].msrVal); + } + + query->logicalCPUs[index].implemented = (err == 0) ? 1 : 0; +} + + +/* + *----------------------------------------------------------------------------- + * + * Vmx86_GetAllMSRs -- + * + * Collect MSR value on all logical CPUs. + * + * The caller is responsible for ensuring that the requested MSR is valid + * on all logical CPUs. + * + * 'query->numLogicalCPUs' is the size of the 'query->logicalCPUs' output + * array. + * + * Results: + * On success: TRUE. 'query->logicalCPUs' is filled and + * 'query->numLogicalCPUs' is adjusted accordingly. + * On failure: FALSE. Happens if 'query->numLogicalCPUs' was too small. + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +Bool +Vmx86_GetAllMSRs(MSRQuery *query) // IN/OUT +{ + Vmx86GetMSRData data; + + Atomic_Write32(&data.index, 0); + data.query = query; + + HostIF_CallOnEachCPU(Vmx86GetMSR, &data); + + /* + * At this point, Atomic_Read32(&data.index) is the number of logical CPUs + * who replied. + */ + + if (Atomic_Read32(&data.index) > query->numLogicalCPUs) { + return FALSE; + } + + ASSERT(Atomic_Read32(&data.index) <= query->numLogicalCPUs); + query->numLogicalCPUs = Atomic_Read32(&data.index); + + return TRUE; +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86_YieldToSet -- + * + * Yield the CPU until a vCPU from the requested set has run. + * + * usecs is the total spin time in monitor. Very low numbers + * indicate we detected there was a vCPU thread that was not + * in the monitor, so we didn't spin. In that case, simply + * nudge the threads we want and return. + * + * Results: + * The current CPU yields whenever possible. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +void +Vmx86_YieldToSet(VMDriver *vm, // IN: + Vcpuid currVcpu, // IN: + const VCPUSet *req, // IN: + uint32 usecs, // IN: + Bool skew) // IN: +{ + VCPUSet vcpus; + + if (VCPUSet_IsEmpty(req)) { + return; + } + +#ifdef __APPLE__ + if (skew) { + /* Mac scheduler yield does fine in the skew case */ + (void)thread_block(THREAD_CONTINUE_NULL); + return; + } +#endif + + /* Crosscalls should spin a few times before blocking */ + if (!skew && usecs < CROSSCALL_SPIN_SHORT_US) { + HostIF_WakeUpYielders(vm, currVcpu); + return; + } + + if (HostIF_PrepareWaitForThreads(vm, currVcpu)) { + return; + } + + VCPUSet_Empty(&vcpus); + FOR_EACH_VCPU_IN_SET(req, vcpuid) { + if (vcpuid == currVcpu) { + continue; + } + /* + * First assume the vCPU we want to have wake up the current vCPU + * is out of the monitor, so set its wakeup bit corresponding to + * the current vCPU. It may or may not actually be on the vmmon side. + */ + + VCPUSet_AtomicInclude(&vm->crosscallWaitSet[vcpuid], currVcpu); + + /* + * Now that the bit is set, check whether the vCPU is in vmmon. If + * it was previously in vmmon, and then took a trip to the monitor + * and back before we got here, then the wakeup has already been sent. + * If it is in the monitor, either it started in vmmon and sent the + * wakeup, or it was there the entire time. In either case we can + * clear the bit. This is safe because the bit is directed solely + * at the current vCPU. + */ + + if (vm->currentHostCpu[vcpuid] != INVALID_PCPU) { + VCPUSet_AtomicRemove(&vm->crosscallWaitSet[vcpuid], currVcpu); + } else { + if (VCPUSet_AtomicIsMember(&vm->crosscallWaitSet[vcpuid], currVcpu)) { + VCPUSet_Include(&vcpus, vcpuid); + } + } + } ROF_EACH_VCPU_IN_SET(); + + /* + * Wake up any threads that had previously yielded the processor to + * let this one run. + */ + + HostIF_WakeUpYielders(vm, currVcpu); + + /* + * If this thread has other threads to wait for, and no other threads + * are waiting for this thread, block until one of the threads we're + * waiting for has run. + */ + + if (!VCPUSet_IsEmpty(&vcpus) && + VCPUSet_IsEmpty(&vm->crosscallWaitSet[currVcpu])) { + HostIF_WaitForThreads(vm, currVcpu); + } + + /* + * Tell other vcpus that they no longer have to wake this one. + * This is optional, the other threads will eventually clear their + * bits anyway. + */ + + FOR_EACH_VCPU_IN_SET(&vcpus, vcpuid) { + VCPUSet_AtomicRemove(&vm->crosscallWaitSet[vcpuid], currVcpu); + } ROF_EACH_VCPU_IN_SET(); + + HostIF_CancelWaitForThreads(vm, currVcpu); +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86PerfCtrInUse -- + * + * Determine which performance counters are already in use by the + * host on the current PCPU. A performance counter is considered + * in use if its event select enable bit is set or if this method + * is unable to count events with the performance counter. + * + * Results: + * Return TRUE if counter is in use. + * + * Side effects: + * None. + *---------------------------------------------------------------------- + */ +static Bool +Vmx86PerfCtrInUse(Bool isGen, unsigned pmcNum, unsigned ctrlMSR, + unsigned cntMSR, Bool hasPGC) +{ + volatile unsigned delay; + uint64 origPGC = hasPGC ? __GET_MSR(PERFCTR_CORE_GLOBAL_CTRL_ADDR) : 0; + uint64 pmcCtrl; + uint64 pmcCount, count; + uint64 ctrlEna, pgcEna; + + pmcCtrl = __GET_MSR(ctrlMSR); + if (isGen) { + ASSERT(pmcNum < 32); + if ((pmcCtrl & PERFCTR_CPU_ENABLE) != 0) { + return TRUE; + } + ctrlEna = PERFCTR_CPU_ENABLE | PERFCTR_CPU_KERNEL_MODE | + PERFCTR_CORE_INST_RETIRED; + pgcEna = CONST64U(1) << pmcNum; + } else { + ASSERT(pmcNum < 3); + if ((pmcCtrl & PERFCTR_CORE_FIXED_ENABLE_MASKn(pmcNum)) != 0) { + return TRUE; + } + ctrlEna = pmcCtrl | PERFCTR_CORE_FIXED_KERNEL_MASKn(pmcNum); + pgcEna = CONST64U(1) << (pmcNum + 32); + } + pmcCount = __GET_MSR(cntMSR); + /* Enable the counter. */ + __SET_MSR(ctrlMSR, ctrlEna); + if (hasPGC) { + __SET_MSR(PERFCTR_CORE_GLOBAL_CTRL_ADDR, pgcEna | origPGC); + } + /* Retire some instructions and wait a few cycles. */ + for (delay = 0; delay < 100; delay++) ; + /* Disable the counter. */ + if (hasPGC) { + __SET_MSR(PERFCTR_CORE_GLOBAL_CTRL_ADDR, origPGC); + } + count = __GET_MSR(cntMSR); + __SET_MSR(ctrlMSR, pmcCtrl); + __SET_MSR(cntMSR, pmcCount); + return count == pmcCount; +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86GetUnavailPerfCtrsOnCPU -- + * + * Determine which performance counters are already in use by the + * host on the current PCPU. + * + * Results: + * A bitset representing unavailable performance counter. + * Bits 0-31 represent general purpose counters, and bits 32-63 + * represent fixed counters. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +static void +Vmx86GetUnavailPerfCtrsOnCPU(void *data) +{ + CPUIDRegs regs; + unsigned i, numGen = 0, numFix = 0, stride = 1; + uint32 selBase = 0; + uint32 ctrBase = 0; + Bool hasPGC = FALSE; + Atomic_uint64 *ctrs = (Atomic_uint64 *)data; + uintptr_t flags; + if (CPUID_GetVendor() == CPUID_VENDOR_INTEL) { + unsigned version; + if (__GET_EAX_FROM_CPUID(0) < 0xA) { + return; + } + __GET_CPUID(0xA, ®s); + version = CPUID_GET(0xA, EAX, PMC_VERSION, regs.eax); + if (version == 0) { + return; + } + numGen = CPUID_GET(0xA, EAX, PMC_NUM_GEN, regs.eax); + if (version >= 2) { + numFix = CPUID_GET(0xA, EDX, PMC_NUM_FIXED, regs.edx); + hasPGC = TRUE; + } + selBase = PERFCTR_CORE_PERFEVTSEL0_ADDR; + ctrBase = PERFCTR_CORE_PERFCTR0_ADDR; + } else if (CPUID_GetVendor() == CPUID_VENDOR_AMD) { + if (CPUID_FAMILY_IS_BULLDOZER(__GET_EAX_FROM_CPUID(1))) { + numGen = 6; + selBase = PERFCTR_BD_BASE_ADDR + PERFCTR_BD_EVENTSEL; + ctrBase = PERFCTR_BD_BASE_ADDR + PERFCTR_BD_CTR; + stride = 2; + } else { + numGen = 4; + selBase = PERFCTR_AMD_PERFEVTSEL0_ADDR; + ctrBase = PERFCTR_AMD_PERFCTR0_ADDR; + } + } + ASSERT(numGen <= 32 && numFix <= 32); + + /* + * Vmx86PerfCtrInUse modifies performance counters to determine if + * if they are usable, disable interrupts to avoid racing with + * interrupt handlers. + */ + SAVE_FLAGS(flags); + CLEAR_INTERRUPTS(); + for (i = 0; i < numGen; i++) { + if (Vmx86PerfCtrInUse(TRUE, i, selBase + i * stride, + ctrBase + i * stride, hasPGC)) { + Atomic_SetBit64(ctrs, i); + } + } + if (numFix > 0) { + for (i = 0; i < numFix; i++) { + if (Vmx86PerfCtrInUse(FALSE, i, PERFCTR_CORE_FIXED_CTR_CTRL_ADDR, + PERFCTR_CORE_FIXED_CTR0_ADDR + i, hasPGC)) { + Atomic_SetBit64(ctrs, i + 32); + } + } + } + RESTORE_FLAGS(flags); +} + + +/* + *---------------------------------------------------------------------- + * + * Vmx86_GetUnavailablePerfCtrs -- + * + * Determine which performance counters are already in use by the + * host on across all PCPUs, and therefore unavailable for use by + * the monitor. A performance counter is considered in use if its + * event select enable bit on any PCPU is set. + * + * Results: + * A bitset representing unavailable performance counter. + * Bits 0-31 represent general purpose counters, and bits 32-63 + * represent fixed counters. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +uint64 +Vmx86_GetUnavailablePerfCtrs(void) +{ + Atomic_uint64 unavailCtrs; + Atomic_Write64(&unavailCtrs, 0); + HostIF_CallOnEachCPU(Vmx86GetUnavailPerfCtrsOnCPU, &unavailCtrs); + return Atomic_Read64(&unavailCtrs); +} + diff --git a/vmmon-only/common/vmx86.h b/vmmon-only/common/vmx86.h new file mode 100644 index 00000000..71ea01b9 --- /dev/null +++ b/vmmon-only/common/vmx86.h @@ -0,0 +1,201 @@ +/********************************************************* + * Copyright (C) 1998-2013,2015-2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * vmx86.h - Platform independent data and interface for supporting + * the vmx86 device driver. + */ + +#ifndef VMX86_H +#define VMX86_H + +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMCORE +#include "includeCheck.h" + +#include "x86apic.h" +#include "x86msr.h" +#include "modulecall.h" +#include "vcpuid.h" +#include "initblock.h" +#include "iocontrols.h" +#include "numa_defs.h" +#include "rateconv.h" +#include "vmmem_shared.h" +#include "apic.h" + +typedef struct PseudoTSCOffsetInfo { + uint32 inVMMCnt; /* Number of vcpus executing in the VMM. */ + uint32 vcpuid; /* Index into VMDriver.ptscOffsets. */ +} PseudoTSCOffsetInfo; + +typedef struct TSCDelta { + Atomic_uint64 min; + Atomic_uint64 max; +} TSCDelta; + +/* + * VMDriver - the main data structure for the driver side of a + * virtual machine. + */ + +typedef struct VMDriver { + /* Unique (in the driver), strictly positive, VM ID used by userland. */ + int userID; + + struct VMDriver *nextDriver; /* Next on list of all VMDrivers */ + + Vcpuid numVCPUs; /* Number of vcpus in VM. */ + struct VMHost *vmhost; /* Host-specific fields. */ + + /* Pointers to the crossover pages shared with the monitor. */ + struct VMCrossPage *crosspage[MAX_INITBLOCK_CPUS]; + volatile uint32 currentHostCpu[MAX_INITBLOCK_CPUS]; + VCPUSet crosscallWaitSet[MAX_INITBLOCK_CPUS]; + APICDescriptor hostAPIC; + + struct MemTrack *memtracker; /* Memory tracker pointer */ + Bool checkFuncFailed; + struct PerfCounter *perfCounter; + VMMemMgmtInfo memInfo; + unsigned fastClockRate;/* Protected by FastClockLock. */ + int fastSuspResFlag; + + volatile PseudoTSCOffsetInfo ptscOffsetInfo; /* Volatile per PR 699101#29. */ + Atomic_uint64 ptscLatest; + int64 ptscOffsets[MAX_INITBLOCK_CPUS]; +} VMDriver; + +typedef struct VmTimeStart { + uint64 count; + uint64 time; +} VmTimeStart; + +typedef struct RefClockParams { + RateConv_Ratio ratio; + Atomic_uint64 add; +} RefClockParams; + +typedef struct PseudoTSC { + RefClockParams refClockToPTSC; + int64 tscOffset; + uint64 hz; + volatile Bool useRefClock; + Bool neverSwitchToRefClock; + Bool hwTSCsSynced; + volatile Bool initialized; +} PseudoTSC; + +extern PseudoTSC pseudoTSC; + +#define MAX_LOCKED_PAGES (-1) + +extern VMDriver *Vmx86_CreateVM(void); +extern int Vmx86_ReleaseVM(VMDriver *vm); +extern int Vmx86_InitVM(VMDriver *vm, InitBlock *initParams); +extern int Vmx86_LateInitVM(VMDriver *vm); +extern int Vmx86_RunVM(VMDriver *vm, Vcpuid vcpuid); +extern void Vmx86_YieldToSet(VMDriver *vm, Vcpuid currVcpu, const VCPUSet *req, + uint32 usecs, Bool skew); +extern void Vmx86_ReadTSCAndUptime(VmTimeStart *st); +extern uint32 Vmx86_ComputekHz(uint64 cycles, uint64 uptime); +extern uint32 Vmx86_GetkHzEstimate(VmTimeStart *st); +extern int Vmx86_SetHostClockRate(VMDriver *vm, unsigned rate); +extern int Vmx86_LockPage(VMDriver *vm, + VA64 uAddr, + Bool allowMultipleMPNsPerVA, + MPN *mpn); +extern int Vmx86_UnlockPage(VMDriver *vm, VA64 uAddr); +extern int Vmx86_UnlockPageByMPN(VMDriver *vm, MPN mpn, VA64 uAddr); +extern MPN Vmx86_GetRecycledPage(VMDriver *vm); +extern int Vmx86_ReleaseAnonPage(VMDriver *vm, MPN mpn); +extern int Vmx86_AllocLockedPages(VMDriver *vm, VA64 addr, + unsigned numPages, Bool kernelMPNBuffer, + Bool ignoreLimits); +extern int Vmx86_FreeLockedPages(VMDriver *vm, VA64 addr, + unsigned numPages, Bool kernelMPNBuffer); +extern MPN Vmx86_GetNextAnonPage(VMDriver *vm, MPN mpn); +extern int Vmx86_GetLockedPageList(VMDriver *vm, VA64 uAddr, + unsigned int numPages); + +extern int32 Vmx86_GetNumVMs(void); +extern int32 Vmx86_GetTotalMemUsage(void); +extern Bool Vmx86_SetConfiguredLockedPagesLimit(unsigned limit); +extern void Vmx86_SetDynamicLockedPagesLimit(unsigned limit); +extern Bool Vmx86_GetMemInfo(VMDriver *curVM, + Bool curVMOnly, + VMMemInfoArgs *args, + int outArgsLength); +extern void Vmx86_Admit(VMDriver *curVM, VMMemInfoArgs *args); +extern Bool Vmx86_Readmit(VMDriver *curVM, OvhdMem_Deltas *delta); +extern void Vmx86_UpdateMemInfo(VMDriver *curVM, + const VMMemMgmtInfoPatch *patch); +extern void Vmx86_Add2MonPageTable(VMDriver *vm, VPN vpn, MPN mpn, + Bool readOnly); +extern Bool Vmx86_PAEEnabled(void); +extern Bool Vmx86_VMXEnabled(void); +extern Bool Vmx86_GetAllMSRs(MSRQuery *query); +extern void Vmx86_MonTimerIPI(void); +extern void Vmx86_InitIDList(void); +extern VMDriver *Vmx86_LookupVMByUserID(int userID); +extern Bool Vmx86_FastSuspResSetOtherFlag(VMDriver *vm, int otherVmUserId); +extern int Vmx86_FastSuspResGetMyFlag(VMDriver *vm, Bool blockWait); +extern void Vmx86_Open(void); +extern void Vmx86_Close(void); + +static INLINE Bool +Vmx86_HwTSCsSynced(void) +{ + return pseudoTSC.hwTSCsSynced; +} + +static INLINE Bool +Vmx86_PseudoTSCUsesRefClock(void) +{ + return pseudoTSC.useRefClock; +} + +static INLINE Bool +Vmx86_SetPseudoTSCUseRefClock(void) +{ + if (!pseudoTSC.useRefClock && !pseudoTSC.neverSwitchToRefClock) { + pseudoTSC.useRefClock = TRUE; + return TRUE; + } + return FALSE; +} + +static INLINE uint64 +Vmx86_GetPseudoTSCHz(void) +{ + return pseudoTSC.hz; +} + +static INLINE uint64 +Vmx86_GetPseudoTSCOffset(void) +{ + return pseudoTSC.tscOffset; +} + +extern void Vmx86_InitPseudoTSC(PTSCInitParams *params); +extern Bool Vmx86_CheckPseudoTSC(uint64 *lastTSC, uint64 *lastRC); +extern uint64 Vmx86_GetPseudoTSC(void); + +extern uint64 Vmx86_GetUnavailablePerfCtrs(void); + +#endif diff --git a/vmmon-only/include/address_defs.h b/vmmon-only/include/address_defs.h new file mode 100644 index 00000000..f7ba0645 --- /dev/null +++ b/vmmon-only/include/address_defs.h @@ -0,0 +1,95 @@ +/********************************************************* + * Copyright (C) 2014 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * address_defs.h -- + * + * Macros for virtual/physical/machine address/page conversions, page types. + */ + +#ifndef _ADDRESS_DEFS_H_ +#define _ADDRESS_DEFS_H_ + +#define INCLUDE_ALLOW_USERLEVEL +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_VMK_MODULE +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_DISTRIBUTE +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_VMMON +#include "includeCheck.h" + +#include "vm_basic_defs.h" // For PAGE_SHIFT + +/* + * Virtual, physical, machine address and page conversion macros + */ + +#define VA_2_VPN(_va) ((_va) >> PAGE_SHIFT) +#define PTR_2_VPN(_ptr) VA_2_VPN((VA)(_ptr)) +#define VPN_2_VA(_vpn) ((_vpn) << PAGE_SHIFT) +#define VPN_2_PTR(_vpn) ((void *)VPN_2_VA(_vpn)) + +/* + * Notice that we don't cast PA_2_PPN's argument to an unsigned type, because + * we would lose compile-time checks for pointer operands and byte-sized + * operands. If you use a signed quantity for _pa, ones may be shifted into the + * high bits of your ppn. + */ + +#define PA_2_PPN(_pa) ((_pa) >> PAGE_SHIFT) +#define PPN_2_PA(_ppn) ((PA)(_ppn) << PAGE_SHIFT) + +static INLINE MA MPN_2_MA(MPN mpn) { return (MA)mpn << PAGE_SHIFT; } +static INLINE MPN MA_2_MPN(MA ma) { return (MPN)(ma >> PAGE_SHIFT); } + +static INLINE IOA IOPN_2_IOA(IOPN iopn) { return (IOA)(iopn << PAGE_SHIFT); } +static INLINE IOPN IOA_2_IOPN(IOA ioa) { return (IOPN)(ioa >> PAGE_SHIFT); } + +typedef char PageArray[PAGE_SIZE]; + +/* + *---------------------------------------------------------------------- + * + * IsGoodMPN -- + * + * Is the given MPN valid? + * + * Results: + * Return TRUE if "mpn" looks plausible. We could make this stricter on + * a per-architecture basis. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +static INLINE Bool +IsGoodMPN(MPN mpn) +{ + return mpn <= MAX_MPN; +} + +static INLINE Bool +IsGoodMPNOrMemref(MPN mpn) +{ + return IsGoodMPN(mpn) || mpn == MEMREF_MPN; +} + +#endif diff --git a/vmmon-only/include/basic_initblock.h b/vmmon-only/include/basic_initblock.h new file mode 100644 index 00000000..438e9b61 --- /dev/null +++ b/vmmon-only/include/basic_initblock.h @@ -0,0 +1,60 @@ +/********************************************************* + * Copyright (C) 2006 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * basic_initblock.h -- + * + * VM initialization block. + */ + +#ifndef _BASIC_INITBLOCK_H_ +#define _BASIC_INITBLOCK_H_ + + +#define INCLUDE_ALLOW_USERLEVEL + +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_VMK_MODULE +#define INCLUDE_ALLOW_DISTRIBUTE +#define INCLUDE_ALLOW_VMCORE +#include "includeCheck.h" + + +#include "vcpuid.h" + + +#define MAX_INITBLOCK_CPUS 128 + + +typedef +#include "vmware_pack_begin.h" +struct InitBlock { + uint32 magicNumber; /* Magic number (INIT_BLOCK_MAGIC) */ + Vcpuid numVCPUs; + VA64 crosspage[MAX_INITBLOCK_CPUS]; + uint32 vmInitFailurePeriod; + LA64 crossGDTHKLA; + MPN crossGDTMPNs[5]; // CROSSGDT_NUMPAGES +} +#include "vmware_pack_end.h" +InitBlock; + + +#endif // _BASIC_INITBLOCK_H_ diff --git a/vmmon-only/include/circList.h b/vmmon-only/include/circList.h new file mode 100644 index 00000000..ac4f48f1 --- /dev/null +++ b/vmmon-only/include/circList.h @@ -0,0 +1,428 @@ +/********************************************************* + * Copyright (C) 1998-2015 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * circList.h -- + * + * macros, prototypes and struct definitions for double-linked + * circular lists. + */ + +#ifndef _CIRCLIST_H_ +#define _CIRCLIST_H_ + +#define INCLUDE_ALLOW_USERLEVEL +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_VMKERNEL +#include "includeCheck.h" +#include "vmware.h" + +typedef struct ListItem { + struct ListItem *prev; + struct ListItem *next; +} ListItem; + +/* A list with no elements is a null pointer. */ +#define LIST_ITEM_DEF(name) \ + ListItem * name = NULL + +#define LIST_EMPTY(l) ((l) == NULL) + +/* initialize list item */ +#define INIT_LIST_ITEM(p) \ + do { \ + (p)->prev = (p)->next = (p); \ + } while (0) + +/* check if initialized */ +#define IS_LIST_ITEM_INITIALIZED(li) \ + (((li) == (li)->prev) && ((li) == (li)->next)) + +/* return first element in the list */ +#define LIST_FIRST(l) (l) +#define LIST_FIRST_CHK(l) (l) + +/* return last element in the list */ +#define LIST_LAST(l) ((l)->prev) +#define LIST_LAST_CHK(l) (LIST_EMPTY(l) ? NULL : LIST_LAST(l)) + +/* + * LIST_CONTAINER - get the struct for this entry (like list_entry) + * @ptr: the &struct ListItem pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list struct within the struct. + */ +#define LIST_CONTAINER(ptr, type, member) \ + VMW_CONTAINER_OF(ptr, type, member) + +/* + * delete item from the list + */ +#define LIST_DEL DelListItem + +/* + * link two lists together + */ +#define LIST_SPLICE SpliceLists + +/* + * Split a list into two lists + */ +#define LIST_SPLIT SplitLists + +/* + * Add item to front of stack. List pointer points to new head. + */ +#define LIST_PUSH PushListItem + +/* + * Add item at back of queue. List pointer only changes if list was empty. + */ +#define LIST_QUEUE QueueListItem + +/* + * Get the list size. + */ +#define LIST_SIZE GetListSize + +/* + * LIST_SCAN_FROM scans the list from "from" up until "until". + * The loop variable p should not be destroyed in the process. + * "from" is an element in the list where to start scanning. + * "until" is the element where search should stop. + * member is the field to use for the search - either "next" or "prev". + */ +#define LIST_SCAN_FROM(p, from, until, member) \ + for (p = (from); (p) != NULL; \ + (p) = (((p)->member == (until)) ? NULL : (p)->member)) + +/* scan the entire list (non-destructively) */ +#define LIST_SCAN(p, l) \ + LIST_SCAN_FROM(p, LIST_FIRST(l), LIST_FIRST(l), next) + + +/* scan a list backward from last element to first (non-destructively) */ +#define LIST_SCAN_BACK(p, l) \ + LIST_SCAN_FROM(p, LIST_LAST_CHK(l), LIST_LAST(l), prev) + +/* scan the entire list where loop element may be destroyed */ +#define LIST_SCAN_SAFE(p, pn, l) \ + if (!LIST_EMPTY(l)) \ + for (p = (l), (pn) = NextListItem(p, l); (p) != NULL; \ + (p) = (pn), (pn) = NextListItem(p, l)) + +/* scan the entire list backwards where loop element may be destroyed */ +#define LIST_SCAN_BACK_SAFE(p, pn, l) \ + if (!LIST_EMPTY(l)) \ + for (p = LIST_LAST(l), (pn) = PrevListItem(p, l); (p) != NULL; \ + (p) = (pn), (pn) = PrevListItem(p, l)) + + +/* function definitions */ + +/* + *---------------------------------------------------------------------- + * + * NextListItem -- + * + * Returns the next member of a doubly linked list, or NULL if last. + * Assumes: p is member of the list headed by head. + * + * Result: + * If head or p is NULL, return NULL. Otherwise, + * next list member (or null if last). + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +static INLINE ListItem * +NextListItem(ListItem *p, // IN + ListItem *head) // IN +{ + if (head == NULL || p == NULL) { + return NULL; + } + /* both p and head are non-null */ + p = p->next; + return p == head ? NULL : p; +} + + +/* + *---------------------------------------------------------------------- + * + * PrevListItem -- + * + * Returns the prev member of a doubly linked list, or NULL if first. + * Assumes: p is member of the list headed by head. + * + * Result: + * If head or prev is NULL, return NULL. Otherwise, + * prev list member (or null if first). + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +static INLINE ListItem * +PrevListItem(ListItem *p, // IN + ListItem *head) // IN +{ + if (head == NULL || p == NULL) { + return NULL; + } + /* both p and head are non-null */ + return p == head ? NULL : p->prev; +} + + +/* + *---------------------------------------------------------------------- + * + * DelListItem -- + * + * Deletes a member of a doubly linked list, possibly modifies the + * list header itself. + * Assumes neither p nor headp is null and p is a member of *headp. + * + * Result: + * None + * + * Side effects: + * Modifies *headp. + * + *---------------------------------------------------------------------- + */ + +static INLINE void +DelListItem(ListItem *p, // IN + ListItem **headp) // IN/OUT +{ + ListItem *next; + + ASSERT(p); + ASSERT(headp); + + next = p->next; + if (p == next) { + *headp = NULL; + } else { + next->prev = p->prev; + p->prev->next = next; + if (*headp == p) { + *headp = next; + } + } +} + + +/* + *---------------------------------------------------------------------- + * + * QueueListItem -- + * + * Adds a new member to the back of a doubly linked list (queue) + * Assumes neither p nor headp is null and p is not a member of *headp. + * + * Result: + * None + * + * Side effects: + * Modifies *headp. + * + *---------------------------------------------------------------------- + */ + +static INLINE void +QueueListItem(ListItem *p, // IN + ListItem **headp) // IN/OUT +{ + ListItem *head; + + head = *headp; + if (LIST_EMPTY(head)) { + INIT_LIST_ITEM(p); + *headp = p; + } else { + p->prev = head->prev; + p->next = head; + p->prev->next = p; + head->prev = p; + } +} + + +/* + *---------------------------------------------------------------------- + * + * PushListItem -- + * + * Adds a new member to the front of a doubly linked list (stack) + * Assumes neither p nor headp is null and p is not a member of *headp. + * + * Result: + * None + * + * Side effects: + * Modifies *headp. + * + *---------------------------------------------------------------------- + */ + +static INLINE void +PushListItem(ListItem *p, // IN + ListItem **headp) // IN/OUT +{ + QueueListItem(p, headp); + *headp = p; +} + + +/* + *---------------------------------------------------------------------- + * + * SpliceLists -- + * + * Make a single list {l1 l2} from {l1} and {l2} and return it. + * It is okay for one or both lists to be NULL. + * No checking is done. It is assumed that l1 and l2 are two + * distinct lists. + * + * Result: + * A list { l1 l2 }. + * + * Side effects: + * Modifies l1 and l2 list pointers. + * + *---------------------------------------------------------------------- + */ + +static INLINE ListItem * +SpliceLists(ListItem *l1, // IN + ListItem *l2) // IN +{ + ListItem *l1Last, *l2Last; + + if (LIST_EMPTY(l1)) { + return l2; + } + + if (LIST_EMPTY(l2)) { + return l1; + } + + l1Last = l1->prev; /* last elem of l1 */ + l2Last = l2->prev; /* last elem of l2 */ + + /* + * l1 -> ... -> l1Last l2 -> ... l2Last + */ + l1Last->next = l2; + l2->prev = l1Last; + + l1->prev = l2Last; + l2Last->next = l1; + + return l1; +} + + +/* + *---------------------------------------------------------------------- + * + * SplitLists -- + * + * Make a list l = {l1 l2} into two separate lists {l1} and {l2}, where: + * l = { ... x -> p -> ... } split into: + * l1 = { ... -> x } + * l2 = { p -> ... } + * Assumes neither p nor l is null and p is a member of l. + * If p is the first element of l, then l1 will be NULL. + * + * Result: + * None. + * + * Side effects: + * Sets *l1p and *l2p to the resulting two lists. + * Modifies l's pointers. + * + *---------------------------------------------------------------------- + */ + +static INLINE void +SplitLists(ListItem *p, // IN + ListItem *l, // IN + ListItem **l1p, // OUT + ListItem **l2p) // OUT +{ + ListItem *last; + + if (p == LIST_FIRST(l)) { /* first element */ + *l1p = NULL; + *l2p = l; + return; + } + + last = l->prev; + + *l1p = l; + p->prev->next = l; + l->prev = p->prev; + + *l2p = p; + p->prev = last; + last->next = p; +} + + +/* + *---------------------------------------------------------------------- + * + * GetListSize -- + * + * Return the number of items in the list. + * + * Result: + * The number of items in the list. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +static INLINE int +GetListSize(ListItem *head) // IN +{ + ListItem *li; + int ret = 0; + + LIST_SCAN(li, head) { + ret++; + } + return ret; +} + +#endif /* _CIRCLIST_H_ */ diff --git a/vmmon-only/include/community_source.h b/vmmon-only/include/community_source.h new file mode 100644 index 00000000..2d32d8fa --- /dev/null +++ b/vmmon-only/include/community_source.h @@ -0,0 +1,67 @@ +/********************************************************* + * Copyright (C) 2009-2015 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * community_source.h -- + * + * Macros for excluding source code from community. + */ + +#ifndef _COMMUNITY_SOURCE_H_ +#define _COMMUNITY_SOURCE_H_ + +#define INCLUDE_ALLOW_USERLEVEL + +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_VMKDRIVERS +#define INCLUDE_ALLOW_VMK_MODULE +#define INCLUDE_ALLOW_DISTRIBUTE +#define INCLUDE_ALLOW_VMCORE +#include "includeCheck.h" + +/* + * Convenience macro for COMMUNITY_SOURCE + */ +#undef EXCLUDE_COMMUNITY_SOURCE +#ifdef COMMUNITY_SOURCE + #define EXCLUDE_COMMUNITY_SOURCE(x) +#else + #define EXCLUDE_COMMUNITY_SOURCE(x) x +#endif + +#undef COMMUNITY_SOURCE_AMD_SECRET +#if !defined(COMMUNITY_SOURCE) || defined(AMD_SOURCE) +/* + * It's ok to include AMD_SECRET source code for non-Community Source, + * or for drops directed at AMD. + */ + #define COMMUNITY_SOURCE_AMD_SECRET +#endif + +#undef COMMUNITY_SOURCE_INTEL_SECRET +#if !defined(COMMUNITY_SOURCE) || defined(INTEL_SOURCE) +/* + * It's ok to include INTEL_SECRET source code for non-Community Source, + * or for drops directed at Intel. + */ + #define COMMUNITY_SOURCE_INTEL_SECRET +#endif + +#endif diff --git a/vmmon-only/include/compat_autoconf.h b/vmmon-only/include/compat_autoconf.h new file mode 100644 index 00000000..26064aa8 --- /dev/null +++ b/vmmon-only/include/compat_autoconf.h @@ -0,0 +1,41 @@ +/********************************************************* + * Copyright (C) 2009 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +#ifndef __COMPAT_AUTOCONF_H__ +# define __COMPAT_AUTOCONF_H__ + +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_DISTRIBUTE +#define INCLUDE_ALLOW_VMKDRIVERS +#include "includeCheck.h" + + +#ifndef LINUX_VERSION_CODE +# error "Include compat_version.h before compat_autoconf.h" +#endif + +/* autoconf.h moved from linux/autoconf.h to generated/autoconf.h in 2.6.33-rc1. */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33) +# include +#else +# include +#endif + +#endif /* __COMPAT_AUTOCONF_H__ */ diff --git a/vmmon-only/include/compat_cred.h b/vmmon-only/include/compat_cred.h new file mode 100644 index 00000000..95a7baa7 --- /dev/null +++ b/vmmon-only/include/compat_cred.h @@ -0,0 +1,47 @@ +/********************************************************* + * Copyright (C) 2002 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +#ifndef __COMPAT_CRED_H__ +# define __COMPAT_CRED_H__ + + +/* + * Include linux/cred.h via linux/sched.h - it is not nice, but + * as cpp does not have #ifexist... + */ +#include + +#if !defined(current_fsuid) && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29) +#define current_uid() (current->uid) +#define current_euid() (current->euid) +#define current_fsuid() (current->fsuid) +#define current_gid() (current->gid) +#define current_egid() (current->egid) +#define current_fsgid() (current->fsgid) +#endif + +#if !defined(cap_set_full) +/* cap_set_full was removed in kernel version 3.0-rc4. */ +#define cap_set_full(_c) do { (_c) = CAP_FULL_SET; } while (0) +#endif + +#if !defined(GLOBAL_ROOT_UID) +#define GLOBAL_ROOT_UID (0) +#endif + +#endif /* __COMPAT_CRED_H__ */ diff --git a/vmmon-only/include/compat_highmem.h b/vmmon-only/include/compat_highmem.h new file mode 100644 index 00000000..263380d6 --- /dev/null +++ b/vmmon-only/include/compat_highmem.h @@ -0,0 +1,32 @@ +/********************************************************* + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +#ifndef __COMPAT_HIGHMEM_H__ +# define __COMPAT_HIGHMEM_H__ + +#include + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0) +# define compat_kmap_atomic(_page) kmap_atomic(_page) +# define compat_kunmap_atomic(_page) kunmap_atomic(_page) +#else +# define compat_kmap_atomic(_page) kmap_atomic((_page), KM_USER0) +# define compat_kunmap_atomic(_page) kunmap_atomic((_page), KM_USER0) +#endif + +#endif /* __COMPAT_HIGHMEM_H__ */ diff --git a/vmmon-only/include/compat_interrupt.h b/vmmon-only/include/compat_interrupt.h new file mode 100644 index 00000000..1d72a4bb --- /dev/null +++ b/vmmon-only/include/compat_interrupt.h @@ -0,0 +1,55 @@ +/********************************************************* + * Copyright (C) 2003 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +#ifndef __COMPAT_INTERRUPT_H__ +# define __COMPAT_INTERRUPT_H__ + + +#include + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 69) +/* + * We cannot just define irqreturn_t, as some 2.4.x kernels have + * typedef void irqreturn_t; for "increasing" backward compatibility. + */ +typedef void compat_irqreturn_t; +#define COMPAT_IRQ_NONE +#define COMPAT_IRQ_HANDLED +#define COMPAT_IRQ_RETVAL(x) +#else +typedef irqreturn_t compat_irqreturn_t; +#define COMPAT_IRQ_NONE IRQ_NONE +#define COMPAT_IRQ_HANDLED IRQ_HANDLED +#define COMPAT_IRQ_RETVAL(x) IRQ_RETVAL(x) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 18) +#define COMPAT_IRQF_DISABLED SA_INTERRUPT +#define COMPAT_IRQF_SHARED SA_SHIRQ +#else +#define COMPAT_IRQF_DISABLED IRQF_DISABLED +#define COMPAT_IRQF_SHARED IRQF_SHARED +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) +#define COMPAT_IRQ_HANDLER_ARGS(irq, devp) (int irq, void *devp, struct pt_regs *regs) +#else +#define COMPAT_IRQ_HANDLER_ARGS(irq, devp) (int irq, void *devp) +#endif + +#endif /* __COMPAT_INTERRUPT_H__ */ diff --git a/vmmon-only/include/compat_ioport.h b/vmmon-only/include/compat_ioport.h new file mode 100644 index 00000000..bacdb50b --- /dev/null +++ b/vmmon-only/include/compat_ioport.h @@ -0,0 +1,63 @@ +/********************************************************* + * Copyright (C) 2003 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +#ifndef __COMPAT_IOPORT_H__ +# define __COMPAT_IOPORT_H__ + + +#include + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 0) +static inline void * +compat_request_region(unsigned long start, unsigned long len, const char *name) +{ + if (check_region(start, len)) { + return NULL; + } + request_region(start, len, name); + return (void*)1; +} +#else +#define compat_request_region(start, len, name) request_region(start, len, name) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 3, 7) +/* mmap io support starts from 2.3.7, fail the call for kernel prior to that */ +static inline void * +compat_request_mem_region(unsigned long start, unsigned long len, const char *name) +{ + return NULL; +} + +static inline void +compat_release_mem_region(unsigned long start, unsigned long len) +{ + return; +} +#else +#define compat_request_mem_region(start, len, name) request_mem_region(start, len, name) +#define compat_release_mem_region(start, len) release_mem_region(start, len) +#endif + +/* these two macro defs are needed by compat_pci_request_region */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 3, 15) +# define IORESOURCE_IO 0x00000100 +# define IORESOURCE_MEM 0x00000200 +#endif + +#endif /* __COMPAT_IOPORT_H__ */ diff --git a/vmmon-only/include/compat_kernel.h b/vmmon-only/include/compat_kernel.h new file mode 100644 index 00000000..04ba2d19 --- /dev/null +++ b/vmmon-only/include/compat_kernel.h @@ -0,0 +1,42 @@ +/********************************************************* + * Copyright (C) 2004 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +#ifndef __COMPAT_KERNEL_H__ +# define __COMPAT_KERNEL_H__ + +#include +#include + +/* + * container_of was introduced in 2.5.28 but it's easier to check like this. + */ +#ifndef container_of +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) +#endif + +/* + * vsnprintf became available in 2.4.10. For older kernels, just fall back on + * vsprintf. + */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 10) +#define vsnprintf(str, size, fmt, args) vsprintf(str, fmt, args) +#endif + +#endif /* __COMPAT_KERNEL_H__ */ diff --git a/vmmon-only/include/compat_module.h b/vmmon-only/include/compat_module.h new file mode 100644 index 00000000..2af73722 --- /dev/null +++ b/vmmon-only/include/compat_module.h @@ -0,0 +1,83 @@ +/********************************************************* + * Copyright (C) 2007 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * compat_module.h -- + */ + +#ifndef __COMPAT_MODULE_H__ +# define __COMPAT_MODULE_H__ + + +#include + + +/* + * Modules wishing to use the GPL license are required to include a + * MODULE_LICENSE definition in their module source as of 2.4.10. + */ +#ifndef MODULE_LICENSE +#define MODULE_LICENSE(license) +#endif + +/* + * To make use of our own home-brewed MODULE_INFO, we need macros to + * concatenate two expressions to "__mod_", and and to convert an + * expression into a string. I'm sure we've got these in our codebase, + * but I'd rather not introduce such a dependency in a compat header. + */ +#ifndef __module_cat +#define __module_cat_1(a, b) __mod_ ## a ## b +#define __module_cat(a, b) __module_cat_1(a, b) +#endif + +#ifndef __stringify +#define __stringify_1(x) #x +#define __stringify(x) __stringify_1(x) +#endif + +/* + * MODULE_INFO was born in 2.5.69. + */ +#ifndef MODULE_INFO +#define MODULE_INFO(tag, info) \ +static const char __module_cat(tag, __LINE__)[] \ + __attribute__((section(".modinfo"), unused)) = __stringify(tag) "=" info +#endif + +/* + * MODULE_VERSION was born in 2.6.4. The earlier form appends a long "\0xxx" + * string to the module's version, but that was removed in 2.6.10, so we'll + * ignore it in our wrapper. + */ +#ifndef MODULE_VERSION +#define MODULE_VERSION(_version) MODULE_INFO(version, _version) +#endif + +/* + * Linux kernel < 2.6.31 takes 'int' for 'bool' module parameters. + * Linux kernel >= 3.3.0 takes 'bool' for 'bool' module parameters. + * Kernels between the two take either. So flip switch at 3.0.0. + */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 0) + typedef bool compat_mod_param_bool; +#else + typedef int compat_mod_param_bool; +#endif + +#endif /* __COMPAT_MODULE_H__ */ diff --git a/vmmon-only/include/compat_page.h b/vmmon-only/include/compat_page.h new file mode 100644 index 00000000..0bf0e251 --- /dev/null +++ b/vmmon-only/include/compat_page.h @@ -0,0 +1,75 @@ +/********************************************************* + * Copyright (C) 2002 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +#ifndef __COMPAT_PAGE_H__ +# define __COMPAT_PAGE_H__ + + +#include +#include + + +/* The pfn_to_page() API appeared in 2.5.14 and changed to function during 2.6.x */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && !defined(pfn_to_page) +# define pfn_to_page(_pfn) (mem_map + (_pfn)) +# define page_to_pfn(_page) ((_page) - mem_map) +#endif + + +/* The virt_to_page() API appeared in 2.4.0 --hpreg */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 0) && !defined(virt_to_page) +# define virt_to_page(_kvAddr) pfn_to_page(MAP_NR(_kvAddr)) +#endif + + +/* + * The get_order() API appeared at some point in 2.3.x, and was then backported + * in 2.2.17-21mdk and in the stock 2.2.18. Because we can only detect its + * definition through makefile tricks, we provide our own for now --hpreg + */ +static inline int +compat_get_order(unsigned long size) // IN +{ + int order; + + size = (size - 1) >> (PAGE_SHIFT - 1); + order = -1; + do { + size >>= 1; + order++; + } while (size); + + return order; +} + +/* + * BUG() was added to in 2.2.18, and was moved to + * in 2.5.58. + * + * XXX: Technically, this belongs in some sort of "compat_asm_page.h" file, but + * since our compatibility wrappers don't distinguish between and + * , putting it here is reasonable. + */ +#ifndef BUG +#define BUG() do { \ + printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ + __asm__ __volatile__(".byte 0x0f,0x0b"); \ +} while (0) +#endif + +#endif /* __COMPAT_PAGE_H__ */ diff --git a/vmmon-only/include/compat_pci.h b/vmmon-only/include/compat_pci.h new file mode 100644 index 00000000..d1f897a7 --- /dev/null +++ b/vmmon-only/include/compat_pci.h @@ -0,0 +1,72 @@ +/********************************************************* + * Copyright (C) 1999 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * compat_pci.h: PCI compatibility wrappers. + */ + +#ifndef __COMPAT_PCI_H__ +#define __COMPAT_PCI_H__ + +#include "compat_ioport.h" +#include + +#ifndef DMA_BIT_MASK +# define DMA_BIT_MASK(n) DMA_##n##BIT_MASK +#endif + +/* + * Power Management related compat wrappers. + */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 10) +# define compat_pci_save_state(pdev) pci_save_state((pdev), NULL) +# define compat_pci_restore_state(pdev) pci_restore_state((pdev), NULL) +#else +# define compat_pci_save_state(pdev) pci_save_state((pdev)) +# define compat_pci_restore_state(pdev) pci_restore_state((pdev)) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11) +# define pm_message_t u32 +# define compat_pci_choose_state(pdev, state) (state) +# define PCI_D0 0 +# define PCI_D3hot 3 +#else +# define compat_pci_choose_state(pdev, state) pci_choose_state((pdev), (state)) +#endif + +/* 2.6.14 changed the PCI shutdown callback */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 14) +# define COMPAT_PCI_SHUTDOWN(func) .driver = { .shutdown = (func), } +# define COMPAT_PCI_DECLARE_SHUTDOWN(func, var) (func)(struct device *(var)) +# define COMPAT_PCI_TO_DEV(dev) (to_pci_dev(dev)) +#else +# define COMPAT_PCI_SHUTDOWN(func) .shutdown = (func) +# define COMPAT_PCI_DECLARE_SHUTDOWN(func, var) (func)(struct pci_dev *(var)) +# define COMPAT_PCI_TO_DEV(dev) (dev) +#endif + +/* 2.6.26 introduced the device_set_wakeup_enable() function */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26) +# define compat_device_set_wakeup_enable(dev, val) do {} while(0) +#else +# define compat_device_set_wakeup_enable(dev, val) \ + device_set_wakeup_enable(dev, val) +#endif + +#endif /* __COMPAT_PCI_H__ */ diff --git a/vmmon-only/include/compat_pgtable.h b/vmmon-only/include/compat_pgtable.h new file mode 100644 index 00000000..dedc25ad --- /dev/null +++ b/vmmon-only/include/compat_pgtable.h @@ -0,0 +1,139 @@ +/********************************************************* + * Copyright (C) 2002 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +#ifndef __COMPAT_PGTABLE_H__ +# define __COMPAT_PGTABLE_H__ + + +#if defined(CONFIG_PARAVIRT) && defined(CONFIG_HIGHPTE) +# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 21) +# include +# undef paravirt_map_pt_hook +# define paravirt_map_pt_hook(type, va, pfn) do {} while (0) +# endif +#endif +#include + + +/* pte_page() API modified in 2.3.23 to return a struct page * --hpreg */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 3, 23) +# define compat_pte_page pte_page +#else +# include "compat_page.h" + +# define compat_pte_page(_pte) virt_to_page(pte_page(_pte)) +#endif + + +/* Appeared in 2.5.5 --hpreg */ +#ifndef pte_offset_map +/* Appeared in SuSE 8.0's 2.4.18 --hpreg */ +# ifdef pte_offset_atomic +# define pte_offset_map pte_offset_atomic +# define pte_unmap pte_kunmap +# else +# define pte_offset_map pte_offset +# define pte_unmap(_pte) +# endif +#endif + + +/* Appeared in 2.5.74-mmX --petr */ +#ifndef pmd_offset_map +# define pmd_offset_map(pgd, address) pmd_offset(pgd, address) +# define pmd_unmap(pmd) +#endif + + +/* + * Appeared in 2.6.10-rc2-mm1. Older kernels did L4 page tables as + * part of pgd_offset, or they did not have L4 page tables at all. + * In 2.6.11 pml4 -> pgd -> pmd -> pte hierarchy was replaced by + * pgd -> pud -> pmd -> pte hierarchy. + */ +#ifdef PUD_MASK +# define compat_pgd_offset(mm, address) pgd_offset(mm, address) +# define compat_pgd_present(pgd) pgd_present(pgd) +# define compat_pud_offset(pgd, address) pud_offset(pgd, address) +# define compat_pud_present(pud) pud_present(pud) +typedef pgd_t compat_pgd_t; +typedef pud_t compat_pud_t; +#elif defined(pml4_offset) +# define compat_pgd_offset(mm, address) pml4_offset(mm, address) +# define compat_pgd_present(pml4) pml4_present(pml4) +# define compat_pud_offset(pml4, address) pml4_pgd_offset(pml4, address) +# define compat_pud_present(pgd) pgd_present(pgd) +typedef pml4_t compat_pgd_t; +typedef pgd_t compat_pud_t; +#else +# define compat_pgd_offset(mm, address) pgd_offset(mm, address) +# define compat_pgd_present(pgd) pgd_present(pgd) +# define compat_pud_offset(pgd, address) (pgd) +# define compat_pud_present(pud) (1) +typedef pgd_t compat_pgd_t; +typedef pgd_t compat_pud_t; +#endif + + +#define compat_pgd_offset_k(mm, address) pgd_offset_k(address) + + +/* Introduced somewhere in 2.6.0, + backported to some 2.4 RedHat kernels */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && !defined(pte_pfn) +# define pte_pfn(pte) page_to_pfn(compat_pte_page(pte)) +#endif + + +/* A page_table_lock field is added to struct mm_struct in 2.3.10 --hpreg */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 3, 10) +# define compat_get_page_table_lock(_mm) (&(_mm)->page_table_lock) +#else +# define compat_get_page_table_lock(_mm) NULL +#endif + + +/* + * Define VM_PAGE_KERNEL_EXEC for vmapping executable pages. + * + * On ia32 PAGE_KERNEL_EXEC was introduced in 2.6.8.1. Unfortunately it accesses + * __PAGE_KERNEL_EXEC which is not exported for modules. So we use + * __PAGE_KERNEL and just cut _PAGE_NX bit from it. + * + * For ia32 kernels before 2.6.8.1 we use PAGE_KERNEL directly, these kernels + * do not have noexec support. + * + * On x86-64 situation is a bit better: they always supported noexec, but + * before 2.6.8.1 flag was named PAGE_KERNEL_EXECUTABLE, and it was renamed + * to PAGE_KERNEL_EXEC when ia32 got noexec too (see above). + */ +#ifdef CONFIG_X86 +#ifdef _PAGE_NX +#define VM_PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL & ~_PAGE_NX) +#else +#define VM_PAGE_KERNEL_EXEC PAGE_KERNEL +#endif +#else +#ifdef PAGE_KERNEL_EXECUTABLE +#define VM_PAGE_KERNEL_EXEC PAGE_KERNEL_EXECUTABLE +#else +#define VM_PAGE_KERNEL_EXEC PAGE_KERNEL_EXEC +#endif +#endif + + +#endif /* __COMPAT_PGTABLE_H__ */ diff --git a/vmmon-only/include/compat_sched.h b/vmmon-only/include/compat_sched.h new file mode 100644 index 00000000..3f3304bd --- /dev/null +++ b/vmmon-only/include/compat_sched.h @@ -0,0 +1,293 @@ +/********************************************************* + * Copyright (C) 2002 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +#ifndef __COMPAT_SCHED_H__ +# define __COMPAT_SCHED_H__ + + +#include + +/* CLONE_KERNEL available in 2.5.35 and higher. */ +#ifndef CLONE_KERNEL +#define CLONE_KERNEL CLONE_FILES | CLONE_FS | CLONE_SIGHAND +#endif + +/* TASK_COMM_LEN become available in 2.6.11. */ +#ifndef TASK_COMM_LEN +#define TASK_COMM_LEN 16 +#endif + +/* The capable() API appeared in 2.1.92 --hpreg */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 1, 92) +# define capable(_capability) suser() +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 2, 0) +# define need_resched() need_resched +#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 3) +# define need_resched() (current->need_resched) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 3) +# define cond_resched() (need_resched() ? schedule() : (void) 0) +#endif + +/* Oh well. We need yield... Happy us! */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 20) +# ifdef __x86_64__ +# define compat_yield() there_is_nothing_like_yield() +# else +# include +# include + +/* + * Used by _syscallX macros. Note that this is global variable, so + * do not rely on its contents too much. As exit() is only function + * we use, and we never check return value from exit(), we have + * no problem... + */ +extern int errno; + +/* + * compat_exit() provides an access to the exit() function. It must + * be named compat_exit(), as exit() (with different signature) is + * provided by x86-64, arm and other (but not by i386). + */ +# define __NR_compat_yield __NR_sched_yield +static inline _syscall0(int, compat_yield); +# endif +#else +# define compat_yield() yield() +#endif + + +/* + * Since 2.5.34 there are two methods to enumerate tasks: + * for_each_process(p) { ... } which enumerates only tasks and + * do_each_thread(g,t) { ... } while_each_thread(g,t) which enumerates + * also threads even if they share same pid. + */ +#ifndef for_each_process +# define for_each_process(p) for_each_task(p) +#endif + +#ifndef do_each_thread +# define do_each_thread(g, t) for_each_task(g) { t = g; do +# define while_each_thread(g, t) while (0) } +#endif + + +/* + * Lock for signal mask is moving target... + */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 40) && defined(CLONE_PID) +/* 2.4.x without NPTL patches or early 2.5.x */ +#define compat_sigmask_lock sigmask_lock +#define compat_dequeue_signal_current(siginfo_ptr) \ + dequeue_signal(¤t->blocked, (siginfo_ptr)) +#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 60) && !defined(INIT_SIGHAND) +/* RedHat's 2.4.x with first version of NPTL support, or 2.5.40 to 2.5.59 */ +#define compat_sigmask_lock sig->siglock +#define compat_dequeue_signal_current(siginfo_ptr) \ + dequeue_signal(¤t->blocked, (siginfo_ptr)) +#else +/* RedHat's 2.4.x with second version of NPTL support, or 2.5.60+. */ +#define compat_sigmask_lock sighand->siglock +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 0) +#define compat_dequeue_signal_current(siginfo_ptr) \ + dequeue_signal(¤t->blocked, (siginfo_ptr)) +#else +#define compat_dequeue_signal_current(siginfo_ptr) \ + dequeue_signal(current, ¤t->blocked, (siginfo_ptr)) +#endif +#endif + +/* + * recalc_sigpending() had task argument in the past + */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 29) && defined(CLONE_PID) +/* 2.4.x without NPTL patches or early 2.5.x */ +#define compat_recalc_sigpending() recalc_sigpending(current) +#else +/* RedHat's 2.4.x with NPTL support, or 2.5.29+ */ +#define compat_recalc_sigpending() recalc_sigpending() +#endif + + +/* + * reparent_to_init() was introduced in 2.4.8. In 2.5.38 (or possibly + * earlier, but later than 2.5.31) a call to it was added into + * daemonize(), so compat_daemonize no longer needs to call it. + * + * In 2.4.x kernels reparent_to_init() forgets to do correct refcounting + * on current->user. It is better to count one too many than one too few... + */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 8) && LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 38) +#define compat_reparent_to_init() do { \ + reparent_to_init(); \ + atomic_inc(¤t->user->__count); \ + } while (0) +#else +#define compat_reparent_to_init() do {} while (0) +#endif + + +/* + * daemonize appeared in 2.2.18. Except 2.2.17-4-RH7.0, which has it too. + * Fortunately 2.2.17-4-RH7.0 uses versioned symbols, so we can check + * its existence with defined(). + */ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 2, 18)) && !defined(daemonize) +static inline void daemonize(void) { + struct fs_struct *fs; + + exit_mm(current); + current->session = 1; + current->pgrp = 1; + exit_fs(current); + fs = init_task.fs; + current->fs = fs; + atomic_inc(&fs->count); +} +#endif + + +/* + * flush_signals acquires sighand->siglock since 2.5.61... Verify RH's kernels! + */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 61) +#define compat_flush_signals(task) do { \ + spin_lock_irq(&task->compat_sigmask_lock); \ + flush_signals(task); \ + spin_unlock_irq(&task->compat_sigmask_lock); \ + } while (0) +#else +#define compat_flush_signals(task) flush_signals(task) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 61) +#define compat_allow_signal(signr) do { \ + spin_lock_irq(¤t->compat_sigmask_lock); \ + sigdelset(¤t->blocked, signr); \ + compat_recalc_sigpending(); \ + spin_unlock_irq(¤t->compat_sigmask_lock); \ + } while (0) +#else +#define compat_allow_signal(signr) allow_signal(signr) +#endif + +/* + * daemonize can set process name since 2.5.61. Prior to 2.5.61, daemonize + * didn't block signals on our behalf. + */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 61) +#define compat_daemonize(x...) \ +({ \ + /* Beware! No snprintf here, so verify arguments! */ \ + sprintf(current->comm, x); \ + \ + /* Block all signals. */ \ + spin_lock_irq(¤t->compat_sigmask_lock); \ + sigfillset(¤t->blocked); \ + compat_recalc_sigpending(); \ + spin_unlock_irq(¤t->compat_sigmask_lock); \ + compat_flush_signals(current); \ + \ + daemonize(); \ + compat_reparent_to_init(); \ +}) +#else +#define compat_daemonize(x...) daemonize(x) +#endif + + +/* + * try to freeze a process. For kernels 2.6.11 or newer, we know how to choose + * the interface. The problem is that the oldest interface, introduced in + * 2.5.18, was backported to 2.4.x kernels. So if we're older than 2.6.11, + * we'll decide what to do based on whether or not swsusp was configured + * for the kernel. For kernels 2.6.20 and newer, we'll also need to include + * freezer.h since the try_to_freeze definition was pulled out of sched.h. + */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) +#include +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 13) || defined(VMW_TL10S64_WORKAROUND) +#define compat_try_to_freeze() try_to_freeze() +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11) +#define compat_try_to_freeze() try_to_freeze(PF_FREEZE) +#elif defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_SOFTWARE_SUSPEND2) +#include "compat_mm.h" +#include +#include +static inline int compat_try_to_freeze(void) { + if (current->flags & PF_FREEZE) { + refrigerator(PF_FREEZE); + return 1; + } else { + return 0; + } +} +#else +static inline int compat_try_to_freeze(void) { return 0; } +#endif + +/* + * As of 2.6.23-rc1, kernel threads are no longer freezable by + * default. Instead, kernel threads that need to be frozen must opt-in + * by calling set_freezable() as soon as the thread is created. + */ + +#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 22) +#define compat_set_freezable() do { set_freezable(); } while (0) +#else +#define compat_set_freezable() do {} while (0) +#endif + +/* + * Around 2.6.27 kernel stopped sending signals to kernel + * threads being frozen, instead threads have to check + * freezing() or use wait_event_freezable(). Unfortunately + * wait_event_freezable() completely hides the fact that + * thread was frozen from calling code and sometimes we do + * want to know that. + */ +#ifdef PF_FREEZER_NOSIG +#define compat_wait_check_freezing() freezing(current) +#else +#define compat_wait_check_freezing() (0) +#endif + +/* + * Since 2.6.27-rc2 kill_proc() is gone... Replacement (GPL-only!) + * API is available since 2.6.19. Use them from 2.6.27-rc1 up. + */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 27) +typedef int compat_pid; +#define compat_find_get_pid(pid) (pid) +#define compat_put_pid(pid) do { } while (0) +#define compat_kill_pid(pid, sig, flag) kill_proc(pid, sig, flag) +#else +typedef struct pid * compat_pid; +#define compat_find_get_pid(pid) find_get_pid(pid) +#define compat_put_pid(pid) put_pid(pid) +#define compat_kill_pid(pid, sig, flag) kill_pid(pid, sig, flag) +#endif + + +#endif /* __COMPAT_SCHED_H__ */ diff --git a/vmmon-only/include/compat_semaphore.h b/vmmon-only/include/compat_semaphore.h new file mode 100644 index 00000000..f5527b9c --- /dev/null +++ b/vmmon-only/include/compat_semaphore.h @@ -0,0 +1,49 @@ +/********************************************************* + * Copyright (C) 2002 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +#ifndef __COMPAT_SEMAPHORE_H__ +# define __COMPAT_SEMAPHORE_H__ + + +/* <= 2.6.25 have asm only, 2.6.26 has both, and 2.6.27-rc2+ has linux only. */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 27) +# include +#else +# include +#endif + + +/* +* The init_MUTEX_LOCKED() API appeared in 2.2.18, and is also in +* 2.2.17-21mdk --hpreg +*/ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 2, 18) + #ifndef init_MUTEX_LOCKED + #define init_MUTEX_LOCKED(_sem) *(_sem) = MUTEX_LOCKED + #endif + #ifndef DECLARE_MUTEX + #define DECLARE_MUTEX(name) struct semaphore name = MUTEX + #endif + #ifndef DECLARE_MUTEX_LOCKED + #define DECLARE_MUTEX_LOCKED(name) struct semaphore name = MUTEX_LOCKED + #endif +#endif + + +#endif /* __COMPAT_SEMAPHORE_H__ */ diff --git a/vmmon-only/include/compat_spinlock.h b/vmmon-only/include/compat_spinlock.h new file mode 100644 index 00000000..b8987a5a --- /dev/null +++ b/vmmon-only/include/compat_spinlock.h @@ -0,0 +1,48 @@ +/********************************************************* + * Copyright (C) 2005 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +#ifndef __COMPAT_SPINLOCK_H__ +# define __COMPAT_SPINLOCK_H__ + +#include + +/* + * Preempt support was added during 2.5.x development cycle, and later + * it was backported to 2.4.x. In 2.4.x backport these definitions + * live in linux/spinlock.h, that's why we put them here (in 2.6.x they + * are defined in linux/preempt.h which is included by linux/spinlock.h). + */ +#ifdef CONFIG_PREEMPT +#define compat_preempt_disable() preempt_disable() +#define compat_preempt_enable() preempt_enable() +#else +#define compat_preempt_disable() do { } while (0) +#define compat_preempt_enable() do { } while (0) +#endif + +/* Some older kernels - 2.6.10 and earlier - lack DEFINE_SPINLOCK */ +#ifndef DEFINE_SPINLOCK +#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED +#endif + +/* Same goes for DEFINE_RWLOCK */ +#ifndef DEFINE_RWLOCK +#define DEFINE_RWLOCK(x) rwlock_t x = RW_LOCK_UNLOCKED +#endif + +#endif /* __COMPAT_SPINLOCK_H__ */ diff --git a/vmmon-only/include/compat_version.h b/vmmon-only/include/compat_version.h new file mode 100644 index 00000000..56d021cf --- /dev/null +++ b/vmmon-only/include/compat_version.h @@ -0,0 +1,131 @@ +/********************************************************* + * Copyright (C) 1998 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +#ifndef __COMPAT_VERSION_H__ +# define __COMPAT_VERSION_H__ + +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_DISTRIBUTE +#define INCLUDE_ALLOW_VMKDRIVERS +#include "includeCheck.h" + + +#ifndef __linux__ +# error "linux-version.h" +#endif + + +#include + +#ifndef KERNEL_VERSION +# error KERNEL_VERSION macro is not defined, environment is busted +#endif + + +/* + * Distinguish relevant classes of Linux kernels. + * + * The convention is that version X defines all + * the KERNEL_Y symbols where Y <= X. + * + * XXX Do not add more definitions here. This way of doing things does not + * scale, and we are going to phase it out soon --hpreg + */ + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 1, 0) +# define KERNEL_2_1 +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 2, 0) +# define KERNEL_2_2 +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 3, 1) +# define KERNEL_2_3_1 +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 3, 15) +/* new networking */ +# define KERNEL_2_3_15 +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 3, 25) +/* new procfs */ +# define KERNEL_2_3_25 +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 3, 29) +/* even newer procfs */ +# define KERNEL_2_3_29 +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 3, 43) +/* softnet changes */ +# define KERNEL_2_3_43 +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 3, 47) +/* more softnet changes */ +# define KERNEL_2_3_47 +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 3, 99) +/* name in netdevice struct is array and not pointer */ +# define KERNEL_2_3_99 +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 0) +/* New 'owner' member at the beginning of struct file_operations */ +# define KERNEL_2_4_0 +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 8) +/* New netif_rx_ni() --hpreg */ +# define KERNEL_2_4_8 +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 2) +/* New kdev_t, major()/minor() API --hpreg */ +# define KERNEL_2_5_2 +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 5) +/* New sk_alloc(), pte_offset_map()/pte_unmap() --hpreg */ +# define KERNEL_2_5_5 +#endif + +/* Linux kernel 3.0 can be called 2.6.40, and 3.1 can be 2.6.41... + * Use COMPAT_LINUX_VERSION_CHECK_LT iff you need to compare running kernel to + * versions 3.0 and above. + * + */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 0) + /* Straight forward comparison if kernel version is 3.0.0 and beyond */ +# define COMPAT_LINUX_VERSION_CHECK_LT(a, b, c) LINUX_VERSION_CODE < KERNEL_VERSION (a, b, c) +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 40) + /* Use b of the check to calculate corresponding c of kernel + * version to compare */ +# define COMPAT_LINUX_VERSION_CHECK_LT(a, b, c) LINUX_VERSION_CODE < KERNEL_VERSION (2, 6, (b + 40)) +#else + /* This is anyways lesser than any 3.x versions */ +# define COMPAT_LINUX_VERSION_CHECK_LT(a, b, c) 1 +#endif + +#endif /* __COMPAT_VERSION_H__ */ diff --git a/vmmon-only/include/contextinfo.h b/vmmon-only/include/contextinfo.h new file mode 100644 index 00000000..0e6dcbb2 --- /dev/null +++ b/vmmon-only/include/contextinfo.h @@ -0,0 +1,78 @@ +/********************************************************* + * Copyright (C) 2005-2014 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + + +/* + * contextinfo.h + * + * Context structures shared across all products + */ + +#ifndef _CONTEXTINFO_H +#define _CONTEXTINFO_H + +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_VMMON +#include "includeCheck.h" + +#include "x86desc.h" + +#pragma pack(push, 1) +typedef struct Context64 { + uint64 cr3; + uint64 rax; + uint64 rcx; + uint64 rdx; + uint64 rbx; + uint64 rsi; + uint64 rdi; + uint64 rbp; + uint64 rsp; + uint64 r8; + uint64 r9; + uint64 r10; + uint64 r11; + uint64 r12; + uint64 r13; + uint64 r14; + uint64 r15; + uint32 cs; + uint32 ds; + uint32 ss; + uint32 es; + uint32 fs; + uint32 gs; + uint64 rip; + uint64 eflags; + uint16 ldt; + uint16 _pad[3]; +} Context64; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct ContextInfo64 { + DTRWords64 gdtr; + DTRWords64 idtr; + Context64 context; + uint16 tr; + uint16 _pad0; +} ContextInfo64; +#pragma pack(pop) + +#endif diff --git a/vmmon-only/include/cpu_defs.h b/vmmon-only/include/cpu_defs.h new file mode 100644 index 00000000..03f7b702 --- /dev/null +++ b/vmmon-only/include/cpu_defs.h @@ -0,0 +1,49 @@ +/********************************************************* + * Copyright (C) 2009 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * cpu_defs.h -- + * + * CPU-related definitions shared between vmkernel and user-space. + */ + +#ifndef CPU_DEFS_H +#define CPU_DEFS_H + +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_DISTRIBUTE +#define INCLUDE_ALLOW_VMKDRIVERS +#define INCLUDE_ALLOW_VMK_MODULE +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_USERLEVEL +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_VMMON + +#define INCLUDE_ALLOW_VMX +#include "includeCheck.h" + +#include "vm_basic_types.h" + +typedef uint32 PCPU; +#define INVALID_PCPU ((PCPU) -1) + +#define MAX_PCPUS 576 +#define MAX_PCPUS_BITS 10 // MAX_PCPUS <= (1 << MAX_PCPUS_BITS) +#define MAX_PCPUS_MASK ((1 << MAX_PCPUS_BITS) - 1) + +#endif diff --git a/vmmon-only/include/cpuid_info.h b/vmmon-only/include/cpuid_info.h new file mode 100644 index 00000000..1d3ba307 --- /dev/null +++ b/vmmon-only/include/cpuid_info.h @@ -0,0 +1,95 @@ +/********************************************************* + * Copyright (C) 1998-2015 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +#ifndef _CPUID_INFO_H +#define _CPUID_INFO_H + +#define INCLUDE_ALLOW_USERLEVEL +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_VMKERNEL + +#include "includeCheck.h" + +#include "vm_basic_asm.h" +#include "x86cpuid_asm.h" + + +typedef struct CPUID0 { + int numEntries; + char name[16]; // 4 extra bytes to null terminate +} CPUID0; + +typedef struct CPUID1 { + uint32 version; + uint32 ebx; + uint32 ecxFeatures; + uint32 edxFeatures; +} CPUID1; + +typedef struct CPUID80 { + uint32 numEntries; + uint32 ebx; + uint32 ecx; + uint32 edx; +} CPUID80; + +typedef struct CPUID81 { + uint32 eax; + uint32 ebx; + uint32 ecxFeatures; + uint32 edxFeatures; +} CPUID81; + +typedef struct CPUIDSummary { + CPUID0 id0; + CPUID1 id1; + CPUIDRegs ida; + CPUID80 id80; + CPUID81 id81; + CPUIDRegs id88, id8a; +} CPUIDSummary; + + +/* + *---------------------------------------------------------------------- + * + * CPUIDSummary_RegsFromCpuid0 -- + * + * Fills in the given CPUIDRegs struct with the values from the CPUID0 struct. + * + * Results: + * Returns the CPUIDRegs pointer passed in. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ +static INLINE CPUIDRegs* +CPUIDSummary_RegsFromCpuid0(CPUID0* id0In, + CPUIDRegs* id0Out) +{ + id0Out->eax = id0In->numEntries; + id0Out->ebx = *(uint32 *) (id0In->name + 0); + id0Out->edx = *(uint32 *) (id0In->name + 4); + id0Out->ecx = *(uint32 *) (id0In->name + 8); + return id0Out; +} + +#endif diff --git a/vmmon-only/include/crossgdt.h b/vmmon-only/include/crossgdt.h new file mode 100644 index 00000000..ad823eaa --- /dev/null +++ b/vmmon-only/include/crossgdt.h @@ -0,0 +1,79 @@ +/********************************************************* + * Copyright (C) 2006-2014 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * crossgdt.h -- + * + * This GDT is used for switching between monitor and host context. + * It contains the host and monitor basic segment descriptors. + * There is only one in the whole host system, shared by all VMs. + * It is allocated when the first VCPU is started and freed when the + * driver is unloaded. + */ + +#ifndef _CROSSGDT_H_ +#define _CROSSGDT_H_ + +#define INCLUDE_ALLOW_USERLEVEL + +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMCORE +#include "includeCheck.h" + +#include "vm_basic_defs.h" // PAGE_SIZE +#include "x86types.h" // Descriptor + +typedef struct CrossGDT { + Descriptor gdtes[0x5000 / sizeof (Descriptor)]; // 0x5000 > GDT_LIMIT +} CrossGDT; + +#define CROSSGDT_NUMPAGES ((sizeof (CrossGDT) + PAGE_SIZE - 1) / PAGE_SIZE) + +/* + * Out of the 5 pages, only the first and last are really used. + * + * All we need to map are the first and last pages. This mask tells + * the setup code which pages it can put stuff in and it tells the + * mapping and invalidation code which pages are mapped and unmapped. + */ +#define CROSSGDT_PAGEMASK 0x11 + +/* + * These segments are placed in the first crossGDT page. We assume + * they do not overlap any host segments (checked by + * Task_InitCrossGDT). The only real requirement is that they (and + * the host CS/SS) be physically contiguous with the start of the + * crossGDT so they will remain valid when paging is turned off. + * + * As of this writing (Nov 2006), host GDT limits: + * Linux 64 bit: 80 (yes 80, not 7F) + * 32 bit: FF + * MacOS 64 bit: 8F + * 32 bit: 8F + * Windows 64 bit: 6F + * 32 bit: 3FF + */ +#define CROSSGDT_LOWSEG (0x0FD0 / 8) // all host segs must be below this +#define CROSSGDT_64BITCS (0x0FD0 / 8) // 64-bit code segment +#define CROSSGDT_64BITSS (0x0FD8 / 8) // 64-bit data segment +#define CROSSGDT_FLAT32CS (0x0FE0 / 8) // 32-bit flat code seg +#define CROSSGDT_FLAT32SS (0x0FE8 / 8) // 32-bit flat data seg +#define CROSSGDT_MON32CS (0x0FF0 / 8) // 32-bit FFC00000 base code seg +#define CROSSGDT_MON32SS (0x0FF8 / 8) // 32-bit FFC00000 base data seg + +#endif diff --git a/vmmon-only/include/driver-config.h b/vmmon-only/include/driver-config.h new file mode 100644 index 00000000..fbf45d3c --- /dev/null +++ b/vmmon-only/include/driver-config.h @@ -0,0 +1,79 @@ +/********************************************************* + * Copyright (C) 1998 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * Sets the proper defines from the Linux header files + * + * This file must be included before the inclusion of any kernel header file, + * with the exception of linux/autoconf.h and linux/version.h --hpreg + */ + +#ifndef __VMX_CONFIG_H__ +#define __VMX_CONFIG_H__ + +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_DISTRIBUTE +#define INCLUDE_ALLOW_VMKDRIVERS +#include "includeCheck.h" + +#include "compat_version.h" +#include "compat_autoconf.h" + +/* + * We rely on Kernel Module support. Check here. + */ +#ifndef CONFIG_MODULES +# error "No Module support in this kernel. Please configure with CONFIG_MODULES" +#endif + +/* + * 2.2 kernels still use __SMP__ (derived from CONFIG_SMP + * in the main Makefile), so we do it here. + */ + +#ifdef CONFIG_SMP +# define __SMP__ 1 +#endif + +#if defined(CONFIG_MODVERSIONS) && defined(KERNEL_2_1) +# if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,60) +/* + * MODVERSIONS might be already defined when using kernel's Makefiles. + */ +# ifndef MODVERSIONS +# define MODVERSIONS +# endif +# include +# endif +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24) +/* + * Force the uintptr_t definition to come from linux/types.h instead of vm_basic_types.h. + */ +# include +# define _STDINT_H 1 +#endif + +#ifndef __KERNEL__ +# define __KERNEL__ +#endif + +#endif diff --git a/vmmon-only/include/hashFunc.h b/vmmon-only/include/hashFunc.h new file mode 100644 index 00000000..fec72614 --- /dev/null +++ b/vmmon-only/include/hashFunc.h @@ -0,0 +1,345 @@ +/********************************************************* + * Copyright (C) 2001,2016 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +#ifndef _HASH_FUNC_H +#define _HASH_FUNC_H + +#define INCLUDE_ALLOW_USERLEVEL +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_VMKERNEL + +#include "includeCheck.h" + +/* + * hashFunc.h -- + * + * Hash functions defined as simple wrappers around code placed + * in the public domain by Bob Jenkins. His original code can + * be used freely for any purpose, including commercial (see the + * original comments in the enclosed code). + */ + +#include "vm_basic_types.h" +#include "vm_basic_defs.h" +#include "vm_assert.h" + +/* + * operations + */ + +extern uint64 HashFunc_HashBytes(const uint8 *key, uint32 nBytes); +extern uint64 HashFunc_HashMoreBytes(const uint8 *key, uint32 nBytes, + uint64 initialValue); +extern uint64 HashFunc_HashBytesSlow(const uint8 *key, uint32 nBytes); +extern uint64 HashFunc_HashQuads(const uint64 *key, uint32 nQuads); +extern uint64 HashFunc_HashPage(const void *addr); + +/* +-------------------------------------------------------------------- +Hashes 32-bit key and returns a value that can fit into numBits bits. +-------------------------------------------------------------------- +*/ + +static INLINE uint32 +HashFunc_TruncHash32(uint32 key, unsigned numBits) +{ + const uint32 hashConst = 0x9e3779b9; /* the golden ratio */ + ASSERT(numBits > 0 && numBits <= 32); + return (key * hashConst) >> (32 - numBits); +} + +/* +-------------------------------------------------------------------- +Hashes 64-bit key and returns a value that can fit into numBits bits. +-------------------------------------------------------------------- +*/ + +static INLINE uint32 +HashFunc_TruncHash64(uint64 key, unsigned numBits) +{ + return HashFunc_TruncHash32((uint32)(key ^ (key >> 32)), numBits); +} + + +/* + * Original Bob Jenkins public-domain code with minor modifications + * (static functions, 32-bit length args, disabled self-test code). + */ + + +typedef uint64 ub8; /* unsigned 8-byte quantities */ +typedef uint32 ub4; /* unsigned 4-byte quantities */ +typedef uint8 ub1; + +#define hashsize(n) ((ub8)1<<(n)) +#define hashmask(n) (hashsize(n)-1) + +/* +-------------------------------------------------------------------- +mix -- mix 3 64-bit values reversibly. +mix() takes 48 machine instructions, but only 24 cycles on a superscalar + machine (like Intel's new MMX architecture). It requires 4 64-bit + registers for 4::2 parallelism. +All 1-bit deltas, all 2-bit deltas, all deltas composed of top bits of + (a,b,c), and all deltas of bottom bits were tested. All deltas were + tested both on random keys and on keys that were nearly all zero. + These deltas all cause every bit of c to change between 1/3 and 2/3 + of the time (well, only 113/400 to 287/400 of the time for some + 2-bit delta). These deltas all cause at least 80 bits to change + among (a,b,c) when the mix is run either forward or backward (yes it + is reversible). +This implies that a hash using mix64 has no funnels. There may be + characteristics with 3-bit deltas or bigger, I didn't test for + those. +-------------------------------------------------------------------- +*/ +#define mix64(a,b,c) \ +{ \ + a -= b; a -= c; a ^= (c>>43); \ + b -= c; b -= a; b ^= (a<<9); \ + c -= a; c -= b; c ^= (b>>8); \ + a -= b; a -= c; a ^= (c>>38); \ + b -= c; b -= a; b ^= (a<<23); \ + c -= a; c -= b; c ^= (b>>5); \ + a -= b; a -= c; a ^= (c>>35); \ + b -= c; b -= a; b ^= (a<<49); \ + c -= a; c -= b; c ^= (b>>11); \ + a -= b; a -= c; a ^= (c>>12); \ + b -= c; b -= a; b ^= (a<<18); \ + c -= a; c -= b; c ^= (b>>22); \ +} + +/* +-------------------------------------------------------------------- +hash() -- hash a variable-length key into a 64-bit value + k : the key (the unaligned variable-length array of bytes) + len : the length of the key, counting by bytes + level : can be any 8-byte value +Returns a 64-bit value. Every bit of the key affects every bit of +the return value. No funnels. Every 1-bit and 2-bit delta achieves +avalanche. About 41+5len instructions. + +The best hash table sizes are powers of 2. There is no need to do +mod a prime (mod is sooo slow!). If you need less than 64 bits, +use a bitmask. For example, if you need only 10 bits, do + h = (h & hashmask(10)); +In which case, the hash table should have hashsize(10) elements. + +If you are hashing n strings (ub1 **)k, do it like this: + for (i=0, h=0; i= 24) + { + a += (k[0] +((ub8)k[ 1]<< 8)+((ub8)k[ 2]<<16)+((ub8)k[ 3]<<24) + +((ub8)k[4 ]<<32)+((ub8)k[ 5]<<40)+((ub8)k[ 6]<<48)+((ub8)k[ 7]<<56)); + b += (k[8] +((ub8)k[ 9]<< 8)+((ub8)k[10]<<16)+((ub8)k[11]<<24) + +((ub8)k[12]<<32)+((ub8)k[13]<<40)+((ub8)k[14]<<48)+((ub8)k[15]<<56)); + c += (k[16] +((ub8)k[17]<< 8)+((ub8)k[18]<<16)+((ub8)k[19]<<24) + +((ub8)k[20]<<32)+((ub8)k[21]<<40)+((ub8)k[22]<<48)+((ub8)k[23]<<56)); + mix64(a,b,c); + k += 24; len -= 24; + } + + /*------------------------------------- handle the last 23 bytes */ + c += length; + switch(len) /* all the case statements fall through */ + { + case 23: c+=((ub8)k[22]<<56); + case 22: c+=((ub8)k[21]<<48); + case 21: c+=((ub8)k[20]<<40); + case 20: c+=((ub8)k[19]<<32); + case 19: c+=((ub8)k[18]<<24); + case 18: c+=((ub8)k[17]<<16); + case 17: c+=((ub8)k[16]<<8); + /* the first byte of c is reserved for the length */ + case 16: b+=((ub8)k[15]<<56); + case 15: b+=((ub8)k[14]<<48); + case 14: b+=((ub8)k[13]<<40); + case 13: b+=((ub8)k[12]<<32); + case 12: b+=((ub8)k[11]<<24); + case 11: b+=((ub8)k[10]<<16); + case 10: b+=((ub8)k[ 9]<<8); + case 9: b+=((ub8)k[ 8]); + case 8: a+=((ub8)k[ 7]<<56); + case 7: a+=((ub8)k[ 6]<<48); + case 6: a+=((ub8)k[ 5]<<40); + case 5: a+=((ub8)k[ 4]<<32); + case 4: a+=((ub8)k[ 3]<<24); + case 3: a+=((ub8)k[ 2]<<16); + case 2: a+=((ub8)k[ 1]<<8); + case 1: a+=((ub8)k[ 0]); + /* case 0: nothing left to add */ + } + mix64(a,b,c); + /*-------------------------------------------- report the result */ + return c; +} + +/* +-------------------------------------------------------------------- + This works on all machines, is identical to hash() on little-endian + machines, and it is much faster than hash(), but it requires + -- that the key be an array of ub8's, and + -- that all your machines have the same endianness, and + -- that the length be the number of ub8's in the key +-------------------------------------------------------------------- +*/ +static INLINE ub8 hash2(register const ub8 *k, /* the key */ + register ub4 length, /* the length of the key */ + register ub8 level) /* the previous hash, or an arbitrary value */ +{ + register ub8 a,b,c; + ub4 len; + + /* Set up the internal state */ + len = length; + a = b = level; /* the previous hash value */ + c = CONST64U(0x9e3779b97f4a7c13); /* the golden ratio; an arbitrary value */ + + /*---------------------------------------- handle most of the key */ + while (len >= 3) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix64(a,b,c); + k += 3; len -= 3; + } + + /*-------------------------------------- handle the last 2 ub8's */ + c += length; + switch(len) /* all the case statements fall through */ + { + /* c is reserved for the length */ + case 2: b+=k[1]; + case 1: a+=k[0]; + /* case 0: nothing left to add */ + } + mix64(a,b,c); + /*-------------------------------------------- report the result */ + return c; +} + +/* +-------------------------------------------------------------------- + This is identical to hash() on little-endian machines, and it is much + faster than hash(), but a little slower than hash2(), and it requires + -- that all your machines be little-endian, for example all Intel x86 + chips or all VAXen. It gives wrong results on big-endian machines. +-------------------------------------------------------------------- +*/ + +static INLINE ub8 hash3(register const ub1 *k, /* the key */ + register ub4 length, /* the length of the key */ + register ub8 level) /* the previous hash, or an arbitrary value */ +{ + register ub8 a,b,c; + ub4 len; + + /* Set up the internal state */ + len = length; + a = b = level; /* the previous hash value */ + c = CONST64U(0x9e3779b97f4a7c13); /* the golden ratio; an arbitrary value */ + + /*---------------------------------------- handle most of the key */ + if (((size_t)k)&7) + { + while (len >= 24) + { + a += (k[0] +((ub8)k[ 1]<< 8)+((ub8)k[ 2]<<16)+((ub8)k[ 3]<<24) + +((ub8)k[4 ]<<32)+((ub8)k[ 5]<<40)+((ub8)k[ 6]<<48)+((ub8)k[ 7]<<56)); + b += (k[8] +((ub8)k[ 9]<< 8)+((ub8)k[10]<<16)+((ub8)k[11]<<24) + +((ub8)k[12]<<32)+((ub8)k[13]<<40)+((ub8)k[14]<<48)+((ub8)k[15]<<56)); + c += (k[16] +((ub8)k[17]<< 8)+((ub8)k[18]<<16)+((ub8)k[19]<<24) + +((ub8)k[20]<<32)+((ub8)k[21]<<40)+((ub8)k[22]<<48)+((ub8)k[23]<<56)); + mix64(a,b,c); + k += 24; len -= 24; + } + } + else + { + while (len >= 24) /* aligned */ + { + a += *(ub8 *)(k+0); + b += *(ub8 *)(k+8); + c += *(ub8 *)(k+16); + mix64(a,b,c); + k += 24; len -= 24; + } + } + + /*------------------------------------- handle the last 23 bytes */ + c += length; + switch(len) /* all the case statements fall through */ + { + case 23: c+=((ub8)k[22]<<56); + case 22: c+=((ub8)k[21]<<48); + case 21: c+=((ub8)k[20]<<40); + case 20: c+=((ub8)k[19]<<32); + case 19: c+=((ub8)k[18]<<24); + case 18: c+=((ub8)k[17]<<16); + case 17: c+=((ub8)k[16]<<8); + /* the first byte of c is reserved for the length */ + case 16: b+=((ub8)k[15]<<56); + case 15: b+=((ub8)k[14]<<48); + case 14: b+=((ub8)k[13]<<40); + case 13: b+=((ub8)k[12]<<32); + case 12: b+=((ub8)k[11]<<24); + case 11: b+=((ub8)k[10]<<16); + case 10: b+=((ub8)k[ 9]<<8); + case 9: b+=((ub8)k[ 8]); + case 8: a+=((ub8)k[ 7]<<56); + case 7: a+=((ub8)k[ 6]<<48); + case 6: a+=((ub8)k[ 5]<<40); + case 5: a+=((ub8)k[ 4]<<32); + case 4: a+=((ub8)k[ 3]<<24); + case 3: a+=((ub8)k[ 2]<<16); + case 2: a+=((ub8)k[ 1]<<8); + case 1: a+=((ub8)k[ 0]); + /* case 0: nothing left to add */ + } + mix64(a,b,c); + /*-------------------------------------------- report the result */ + return c; +} + +#endif //_HASH_FUNC_H diff --git a/vmmon-only/include/includeCheck.h b/vmmon-only/include/includeCheck.h new file mode 100644 index 00000000..08fd1e58 --- /dev/null +++ b/vmmon-only/include/includeCheck.h @@ -0,0 +1,17 @@ +/********************************************************* + * Copyright (C) 2008 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ diff --git a/vmmon-only/include/initblock.h b/vmmon-only/include/initblock.h new file mode 100644 index 00000000..e101e9ff --- /dev/null +++ b/vmmon-only/include/initblock.h @@ -0,0 +1,43 @@ +/********************************************************* + * Copyright (C) 1998 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +#ifndef _INITBLOCK_H +#define _INITBLOCK_H + + +#define INCLUDE_ALLOW_VMX +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_VMMON +#include "includeCheck.h" + + +#include "basic_initblock.h" + + +#define MAX_LOGFILE_NAME_LENGTH 256 +#define MAX_MONITOR_REGIONS 8 +#define INIT_BLOCK_MAGIC (0x1789+14) + + +/* + * Option flags + */ +/* None yet */ + + +#endif diff --git a/vmmon-only/include/iocontrols.h b/vmmon-only/include/iocontrols.h new file mode 100644 index 00000000..6bce93ee --- /dev/null +++ b/vmmon-only/include/iocontrols.h @@ -0,0 +1,623 @@ +/********************************************************* + * Copyright (C) 1998-2015 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + + +/* + * iocontrols.h + * + * The driver io controls. + */ + +#ifndef _IOCONTROLS_H_ +#define _IOCONTROLS_H_ + +#define INCLUDE_ALLOW_USERLEVEL + +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_MODULE +#include "includeCheck.h" + +#ifdef VMX86_SERVER +#error iocontrols.h is for hosted vmmon, do not use on visor +#endif + +#include "basic_initblock.h" +#include "x86segdescrs.h" +#include "rateconv.h" +#include "overheadmem_types.h" +#include "pageLock_defs.h" +#include "numa_defs.h" + +/* + *----------------------------------------------------------------------------- + * + * VA64ToPtr -- + * + * Convert a VA64 to a pointer. + * + * Usage of this function is strictly limited to these 2 cases: + * + * 1) In a VMX function which does an ioctl to vmmon, and receives a VMX + * pointer as a result. + * + * 2) In the vmmon code, for the functions which have a VA64 and need + * to call kernel APIs which take pointers. + * + * Results: + * Virtual address. + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE void * +VA64ToPtr(VA64 va64) // IN +{ +#ifdef VM_64BIT + ASSERT_ON_COMPILE(sizeof (void *) == 8); +#else + ASSERT_ON_COMPILE(sizeof (void *) == 4); + // Check that nothing of value will be lost. + ASSERT(!(va64 >> 32)); +#endif + return (void *)(uintptr_t)va64; +} + + +/* + *----------------------------------------------------------------------------- + * + * PtrToVA64 -- + * + * Convert a pointer to a VA64. + * + * Usage of this function is strictly limited to these 2 cases: + * + * 1) In a VMX function which does an ioctl to vmmon, and passes in a VMX + * pointer. + * + * 2) In the vmmon code, for the functions which need to pass in a kernel + * pointer to functions which can take either a user or a kernel + * pointer in the same parameter. + * + * Results: + * Virtual address. + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE VA64 +PtrToVA64(void const *ptr) // IN +{ + ASSERT_ON_COMPILE(sizeof ptr <= sizeof (VA64)); + return (VA64)(uintptr_t)ptr; +} + + +/* + * Driver version. + * + * Increment major version when you make an incompatible change. + * Compatibility goes both ways (old driver with new executable + * as well as new driver with old executable). + * + * Note: Vmcore compatibility is different from driver versioning. + * For vmcore puposes, the bora tree is conceptually split in two: + * vmcore, and rest-of-bora. The vmmon driver is largely outside + * vmcore and vmcore imports functionality from vmmon. Addition, + * deletion or modification of an iocontrol used only by rest-of-bora + * does not break vmcore compatibility. + * + * See bora/doc/vmcore details. + * + */ + +#define VMMON_VERSION (308 << 16 | 0) +#define VMMON_VERSION_MAJOR(v) ((uint32) (v) >> 16) +#define VMMON_VERSION_MINOR(v) ((uint16) (v)) + + +/* + * ENOMEM returned after MAX_VMS virtual machines created + */ + +#ifdef VMX86_SERVER +#define MAX_VMS 128 +#else +#define MAX_VMS 64 +#endif +/* + * MsgWaitForMultipleObjects doesn't scale well enough on Win32. + * Allocate with MAX_VMS so static buffers are large, but do + * admissions control with this value on Win32 until we check + * scalability (probably in authd). + */ +#ifdef _WIN32 +#define MAX_VMS_WIN32 64 +#endif + + +#if !__linux__ +/* + * On platforms other than Linux, IOCTLCMD_foo values are just numbers, and + * we build the IOCTL_VMX86_foo values around these using platform-specific + * format for encoding arguments and sizes. + */ +# define IOCTLCMD(_cmd) IOCTLCMD_ ## _cmd +#else // if __linux__ +/* + * Linux defines _IO* macros, but the core kernel code ignore the encoded + * ioctl value. It is up to individual drivers to decode the value (for + * example to look at the size of a structure to determine which version + * of a specific command should be used) or not (which is what we + * currently do, so right now the ioctl value for a given command is the + * command itself). + * + * Hence, we just define the IOCTL_VMX86_foo values directly, with no + * intermediate IOCTLCMD_ representation. + */ +# define IOCTLCMD(_cmd) IOCTL_VMX86_ ## _cmd +#endif + + +enum IOCTLCmd { + /* + * We need to bracket the range of values used for ioctls, because x86_64 + * Linux forces us to explicitly register ioctl handlers by value for + * handling 32 bit ioctl syscalls. Hence FIRST and LAST. FIRST must be + * 2001 so that VERSION is 2001 for backwards compatibility. + */ +#if defined __linux__ || defined _WIN32 + /* Start at 2001 because legacy code did. */ + IOCTLCMD(FIRST) = 2001, +#else + /* Start at 0. */ + IOCTLCMD(FIRST), +#endif + IOCTLCMD(VERSION) = IOCTLCMD(FIRST), + IOCTLCMD(CREATE_VM), + IOCTLCMD(RELEASE_VM), + IOCTLCMD(GET_NUM_VMS), + IOCTLCMD(ALLOC_CROSSGDT), + IOCTLCMD(INIT_VM), + IOCTLCMD(INIT_CROSSGDT), + IOCTLCMD(RUN_VM), + IOCTLCMD(LOOK_UP_MPN), + IOCTLCMD(LOCK_PAGE), + IOCTLCMD(UNLOCK_PAGE), + IOCTLCMD(APIC_INIT), + IOCTLCMD(SET_HARD_LIMIT), + IOCTLCMD(GET_MEM_INFO), + IOCTLCMD(ADMIT), + IOCTLCMD(UPDATE_MEM_INFO), + IOCTLCMD(READMIT), + IOCTLCMD(PAE_ENABLED), + IOCTLCMD(GET_TOTAL_MEM_USAGE), + IOCTLCMD(GET_KHZ_ESTIMATE), + IOCTLCMD(SET_HOST_CLOCK_RATE), + IOCTLCMD(READ_PAGE), + IOCTLCMD(WRITE_PAGE), + IOCTLCMD(LOCK_PAGE_NEW), + IOCTLCMD(UNLOCK_PAGE_BY_MPN), + /* AWE calls */ + IOCTLCMD(ALLOC_LOCKED_PAGES), + IOCTLCMD(FREE_LOCKED_PAGES), + IOCTLCMD(GET_NEXT_ANON_PAGE), + IOCTLCMD(GET_LOCKED_PAGES_LIST), + + IOCTLCMD(GET_ALL_MSRS), + + IOCTLCMD(COUNT_PRESENT_PAGES), + + IOCTLCMD(GET_REFERENCE_CLOCK_HZ), + IOCTLCMD(INIT_PSEUDO_TSC), + IOCTLCMD(CHECK_PSEUDO_TSC), + IOCTLCMD(GET_PSEUDO_TSC), + + IOCTLCMD(SYNC_GET_TSCS), + + IOCTLCMD(GET_IPI_VECTORS), + IOCTLCMD(SEND_IPI), + + /* + * Keep host-specific calls at the end so they can be undefined + * without renumbering the common calls. + */ + +#if defined __linux__ + IOCTLCMD(SET_UID), // VMX86_DEVEL only +#endif + +#if defined __linux__ || defined __APPLE__ + IOCTLCMD(GET_ALL_CPUID), + IOCTLCMD(GET_KERNEL_CLOCK_RATE), +#endif + +#if defined _WIN32 || defined __APPLE__ + IOCTLCMD(ALLOC_CONTIG_PAGES), +#endif + +#if defined _WIN32 + IOCTLCMD(FREE_CONTIG_PAGES), + IOCTLCMD(HARD_LIMIT_MONITOR_STATUS), // Windows 2000 only + IOCTLCMD(BLUE_SCREEN), // USE_BLUE_SCREEN only + IOCTLCMD(CHANGE_HARD_LIMIT), + IOCTLCMD(GET_KERNEL_PROC_ADDRESS), + IOCTLCMD(READ_VA64), + IOCTLCMD(SET_MEMORY_PARAMS), + IOCTLCMD(REMEMBER_KHZ_ESTIMATE), + IOCTLCMD(REMAP_SCATTER_LIST), + IOCTLCMD(UNMAP_SCATTER_LIST), +#endif + +#if defined __APPLE__ + IOCTLCMD(ALLOC_LOW_PAGES), + IOCTLCMD(INIT_DRIVER), + IOCTLCMD(BLUEPILL), +#endif + + IOCTLCMD(SET_POLL_TIMEOUT_PTR), + + IOCTLCMD(FAST_SUSP_RES_SET_OTHER_FLAG), + IOCTLCMD(FAST_SUSP_RES_GET_MY_FLAG), + +#if defined __linux__ + IOCTLCMD(SET_HOST_CLOCK_PRIORITY), + IOCTLCMD(VMX_ENABLED), + IOCTLCMD(SET_HOST_SWAP_SIZE), +#endif + + IOCTLCMD(GET_UNAVAIL_PERF_CTRS), + // Must be last. + IOCTLCMD(LAST) +}; + + +#if defined _WIN32 +/* + * Windows ioctl definitions. + * + * We use the IRP Information field for the return value + * of IOCTLCMD_RUN_VM, to be faster since it is used a lot. + */ + +#define FILE_DEVICE_VMX86 0x8101 +#define VMX86_IOCTL_BASE_INDEX 0x801 +#define VMIOCTL_BUFFERED(name) \ + CTL_CODE(FILE_DEVICE_VMX86, \ + VMX86_IOCTL_BASE_INDEX + IOCTLCMD_ ## name, \ + METHOD_BUFFERED, \ + FILE_ANY_ACCESS) +#define VMIOCTL_NEITHER(name) \ + CTL_CODE(FILE_DEVICE_VMX86, \ + VMX86_IOCTL_BASE_INDEX + IOCTLCMD_ ## name, \ + METHOD_NEITHER, \ + FILE_ANY_ACCESS) + +#define IOCTL_VMX86_VERSION VMIOCTL_BUFFERED(VERSION) +#define IOCTL_VMX86_CREATE_VM VMIOCTL_BUFFERED(CREATE_VM) +#define IOCTL_VMX86_RELEASE_VM VMIOCTL_BUFFERED(RELEASE_VM) +#define IOCTL_VMX86_GET_NUM_VMS VMIOCTL_BUFFERED(GET_NUM_VMS) +#define IOCTL_VMX86_ALLOC_CROSSGDT VMIOCTL_BUFFERED(ALLOC_CROSSGDT) +#define IOCTL_VMX86_INIT_VM VMIOCTL_BUFFERED(INIT_VM) +#define IOCTL_VMX86_INIT_CROSSGDT VMIOCTL_BUFFERED(INIT_CROSSGDT) +#define IOCTL_VMX86_RUN_VM VMIOCTL_NEITHER(RUN_VM) +#define IOCTL_VMX86_SEND_IPI VMIOCTL_NEITHER(SEND_IPI) +#define IOCTL_VMX86_GET_IPI_VECTORS VMIOCTL_BUFFERED(GET_IPI_VECTORS) +#define IOCTL_VMX86_LOOK_UP_MPN VMIOCTL_BUFFERED(LOOK_UP_MPN) +#define IOCTL_VMX86_LOCK_PAGE VMIOCTL_BUFFERED(LOCK_PAGE) +#define IOCTL_VMX86_UNLOCK_PAGE VMIOCTL_BUFFERED(UNLOCK_PAGE) +#define IOCTL_VMX86_APIC_INIT VMIOCTL_BUFFERED(APIC_INIT) +#define IOCTL_VMX86_SET_HARD_LIMIT VMIOCTL_BUFFERED(SET_HARD_LIMIT) +#define IOCTL_VMX86_GET_MEM_INFO VMIOCTL_BUFFERED(GET_MEM_INFO) +#define IOCTL_VMX86_ADMIT VMIOCTL_BUFFERED(ADMIT) +#define IOCTL_VMX86_READMIT VMIOCTL_BUFFERED(READMIT) +#define IOCTL_VMX86_UPDATE_MEM_INFO VMIOCTL_BUFFERED(UPDATE_MEM_INFO) +#define IOCTL_VMX86_PAE_ENABLED VMIOCTL_BUFFERED(PAE_ENABLED) +#define IOCTL_VMX86_BEEP VMIOCTL_BUFFERED(BEEP) +#define IOCTL_VMX86_HARD_LIMIT_MONITOR_STATUS VMIOCTL_BUFFERED(HARD_LIMIT_MONITOR_STATUS) +#define IOCTL_VMX86_CHANGE_HARD_LIMIT VMIOCTL_BUFFERED(CHANGE_HARD_LIMIT) +#define IOCTL_VMX86_ALLOC_CONTIG_PAGES VMIOCTL_BUFFERED(ALLOC_CONTIG_PAGES) +#define IOCTL_VMX86_FREE_CONTIG_PAGES VMIOCTL_BUFFERED(FREE_CONTIG_PAGES) + +#define IOCTL_VMX86_GET_TOTAL_MEM_USAGE VMIOCTL_BUFFERED(GET_TOTAL_MEM_USAGE) +#define IOCTL_VMX86_GET_KHZ_ESTIMATE VMIOCTL_BUFFERED(GET_KHZ_ESTIMATE) +#define IOCTL_VMX86_SET_HOST_CLOCK_RATE VMIOCTL_BUFFERED(SET_HOST_CLOCK_RATE) +#define IOCTL_VMX86_SYNC_GET_TSCS VMIOCTL_BUFFERED(SYNC_GET_TSCS) +#define IOCTL_VMX86_READ_PAGE VMIOCTL_BUFFERED(READ_PAGE) +#define IOCTL_VMX86_WRITE_PAGE VMIOCTL_BUFFERED(WRITE_PAGE) +#define IOCTL_VMX86_LOCK_PAGE_NEW VMIOCTL_BUFFERED(LOCK_PAGE_NEW) +#define IOCTL_VMX86_UNLOCK_PAGE_BY_MPN VMIOCTL_BUFFERED(UNLOCK_PAGE_BY_MPN) +#define IOCTL_VMX86_ALLOC_LOCKED_PAGES VMIOCTL_BUFFERED(ALLOC_LOCKED_PAGES) +#define IOCTL_VMX86_FREE_LOCKED_PAGES VMIOCTL_BUFFERED(FREE_LOCKED_PAGES) +#define IOCTL_VMX86_GET_NEXT_ANON_PAGE VMIOCTL_BUFFERED(GET_NEXT_ANON_PAGE) +#define IOCTL_VMX86_GET_LOCKED_PAGES_LIST VMIOCTL_BUFFERED(GET_LOCKED_PAGES_LIST) + +#define IOCTL_VMX86_GET_KERNEL_PROC_ADDRESS VMIOCTL_BUFFERED(GET_KERNEL_PROC_ADDRESS) +#define IOCTL_VMX86_READ_VA64 VMIOCTL_BUFFERED(READ_VA64) +#define IOCTL_VMX86_SET_MEMORY_PARAMS VMIOCTL_BUFFERED(SET_MEMORY_PARAMS) + +#define IOCTL_VMX86_REMEMBER_KHZ_ESTIMATE VMIOCTL_BUFFERED(REMEMBER_KHZ_ESTIMATE) + +#define IOCTL_VMX86_GET_ALL_MSRS VMIOCTL_BUFFERED(GET_ALL_MSRS) +#define IOCTL_VMX86_COUNT_PRESENT_PAGES VMIOCTL_BUFFERED(COUNT_PRESENT_PAGES) + +#define IOCTL_VMX86_FAST_SUSP_RES_SET_OTHER_FLAG VMIOCTL_BUFFERED(FAST_SUSP_RES_SET_OTHER_FLAG) +#define IOCTL_VMX86_FAST_SUSP_RES_GET_MY_FLAG VMIOCTL_BUFFERED(FAST_SUSP_RES_GET_MY_FLAG) + +#define IOCTL_VMX86_GET_REFERENCE_CLOCK_HZ VMIOCTL_BUFFERED(GET_REFERENCE_CLOCK_HZ) +#define IOCTL_VMX86_INIT_PSEUDO_TSC VMIOCTL_BUFFERED(INIT_PSEUDO_TSC) +#define IOCTL_VMX86_CHECK_PSEUDO_TSC VMIOCTL_BUFFERED(CHECK_PSEUDO_TSC) +#define IOCTL_VMX86_GET_PSEUDO_TSC VMIOCTL_NEITHER(GET_PSEUDO_TSC) +#define IOCTL_VMX86_SET_HOST_CLOCK_PRIORITY VMIOCTL_BUFFERED(SET_HOST_CLOCK_PRIORITY) +#define IOCTL_VMX86_GET_UNAVAIL_PERF_CTRS VMIOCTL_NEITHER(GET_UNAVAIL_PERF_CTRS) +#define IOCTL_VMX86_REMAP_SCATTER_LIST VMIOCTL_BUFFERED(REMAP_SCATTER_LIST) +#define IOCTL_VMX86_UNMAP_SCATTER_LIST VMIOCTL_BUFFERED(UNMAP_SCATTER_LIST) +#endif + + +/* + * Flags sent into APICBASE ioctl + */ + +#define APIC_FLAG_DISABLE_NMI 0x00000001 +#define APIC_FLAG_PROBE 0x00000002 +#define APIC_FLAG_FORCE_ENABLE 0x00000004 + +typedef +#include "vmware_pack_begin.h" +struct VMLockPageRet { + MPN mpn; // OUT: MPN + int32 status; // OUT: PAGE_* status code +} +#include "vmware_pack_end.h" +VMLockPageRet; + +typedef +#include "vmware_pack_begin.h" +union { + VA64 uAddr; // IN: user address + VMLockPageRet ret; // OUT: status code and MPN +} +#include "vmware_pack_end.h" +VMLockPage; + + +typedef struct VMAPICInfo { + uint32 flags; +} VMAPICInfo; + +#define VMX86_DRIVER_VCPUID_OFFSET 1000 + + +/* + * We keep track of 3 different limits on the number of pages we can lock. + * The host limit is determined at driver load time (in windows only) to + * make sure we do not starve the host by locking too many pages. + * The static limit is user defined in the UI and the dynamic limit is + * set by authd's hardLimitMonitor code (windows only), which queries + * host load and adjusts the limit accordingly. We lock the minimum of + * all these values at any given time. + */ +typedef struct LockedPageLimit { + uint32 host; // driver calculated maximum for this host + uint32 configured; // user defined maximum pages to lock + uint32 dynamic; // authd hardLimitMonitor pages to lock +} LockedPageLimit; + +/* + * Sentinel VA for IOCTL_VMX86_SET_MEMORY_PARAMS, indicates + * NtQuerySystemInformation should be used to determine the host + * LockedPageLimit. + */ +#define MEMORY_PARAM_USE_SYSINFO_FOR_LOCKED_PAGE_LIMIT ((VA64)(int64)-1) + +/* + * Data structures for the GET_MEM_INFO and ADMIT ioctls. + * + * Be careful adding structs and fields to VMMemInfoArgs and its + * substructures. These are compiled into both the 32-bit and 64-bit + * vmmon drivers and the 32-bit and 64-bit vmx's and need to have + * the same size and layout in all four combinations. Note the + * use of padding below to ensure that this happens. + */ + +typedef struct VMMemMgmtInfo { + uint32 minAllocation; // minimum pages for vm + uint32 maxAllocation; // maximum pages the vm could lock + uint32 shares; // proportional sharing weight + uint32 nonpaged; // overhead memory (guest, mmap) + uint32 paged; // vmx memory (malloc, statics) + uint32 anonymous; // vmm memory + uint32 mainMemSize; // guest main memory size + uint32 locked; // number of pages locked by this vm + uint32 perVMOverhead; // memory for vmx/vmmon overheads + Percent touchedPct; // % of guest memory being touched + Percent dirtiedPct; // % of guest memory being dirtied + Bool admitted; // admission control + uint8 _pad; // for alignment of 64-bit fields + uint64 hugePageBytes; // number of bytes occupied by huge pages + uint64 timestamp; // most recent poll of get mem info time +} VMMemMgmtInfo; + +typedef struct VMMemMgmtInfoPatch { + Percent touchedPct; // % of guest memory being touched + Percent dirtiedPct; // % of guest memory being dirtied + uint8 _pad[6]; + uint64 hugePageBytes; +} VMMemMgmtInfoPatch; + +/* + * See comment on padding and size/layout constraints above when + * when modifying VMMemInfoArgs or its components. + */ + +typedef struct VMMemInfoArgs { + uint64 currentTime; // Host time in secs of the call. + uint32 minVmMemPct; // % of vm that must fit in memory + uint32 globalMinAllocation;// pages that must fit in maxLockedPages + uint32 numLockedPages; // total locked pages by all vms + LockedPageLimit lockedPageLimit; // set of locked page limits + uint32 maxLockedPages; // effective limit on locked pages + uint32 callerIndex; // this vm's index memInfo array + uint32 numVMs; // number of running VMs + uint8 _pad[4]; + VMMemMgmtInfo memInfo[1]; +} VMMemInfoArgs; + +#define VM_GET_MEM_INFO_SIZE(numVMs) \ + (sizeof(VMMemInfoArgs) - sizeof(VMMemMgmtInfo) + (numVMs) * sizeof(VMMemMgmtInfo)) + +typedef struct VMMPNNext { + MPN inMPN; // IN + MPN outMPN; // OUT +} VMMPNNext; + +typedef struct VMMPNList { + uint32 mpnCount; // IN (and OUT on Mac OS) + Bool ignoreLimits; + uint8 _pad[3]; + VA64 mpnList; // IN: User VA of an array of 64-bit MPNs. +} VMMPNList; + +typedef struct VARange { + VA64 addr; + VA64 bv; + unsigned len; + uint32 pad; +} VARange; + +typedef struct VMMUnlockPageByMPN { + MPN mpn; + VA64 uAddr; /* IN: User VA of the page (optional). */ +} VMMUnlockPageByMPN; + +typedef struct VMMReadWritePage { + MPN mpn; // IN + VA64 uAddr; // IN: User VA of a PAGE_SIZE-large buffer. +} VMMReadWritePage; + +struct passthrough_iorange { + unsigned short ioBase; /* Base of range to pass through. */ + unsigned short numPorts; /* Length of range. */ +}; + +/* + * Data structure for the INIT_PSEUDO_TSC and CHECK_PSEUDO_TSC. + */ + +typedef struct PTSCInitParams { + RateConv_Params refClockToPTSC; + uint64 tscHz; + uint64 initialPTSC; + int64 tscOffset; + Bool forceRefClock; + Bool forceTSC; + Bool hwTSCsSynced; + uint8 _pad[5]; +} PTSCInitParams; + +typedef struct PTSCCheckParams { + uint64 lastTSC; + uint64 lastRC; + Bool usingRefClock; + uint8 _pad[7]; +} PTSCCheckParams; + +#ifndef VMX86_SERVER + +typedef struct IPIVectors { + /* + * Vector(s) the host uses for its own IPIs; we use this as a performance + * hint. + */ + uint8 hostIPIVectors[2]; + /* + * Vectors we have allocated or stolen for the monitor interrupts. + */ + uint8 monitorIPIVector; + uint8 hvIPIVector; +} IPIVectors; + +#endif + +/* + * This struct is passed to IOCTL_VMX86_INIT_CROSSGDT to fill in a crossGDT + * entry. + */ +typedef struct InitCrossGDT { + uint32 index; // index in crossGDT to update (offset / 8) + Descriptor value; // value to set the crossGDT entry to +} InitCrossGDT; + +#if defined __linux__ + +/* + * Linux uses mmap(2) to allocate contiguous locked pages, and uses these + * macros to marshall real arguments to mmap's made-up 'offset' argument. + */ + +#define VMMON_MAP_MT_LOW4GB 0 +#define VMMON_MAP_MT_LOW16MB 1 +#define VMMON_MAP_MT_ANY 2 + +#define VMMON_MAP_OFFSET_SHIFT 0 +#define VMMON_MAP_OFFSET_MASK 0x00000FFF +#define VMMON_MAP_ORDER_SHIFT 12 +#define VMMON_MAP_ORDER_MASK 0xF +#define VMMON_MAP_MT_SHIFT 16 +#define VMMON_MAP_MT_MASK 0x7 +#define VMMON_MAP_RSVD_SHIFT 19 + +#define VMMON_MAP_RSVD(base) \ + ((base) >> VMMON_MAP_RSVD_SHIFT) +#define VMMON_MAP_MT(base) \ + (((base) >> VMMON_MAP_MT_SHIFT) & VMMON_MAP_MT_MASK) +#define VMMON_MAP_ORDER(base) \ + (((base) >> VMMON_MAP_ORDER_SHIFT) & VMMON_MAP_ORDER_MASK) +#define VMMON_MAP_OFFSET(base) \ + (((base) >> VMMON_MAP_OFFSET_SHIFT) & VMMON_MAP_OFFSET_MASK) + +#define VMMON_MAP_BASE(mt, order) (((mt) << VMMON_MAP_MT_SHIFT) | \ + ((order) << VMMON_MAP_ORDER_SHIFT)) + +#elif defined _WIN32 +/* + * Windows uses an ioctl to allocate contiguous locked pages. + */ + +typedef struct VMAllocContiguousMem { + VA64 mpnList; // IN: User VA of an array of 64-bit MPNs. + uint32 mpnCount; // IN + uint32 order; // IN + MPN maxMPN; // IN +} VMAllocContiguousMem; +#elif defined __APPLE__ +# include "iocontrolsMacos.h" +#endif + +/* Clean up helper macros */ +#undef IOCTLCMD + +#endif // ifndef _IOCONTROLS_H_ diff --git a/vmmon-only/include/memDefaults.h b/vmmon-only/include/memDefaults.h new file mode 100644 index 00000000..68611708 --- /dev/null +++ b/vmmon-only/include/memDefaults.h @@ -0,0 +1,154 @@ +/********************************************************* + * Copyright (C) 1998-2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + + +#ifndef _MEMDEFAULTS_H_ +#define _MEMDEFAULTS_H_ + +#define INCLUDE_ALLOW_USERLEVEL +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_MODULE + +#include "includeCheck.h" + +#include "vm_basic_math.h" +#include "vm_basic_defs.h" + +#define MEMDEFAULTS_MIN_HOST_PAGES MBYTES_2_PAGES(128) + + +/* + *----------------------------------------------------------------------------- + * + * MemDefaults_CalcMaxLockedPages -- + * + * Calculate the rough estimate of the maximum amount of memory + * that can be locked (total for the kernel, all VMs, and other apps), + * based on the size of host memory as supplied in pages. + * + * Results: + * The estimated maximum memory that can be locked in pages. + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE unsigned +MemDefaults_CalcMaxLockedPages(unsigned hostPages) // IN: +{ + unsigned reservedPages; + +#if defined(__APPLE__) + /* + * Reserve (25% of the host memory + 512 MB) or 4 GB, whichever is lower. + * 4 GB hosts perform poorly with less than 1.5 GB reserved, and large + * memory hosts (>= 16 GB) may want to use more than 75% for VMs. + */ + reservedPages = MIN((hostPages / 4) + MBYTES_2_PAGES(512), + GBYTES_2_PAGES(4)); +#elif defined(_WIN32) + { + unsigned int hostGig = PAGES_2_GBYTES(hostPages); + + if (hostGig <= 4) { + reservedPages = hostPages / 4; + } else if (hostGig >= 16) { + reservedPages = hostPages / 8; + } else { + /* + * Increment by 1/32 for each 4GB of host mem between 4 and 16. + * See PR779556. + */ + reservedPages = hostPages / 32 * (8 - hostGig / 4); + } + } +#else // Linux + reservedPages = hostPages / 8; +#endif + + reservedPages = MAX(reservedPages, MEMDEFAULTS_MIN_HOST_PAGES); + + return hostPages > reservedPages ? hostPages - reservedPages : 0; +} + + +/* + *----------------------------------------------------------------------------- + * + * MemDefaults_CalcMaxLockedMBs -- + * + * Calculate the rough estimate of the maximum amount of memory + * that can be locked based on the size of host memory as supplied + * in MBytes. + * + * Results: + * The estimated maximum memory that can be locked in MBytes. + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE uint32 +MemDefaults_CalcMaxLockedMBs(uint32 hostMem) // IN: +{ + return PAGES_2_MBYTES( + MemDefaults_CalcMaxLockedPages(MBYTES_2_PAGES(hostMem))); +} + + +/* + *----------------------------------------------------------------------------- + * + * MemDefaults_CalcMinReservedMBs -- + * + * Provide a lower bound on the user as to the minimum amount + * of memory to lock based on the size of host memory. This + * threshold might be crossed as a result of the user limiting + * the amount of memory consumed by all VMs. + * + * Results: + * The minimum locked memory requirement in MBytes. + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE uint32 +MemDefaults_CalcMinReservedMBs(uint32 hostMem) // IN: +{ + if (hostMem < 512) { + return 32; + } else if (hostMem < 1024) { + return 64; + } else { + return 128; + } +} + + +void MemDefaults_GetReservedMemory(uint32 *host, uint32 *min, + uint32 *max, uint32 *recommended); + +#endif diff --git a/vmmon-only/include/modulecall.h b/vmmon-only/include/modulecall.h new file mode 100644 index 00000000..3f6f444b --- /dev/null +++ b/vmmon-only/include/modulecall.h @@ -0,0 +1,495 @@ +/********************************************************* + * Copyright (C) 1998-2015 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + + +/* + * modulecall.h + * + * Monitor <--> Module (kernel driver) interface + */ + +#ifndef _MODULECALL_H +#define _MODULECALL_H + +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_VMMON +#include "includeCheck.h" + +#include "x86types.h" +#include "x86desc.h" +#include "ptsc.h" +#include "vcpuid.h" +#include "vcpuset.h" +#include "vmm_constants.h" +#include "contextinfo.h" +#include "rateconv.h" +#include "modulecallstructs.h" +#include "mon_assert.h" + +#define NUM_EXCEPTIONS 20 /* EXC_DE ... EXC_XF. */ + +#define MODULECALL_TABLE \ + MC(INTR) \ + MC(SEMAWAIT) \ + MC(SEMASIGNAL) \ + MC(SEMAFORCEWAKEUP) \ + MC(IPI) /* Hit thread with IPI. */ \ + MC(USERRETURN) /* Return codes for user calls. */ \ + MC(GET_RECYCLED_PAGES) \ + MC(RELEASE_ANON_PAGES) \ + MC(LOOKUP_MPN) \ + MC(COSCHED) \ + MC(ALLOC_VMX_PAGE) \ + MC(ALLOC_TMP_GDT) \ + MC(PIN_MPN) + +/* + *---------------------------------------------------------------------- + * + * ModuleCallType -- + * + * Enumeration of support calls done by the module. + * + * If anything changes in the enum, please update kstatModuleCallPtrs + * for stats purposes. + * + *---------------------------------------------------------------------- + */ + +typedef enum ModuleCallType { + MODULECALL_NONE = 100, +#define MC(_modulecall) MODULECALL_##_modulecall, + MODULECALL_TABLE +#undef MC + MODULECALL_LAST // Number of entries. Must be the last one +} ModuleCallType; + +#define MODULECALL_USERCALL_NONE 300 + +/* + * Define VMX86_UCCOST in the makefiles (Local.mk, + * typically) if you want a special build whose only purpose + * is to measure the overhead of a user call and its + * breakdown. + * + * WINDOWS NOTE: I don't know how to pass VMX86_UCCOST to + * the driver build on Windows. It must be defined by hand. + * + * ESX Note: we don't have a crosspage in which to store these + * timestamps. Such a feature would perhaps be nice (if we + * ever tire of the argument that esx does so few usercalls + * that speed doesn't matter). + */ + +#if defined(VMX86_UCCOST) && !defined(VMX86_SERVER) +#define UCTIMESTAMP(cp, stamp) \ + do { (cp)->ucTimeStamps[UCCOST_ ## stamp] = RDTSC(); } while (0) +#else +#define UCTIMESTAMP(cp, stamp) +#endif + +#ifdef VMX86_SERVER +typedef struct UCCostResults { + uint32 vmksti; + uint32 vmkcli; + uint32 ucnop; +} UCCostResults; +#else + +typedef struct UCCostResults { + uint32 htom; + uint32 mtoh; + uint32 ucnop; +} UCCostResults; + +typedef enum UCCostStamp { +#define UC(x) UCCOST_ ## x, +#include "uccostTable.h" + UCCOST_MAX +} UCCostStamp; +#endif // VMX86_SERVER + +/* + * Header for the wsBody64.S worldswitch code file. + */ +typedef struct WSModule { + uint32 vmmonVersion; // VMMON_VERSION when assembled as part of monitor + uint16 moduleSize; // size of whole wsBody64 module + uint16 hostToVmm; // offset from beginning of header to hostToVmm + uint16 vmm64ToHost; + uint16 _pad[3]; + + uint8 code[1024]; // big enough for MAX('.wsBody64', 'wsBody64Log') +} WSModule; + +typedef +#include "vmware_pack_begin.h" +struct SwitchNMIOffsets { + uint16 db; // offset to start of #DB handler + uint16 nmi; // offset to start of #NMI handler + uint16 df; // offset to start of #DF handler + uint16 ud; // offset to start of #UD handler + uint16 gp; // offset to start of #GP handler + uint16 pf; // offset to start of #PF handler + uint16 mce; // offset to start of #MCE handler +} +#include "vmware_pack_end.h" +SwitchNMIOffsets; + +/* + * This is a header for the switchNMI.S module. It contains code for + * exceptions occurring during worldswitch. The code gets copied to + * the crosspage by initialization. + */ +typedef +#include "vmware_pack_begin.h" +struct SwitchNMI { // see switchNMI.S + uint16 switchNMISize; + SwitchNMIOffsets host; // offsets to handlers + volatile Bool wsException[NUM_EXCEPTIONS]; // EXC_DE ... EXC_XF + // TRUE -> fault occurred in + // worldswitch + uint64 wsUD2; // IP of ud2 instruction + // 0ULL == unset + // other == worldswitch IP + uint8 codeBlock[768]; // Enough for + // max('.switchNMI', + // '.switchNMILog'). +} +#include "vmware_pack_end.h" +SwitchNMI; + +#define SHADOW_DR(cpData, n) (cpData)->shadowDR[n].ureg64 + + +/*---------------------------------------------------------------------- + * + * MAX_SWITCH_PT_PATCHES + * + * This is the maximum number of patches that must be placed into + * the monitor page tables so that two pages of the host GDT and the + * crosspage can be accessed during worldswitch. + * + *---------------------------------------------------------------------- + */ +#define MAX_SWITCH_PT_PATCHES 3 + +/*---------------------------------------------------------------------- + * + * WS_INTR_STRESS + * + * When set to non-zero, world switch code will enable single-step + * debugging across much of the switch path in both directions. The + * #DB handler detects single-stepping and induces a simulated NMI per + * instruction. This verifies that interrupts and exceptions are safe + * across the switch path, even if an NMI were raised during handling + * of another exception. + * + * When set to zero, normal worldswitch operation occurs. + * + * See the worldswitch assembly code for details. + * + *---------------------------------------------------------------------- + */ +#define WS_INTR_STRESS 0 + + +/*---------------------------------------------------------------------- + * + * VMM64PageTablePatch + * + * Describes an entry in the monitor page table which needs to be + * patched during the back-to-host worldswitch. + * + * o A patch can appear at any place in the page table, and so four + * items are required to uniquely describe the patch: + * + * o level + * + * This is the level in the page table to which the patch must + * be applied: L4, L3, L2, L1. This information is used to + * determine the base of the region of memory which must be + * patched. The level value corresponds to the following + * regions in monitor memory: + * + * MMU_ROOT_64 + * MMU_L3_64 + * MMU_L2_64 + * MON_PAGE_TABLE_64 + * + * The value zero is reserved to indicate an empty spot in the + * array of patches. + * + * o level offset + * + * The monitor memory regions corresponding to the page table + * levels may be more than one page in length, so a 'page + * offset' is required to know the starting address of the page + * table page which must be patched in 'level'. + * + * o page index + * + * The 'index' value specifies the element in the page which + * should be patched. + * + * o pte + * + * This is the PTE value which will be patched into the monitor + * page table. + * + *---------------------------------------------------------------------- + */ +typedef +#include "vmware_pack_begin.h" +struct VMM64PageTablePatch { +#define PTP_EMPTY (0U) /* Unused array entry. (must be 0) */ +#define PTP_LEVEL_L1 (1U) /* leaf level */ +#define PTP_LEVEL_L2 (2U) +#define PTP_LEVEL_L3 (3U) +#define PTP_LEVEL_L4 (4U) /* root level */ + uint16 level; /* [0, 4] (maximal size: 3 bits) */ + uint16 page; /* Index of 'page' in 'level'. */ + uint32 index; /* Index of 'pte' in 'page'. */ + VM_PDPTE pte; /* PTE. */ +} +#include "vmware_pack_end.h" +VMM64PageTablePatch; + +#define MODULECALL_NUM_ARGS 4 + +/* + *---------------------------------------------------------------------- + * + * VMCrossPageData -- + * + * Data which is stored on the VMCrossPage. + * + *---------------------------------------------------------------------- + */ +typedef +#include "vmware_pack_begin.h" +struct VMCrossPageData { + /* + * A tiny stack upon which interrupt and exception handlers in the switch + * path temporarily run. Keep the end 16-byte aligned. This stack must + * be large enough for the sum of: + * + * - 1 #DB exception frame (5 * uint64) + * - 1 #NMI exception frame (5 * uint64) + * - 1 #MCE exception frame (5 * uint64) + * - the largest stack use instantaneously possible by #MCE handling code + * - the largest stack use instantaneously possible by #NMI handling code + * - the largest stack use instantaneously possible by #DB handling code + * - one high-water uint32 used to detect stack overflows when debugging + * - remaining pad bytes to align to 16 bytes + * + * 184 bytes is slightly more than enough as of 2015/03/17 -- fjacobs. + */ + uint32 tinyStack[46]; + + uint64 hostCR3; + uint32 crosspageMA; + + uint8 hostDRSaved; // Host DR spilled to hostDR[x]. + uint8 hostDRInHW; // 0 -> shadowDR in h/w, 1 -> hostDR in h/w. + // contains host-sized DB,NMI,MCE entries + uint16 hostSS; + uint64 hostRSP; + uint64 hostDR[8]; + uint64 hostRBX; + uint64 hostRSI; + uint64 hostRDI; + uint64 hostRBP; + uint64 hostR12; + uint64 hostR13; + uint64 hostR14; + uint64 hostR15; + LA64 hostCrossPageLA; // where host has crosspage mapped + uint16 hostInitial64CS; + uint16 _pad0[3]; + + uint64 wsCR0; + uint64 wsCR4; + + DTR64 crossGDTHKLADesc; // always uses host kernel linear address + uint16 _pad1[3]; + DTR64 mon64GDTR; + uint16 mon64ES; + uint16 mon64SS; + uint16 mon64DS; + uint64 mon64CR3; + uint64 mon64RBX; + uint64 mon64RSP; + uint64 mon64RBP; + uint64 mon64RSI; + uint64 mon64RDI; + uint64 mon64R12; + uint64 mon64R13; + uint64 mon64R14; + uint64 mon64R15; + uint64 mon64RIP; + Task64 monTask64; /* vmm64's task */ + + VMM64PageTablePatch vmm64PTP[MAX_SWITCH_PT_PATCHES]; /* page table patch */ + LA64 vmm64CrossPageLA; + LA64 vmm64CrossGDTLA; // where crossGDT mapped by PT patch + // 64-bit host: host kernel linear + // address + + /* + * The monitor may requests up to two actions when returning to the + * host. The moduleCallType field and args encode a request for + * some action in the driver. The userCallType field (together + * with the RPC block) encodes a user call request. The two + * requests are independent. The user call is executed first, with + * the exception of MODULECALL_INTR which has a special effect. + */ + ModuleCallType moduleCallType; + uint32 retval; + uint64 args[MODULECALL_NUM_ARGS]; + int userCallType; + uint32 pcpuNum; /* Used as extra module call arg within vmmon. */ + + VCPUSet yieldVCPUs; + +#if !defined(VMX86_SERVER) + uint64 ucTimeStamps[UCCOST_MAX]; +#endif + + /* + * The values in the shadow debug registers must match those in the + * hardware debug register immediately after a task switch in + * either direction. They are used to minimize moves to and from + * the debug registers. + */ + SharedUReg64 shadowDR[8]; + uint8 shadowDRInHW; // bit n set iff %DRn == shadowDR[n] + + SwitchedMSRState switchedMSRState; + uint8 _pad2[7]; + + /* + * Adjustment for machines where the hardware TSC does not run + * constantly (laptops) or is out of sync between different PCPUs. + * Updated as needed by vmmon. See VMK_SharedData for the ESX + * analog, which is updated by the vmkernel. + */ + RateConv_ParamsVolatile pseudoTSCConv; + VmAbsoluteTS worldSwitchPTSC; // PTSC value immediately before + // last worldswitch. + + VmAbsoluteTS timerIntrTS; // PTSC of timer interrupt while in the vmm + VmAbsoluteTS hstTimerExpiry; // PTSC of host timer interrupt + VmAbsoluteTS monTimerExpiry; // PTSC of next MonTimer callback + + Bool activateVMX; // TRUE -> activate Intel VMX extensions + Bool activateSVM; // TRUE -> activate AMD SVM extensions + Bool retryWorldSwitch; // TRUE -> return to host on host->vmm switch + /* + * TRUE if moduleCall was interrupted by signal. Only + * vmmon uses this field to remember that it should + * restart RunVM call, nobody else should look at it. + */ + Bool moduleCallInterrupted; + uint8 _pad3[4]; + + DTR64 switchHostIDTR; // baseLA = switchHostIDT's host knl LA + uint16 _pad4[3]; + DTR64 switchMon64IDTR; // has baseLA = switchMon64IDT's monitor LA + // contains 64-bit DB,NMI,MCE entries + uint16 _pad5[3]; + + /* + * Descriptors and interrupt tables for switchNMI handlers. Each + * IDT has only enough space for the hardware exceptions; they are + * sized to accommodate 64-bit descriptors. + */ + uint8 switchHostIDT [sizeof(Gate64) * NUM_EXCEPTIONS]; // hostCS:hostVA + uint8 switchMon64IDT[sizeof(Gate64) * NUM_EXCEPTIONS]; // 64-bit monCS:monVA +} +#include "vmware_pack_end.h" +VMCrossPageData; + +/* + *---------------------------------------------------------------------- + * + * VMCrossPageCode -- + * + * Code which is stored on the VMCrossPage. + * + *---------------------------------------------------------------------- + */ +typedef +#include "vmware_pack_begin.h" +struct VMCrossPageCode { + WSModule worldswitch; + SwitchNMI faultHandler; +} +#include "vmware_pack_end.h" +VMCrossPageCode; + + +/* + *---------------------------------------------------------------------- + * + * VMCrossPage -- + * + * Data structure shared between the monitor and the module + * that is used for crossing between the two. + * Accessible as vm->cross (kernel module) and CROSS_PAGE + * (monitor) + * + * Exactly one page long + * + *---------------------------------------------------------------------- + */ + +typedef +#include "vmware_pack_begin.h" +struct VMCrossPage { + uint32 version; /* 4 bytes. Must be at offset zero. */ + uint32 crosspage_size; /* 4 bytes. Must be at offset 4. */ + VMCrossPageData crosspageData; + uint8 _pad[PAGE_SIZE - (sizeof(uint32) /* version */ + + sizeof(uint32) /* crosspage_size */ + + sizeof(VMCrossPageData) + + sizeof(VMCrossPageCode))]; + VMCrossPageCode crosspageCode; +} +#include "vmware_pack_end.h" +VMCrossPage; + +#define CROSSPAGE_VERSION_BASE 0xbf1 /* increment by 1 */ +#define CROSSPAGE_VERSION ((CROSSPAGE_VERSION_BASE << 1) + WS_INTR_STRESS) + +#if !defined(VMX86_SERVER) && defined(VMM) +#define CROSS_PAGE ((VMCrossPage * const) VPN_2_VA(CROSS_PAGE_START)) +#define VMM_SWITCH_SHARED_DATA ((VMCrossPageData *)&CROSS_PAGE->crosspageData) +#endif + +#define NULLPAGE_LINEAR_START (MONITOR_LINEAR_START + \ + PAGE_SIZE * CPL0_GUARD_PAGE_START) + +#define MX_WAITINTERRUPTED 3 +#define MX_WAITTIMEDOUT 2 +#define MX_WAITNORMAL 1 // Must equal one; see linux module code. +#define MX_WAITERROR 0 // Use MX_ISWAITERROR() to test for error. + +// Any zero or negative value denotes error. +#define MX_ISWAITERROR(e) ((e) <= MX_WAITERROR) +#endif diff --git a/vmmon-only/include/modulecallstructs.h b/vmmon-only/include/modulecallstructs.h new file mode 100644 index 00000000..7943a2ba --- /dev/null +++ b/vmmon-only/include/modulecallstructs.h @@ -0,0 +1,99 @@ +/********************************************************* + * Copyright (C) 2006,2009-2011,2013-2014 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * modulecallstructs.h -- + * + * + * Data structures that need to be included in modulecall.h + * as well as the vmkernel. + * + */ + +#ifndef _MODULECALLSTRUCTS_H_ +#define _MODULECALLSTRUCTS_H_ + +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_VMCORE + +#include "includeCheck.h" + +#include "vm_basic_types.h" + +/* + * Flags indicating switched MSR status. + * + * UNUSED - Not used by the monitor (yet). [This is a value, not a flag.] + * USED - Hardware MSR is used by the monitor. + * RESTORED - Monitor value is restored on world switch into the VMM. + * SHADOWED - Live monitor value is always shadowed in the SwitchedMSRState. + * + */ + +#define SWITCHED_MSR_FLAG_UNUSED 0 +#define SWITCHED_MSR_FLAG_USED 1 +#define SWITCHED_MSR_FLAG_RESTORED 2 +#define SWITCHED_MSR_FLAG_SHADOWED 4 + +/* + * Note: If you add an msr to this list, please also ensure that + * hardware support for the msr is properly indicated in + * both the monitor (MonMSRIsSupported) and in the vmkernel + * (world switch msrSupported array). + */ +#define SWITCHED_MSRS \ + SWMSR(MSR_SYSENTER_CS) \ + SWMSR(MSR_SYSENTER_EIP) \ + SWMSR(MSR_SYSENTER_ESP) \ + SWMSR(MSR_STAR) \ + SWMSR(MSR_LSTAR) \ + SWMSR(MSR_CSTAR) \ + SWMSR(MSR_SFMASK) \ + SWMSR(MSR_TSC_AUX) \ + SWMSR(MSR_BD_TSC_RATIO) + +/* + * Data structures for dealing with the context-switched MSRs that need + * to be specially handled. While the MSR definitions themselves + * are part of the x86 architecture, our handling of them (and hence + * these data structures) is an implementation detail. + */ + + +typedef enum SwitchedMSR { +#define SWMSR(msr) SWITCHED_##msr, + SWITCHED_MSRS +#undef SWMSR + NUM_SWITCHED_MSRS +} SwitchedMSR; + +/* + * Switched MSR values for each [vp]CPU. + */ +typedef struct SwitchedMSRValues { + uint64 a[NUM_SWITCHED_MSRS]; +} SwitchedMSRValues; + +typedef struct SwitchedMSRState { + SwitchedMSRValues smv; + uint8 flags[NUM_SWITCHED_MSRS]; + uint32 _pad; +} SwitchedMSRState; + +#endif diff --git a/vmmon-only/include/mon_assert.h b/vmmon-only/include/mon_assert.h new file mode 100644 index 00000000..b86832ca --- /dev/null +++ b/vmmon-only/include/mon_assert.h @@ -0,0 +1,207 @@ +/********************************************************* + * Copyright (C) 2007-2014 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +#ifndef _MON_ASSERT_H_ +#define _MON_ASSERT_H_ + +#define INCLUDE_ALLOW_USERLEVEL + +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMCORE +#include "includeCheck.h" + +#include "vm_assert.h" +#include "vm_basic_asm.h" + +/* + * Monitor Source Location + * + * The monitor encodes source locations -- file name & line number -- + * in just 32 bits; the process is arcane enough that it deserves a + * little discussion. + * + * o The ASSERT family of macros are expanded in the monitor to take + * an 'Assert_MonSrcLoc' rather than the standard ', + * ' couplet. + * + * o The ', ' couplet is encoded into + * Assert_MonSrcLoc, which is an unsigned 32-bit integer. + * + * o The upper 16-bits of Assert_MonSrcLoc are the line number. + * + * Source lines above 65535 will be silently masked to 16-bits. + * + * o The lower 16-bits of Assert_MonSrcLoc are the offset to the file + * name from the start of the file name table. + * + * This, of course, implies that the size of the table containing + * the file names cannot exceed 64K. + * + * o If we use '__FILE__' directly, gcc will coalesce all equivalent + * strings into a single occurrence (in '.rodata'). + * + * Using the full pathname for __FILE__ is undesirable because + * different source trees frequently have different path name + * lengths, and this causes the 'monitor-modular-size' script to + * report differences in '.rodata'. + * + * o To avoid differences in '.rodata', each __FILE__ is put into + * its own section. The monitor's linker (not ld) will use the + * name of the section to recover the name of the source file. + * + * o At run time, prior to loading, when our linker is creating an + * executable image of the monitor and extensions, all the file + * names are extracted from these sections, the '${VMTREE}' prefix + * is removed, and the resulting table of shortened file names is + * added to '.rodata'. + * + * Further, during linkage, each relocation to the original + * section containing the path name is modified so that the low + * 16-bits contain an offset from '__vmm_pathnames_start' rather + * than the base of the original containing section. + * + * Only three types of relocations to the assertion strings are + * supported (32-bit PC-relative and 32-bit/64-bit absolute) because that + * is all the compiler has been seen to generate. + */ + +#define ALL_ASSERT_TYPES \ + ADEF(AssertType_AssertPanic, _AssertPanicFmt), \ + ADEF(AssertType_AssertAssert, _AssertAssertFmt), \ + ADEF(AssertType_AssertVerify, _AssertVerifyFmt), \ + ADEF(AssertType_AssertNotImplemented, _AssertNotImplementedFmt), \ + ADEF(AssertType_AssertNotReached, _AssertNotReachedFmt), \ + ADEF(AssertType_AssertPanicBug, _AssertPanicFmt " bugNr=%d"), \ + ADEF(AssertType_AssertAssertBug, _AssertAssertFmt " bugNr=%d"), \ + ADEF(AssertType_AssertVerifyBug, _AssertVerifyFmt " bugNr=%d"), \ + ADEF(AssertType_AssertNotImplementedBug, \ + _AssertNotImplementedFmt " bugNr=%d"), \ + ADEF(AssertType_AssertNotReachedBug, _AssertNotReachedFmt " bugNr=%d"),\ + ADEF(AssertType_AssertNotTested, _AssertNotTestedFmt) + +typedef uint32 Assert_MonSrcLoc; + +#define ADEF(type, fmt) type +typedef enum Assert_Type { + ALL_ASSERT_TYPES +} Assert_Type; +#undef ADEF + +typedef struct Assert_Info { + VA faultAddr; + struct { + Assert_Type type:4; + int bugNr:28; + } misc; + Assert_MonSrcLoc loc; +} Assert_Info; + +/* + * The portion of the __attribute__ line after __FILE__ is there so that + * the .assert_pathname_* sections are not marked as ALLOC, since we only + * need them in the vmx and do not need them loaded. + */ +#define __VMM__FILE__SECTION \ + __attribute__((section (".assert_pathname_" __FILE__ ",\"\"#"))) +#define __VMM__FILE__ ({ \ + static __VMM__FILE__SECTION const char file[] = ""; \ + file; \ + }) + +#define ASSERT_MONSRCFILEOFFSET(loc) LOWORD(loc) +#define ASSERT_MONSRCLINE(loc) HIWORD(loc) + +#define ASSERT_NULL_MONSRCLOC 0 // there is never line 0 + +#ifdef VMM // { +#ifdef MONITOR_APP // { + +#define ASSERT_MONSRCLOC() ASSERT_NULL_MONSRCLOC + +#else // } { + +#define ASSERT_MONSRCLOC() ({ \ + const uintptr_t offset = ((__LINE__ << 16) + \ + (uintptr_t)__VMM__FILE__); \ + const Assert_MonSrcLoc loc = offset; \ + loc; \ +}) + +extern const char __vmm_pathnames_start; +#define ASSERT_MONSRCFILE(loc) \ + (&__vmm_pathnames_start + ASSERT_MONSRCFILEOFFSET(loc)) + + +/* + * Assertion information is collected in a non-loadable section + * named .assert_info. Each record in this section contains + * a VMM address, an assertion type, an optional bug number, and + * the MonSrcLoc described previously. The VMM address is a key + * used by the VMX to look up the information associated with + * a particular assertion failure. + * + * Assertion failures are fired by executing a ud2 instruction. + * + * For assertions which always result in a terminal user RPC, we use + * __builtin_trap to generate the ud2, so that gcc knows that the + * subsequent code is unreachable. For assertions which are + * recoverable (e.g any assertion triggered on the BackToHost path), + * we generate the ud2 manually, so that gcc will treat the subsequent + * code as reachable. + * + * The memory barriers work around a gcc bug that results from having + * to continue past an assertion. Without these barriers, gcc has been + * seen to hoist code into the failing arm of the assertion, where it + * can then tell that, because of the assertion failure, the code ends + * up accessing an array out of bounds. + */ + +#define ASSERT_RECORDINFO(assembly, assertType, bugNr) \ + __asm__ __volatile__(".pushsection .assert_info;" \ + ".quad 0f;" \ + ".long %c[type] + (%c[bug] << 4);" \ + ".long (%c[line] << 16) + %c[file];" \ + ".popsection;" \ + "0: " assembly : : \ + [line] "i" (__LINE__), \ + [file] "i" (__VMM__FILE__), \ + [type] "i" (assertType), \ + [bug] "i" (bugNr)) + +#define _ASSERT_PANIC(name) \ + ({COMPILER_MEM_BARRIER(); \ + ASSERT_RECORDINFO("ud2", AssertType_##name, 0);}) + +#define _ASSERT_PANIC_NORETURN(name) \ + ({COMPILER_MEM_BARRIER(); \ + ASSERT_RECORDINFO("", AssertType_##name, 0); \ + __builtin_trap();}) + +#define _ASSERT_PANIC_BUG(bug, name) \ + ({COMPILER_MEM_BARRIER(); \ + ASSERT_RECORDINFO("ud2", AssertType_##name##Bug, bug);}) + +#define _ASSERT_PANIC_BUG_NORETURN(bug, name) \ + ({COMPILER_MEM_BARRIER(); \ + ASSERT_RECORDINFO("", AssertType_##name##Bug, bug); \ + __builtin_trap();}) + +#endif // MONITOR_APP } +#endif // VMM } + +#endif diff --git a/vmmon-only/include/monitorAction_exported.h b/vmmon-only/include/monitorAction_exported.h new file mode 100644 index 00000000..28b57c2c --- /dev/null +++ b/vmmon-only/include/monitorAction_exported.h @@ -0,0 +1,156 @@ +/********************************************************* + * Copyright (C) 2010-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +#ifndef _MONITORACTION_EXPORTED_H_ +#define _MONITORACTION_EXPORTED_H_ + +#define INCLUDE_ALLOW_VMX +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_USERLEVEL +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_DISTRIBUTE +#include "includeCheck.h" + +#include "vm_assert.h" +#include "vm_atomic.h" +#include "vm_basic_types.h" + +/* + * Please bump the version number if your change will break the + * compatability to the drivers. + */ +#define ACTION_EXPORTED_VERSION 2 + +#define ACTION_WORD_SIZE (sizeof(uint64) * 8) +#define ACTION_NUM_WORDS (2) +#define ACTION_NUM_IDS (ACTION_NUM_WORDS * ACTION_WORD_SIZE) + +#define MONACTION_INVALID MAX_UINT32 + +typedef uint32 MonitorIdemAction; + +/* + * Representation of a set of actions. + */ +typedef struct MonitorActionSet { + volatile uint64 word[ACTION_NUM_WORDS]; +} MonitorActionSet; + +#ifndef __cplusplus +typedef enum MonitorActionSetName MonitorActionSetName; +#endif + +/* + * Summary of action and interrupt states. + */ +typedef struct MonitorActionIntr { + MonitorActionSet pendingSet; + volatile Bool action; + Bool intr; + Bool nmi; + Bool db; + uint32 _pad; +} MonitorActionIntr; + +/* + *------------------------------------------------------------------------ + * MonitorActionSet_AtomicInclude -- + * + * This function atomically adds an action to an action set. + * + * Results: + * TRUE if the action being added did not exist in the action set. + * FALSE otherwise. + * + * Side effects: + * The given action set will be updated. + *------------------------------------------------------------------------ + */ +static INLINE Bool +MonitorActionSet_AtomicInclude(MonitorActionSet *set, const uint32 actionID) +{ + Atomic_uint64 *atomicSet = + Atomic_VolatileToAtomic64(&set->word[actionID / ACTION_WORD_SIZE]); + uint64 mask = (uint64)1 << (actionID % ACTION_WORD_SIZE); + uint64 oldWord; + uint64 newWord; + + ASSERT_ON_COMPILE((ACTION_WORD_SIZE & (ACTION_WORD_SIZE - 1)) == 0); +#ifdef VMX86_DEBUG + /* If ASSERT is not desirable, do explicit check. Please see PR 567811. */ +#ifdef MODULE + if (UNLIKELY(actionID / ACTION_WORD_SIZE >= ACTION_NUM_WORDS)) { + return FALSE; + } +#else + ASSERT(actionID / ACTION_WORD_SIZE < ACTION_NUM_WORDS); +#endif // MODULE +#endif // VMX86_DEBUG + do { + oldWord = Atomic_Read64(atomicSet); + newWord = oldWord | mask; + } while (!Atomic_CMPXCHG64(atomicSet, &oldWord, &newWord)); + return (oldWord & mask) == 0; +} + + +/* + *---------------------------------------------------------------------------- + * MonitorAction_SetBits -- + * + * The core logic for posting an action. Update the set of pending + * actions of the target VCPU in the shared area to mark the action + * as present. Make sure the bit is set in the pendingSet first to + * avoid a race with the drain loop. + * + * It's the responsibility of the callers to ensure that the change + * to actionIntr->action is globally visible before any IPI is sent + * (the change to pendingSet is pushed out by the cmpxchg in + * MonitorActionSet_AtomicInclude). + * + * Results: + * TRUE if the action being posted was not pending before. + * FALSE otherwise (other threads could have posted the same action). + * + * Side effects: + * None. + *---------------------------------------------------------------------------- + */ +static INLINE Bool +MonitorAction_SetBits(MonitorActionIntr *actionIntr, MonitorIdemAction actionID) +{ + /* Careful if optimizing this: see PR70016. */ + Bool newAction = + MonitorActionSet_AtomicInclude(&actionIntr->pendingSet, actionID); + actionIntr->action = TRUE; + return newAction; +} + +/* + * C1 states entered by monitor while waiting for an action + */ +typedef enum { + VMM_C1_STATE_INVALID = 0, + VMM_C1_STATE_HLT, + VMM_C1_STATE_MWAIT, + VMM_C1_STATE_PAUSE +} vmmC1StateType; + +#endif // _MONITORACTION_EXPORTED_H_ diff --git a/vmmon-only/include/numa_defs.h b/vmmon-only/include/numa_defs.h new file mode 100644 index 00000000..f14a8b54 --- /dev/null +++ b/vmmon-only/include/numa_defs.h @@ -0,0 +1,72 @@ +/********************************************************* + * Copyright (C) 2006 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * numa_defs.h -- + * This is the internal header file for the NUMA module. + */ + +#ifndef _NUMA_DEFS_H +#define _NUMA_DEFS_H + +#define INCLUDE_ALLOW_VMX +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_VMMON + +#define INCLUDE_ALLOW_USERLEVEL +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_VMKERNEL + +#include "includeCheck.h" +#include "vm_basic_types.h" +#include "vm_basic_defs.h" +#include "cpu_defs.h" + +/* Machine NUMA nodes */ +typedef uint32 NUMA_Node; +typedef uint32 NUMA_NodeMask; +typedef uint8 NUMA_MemRangeID; + +/* + * Constants + */ +#define NUMA_MAX_NODES 32 +#define NUMA_MAX_CPUS_PER_NODE (vmx86_server ? MAX_PCPUS : 32) +#define NUMA_MAX_MEM_RANGES 64 +#define INVALID_NUMANODE ((NUMA_Node)-1) +#define NUMA_NODE_MASK_ANY ((NUMA_NodeMask)-1) +#define NUMA_NODE_MASK_NONE ((NUMA_NodeMask)0) + + +/* + * Structures + */ +typedef struct { + MPN startMPN; + MPN endMPN; + NUMA_Node id; + Bool isReliable; + Bool isVolatile; +} NUMA_MemRange; + +typedef struct NUMA_MemRangesList { + uint64 numMemRanges; + NUMA_MemRange memRange[NUMA_MAX_MEM_RANGES]; +} NUMA_MemRangesList; + +#endif // _NUMA_DEFS_H diff --git a/vmmon-only/include/overheadmem_types.h b/vmmon-only/include/overheadmem_types.h new file mode 100644 index 00000000..a07bcab6 --- /dev/null +++ b/vmmon-only/include/overheadmem_types.h @@ -0,0 +1,131 @@ +/********************************************************* + * Copyright (C) 2001-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * overheadmem_types.h + * + * Types for tracking memory overheads. + */ + +#ifndef _OVERHEADMEM_TYPES_H +#define _OVERHEADMEM_TYPES_H + +#define INCLUDE_ALLOW_USERLEVEL + +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_MODULE +#include "includeCheck.h" + +#include "vm_basic_types.h" + +/* + * There are 4 types of memory we lock on the host. Memory can be Mem_Mapped in + * the vmx, anonymous memory for use by monitor is not mapped permanently in any + * address space, guest memory regions other than main memory (can be + * locked/unlocked on hosted but not on ESX), and main memory which can be + * locked/unlocked in hosted and esx. + * + * In addition, the vmx may malloc memory or declare (large) static structures. + * Neither of these is locked on hosted platforms and the hostOS may swap it. + * Therefore, on hosted platforms we do not track this memory and instead + * include a working set component (sched.mem.hosted.perVMOverheadMBs). + * On ESX, this memory must be accounted for so we account it to user + * (nonpaged) overhead. At present, the accounting is extremely coarse, + * and only aggregate sizes are hard-coded (see PR363997). + */ +typedef enum OvhdMemType { + OvhdMem_memmap, + OvhdMem_anon, + OvhdMem_guest, + OvhdMem_mainmem, + OvhdMem_malloc, + OvhdMem_static, + OvhdMem_text, + NumOvhdMemTypes +} OvhdMemType; + +#define OvhdMemMask(type) (1 << type) + +#define OVHDMEM_NONE 0x0 +#define OVHDMEM_MEMMAP 0x1 // OvhdMemMask(OvhdMem_memmap) +#define OVHDMEM_ANON 0x2 // OvhdMemMask(OvhdMem_anon) +#define OVHDMEM_GUEST 0x4 // OvhdMemMask(OvhdMem_guest) +#define OVHDMEM_MAINMEM 0x8 // OvhdMemMask(OvhdMem_mainmem) +#define OVHDMEM_MALLOC 0x10 // OvhdMemMask(OvhdMem_malloc) +#define OVHDMEM_STATIC 0x20 // OvhdMemMask(OvhdMem_static) +#define OVHDMEM_TEXT 0x40 // OvhdMemMask(OvhdMem_text) +#define OVHDMEM_ALL_USER (OVHDMEM_MEMMAP | OVHDMEM_GUEST | OVHDMEM_MAINMEM | \ + OVHDMEM_MALLOC | OVHDMEM_STATIC | OVHDMEM_TEXT) +#define OVHDMEM_ALL (OVHDMEM_ALL_USER | OVHDMEM_ANON) + +/* ... and four categories of memory sources. */ +typedef enum OvhdMemCategory { + OvhdMemCat_paged, + OvhdMemCat_nonpaged, + OvhdMemCat_excluded, + OvhdMemCat_anonymous, + NumOvhdMemCategories +} OvhdMemCategory; + +#define OVHDMEM_PAGED (OVHDMEM_MALLOC | OVHDMEM_STATIC) +#define OVHDMEM_NONPAGED (OVHDMEM_GUEST | OVHDMEM_MEMMAP) +#define OVHDMEM_EXCLUDED (OVHDMEM_MAINMEM | OVHDMEM_TEXT) + +#if ((OVHDMEM_PAGED & OVHDMEM_NONPAGED) != 0) || \ + ((OVHDMEM_NONPAGED & OVHDMEM_EXCLUDED) != 0) || \ + ((OVHDMEM_PAGED & OVHDMEM_EXCLUDED) != 0) || \ + ((OVHDMEM_PAGED | OVHDMEM_NONPAGED | OVHDMEM_EXCLUDED | OVHDMEM_ANON) != \ + OVHDMEM_ALL) +#error Overheadmem categories do not form a partition of the overheads +#endif + +/* Categories of overhead for 32-bit and 64-bit mode. */ +typedef struct OvhdMem_Overheads { + uint32 paged; + uint32 nonpaged; + uint32 anonymous; + uint32 text; +} OvhdMem_Overheads; + +typedef struct OvhdMem_Deltas { + int32 paged; + int32 nonpaged; + int32 anonymous; +} OvhdMem_Deltas; + + +/* Types for tracking vmx (user) overheads. */ + +#define OVHDMEM_MAX_NAME_LEN 36 + +/* Types for tracking vmm overheads. */ + +typedef struct OvhdMemUsage { + uint32 reserved; // pages + uint32 used; // pages +} OvhdMemUsage; + +typedef struct OvhdMemNode { + OvhdMemUsage usage; // allocated and rsvd bytes for source + OvhdMemUsage maxUsage; // max allocated and rsvd bytes for source + char name[OVHDMEM_MAX_NAME_LEN]; // name of overhead source + OvhdMemType type; // how/where memory for source is managed +} OvhdMemNode; + +#endif diff --git a/vmmon-only/include/pageLock_defs.h b/vmmon-only/include/pageLock_defs.h new file mode 100644 index 00000000..c3b5eb98 --- /dev/null +++ b/vmmon-only/include/pageLock_defs.h @@ -0,0 +1,105 @@ +/********************************************************* + * Copyright (C) 2015 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + + +/* + * pageLock_defs.h + * + * Page lock status codes, used by vmmon. + */ + +#ifndef _PAGELOCK_DEFS_H_ +#define _PAGELOCK_DEFS_H_ + +#define INCLUDE_ALLOW_USERLEVEL +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_MODULE +#include "includeCheck.h" + +/* + * Return codes from page locking, unlocking, and MPN lookup. + * They share an error code space because they call one another + * internally. + * + * PAGE_LOCK_FAILED The host refused to lock a page. + * PAGE_LOCK_LIMIT_EXCEEDED We have reached the limit of locked + * pages for all VMs + * PAGE_LOCK_TOUCH_FAILED Failed to touch page after lock. + * PAGE_LOCK_IN_TRANSITION The page is locked but marked by Windows + * as nonpresent in CPU PTE and in transition + * in Windows PFN. + * + * PAGE_LOCK_SYS_ERROR System call error. + * PAGE_LOCK_ALREADY_LOCKED Page already locked. + * PAGE_LOCK_MEMTRACKER_ERROR MemTracker fails. + * PAGE_LOCK_PHYSTRACKER_ERROR PhysTracker fails. + * PAGE_LOCK_MDL_ERROR Mdl error on Windows. + * + * PAGE_UNLOCK_NO_ERROR Unlock successful (must be 0). + * PAGE_UNLOCK_NOT_TRACKED Not in memtracker. + * PAGE_UNLOCK_NO_MPN Tracked but no MPN. + * PAGE_UNLOCK_NOT_LOCKED Not locked. + * PAGE_UNLOCK_TOUCH_FAILED Failed to touch page. + * PAGE_UNLOCK_MISMATCHED_TYPE Tracked but was locked by different API + * + * PAGE_LOOKUP_INVALID_ADDR Consistency checking. + * PAGE_LOOKUP_BAD_HIGH_ADDR Consistency checking. + * PAGE_LOOKUP_ZERO_ADDR Consistency checking. + * PAGE_LOOKUP_SMALL_ADDR Consistency checking. + * + * All error values must be negative values less than -4096 to avoid + * conflicts with errno values on Linux. + * + * -- edward + */ + +#define PAGE_LOCK_SUCCESS 0 +#define PAGE_LOCK_FAILED (-10001) +#define PAGE_LOCK_LIMIT_EXCEEDED (-10002) +#define PAGE_LOCK_TOUCH_FAILED (-10003) +#define PAGE_LOCK_IN_TRANSITION (-10004) + +#define PAGE_LOCK_SYS_ERROR (-10010) +#define PAGE_LOCK_ALREADY_LOCKED (-10011) +#define PAGE_LOCK_MEMTRACKER_ERROR (-10012) +#define PAGE_LOCK_PHYSTRACKER_ERROR (-10013) +#define PAGE_LOCK_MDL_ERROR (-10014) + +#define PAGE_UNLOCK_SUCCESS 0 +#define PAGE_UNLOCK_NOT_TRACKED (-10100) +#define PAGE_UNLOCK_NO_MPN (-10101) +#define PAGE_UNLOCK_NOT_LOCKED (-10102) +#define PAGE_UNLOCK_TOUCH_FAILED (-10103) +#define PAGE_UNLOCK_MISMATCHED_TYPE (-10104) + +#define PAGE_LOOKUP_SUCCESS 0 +#define PAGE_LOOKUP_INVALID_ADDR (-10200) +#define PAGE_LOOKUP_BAD_HIGH_ADDR (-10201) +#define PAGE_LOOKUP_ZERO_ADDR (-10202) +#define PAGE_LOOKUP_SMALL_ADDR (-10203) +#define PAGE_LOOKUP_SYS_ERROR (-10204) +#define PAGE_LOOKUP_NOT_TRACKED (-10) // added to another code +#define PAGE_LOOKUP_NO_MPN (-20) // added to another code +#define PAGE_LOOKUP_NOT_LOCKED (-30) // added to another code +#define PAGE_LOOKUP_NO_VM (-40) // added to another code + +#define PAGE_LOCK_SOFT_FAILURE(status) (status <= PAGE_LOCK_FAILED && \ + status > PAGE_LOCK_SYS_ERROR) + +#endif // ifndef _PAGELOCK_DEFS_H_ diff --git a/vmmon-only/include/pagelist.h b/vmmon-only/include/pagelist.h new file mode 100644 index 00000000..46ed1424 --- /dev/null +++ b/vmmon-only/include/pagelist.h @@ -0,0 +1,101 @@ +/********************************************************* + * Copyright (C) 2010-2014 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * pagelist.h -- + * + * Definitions of operations on BPNs used in communicating page info + * between VMKernel/VMX and VMM. + */ + +#ifndef _PAGELIST_H +#define _PAGELIST_H + +#define INCLUDE_ALLOW_USERLEVEL +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_MODULE +#include "includeCheck.h" + +#include "vm_assert.h" +#include "vmcore_types.h" + +/* + * Sets of pages are passed between the monitor and the platform to be + * shared, invalidated, remapped, or swapped. + * + * A set is sized so that it fits in a 4KB page. + */ + +#pragma pack(push, 1) +typedef struct PageListEntry { + CompressedBPN cbpn; + Bool voided; + uint8 _pad[2]; +} PageListEntry; +#pragma pack(pop) + +#define PAGELIST_MAX (PAGE_SIZE / sizeof(PageListEntry)) + +static INLINE PageListEntry +PageList_CreateEntry(BPN bpn) +{ + PageListEntry ple; + CompressedBPN_Write(&ple.cbpn, bpn); + ple.voided = FALSE; + return ple; +} + +static INLINE BPN +PageList_BPN(const PageListEntry *ple) +{ + return CompressedBPN_Read(&ple->cbpn); +} + +static INLINE Bool +PageList_IsVoid(const PageListEntry *ple) +{ + ASSERT(ple->voided == TRUE || ple->voided == FALSE); + return ple->voided; +} + +static INLINE void +PageList_VoidEntry(PageListEntry *ple) +{ + ple->voided = TRUE; +} + + +/* + * This function inspects the set of BPN between entry [0,i) in the page list + * and returns TRUE if any of them matches the provided BPN. + */ +static INLINE Bool +PageList_IsBPNDup(const PageListEntry *pageList, unsigned i, BPN bpn) +{ + unsigned k; + for (k = 0; k < i; k++) { + if (PageList_BPN(&pageList[k]) == bpn) { + return TRUE; + } + } + return FALSE; +} + +#endif diff --git a/vmmon-only/include/pcip_defs.h b/vmmon-only/include/pcip_defs.h new file mode 100644 index 00000000..a00520bb --- /dev/null +++ b/vmmon-only/include/pcip_defs.h @@ -0,0 +1,122 @@ +/********************************************************* + * Copyright (C) 2009-2014 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * pcip_defs.h -- + * + * PCI passthru definitions shared by the vmx, monitor, vmkernel, and + * vmmon. Not all PCI passthru definitions are found here: the shared + * bits mainly pertain to interrupt proxying. + */ + +#ifndef _PCIP_DEFS_H +#define _PCIP_DEFS_H + +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_USERLEVEL +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_VMMON +#include "includeCheck.h" + +#include "monitorAction_exported.h" + +#include "bitvector.h" + +#define MAX_INTERRUPTS 256 // max interrupts a device could use +#define PCIP_MAX_MSIX_VECTORS 128 +#define PCIP_MAX_VECTORS (PCIP_MAX_MSIX_VECTORS + 2) + +typedef struct PCIPVecBV { + BitVector bv; + uint32 reserved[PCIP_MAX_VECTORS / sizeof (uint32) - 1]; +} PCIPVecBV; + +typedef enum PCIPassthruVectorIndex { + PCIP_INDEX_IOAPIC, + PCIP_INDEX_MSI, + PCIP_INDEX_MSIXOFF, + PCIP_INDEX_INVALID = PCIP_INDEX_MSIXOFF + PCIP_MAX_MSIX_VECTORS, +} PCIPassthruVectorIndex; + +typedef enum PCIPassthru_IntrType { + PCIPASSTHRU_INTR_NONE = 0x00, + PCIPASSTHRU_INTR_IOAPIC = 0x01, + PCIPASSTHRU_INTR_MSI = 0x02, + PCIPASSTHRU_INTR_MSIX = 0x04, +} PCIPassthru_IntrType; + +typedef struct FPTIntrProxyInfo { + uint32 adapterIndex; + uint32 vectorIndex; + MonitorIdemAction actionID; + uint32 _pad0; + + /* + * These addresses are only needed for hosted platforms, where our kernel + * modules lack a SharedArea API. + */ +#ifndef VMX86_SERVER + VA64 notifyUVA; + VA64 vectorIndexUVA; +#endif +} FPTIntrProxyInfo; + +typedef struct UPTIntrProxyInfo { + uint32 adapterIndex; + MonitorIdemAction actionID; +} UPTIntrProxyInfo; + +typedef union PCIPassthru_IntrProxyInfo { + FPTIntrProxyInfo fpt; + UPTIntrProxyInfo upt; +} PCIPassthru_IntrProxyInfo; + +typedef enum PCIPassthru_ErrorType { + PCIPASSTHRU_ERROR_NONE = 0x00, + PCIPASSTHRU_ERROR_AER = 0x01, + PCIPASSTHRU_ERROR_PAGE_FAULT = 0x02, +} PCIPassthru_ErrorType; + +typedef struct PCIPassthru_PageFaultInfo { + uint64 ioAddr; + uint64 machAddr; + uint8 faultReason; + uint8 unused[7]; +} PCIPassthru_PageFaultInfo; + +typedef struct PCIPassthru_AERInfo { + uint64 count; +} PCIPassthru_AERInfo; + +typedef union PCIPassthru_ErrorInfo { + PCIPassthru_PageFaultInfo pageFaultInfo; + PCIPassthru_AERInfo aerInfo; +} PCIPassthru_ErrorInfo; + +typedef +#include "vmware_pack_begin.h" +struct PCIPassthru_ErrorMsg { + uint32 sbdf; + PCIPassthru_ErrorType errorType; + PCIPassthru_ErrorInfo errorInfo; +} +#include "vmware_pack_end.h" +PCIPassthru_ErrorMsg; + +#endif // _PCIP_DEFS_H diff --git a/vmmon-only/include/perfctr_generic.h b/vmmon-only/include/perfctr_generic.h new file mode 100644 index 00000000..82a068ba --- /dev/null +++ b/vmmon-only/include/perfctr_generic.h @@ -0,0 +1,72 @@ +/********************************************************* + * Copyright (C) 1998-2012,2014 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * perfctr_generic.h -- + * + */ + +#ifndef _PERFCTR_GENERIC_H_ +#define _PERFCTR_GENERIC_H_ + +#define INCLUDE_ALLOW_USERLEVEL + +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_VMMON + +#include "includeCheck.h" +#include "vm_basic_types.h" + +/* + * nmiNo -- vmm peer is not attempting to do nmi profiling this run + * nmiYes -- vmm peer is doing nmi profiling and nmis are currently enabled + * nmiStopped -- vmm peer is doing nmi profiling, but nmis are temporarily + * disabled for safety reasons. + */ +typedef enum {nmiNo = 0, nmiYes, nmiStopped} NMIStatus; +typedef struct NMIShared { /* shared with vmx and vmkernel */ + NMIStatus vmmStatus; + int32 nmiErrorCode; + int64 nmiErrorData; +} NMIShared; + +/* + * CrossProf: structures for unified profiling of vmm, vmx, and + * vmkernel. Per-vcpu. + */ + +#define CALLSTACK_CROSSPROF_PAGES 1 + +typedef struct { + /* + * This structure is per-vcpu. The raw data is a packed vector + * of MonitorCallStackSample, a variable-length structure. + */ + + /* raw data - packed vec of MonitorCallStackSample, variable length */ + uint8 crossProfSampleBuffer[PAGES_2_BYTES(CALLSTACK_CROSSPROF_PAGES)]; + + uint32 crossProfSampleBytes; + uint32 crossProfNumDroppedSamples; /* For when buffer fills up */ + Bool enabled; /* Can be false in stats build if monitor.callstack=FALSE */ + uint8 _pad[3]; +} CrossProfShared; + +#endif // ifndef _PERFCTR_GENERIC_H_ diff --git a/vmmon-only/include/pgtbl.h b/vmmon-only/include/pgtbl.h new file mode 100644 index 00000000..39ef4e19 --- /dev/null +++ b/vmmon-only/include/pgtbl.h @@ -0,0 +1,382 @@ +/********************************************************* + * Copyright (C) 2002,2014 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +#ifndef __PGTBL_H__ +# define __PGTBL_H__ + + +#include + +#include "compat_pgtable.h" +#include "compat_spinlock.h" +#include "compat_page.h" + +/* + *----------------------------------------------------------------------------- + * + * PgtblPte2MPN -- + * + * Returns the page structure associated to a Page Table Entry. + * + * This function is not allowed to schedule() because it can be called while + * holding a spinlock --hpreg + * + * Results: + * INVALID_MPN on failure + * mpn on success + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE MPN +PgtblPte2MPN(pte_t *pte) // IN +{ + MPN mpn; + if (pte_present(*pte) == 0) { + return INVALID_MPN; + } + mpn = pte_pfn(*pte); + if (mpn >= INVALID_MPN) { + return INVALID_MPN; + } + return mpn; +} + + +/* + *----------------------------------------------------------------------------- + * + * PgtblPte2Page -- + * + * Returns the page structure associated to a Page Table Entry. + * + * This function is not allowed to schedule() because it can be called while + * holding a spinlock --hpreg + * + * Results: + * The page structure if the page table entry points to a physical page + * NULL if the page table entry does not point to a physical page + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE struct page * +PgtblPte2Page(pte_t *pte) // IN +{ + if (pte_present(*pte) == 0) { + return NULL; + } + + return compat_pte_page(*pte); +} + + +/* + *----------------------------------------------------------------------------- + * + * PgtblPGD2PTELocked -- + * + * Walks through the hardware page tables to try to find the pte + * associated to a virtual address. + * + * Results: + * pte. Caller must call pte_unmap if valid pte returned. + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE pte_t * +PgtblPGD2PTELocked(compat_pgd_t *pgd, // IN: PGD to start with + VA addr) // IN: Address in the virtual address + // space of that process +{ + compat_pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + if (compat_pgd_present(*pgd) == 0) { + return NULL; + } + + pud = compat_pud_offset(pgd, addr); + if (compat_pud_present(*pud) == 0) { + return NULL; + } + + pmd = pmd_offset_map(pud, addr); + if (pmd_present(*pmd) == 0) { + pmd_unmap(pmd); + return NULL; + } + + pte = pte_offset_map(pmd, addr); + pmd_unmap(pmd); + return pte; +} + + +/* + *----------------------------------------------------------------------------- + * + * PgtblVa2PTELocked -- + * + * Walks through the hardware page tables to try to find the pte + * associated to a virtual address. + * + * Results: + * pte. Caller must call pte_unmap if valid pte returned. + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE pte_t * +PgtblVa2PTELocked(struct mm_struct *mm, // IN: Mm structure of a process + VA addr) // IN: Address in the virtual address + // space of that process +{ + return PgtblPGD2PTELocked(compat_pgd_offset(mm, addr), addr); +} + + +/* + *----------------------------------------------------------------------------- + * + * PgtblVa2MPNLocked -- + * + * Retrieve MPN for a given va. + * + * Caller must call pte_unmap if valid pte returned. The mm->page_table_lock + * must be held, so this function is not allowed to schedule() --hpreg + * + * Results: + * INVALID_MPN on failure + * mpn on success + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE MPN +PgtblVa2MPNLocked(struct mm_struct *mm, // IN: Mm structure of a process + VA addr) // IN: Address in the virtual address +{ + pte_t *pte; + + pte = PgtblVa2PTELocked(mm, addr); + if (pte != NULL) { + MPN mpn = PgtblPte2MPN(pte); + pte_unmap(pte); + return mpn; + } + return INVALID_MPN; +} + + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) +/* + *----------------------------------------------------------------------------- + * + * PgtblKVa2MPNLocked -- + * + * Retrieve MPN for a given kernel va. + * + * Caller must call pte_unmap if valid pte returned. The mm->page_table_lock + * must be held, so this function is not allowed to schedule() --hpreg + * + * Results: + * INVALID_MPN on failure + * mpn on success + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE MPN +PgtblKVa2MPNLocked(struct mm_struct *mm, // IN: Mm structure of a caller + VA addr) // IN: Address in the virtual address +{ + pte_t *pte; + + pte = PgtblPGD2PTELocked(compat_pgd_offset_k(mm, addr), addr); + if (pte != NULL) { + MPN mpn = PgtblPte2MPN(pte); + pte_unmap(pte); + return mpn; + } + return INVALID_MPN; +} +#endif + + +/* + *----------------------------------------------------------------------------- + * + * PgtblVa2PageLocked -- + * + * Return the "page" struct for a given va. + * + * Results: + * struct page or NULL. The mm->page_table_lock must be held, so this + * function is not allowed to schedule() --hpreg + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE struct page * +PgtblVa2PageLocked(struct mm_struct *mm, // IN: Mm structure of a process + VA addr) // IN: Address in the virtual address +{ + pte_t *pte; + + pte = PgtblVa2PTELocked(mm, addr); + if (pte != NULL) { + struct page *page = PgtblPte2Page(pte); + pte_unmap(pte); + return page; + } else { + return NULL; + } +} + + +/* + *----------------------------------------------------------------------------- + * + * PgtblVa2MPN -- + * + * Walks through the hardware page tables of the current process to try to + * find the page structure associated to a virtual address. + * + * Results: + * Same as PgtblVa2MPNLocked() + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE MPN +PgtblVa2MPN(VA addr) // IN +{ + struct mm_struct *mm; + MPN mpn; + + /* current->mm is NULL for kernel threads, so use active_mm. */ + mm = current->active_mm; + if (compat_get_page_table_lock(mm)) { + spin_lock(compat_get_page_table_lock(mm)); + } + mpn = PgtblVa2MPNLocked(mm, addr); + if (compat_get_page_table_lock(mm)) { + spin_unlock(compat_get_page_table_lock(mm)); + } + return mpn; +} + + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) +/* + *----------------------------------------------------------------------------- + * + * PgtblKVa2MPN -- + * + * Walks through the hardware page tables of the current process to try to + * find the page structure associated to a virtual address. + * + * Results: + * Same as PgtblVa2MPNLocked() + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE MPN +PgtblKVa2MPN(VA addr) // IN +{ + struct mm_struct *mm = current->active_mm; + MPN mpn; + + if (compat_get_page_table_lock(mm)) { + spin_lock(compat_get_page_table_lock(mm)); + } + mpn = PgtblKVa2MPNLocked(mm, addr); + if (compat_get_page_table_lock(mm)) { + spin_unlock(compat_get_page_table_lock(mm)); + } + return mpn; +} +#endif + + +/* + *----------------------------------------------------------------------------- + * + * PgtblVa2Page -- + * + * Walks through the hardware page tables of the current process to try to + * find the page structure associated to a virtual address. + * + * Results: + * Same as PgtblVa2PageLocked() + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE struct page * +PgtblVa2Page(VA addr) // IN +{ + struct mm_struct *mm = current->active_mm; + struct page *page; + + if (compat_get_page_table_lock(mm)) { + spin_lock(compat_get_page_table_lock(mm)); + } + page = PgtblVa2PageLocked(mm, addr); + if (compat_get_page_table_lock(mm)) { + spin_unlock(compat_get_page_table_lock(mm)); + } + return page; +} + + +#endif /* __PGTBL_H__ */ diff --git a/vmmon-only/include/pshare_ext.h b/vmmon-only/include/pshare_ext.h new file mode 100644 index 00000000..ab42c3d1 --- /dev/null +++ b/vmmon-only/include/pshare_ext.h @@ -0,0 +1,64 @@ +/********************************************************* + * Copyright (C) 2001,2014 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * pshare_ext.h -- + * + * VMKernel/VMMon <-> VMM transparent page sharing info. + */ + +#ifndef _PSHARE_EXT_H +#define _PSHARE_EXT_H + +#define INCLUDE_ALLOW_USERLEVEL + +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_MODULE +#include "includeCheck.h" + +#include "pagelist.h" +#include "vm_basic_types.h" +#include "vm_assert.h" + +/* + * constants + */ + +#define PSHARE_PAGELIST_MAX (PAGELIST_MAX) +#define PSHARE_P2M_BUFFER_MPNS_MAX (16) +#define PSHARE_P2M_BUFFER_MPNS_DEFAULT (4) +#define PSHARE_P2M_BUFFER_SLOTS_PER_MPN (PAGE_SIZE / sizeof(PShare_P2MUpdate)) + +#define PSHARE_POISON_MARKER (CONST64U(0xAAAAAAAAAAAAAAAA)) + +#define PSHARE_SALT_UNSET 0 +#define PSHARE_SALT_DEFAULT 1 + +MY_ASSERTS(PSHARE_EXT, + ASSERT_ON_COMPILE(PSHARE_PAGELIST_MAX <= PAGELIST_MAX);) + +/* + * types + */ + +typedef struct PShare_P2MUpdate { + BPN bpn; + MPN mpn; +} PShare_P2MUpdate; +#endif diff --git a/vmmon-only/include/ptsc.h b/vmmon-only/include/ptsc.h new file mode 100644 index 00000000..dfa0328a --- /dev/null +++ b/vmmon-only/include/ptsc.h @@ -0,0 +1,263 @@ +/********************************************************* + * Copyright (C) 1998-2014 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * ptsc.h -- + * + * Pseudo TSC + */ + +#ifndef _PTSC_H_ +#define _PTSC_H_ + +#define INCLUDE_ALLOW_VMX + +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMKERNEL +#include "includeCheck.h" + +#include "rateconv.h" + +/* + * RDTSC and PTSC_Get. + * + * RDTSC reads the hardware timestamp counter on the current physical + * CPU. In general, the TSC is *not* a globally consistent timer that + * runs at a constant rate. Any code that still assumes it is should + * be corrected; see PR 20499. + * + * PTSC_Get returns a pseudo-TSC that runs at approximately the + * maximum speed of physical CPU 0's TSC and is approximately globally + * consistent. It is available both at userlevel and in the monitor, + * with different implementations. In the vmkernel, Timer_PseudoTSC + * provides similar functionality. + * + */ + +/* TS stands for "timestamp", which is in units of "cycles" */ +typedef uint64 VmAbsoluteTS; // a particular point in time (in cycles) +typedef int64 VmRelativeTS; // a signed delta in cycles +typedef uint64 VmIntervalTS; // an unsigned delta in cycles +typedef uint64 VmAbsoluteUS; // a particular point in time (in us) +typedef int64 VmRelativeUS; // a signed delta in us +typedef uint64 VmIntervalUS; // an unsigned delta in us + +/* + * Compare two VmAbsoluteTS's using comparison operator op, allowing + * for wrap. The assumption is that differences should not be more + * than 2**63, so a larger difference is taken as negative. + */ +#define COMPARE_TS(ts1, op, ts2) (((int64) ((ts1) - (ts2))) op 0) + +#define MAX_ABSOLUTE_TS \ + ((VmAbsoluteTS) CONST64U(0xffffffffffffffff)) + +/* + * Largest possible unambiguous difference between two VmAbsoluteTS's + * according to COMPARE_TS's method of comparison. + */ +#define MAX_RELATIVE_TS \ + ((VmRelativeTS) CONST64(0x7fffffffffffffff)) + +#define MAX_ABSOLUTE_US \ + ((VmAbsoluteUS) CONST64U(0xffffffffffffffff)) + +typedef struct PTSCInfo { + Bool hwTSCsSynced; + Bool hwTSCsAdjusted; + uint8 _pad[6]; + int64 hz; + uint32 kHz; + uint32 mHz; + + RateConv_Ratio usToCycles; + RateConv_Ratio cyclesToUs; + RateConv_Ratio msToCycles; + RateConv_Ratio cyclesToNs; +} PTSCInfo; + +extern PTSCInfo ptscInfo; + +Bool PTSC_Init(uint64 tscHz); +VmAbsoluteTS PTSC_InitialCount(const char *module, + const char *option, + VmIntervalTS freq, + VmAbsoluteTS defaultCnt); +Bool PTSC_HasPerfectlySynchronizedTSCs(void); + +static INLINE int64 +PTSC_Hz(void) +{ + ASSERT(ptscInfo.hz); + return ptscInfo.hz; +} + +static INLINE uint32 +PTSC_KHz(void) +{ + ASSERT(ptscInfo.kHz); + return ptscInfo.kHz; +} + +static INLINE uint32 +PTSC_MHz(void) +{ + ASSERT(ptscInfo.mHz); + return ptscInfo.mHz; +} + +#if defined(VM_X86_64) || defined(VM_ARM_64) + +/* + * Conversions to/from cycles. Note that the conversions operate on + * signed values, so be careful when taking the difference of two + * VmAbsoluteTS (which is unsigned) that that value is not out of range + * of the signed type. + */ + +static INLINE VmRelativeTS +PTSC_USToCycles(int64 us) +{ + return Muls64x32s64(us, ptscInfo.usToCycles.mult, ptscInfo.usToCycles.shift); +} + +static INLINE VmRelativeTS +PTSC_MSToCycles(int64 ms) +{ + return Muls64x32s64(ms, ptscInfo.msToCycles.mult, ptscInfo.msToCycles.shift); +} + +static INLINE int64 +PTSC_CyclesToNS(VmRelativeTS ts) +{ + return Muls64x32s64(ts, ptscInfo.cyclesToNs.mult, ptscInfo.cyclesToNs.shift); +} + +static INLINE int64 +PTSC_CyclesToUS(VmRelativeTS ts) +{ + return Muls64x32s64(ts, ptscInfo.cyclesToUs.mult, ptscInfo.cyclesToUs.shift); +} + +#else + +/* 32-bit Muls64x32s64 too big to justify inlining. */ +VmRelativeTS PTSC_USToCycles(int64 us); +VmRelativeTS PTSC_MSToCycles(int64 ms); +int64 PTSC_CyclesToNS(VmRelativeTS ts); +int64 PTSC_CyclesToUS(VmRelativeTS ts); + +#endif + +#if defined(VMX86_SERVER) && defined(VMX86_VMX) + +/* + * ESX with userworld VMX + */ +#include "x86types.h" +#include "user_layout.h" + +#ifdef VM_ARM_64 +static INLINE VmAbsoluteTS +PTSC_Get(void) +{ + extern __thread User_ThreadData vmkUserTdata; + + register RateConv_Params params; + register uint64 pseudoTSC; + + /* + * On ARM64 the generic timer guarantees that the counters are synchronous + * and independent of CPU frequency. + */ + params = vmkUserTdata.pseudoTSCConv; + pseudoTSC = RDTSC(); + + /* + * assert that conversion rate is `identity'. + * we'll have to revisit that when/if this assumption changes + * on some platform + */ + ASSERT((params.mult == 1 && params.shift == 0) || + (params.mult == 0x80000000 && params.shift == 31)); + + pseudoTSC += params.add; + + return pseudoTSC; +} +#else +static INLINE VmAbsoluteTS +PTSC_Get(void) +{ + extern __thread User_ThreadData vmkUserTdata; + VmAbsoluteTS ptsc; + + if (vmkUserTdata.magic != USER_THREADDATA_MAGIC) { + return 0; + } + ptsc = vmkUserTdata.u.pseudoTSCGet(&vmkUserTdata); + ASSERT((int64)ptsc >= 0); + return ptsc; +} +#endif +#else + +/* + * Monitor and hosted VMX + */ + +VmAbsoluteTS PTSC_Get(void); + +#endif + +/* + *----------------------------------------------------------------------------- + * + * PTSC_HasSynchronizedTSCs -- + * + * Returns TRUE iff the platform TSCs are known to be synchronized. + * + *----------------------------------------------------------------------------- + */ + +static INLINE Bool +PTSC_HasSynchronizedTSCs(void) +{ + return ptscInfo.hwTSCsSynced; +} + + +/* + *----------------------------------------------------------------------------- + * + * PTSC_HostAdjustedTSCs -- + * + * Returns TRUE if the platform may have adjusted TSCs in an attempt + * to sync them up. + * + *----------------------------------------------------------------------------- + */ + +static INLINE Bool +PTSC_HostAdjustedTSCs(void) +{ + return ptscInfo.hwTSCsAdjusted; +} + +#endif /* ifndef _PTSC_H_ */ diff --git a/vmmon-only/include/rateconv.h b/vmmon-only/include/rateconv.h new file mode 100644 index 00000000..14f0195f --- /dev/null +++ b/vmmon-only/include/rateconv.h @@ -0,0 +1,118 @@ +/********************************************************* + * Copyright (C) 2003 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * + * rateconv.h -- + * + * Parameters and functions for linear rate conversion of 64 bit + * counters: + * + * y = ((x * mult) >> shift) + add. + * + */ + +#ifndef _VM_RATECONV_H_ +#define _VM_RATECONV_H_ + +#define INCLUDE_ALLOW_USERLEVEL + +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_VMK_MODULE +#define INCLUDE_ALLOW_DISTRIBUTE +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_VMMON +#include "includeCheck.h" + +#include "vm_basic_types.h" +#include "vm_basic_asm.h" +#include "vm_assert.h" +#include "vm_atomic.h" + +/* RateConv_Params is part of vmx<->vmmon interface (INIT_PSEUDO_TSC ioctl) */ +typedef struct RateConv_Params { + uint32 mult; /* mult == 1 implies shift == 0. */ + uint32 shift; + int64 add; +} RateConv_Params; + +typedef struct RateConv_ParamsVolatile { + RateConv_Params p; + Bool changed; + uint8 pad[7]; +} RateConv_ParamsVolatile; + +typedef struct RateConv_Ratio { + uint32 mult; + uint32 shift; +} RateConv_Ratio; + +#define RATE_CONV_IDENTITY { 1, 0, 0 } /* Out = in. */ + +Bool RateConv_ComputeParams(uint64 inHz, uint64 inBase, + uint64 outHz, uint64 outBase, + RateConv_Params *conv); +void RateConv_LogParams(const char *prefix, + uint64 inHz, uint64 inBase, + uint64 outHz, uint64 outBase, + const RateConv_Params *conv); +Bool RateConv_ComputeRatio(uint64 inHz, uint64 outHz, + RateConv_Ratio *ratio); +void RateConv_LogRatio(const char *prefix, + uint64 inHz, uint64 outHz, + const RateConv_Ratio *ratio); + + +/* + *---------------------------------------------------------------------- + * + * RateConv_Unsigned -- + * + * Apply rate conversion to an unsigned argument: + * y = ((x * mult) >> shift) + add. + * + *---------------------------------------------------------------------- + */ + +static INLINE uint64 +RateConv_Unsigned(const RateConv_Params *conv, uint64 x) +{ + return Mul64x3264(x, conv->mult, conv->shift) + conv->add; +} + + +/* + *---------------------------------------------------------------------- + * + * RateConv_Signed -- + * + * Apply rate conversion to a signed argument: + * y = ((x * mult) >> shift) + add. + * + *---------------------------------------------------------------------- + */ + +static INLINE int64 +RateConv_Signed(const RateConv_Params *conv, int64 x) +{ + return Muls64x32s64(x, conv->mult, conv->shift) + conv->add; +} + + +#endif // _VM_RATECONV_H_ diff --git a/vmmon-only/include/uccostTable.h b/vmmon-only/include/uccostTable.h new file mode 100644 index 00000000..87366788 --- /dev/null +++ b/vmmon-only/include/uccostTable.h @@ -0,0 +1,34 @@ +/********************************************************* + * Copyright (C) 1998-2014 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_VMMON +#include "includeCheck.h" + +UC(CALL_START) +UC(BEGIN_BACK_TO_HOST) +UC(SWITCHED_TO_MODULE) +UC(VMX_SELECT_RETURN) +UC(VMX_HANDLER_START) +UC(VMX_SIGNAL) +UC(MODULE_SIGNAL) +UC(SWITCHING_TO_MONITOR) +UC(DONE_BACK_TO_HOST) +UC(CALL_END) + +#undef UC diff --git a/vmmon-only/include/usercalldefs.h b/vmmon-only/include/usercalldefs.h new file mode 100644 index 00000000..9b32ee79 --- /dev/null +++ b/vmmon-only/include/usercalldefs.h @@ -0,0 +1,35 @@ +/********************************************************* + * Copyright (C) 2008 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +#ifndef USERCALLDEFS_H +#define USERCALLDEFS_H + +#ifdef linux +#include +#define USERCALL_RESTART (-ERESTARTNOINTR) +#else +#include +#define USERCALL_RESTART (USERCALL_NOP) +#endif +/* + * -1 to -4096 are reserved for syscall errors on Linux. -1 is reserved for + * failing DeviceIoControl on Windows. + */ +#define USERCALL_VMX86ALLOCERR (-8192) + +#endif diff --git a/vmmon-only/include/vcpuid.h b/vmmon-only/include/vcpuid.h new file mode 100644 index 00000000..c12dcef0 --- /dev/null +++ b/vmmon-only/include/vcpuid.h @@ -0,0 +1,65 @@ +/********************************************************* + * Copyright (C) 1998-2014 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * + * vcpuid.h -- + * + * Monitor's VCPU ID. + */ + +#ifndef _VCPUID_H_ +#define _VCPUID_H_ + +#define INCLUDE_ALLOW_USERLEVEL + +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_VMK_MODULE +#define INCLUDE_ALLOW_DISTRIBUTE +#define INCLUDE_ALLOW_VMCORE +#include "includeCheck.h" + +#include "vm_basic_types.h" + + +typedef uint32 Vcpuid; // VCPU number + +#define VCPUID_INVALID (~0U) + +#define BOOT_VCPU_ID 0 +#define IS_BOOT_VCPUID(vcpuid) ((vcpuid) == BOOT_VCPU_ID) + +#define MAX_VCPUS 128 + +#define MAX_CORES_PER_SOCKET 64 + +#ifdef VMM +#include "vcpuset.h" + +/* In VMM, CurVcpuid() is available everywhere. */ +extern const Vcpuid curVcpuid; +extern const VCPUSet curVcpuidSet; +#define CurVcpuid() (curVcpuid) +#define CurVcpuidSet() (&curVcpuidSet) +#define IS_BOOT_VCPU() IS_BOOT_VCPUID(CurVcpuid()) + +#endif /* VMM */ + +#endif // ifndef _VCPUID_H_ diff --git a/vmmon-only/include/vcpuset.h b/vmmon-only/include/vcpuset.h new file mode 100644 index 00000000..4bbe3a08 --- /dev/null +++ b/vmmon-only/include/vcpuset.h @@ -0,0 +1,808 @@ +/********************************************************* + * Copyright (C) 2002-2014 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * vcpuset.h -- + * + * ADT for a set of VCPUs. Implemented as an array of bitmasks. + * + */ + +#ifndef _VCPUSET_H_ +#define _VCPUSET_H_ + + +#define INCLUDE_ALLOW_VMX +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_USERLEVEL +#define INCLUDE_ALLOW_VMCORE +#include "includeCheck.h" + +#include "vm_basic_asm.h" +#include "vm_atomic.h" +#include "vcpuid.h" +#include "vcpuset_types.h" + +#if defined VMX86_VMX +# include "str.h" /* Str_Snprintf */ +# define VCS_SNPRINTF Str_Snprintf +#elif defined MONITOR_APP +# include /* libc snprintf */ +# if defined WIN32 +# define VCS_SNPRINTF _snprintf +# else +# define VCS_SNPRINTF snprintf +# endif +#elif defined VMM || defined VMKERNEL +# include "vm_libc.h" /* vmcore snprintf */ +# define VCS_SNPRINTF snprintf +#endif + +#ifdef VMX86_VMX +#include "vmx.h" +#endif + + +/* + * A buffer for logging a VCPUSet must fit a maximally-populated set. To + * balance brevity and readability, sets are formatted for printing like long + * hexadecimal numbers, with a '.' at every 64-VCPU subset boundary. The + * highest-numbered VCPU in the set is printed first, followed by all slots + * for lower-numbered VCPUs, populated or not. Leading zeroes are not printed. + * + * Examples, assuming a VCS_SUBSET_COUNT of 2: + * An empty set: "0x0\0" + * A full set: "0xffffffffffffffff.ffffffffffffffff\0" + * A set with only VCPU 50: "0x4000000000000\0" + * A set with only VCPU 80: "0x10000.0000000000000000\0" + */ +#define VCS_BUF_SIZE (2 + /* "0x" */ \ + (VCS_SUBSET_COUNT * VCS_SUBSET_WIDTH / 4) + /* (hex) */ \ + (VCS_SUBSET_COUNT - 1) + /* '.' */ \ + 1) /* NULL */ + +extern VCPUSet vcpuSetFull; + +#define FOR_EACH_VCPU_IN_SET(_vcpuSet, _v) \ + do { \ + Vcpuid _v; \ + VCPUSet __vcs; \ + VCPUSet_Copy(&__vcs, _vcpuSet); \ + while ((_v = VCPUSet_FindFirst(&__vcs)) != VCPUID_INVALID) { \ + VCPUSet_Remove(&__vcs, _v); + +#define ROF_EACH_VCPU_IN_SET() \ + } \ + } while (0) + + +#define FOR_EACH_SUBSET_IN_SET(_setIndex) \ + do { \ + int _setIndex; \ + for (_setIndex = 0; _setIndex < VCS_SUBSET_COUNT; _setIndex++) { + +#define ROF_EACH_SUBSET_IN_SET() \ + } \ + } while (0) + + +#define FOR_EACH_SUBSET_IN_SET_COUNTDOWN(_setIndex) \ + do { \ + int _setIndex; \ + for (_setIndex = VCS_SUBSET_COUNT - 1; _setIndex >= 0; _setIndex--) { + +#define ROF_EACH_SUBSET_IN_SET_COUNTDOWN() \ + } \ + } while (0) + + +#define FOR_EACH_POPULATED_SUBSET_IN_SET(_setIndex) \ + do { \ + int _setIndex; \ + int _maxSubsets = VCS_VCPUID_SUBSET_IDX(NumVCPUs() - 1); \ + for (_setIndex = 0; _setIndex <= _maxSubsets; _setIndex++) { + +#define ROF_EACH_POPULATED_SUBSET_IN_SET() \ + } \ + } while (0) + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_Empty -- + * + * Clear all bits in a VCPUSet. + * + *---------------------------------------------------------------------- + */ + +static INLINE void +VCPUSet_Empty(VCPUSet *vcs) +{ + FOR_EACH_SUBSET_IN_SET(idx) { + vcs->subset[idx] = 0; + } ROF_EACH_SUBSET_IN_SET(); +} + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_IsEmpty -- + * + * Return TRUE iff a VCPUSet has no bits set. + * + *---------------------------------------------------------------------- + */ + +static INLINE Bool +VCPUSet_IsEmpty(const VCPUSet *vcs) +{ + FOR_EACH_SUBSET_IN_SET(idx) { + if (vcs->subset[idx] != 0) { + return FALSE; + } + } ROF_EACH_SUBSET_IN_SET(); + return TRUE; +} + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_Full -- + * + * Returns a pointer to a VCPUSet containing all valid VCPUs. + * + *---------------------------------------------------------------------- + */ +static INLINE const VCPUSet * +VCPUSet_Full(void) +{ + /* + * Read too early, we may get the wrong notion of how many + * vcpus the VM has. Cf. pr286243 and pr289186. + */ +#if defined (VMX86_VMX) + ASSERT(NumVCPUs() != 0 && !VCPUSet_IsEmpty(&vcpuSetFull)); +#endif + return &vcpuSetFull; +} + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_Copy -- + * + * Copy one VCPUSet's contents to another. + * + *---------------------------------------------------------------------- + */ + +static INLINE void +VCPUSet_Copy(VCPUSet *dest, const VCPUSet *src) +{ + FOR_EACH_SUBSET_IN_SET(idx) { + dest->subset[idx] = src->subset[idx]; + } ROF_EACH_SUBSET_IN_SET(); +} + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_Equals -- + * + * Compare two VCPUSets, return TRUE iff their contents match. + * + *---------------------------------------------------------------------- + */ + +static INLINE Bool +VCPUSet_Equals(const VCPUSet *vcs1, const VCPUSet *vcs2) +{ + FOR_EACH_SUBSET_IN_SET(idx) { + if (vcs1->subset[idx] != vcs2->subset[idx]) { + return FALSE; + } + } ROF_EACH_SUBSET_IN_SET(); + return TRUE; +} + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_IsMember -- + * + * Return TRUE iff the given Vcpuid is present in a VCPUSet. + * + *---------------------------------------------------------------------- + */ + +static INLINE Bool +VCPUSet_IsMember(const VCPUSet *vcs, Vcpuid v) +{ + ASSERT(v < MAX_VCPUS); + return (vcs->subset[VCS_VCPUID_SUBSET_IDX(v)] & + VCS_VCPUID_SUBSET_BIT(v)) != 0; +} + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_AtomicIsMember -- + * + * Return TRUE iff the given Vcpuid is present in a VCPUSet. + * + *---------------------------------------------------------------------- + */ + +static INLINE Bool +VCPUSet_AtomicIsMember(VCPUSet *vcs, Vcpuid v) +{ + volatile uint64 *subset = &vcs->subset[VCS_VCPUID_SUBSET_IDX(v)]; + ASSERT(v < MAX_VCPUS); + return (Atomic_Read64(Atomic_VolatileToAtomic64(subset)) & + VCS_VCPUID_SUBSET_BIT(v)) != 0; +} + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_FindFirst -- + * VCPUSet_FindLast -- + * + * Find the first (lowest-numbered) or last (highest-numbered) + * Vcpuid in a VCPUSet. + * + * Results: + * Vcpuid if at least one is present in a set. + * VCPUID_INVALID if the set is empty. + * + *---------------------------------------------------------------------- + */ + +static INLINE Vcpuid +VCPUSet_FindFirst(const VCPUSet *vcs) +{ + FOR_EACH_SUBSET_IN_SET(idx) { + uint64 subset = vcs->subset[idx]; + if (subset != 0) { + return lssb64_0(subset) + (idx << VCS_SUBSET_SHIFT); + } + } ROF_EACH_SUBSET_IN_SET(); + return VCPUID_INVALID; +} + +static INLINE Vcpuid +VCPUSet_FindLast(const VCPUSet *vcs) +{ + FOR_EACH_SUBSET_IN_SET_COUNTDOWN(idx) { + uint64 subset = vcs->subset[idx]; + if (subset != 0) { + return mssb64_0(subset) + (idx << VCS_SUBSET_SHIFT); + } + } ROF_EACH_SUBSET_IN_SET_COUNTDOWN(); + return VCPUID_INVALID; +} + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_Remove -- + * VCPUSet_AtomicRemove -- + * + * Remove or atomically remove a single Vcpuid from a VCPUSet. + * + *---------------------------------------------------------------------- + */ + +static INLINE void +VCPUSet_Remove(VCPUSet *vcs, Vcpuid v) +{ + ASSERT(v < MAX_VCPUS); + vcs->subset[VCS_VCPUID_SUBSET_IDX(v)] &= ~VCS_VCPUID_SUBSET_BIT(v); +} + + +static INLINE void +VCPUSet_AtomicRemove(VCPUSet *vcs, Vcpuid v) +{ + volatile uint64 *subset = &vcs->subset[VCS_VCPUID_SUBSET_IDX(v)]; + ASSERT(v < MAX_VCPUS); + Atomic_And64(Atomic_VolatileToAtomic64(subset), ~VCS_VCPUID_SUBSET_BIT(v)); +} + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_IncludeSet -- + * VCPUSet_RemoveSet -- + * + * Add/remove all vcpus present in the set 'src' to/from the set 'dest'. + * + *---------------------------------------------------------------------- + */ + +static INLINE void +VCPUSet_IncludeSet(VCPUSet *dest, const VCPUSet *src) +{ + FOR_EACH_SUBSET_IN_SET(idx) { + dest->subset[idx] |= src->subset[idx]; + } ROF_EACH_SUBSET_IN_SET(); +} + + +static INLINE void +VCPUSet_RemoveSet(VCPUSet *dest, const VCPUSet *src) +{ + FOR_EACH_SUBSET_IN_SET(idx) { + dest->subset[idx] &= ~src->subset[idx]; + } ROF_EACH_SUBSET_IN_SET(); +} + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_Include -- + * VCPUSet_AtomicInclude -- + * + * Add or atomically add a single Vcpuid to a VCPUSet. + * + *---------------------------------------------------------------------- + */ + +static INLINE void +VCPUSet_Include(VCPUSet *vcs, Vcpuid v) +{ + ASSERT(v < MAX_VCPUS); + vcs->subset[VCS_VCPUID_SUBSET_IDX(v)] |= VCS_VCPUID_SUBSET_BIT(v); +} + + +static INLINE void +VCPUSet_AtomicInclude(VCPUSet *vcs, Vcpuid v) +{ + volatile uint64 *subset = &vcs->subset[VCS_VCPUID_SUBSET_IDX(v)]; + ASSERT(v < MAX_VCPUS); + Atomic_Or64(Atomic_VolatileToAtomic64(subset), VCS_VCPUID_SUBSET_BIT(v)); +} + + +#if defined(VMM) && !defined(MONITOR_APP) +/* + *---------------------------------------------------------------------- + * + * VCPUSet_PackCareful -- + * + * Pack a VCPUSet into the bytes at "ptr". + * + *---------------------------------------------------------------------- + */ + +static INLINE void +VCPUSet_PackCareful(unsigned numVCPUs, const VCPUSet *vcs, void *ptr) +{ + memcpy(ptr, vcs->subset, (numVCPUs + 7) / 8); +} + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_UnpackCareful -- + * + * Unpack a VCPUSet from the bytes at "src". + * + *---------------------------------------------------------------------- + */ + +static INLINE void +VCPUSet_UnpackCareful(unsigned numVCPUs, VCPUSet *vcs, void *ptr) +{ + memcpy(vcs->subset, ptr, (numVCPUs + 7) / 8); +} +#endif /* VMM && !MONITOR_APP */ + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_PopulateRange -- + * + * Populates the given set with 'numVCPUs' VCPUs starting at 'firstVCPU'. + * + *---------------------------------------------------------------------- + */ + +static INLINE void +VCPUSet_PopulateRange(VCPUSet *vcs, unsigned firstVCPU, + unsigned numVCPUs) +{ + unsigned sub; + unsigned lastVCPU = firstVCPU + numVCPUs - 1; + unsigned firstSubset = firstVCPU / VCS_SUBSET_WIDTH; + unsigned lastSubset = lastVCPU / VCS_SUBSET_WIDTH; + unsigned lowMaskShift = firstVCPU % VCS_SUBSET_WIDTH; + unsigned highMaskShift = VCS_SUBSET_WIDTH - 1 - lastVCPU % VCS_SUBSET_WIDTH; + + ASSERT(firstSubset <= lastSubset && lastSubset < VCS_SUBSET_COUNT); + + VCPUSet_Empty(vcs); + for (sub = firstSubset; sub <= lastSubset; sub++) { + vcs->subset[sub] = CONST64U(-1); + } + vcs->subset[firstSubset] &= (CONST64U(-1) << lowMaskShift); + vcs->subset[lastSubset] &= (CONST64U(-1) >> highMaskShift); +} + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_Populate -- + * + * Populates the given set with the VCPUs in [0, numVCPUs). + * + *---------------------------------------------------------------------- + */ + +static INLINE void +VCPUSet_Populate(VCPUSet *vcs, unsigned numVCPUs) +{ + VCPUSet_PopulateRange(vcs, 0, numVCPUs); +} + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_Subset -- + * + * Return the specified subset of a VCPUSet. + * + *---------------------------------------------------------------------- + */ + +static INLINE uint64 +VCPUSet_Subset(const VCPUSet *vcs, + unsigned subset) +{ + ASSERT(subset < VCS_SUBSET_COUNT); + return vcs->subset[subset]; +} + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_SubsetPtr -- + * + * Return a pointer to the specified subset of a VCPUSet. + * + *---------------------------------------------------------------------- + */ + +static INLINE uint64 * +VCPUSet_SubsetPtr(VCPUSet *vcs, unsigned subset) +{ + ASSERT(subset < VCS_SUBSET_COUNT); + return &vcs->subset[subset]; +} + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_IsSupersetOrEqual -- + * + * Return TRUE iff vcs1 contains a superset of the VCPUs in vcs2 + * or vcs1 and vcs2 contain exactly the same VCPUs. + * + *---------------------------------------------------------------------- + */ + +static INLINE Bool +VCPUSet_IsSupersetOrEqual(const VCPUSet *vcs1, const VCPUSet *vcs2) +{ + FOR_EACH_SUBSET_IN_SET(idx) { + if (vcs2->subset[idx] & ~vcs1->subset[idx]) { + return FALSE; + } + } ROF_EACH_SUBSET_IN_SET(); + return TRUE; +} + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_IsSubsetOrEqual -- + * + * Return TRUE iff vcs1 contains a subset of the VCPUs in vcs2 + * or vcs1 and vcs2 contain exactly the same VCPUs. + * + *---------------------------------------------------------------------- + */ + +static INLINE Bool +VCPUSet_IsSubsetOrEqual(const VCPUSet *vcs1, const VCPUSet *vcs2) +{ + return VCPUSet_IsSupersetOrEqual(vcs2, vcs1); +} + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_MakeSingleton -- + * + * Add a single Vcpuid to a VCPUSet and remove all others. + * + *---------------------------------------------------------------------- + */ + +static INLINE void +VCPUSet_MakeSingleton(VCPUSet *vcs, Vcpuid v) +{ + VCPUSet_Empty(vcs); + VCPUSet_Include(vcs, v); +} + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_IsSingleton -- + * + * Return TRUE iff vcs contains exactly one VCPU. + * + *---------------------------------------------------------------------- + */ + +static INLINE Bool +VCPUSet_IsSingleton(const VCPUSet *vcs) +{ + Bool foundOnce = FALSE; + FOR_EACH_SUBSET_IN_SET(idx) { + uint64 sub = vcs->subset[idx]; + if (sub != 0) { + if (foundOnce || (sub & (sub - 1)) != 0) { + return FALSE; + } + foundOnce = TRUE; + } + } ROF_EACH_SUBSET_IN_SET(); + return foundOnce; +} + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_IsFull -- + * + * Returns true iff vcs contains the set of all vcpus. + * + *---------------------------------------------------------------------- + */ +static INLINE Bool +VCPUSet_IsFull(const VCPUSet *vcs) +{ + return VCPUSet_Equals(vcs, VCPUSet_Full()); +} + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_AtomicReadWriteSubset -- + * + * For the nth aligned 64-VCPU subset of a VCPU set, atomically + * read then write. Return the contents read. Set 0 is VCPUs + * 0-63 and set 1 is VCPUs 64-127. + * + *---------------------------------------------------------------------- + */ + +static INLINE uint64 +VCPUSet_AtomicReadWriteSubset(VCPUSet *vcs, uint64 vcpus, + unsigned n) +{ + ASSERT(n < VCS_SUBSET_COUNT); + return Atomic_ReadWrite64(Atomic_VolatileToAtomic64(&vcs->subset[n]), + vcpus); +} + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_Size -- + * + * Return the number of VCPUs in this set. + * + *---------------------------------------------------------------------- + */ +static INLINE int +VCPUSet_Size(const VCPUSet *vcs) +{ + int n = 0; + FOR_EACH_SUBSET_IN_SET(idx) { + uint64 bits = vcs->subset[idx]; + while (bits != 0) { + bits = bits & (bits - 1); + n++; + } + } ROF_EACH_SUBSET_IN_SET(); + return n; +} + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_UnionSubset -- + * + * Given an 64-bit value and a subset number, add the VCPUs + * represented to the set. + * + *---------------------------------------------------------------------- + */ + +static INLINE void +VCPUSet_UnionSubset(VCPUSet *vcs, uint64 vcpus, unsigned n) +{ + ASSERT(n < VCS_SUBSET_COUNT); + vcs->subset[n] |= vcpus; +} + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_SubtractSubset -- + * + * Given an 64-bit value and a subset number, remove the VCPUs + * represented in the subset from the set. + * + *---------------------------------------------------------------------- + */ + +static INLINE void +VCPUSet_SubtractSubset(VCPUSet *vcs, uint64 vcpus, unsigned n) +{ + ASSERT(n < VCS_SUBSET_COUNT); + vcs->subset[n] &= ~vcpus; +} + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_AtomicUnionSubset -- + * + * Given a 64-bit value and a subset number, atomically add + * the VCPUs represented to the set. + * + *---------------------------------------------------------------------- + */ + +static INLINE void +VCPUSet_AtomicUnionSubset(VCPUSet *vcs, uint64 vcpus, unsigned n) +{ + uint64 *subsetPtr = &vcs->subset[n]; + ASSERT(n < VCS_SUBSET_COUNT); + Atomic_Or64(Atomic_VolatileToAtomic64(subsetPtr), vcpus); +} + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_Invert -- + * + * Makes all non-present valid VCPUs in a set present and all + * VCPUs present non-present. + * + *---------------------------------------------------------------------- + */ + +static INLINE void +VCPUSet_Invert(VCPUSet *vcs) +{ + VCPUSet temp; + VCPUSet_Copy(&temp, VCPUSet_Full()); + VCPUSet_RemoveSet(&temp, vcs); + VCPUSet_Copy(vcs, &temp); +} + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_Intersection + * + * Given two VCPUSets, populate the destination set with only the + * VCPUs common to both. + * + *---------------------------------------------------------------------- + */ + +static INLINE void +VCPUSet_Intersection(VCPUSet *dest, const VCPUSet *src) +{ + FOR_EACH_SUBSET_IN_SET(idx) { + dest->subset[idx] &= src->subset[idx]; + } ROF_EACH_SUBSET_IN_SET(); +} + + +/* + *---------------------------------------------------------------------- + * + * VCPUSet_LogFormat -- + * + * Given a buffer of at least VCS_BUF_SIZE to fill, write into it a + * string suitable for use in Log() or LOG(). + * Returns the buffer which was passed as an argument, after + * writing the string. + * + *---------------------------------------------------------------------- + */ + +#ifdef VCS_SNPRINTF +static INLINE char * +VCPUSet_LogFormat(char *buf, const int size, const VCPUSet *vcs) +{ + int offset = 0; + Vcpuid highest = VCPUSet_FindLast(vcs); + int idx = (highest == VCPUID_INVALID) ? 0 : highest / 8; + ASSERT(size >= VCS_BUF_SIZE); +#define VCS_LOGF(...) \ + { \ + int ret = VCS_SNPRINTF(buf + offset, size - offset, __VA_ARGS__); \ + ASSERT(0 <= ret && ret < size - offset); \ + offset += ret; \ + } + /* Print the leading value with no zero-extension. */ + VCS_LOGF("%#x", ((unsigned char *)vcs)[idx--]); + + while (idx >= 0) { + if ((idx + 1) % (VCS_SUBSET_WIDTH / 8) == 0) { + VCS_LOGF("."); + } + VCS_LOGF("%02x", ((unsigned char *)vcs)[idx]); + idx--; + } + return buf; +} +#undef VCS_LOGF +#endif + + +#endif /* _VCPUSET_H_ */ diff --git a/vmmon-only/include/vcpuset_types.h b/vmmon-only/include/vcpuset_types.h new file mode 100644 index 00000000..bbb1990d --- /dev/null +++ b/vmmon-only/include/vcpuset_types.h @@ -0,0 +1,58 @@ +/********************************************************* + * Copyright (C) 2002-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * vcpuset_types.h -- + * + * ADT for a set of VCPUs. Implemented as an array of bitmasks. + * + */ + +#ifndef _VCPUSET_TYPES_H_ +#define _VCPUSET_TYPES_H_ + + +#define INCLUDE_ALLOW_VMX +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_USERLEVEL +#define INCLUDE_ALLOW_VMCORE +#include "includeCheck.h" + +#include "vm_basic_asm.h" +#include "vm_atomic.h" +#include "vcpuid.h" + +#define VCS_SUBSET_WIDTH 64 +#define VCS_SUBSET_SHIFT 6 +#define VCS_SUBSET_MASK ((CONST64U(1) << VCS_SUBSET_SHIFT) - 1) +#define VCS_SUBSET_COUNT 2 + +#define VCS_VCPUID_SUBSET_IDX(v) ((v) >> VCS_SUBSET_SHIFT) +#define VCS_VCPUID_SUBSET_BIT(v) (CONST64U(1) << ((v) & VCS_SUBSET_MASK)) + +/* + * If you update this type, you also need to update the SEND_IPI line in + * bora/public/iocontrolsMacosTable.h. + */ +typedef struct VCPUSet { + uint64 subset[VCS_SUBSET_COUNT]; +} VCPUSet; + +#endif diff --git a/vmmon-only/include/versioned_atomic.h b/vmmon-only/include/versioned_atomic.h new file mode 100644 index 00000000..7eb190d8 --- /dev/null +++ b/vmmon-only/include/versioned_atomic.h @@ -0,0 +1,170 @@ +/********************************************************* + * Copyright (C) 1998 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * Versioned atomic synchronization: + * + * These synchronization macros allow single-writer/many-reader + * access to data, based on Leslie Lamport's paper "Concurrent + * Reading and Writing", Communications of the ACM, November 1977. + * + * many-writer/many-reader can be implemented on top of versioned + * atomics by using an additional spin lock to synchronize + * writers. This is preferable for cases where readers are expected to + * greatly outnumber writers. + * + * Multiple concurrent writers to the version variables are not + * allowed. Even if writers are working on lock-free or disjoint + * data, the version counters are not interlocked for read-modify-write. + * + * Recursive use of versioned atomics in writers is currently not supported. + */ + +#ifndef _VERSIONED_ATOMIC_H +#define _VERSIONED_ATOMIC_H + +#define INCLUDE_ALLOW_USERLEVEL +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_VMMON +#include "includeCheck.h" + +#include "vm_basic_asm.h" +#include "vm_assert.h" + +/* + * Users with attribute(packed) structs must ensure any + * VersionedAtomic members are marked as ALIGNED(4). Unfortunately + * the compiler cannot be trusted to align our substruct correctly + * (PR515329). If the enclosing struct is packed, the VersionedAtomic + * alignment requested below will be ignored! + */ + +typedef struct { + volatile uint32 v0; + volatile uint32 v1; +} ALIGNED(4) VersionedAtomic; + +/* + *----------------------------------------------------------------------------- + * + * VersionedAtomic_BeginWrite -- + * Called by a writer to indicate that the data protected by + * a given atomic version is about to change. Effectively locks out + * all readers until EndWrite is called. + * + * Results: + * . + * + * Side effects: + * + * + *----------------------------------------------------------------------------- + */ +static INLINE void +VersionedAtomic_BeginWrite(VersionedAtomic *versions) +{ + /* + * As long as the variable is on natural boundary it is guaranteed + * to be atomic, so we don't need to write from left to right as in + * Lamport's original algorithm. + */ + ASSERT(((size_t)(&versions->v0) & (sizeof(versions->v0) - 1)) == 0); + /* If recursive use is needed demand for support in PR514764 */ + ASSERT(versions->v1 == versions->v0); + versions->v0++; + COMPILER_MEM_BARRIER(); +} + +/* + *----------------------------------------------------------------------------- + * + * VersionedAtomic_EndWrite -- + * Called by a writer after it is done updating shared data. Lets + * pending and new readers proceed on shared data. + * + * Results: + * . + * + * Side effects: + * + * + *----------------------------------------------------------------------------- + */ +static INLINE void +VersionedAtomic_EndWrite(VersionedAtomic *versions) +{ + ASSERT(((size_t)(&versions->v1) & (sizeof(versions->v1) - 1)) == 0); + ASSERT(versions->v1 + 1 == versions->v0); + COMPILER_MEM_BARRIER(); + versions->v1 = versions->v0; +} + +/* + *----------------------------------------------------------------------------- + * + * VersionedAtomic_BeginTryRead -- + * Called by a reader before it tried to read shared data. + * + * Results: + * Returns a version number to the reader. This version number + * is required to confirm validity of the read operation when reader + * calls EndTryRead. + * + * Side effects: + * + * + *----------------------------------------------------------------------------- + */ +static INLINE uint32 +VersionedAtomic_BeginTryRead(const VersionedAtomic *versions) +{ + uint32 readVersion; + + readVersion = versions->v1; + COMPILER_MEM_BARRIER(); + + return readVersion; +} + +/* + *----------------------------------------------------------------------------- + * + * VersionedAtomic_EndTryRead -- + * Called by a reader after it finishes reading shared data, to confirm + * validity of the data that was just read (IOW, to make sure that a + * writer did not intervene while the read was in progress). + * + * Results: + * TRUE if the data read between BeginTryRead() and this call is + * valid. FALSE otherwise. + * + * Side effects: + * + * + *----------------------------------------------------------------------------- + */ +static INLINE Bool +VersionedAtomic_EndTryRead(const VersionedAtomic *versions, + uint32 readVersion) +{ + COMPILER_MEM_BARRIER(); + return LIKELY(versions->v0 == readVersion); +} + +#endif //_VERSIONED_ATOMIC_H diff --git a/vmmon-only/include/vm_asm.h b/vmmon-only/include/vm_asm.h new file mode 100644 index 00000000..0eb9cee5 --- /dev/null +++ b/vmmon-only/include/vm_asm.h @@ -0,0 +1,93 @@ +/********************************************************* + * Copyright (C) 1998-2014 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * vm_asm.h + * + * asm macros + */ + +#ifndef _VM_ASM_H_ +#define _VM_ASM_H_ + + +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_VMK_MODULE +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_DISTRIBUTE +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_USERLEVEL +#include "includeCheck.h" + +#ifdef VM_ARM_64 +#include "vm_asm_arm64.h" +#else + +#include "vm_basic_asm.h" +#include "x86msr.h" + +#ifdef VM_X86_64 +#include "vm_asm_x86_64.h" +#else +#include "vm_asm_x86.h" +#endif + + +static INLINE void +SET_FS64(uint64 fs64) +{ + __SET_MSR(MSR_FSBASE, fs64); +} + + +static INLINE void +SET_GS64(uint64 gs64) +{ + __SET_MSR(MSR_GSBASE, gs64); +} + +static INLINE void +SET_KernelGS64(uint64 kgs64) +{ + __SET_MSR(MSR_KERNELGSBASE, kgs64); +} + + +static INLINE uint64 +GET_FS64(void) +{ + return __GET_MSR(MSR_FSBASE); +} + + +static INLINE uint64 +GET_GS64(void) +{ + return __GET_MSR(MSR_GSBASE); +} + + +static INLINE uint64 +GET_KernelGS64(void) +{ + return __GET_MSR(MSR_KERNELGSBASE); +} + +#endif // VM_ARM_64 +#endif diff --git a/vmmon-only/include/vm_asm_x86.h b/vmmon-only/include/vm_asm_x86.h new file mode 100644 index 00000000..2b37976a --- /dev/null +++ b/vmmon-only/include/vm_asm_x86.h @@ -0,0 +1,998 @@ +/********************************************************* + * Copyright (C) 1998-2014 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * vm_asm_x86.h + * + * IA32 asm macros + */ + +#ifndef _VM_ASM_X86_H_ +#define _VM_ASM_X86_H_ + + +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMK_MODULE +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_DISTRIBUTE +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_USERLEVEL +#include "includeCheck.h" + +#include "x86types.h" +#include "x86desc.h" +#include "x86sel.h" +#include "x86_basic_defs.h" +#include "x86msr.h" + +#ifdef VM_X86_64 +#define _GETSET_DTR_TYPE DTR64 +#else +#define _GETSET_DTR_TYPE DTR32 +#endif + +#ifdef __GNUC__ + +#if defined __APPLE__ +/* PR 352418: GCC produces error if the non-Apple version is used */ +#define ASSERT_ON_COMPILE_SELECTOR_SIZE(expr) +#else +/* ASSERT_ON_COMPILE_SELECTOR_SIZE: + * + * - Selector must be 16-bits. + * - If a constant is used, it better be only 16-bits. + * - If it's not a constant, it must be Selector-sized. or less. + * + * Although aesthetically the following looks nicer, gcc is unable + * to produce a constant expression for it: + * + * ASSERT_ON_COMPILE(sizeof(Selector) == 2 && \ + * ((__builtin_constant_p(expr) ? ((expr) >> 16) == 0) \ + * : sizeof(expr) <= 2) + */ +#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 1) +#define ASSERT_ON_COMPILE_SELECTOR_SIZE(expr) \ + ASSERT_ON_COMPILE(sizeof(Selector) == 2 && \ + ((__builtin_constant_p(expr) && ((expr) >> 16) == 0) || \ + sizeof(expr) <= 2)) +#else +/* gcc 3.3.3 is not able to produce a constant expression (PR 356383) */ +#define ASSERT_ON_COMPILE_SELECTOR_SIZE(expr) +#endif +#endif + + +/* + * [GS]ET_[GI]DT() are defined as macros wrapping a function + * so we can pass the argument implicitly by reference (requires + * a macro) and get type checking too (requires a function). + */ + +#define SET_GDT(var) _Set_GDT(&(var)) + +/* Checked against the Intel manual and GCC --hpreg */ +static INLINE void +_Set_GDT(_GETSET_DTR_TYPE *dtr) +{ + __asm__( + "lgdt %0" + : + : "m" (*dtr) + ); +} + +#define SET_IDT(var) _Set_IDT(&(var)) + +/* Checked against the Intel manual and GCC --hpreg */ +static INLINE void +_Set_IDT(_GETSET_DTR_TYPE *dtr) +{ + __asm__( + "lidt %0" + : + : "m" (*dtr) + ); +} + +#define GET_GDT(var) _Get_GDT(&(var)) + +/* + * Checked against the Intel manual and GCC --hpreg + * volatile because there's a hidden input (the [IG]DTR) that can change + * without the compiler knowing it. + */ +static INLINE void +_Get_GDT(_GETSET_DTR_TYPE *dtr) +{ + __asm__ __volatile__( + "sgdt %0" + : "=m" (*dtr) + ); +} + +#define GET_IDT(var) _Get_IDT(&(var)) + +/* + * Checked against the Intel manual and GCC --hpreg + * volatile because the [IG]DT can change without the compiler knowing it + * (when we use l[ig]dt). + */ +static INLINE void +_Get_IDT(_GETSET_DTR_TYPE *dtr) +{ + __asm__ __volatile__( + "sidt %0" + : "=m" (*dtr) + ); +} + + +#define SET_LDT(expr) \ + do { \ + const Selector _set_ldt_sel = (Selector)(expr); \ + ASSERT_ON_COMPILE_SELECTOR_SIZE(expr); \ + /* lldt reads from the GDT; don't sink any writes. */ \ + COMPILER_MEM_BARRIER(); \ + /* Checked against the Intel manual and GCC --hpreg */ \ + __asm__("lldt %0" \ + : \ + : "rm" (_set_ldt_sel)); \ + } while (0) + + +/* Checked against the Intel manual and GCC --hpreg + * + * volatile because the LDT can change without the compiler knowing it + * (when we use lldt). + */ +static INLINE void +_GET_LDT(Selector * const result) +{ + __asm__ __volatile__("sldt %0" + : "=rm" (*result)); +} + + +#define GET_LDT(var) \ + do { \ + _GET_LDT(&(var)); \ + } while (0) + + +/* Checked against the Intel manual and GCC --thutt */ +#define _BUILD_SET_R(func, reg) \ + static INLINE void \ + func(uintptr_t r) \ + { \ + __asm__("mov %0, %%" #reg \ + : /* no outputs */ \ + : "r" (r) \ + : "memory"); \ + } + +/* Not yet checked against the Intel manual and GCC --slava + * + * 'volatile' because CRs and DRs can change without the compiler + * knowing it (when there is a page fault, when a breakpoint occurs, + * and moreover it seems there is no way to teach gcc that smsw + * clobbers cr0 for example). + * + * The parameter is a 'uintptr_t *' so that the size of the actual + * parameter must exactly match the size of the hardware register. + * This prevents the use of 32-bit variables when building 64-bit + * code. + */ +#define _BUILD_GET_R(func, reg) \ + static INLINE void \ + func(uintptr_t *result) \ + { \ + __asm__ __volatile__("mov %%" #reg ", %0" \ + : "=r" (*result)); \ + } + +_BUILD_SET_R(_SET_CR0, cr0) +_BUILD_SET_R(_SET_CR2, cr2) +_BUILD_SET_R(_SET_CR3, cr3) +_BUILD_SET_R(_SET_CR4, cr4) +_BUILD_SET_R(_SET_CR8, cr8) + +_BUILD_GET_R(_GET_CR0, cr0) +_BUILD_GET_R(_GET_CR2, cr2) +_BUILD_GET_R(_GET_CR3, cr3) +_BUILD_GET_R(_GET_CR4, cr4) +_BUILD_GET_R(_GET_CR8, cr8) + +#if defined __APPLE__ +/* Mac OS gcc 4 uses DBx instead of DRx register names. */ +_BUILD_SET_R(_SET_DR0, db0) +_BUILD_SET_R(_SET_DR1, db1) +_BUILD_SET_R(_SET_DR2, db2) +_BUILD_SET_R(_SET_DR3, db3) +_BUILD_SET_R(_SET_DR6, db6) +_BUILD_SET_R(_SET_DR7, db7) + +_BUILD_GET_R(_GET_DR0, db0) +_BUILD_GET_R(_GET_DR1, db1) +_BUILD_GET_R(_GET_DR2, db2) +_BUILD_GET_R(_GET_DR3, db3) +_BUILD_GET_R(_GET_DR6, db6) +_BUILD_GET_R(_GET_DR7, db7) +#else +_BUILD_SET_R(_SET_DR0, dr0) +_BUILD_SET_R(_SET_DR1, dr1) +_BUILD_SET_R(_SET_DR2, dr2) +_BUILD_SET_R(_SET_DR3, dr3) +_BUILD_SET_R(_SET_DR6, dr6) +_BUILD_SET_R(_SET_DR7, dr7) + +_BUILD_GET_R(_GET_DR0, dr0) +_BUILD_GET_R(_GET_DR1, dr1) +_BUILD_GET_R(_GET_DR2, dr2) +_BUILD_GET_R(_GET_DR3, dr3) +_BUILD_GET_R(_GET_DR6, dr6) +_BUILD_GET_R(_GET_DR7, dr7) +#endif + +#define SET_CR_DR(regType, regNum, expr) \ + do { \ + /* Ensure no implicit truncation of 'expr' */ \ + ASSERT_ON_COMPILE(sizeof(expr) <= sizeof(uintptr_t)); \ + _SET_##regType##regNum(expr); \ + } while (0) + +#define GET_CR_DR(regType, regNum, var) \ + do { \ + _GET_##regType##regNum(&(var)); \ + } while (0) + +#define SET_CR0(expr) SET_CR_DR(CR, 0, expr) +#define SET_CR2(expr) SET_CR_DR(CR, 2, expr) +#define SET_CR3(expr) SET_CR_DR(CR, 3, expr) +#define SET_CR4(expr) SET_CR_DR(CR, 4, expr) +#define SET_CR8(expr) SET_CR_DR(CR, 8, expr) + +/* Undefine GET_CR0; it is defined in mach_asm.h for SLES cross-compile */ +#undef GET_CR0 +#define GET_CR0(var) GET_CR_DR(CR, 0, var) +#define GET_CR2(var) GET_CR_DR(CR, 2, var) +#define GET_CR3(var) GET_CR_DR(CR, 3, var) +#define GET_CR4(var) GET_CR_DR(CR, 4, var) +#define GET_CR8(var) GET_CR_DR(CR, 8, var) + +#define SET_DR0(expr) SET_CR_DR(DR, 0, expr) +#define SET_DR1(expr) SET_CR_DR(DR, 1, expr) +#define SET_DR2(expr) SET_CR_DR(DR, 2, expr) +#define SET_DR3(expr) SET_CR_DR(DR, 3, expr) +#define SET_DR6(expr) SET_CR_DR(DR, 6, expr) +#define SET_DR7(expr) SET_CR_DR(DR, 7, expr) + +#define GET_DR0(var) GET_CR_DR(DR, 0, var) +#define GET_DR1(var) GET_CR_DR(DR, 1, var) +#define GET_DR2(var) GET_CR_DR(DR, 2, var) +#define GET_DR3(var) GET_CR_DR(DR, 3, var) +#define GET_DR6(var) GET_CR_DR(DR, 6, var) +#define GET_DR7(var) GET_CR_DR(DR, 7, var) + +#define SET_SEGREG(reg, expr) \ + do { \ + const Selector _set_segreg_sel = (Selector)(expr); \ + ASSERT_ON_COMPILE_SELECTOR_SIZE(expr); \ + /* mov to Sreg reads from the [GL]DT; don't sink any writes. */ \ + COMPILER_MEM_BARRIER(); \ + /* Checked against the Intel manual and GCC --hpreg */ \ + __asm__("movw %0, %%" #reg \ + : \ + : "rm" (_set_segreg_sel)); \ + } while (0) + +#define SET_DS(expr) SET_SEGREG(ds, expr) +#define SET_ES(expr) SET_SEGREG(es, expr) +#define SET_FS(expr) SET_SEGREG(fs, expr) +#define SET_GS(expr) SET_SEGREG(gs, expr) +#define SET_SS(expr) SET_SEGREG(ss, expr) + +/* Checked against the Intel manual and GCC --hpreg + * + * volatile because the content of CS can change without the compiler + * knowing it (when we use call gates). + * + * XXX: The segment register getter functions have not been updated to + * have stricter type checking like many other functions in this + * file because they return a value, rather than taking an + * argument. Perhaps sometime in the future, a willing soul will + * change these accessors to take an argument and at the same + * time install better type checking. + */ +#define _BUILD_GET_SEG(func, reg) \ + static INLINE Selector \ + func(void) \ + { \ + Selector result; \ + __asm__ __volatile__("movw %%" #reg ", %0" \ + : "=rm" (result)); \ + return result; \ + } + +_BUILD_GET_SEG(GET_CS, cs) +_BUILD_GET_SEG(GET_DS, ds) +_BUILD_GET_SEG(GET_ES, es) +_BUILD_GET_SEG(GET_FS, fs) +_BUILD_GET_SEG(GET_GS, gs) +_BUILD_GET_SEG(GET_SS, ss) + + +#define SET_TR(expr) \ + do { \ + const Selector _set_tr_sel = (Selector)(expr); \ + ASSERT_ON_COMPILE_SELECTOR_SIZE(expr); \ + /* ltr reads from the GDT; don't sink any writes. */ \ + COMPILER_MEM_BARRIER(); \ + /* Checked against the Intel manual and GCC --hpreg */ \ + __asm__ __volatile__("ltr %0" \ + : \ + : "rm" (_set_tr_sel) : "memory"); \ + } while (0) + +/* Checked against the Intel manual and GCC --hpreg + + volatile because the content of TR can change without the compiler knowing + it (when we use task gates). */ +static INLINE void +_GET_TR(Selector * const result) +{ + __asm__ __volatile__("str %0" + : "=rm" (*result)); +} + +#define GET_TR(var) \ + do { \ + _GET_TR(&(var)); \ + } while (0) + + +/* Checked against the Intel manual and GCC --hpreg + + We use this to restore interrupts, so this cannot be reordered around + by gcc */ +static INLINE void +_Set_flags(uintptr_t f) +{ + __asm__ __volatile__( + "push %0" "\n\t" + "popf" + : + : "g" (f) + : "memory", "cc" + ); +} + + + +/* Checked against the Intel manual and GCC --hpreg + + volatile because gcc 2.7.2.3 doesn't know when eflags are modified (it + seems to ignore the "cc" clobber). gcc 2.95.2 is ok: it optimize 2 + successive calls only if there is no instructions in between. */ +static INLINE uintptr_t +_Get_flags(void) +{ + uintptr_t result; + + __asm__ __volatile__( + "pushf" "\n\t" + "pop %0" + : "=rm" (result) + : + : "memory" + ); + + return result; +} + +#define SAVE_FLAGS(var) do { \ + var = _Get_flags(); \ +} while (0) + +static INLINE Bool +HwInterruptsEnabled(uint32 eflags) +{ + return (eflags & EFLAGS_IF) != 0; +} + +/* Checked against the Intel manual and GCC --hpreg */ +static INLINE void +CLTS(void) +{ + __asm__ __volatile__ ("clts"); +} + + +/* Beginning of the section whose correctness has NOT been checked */ +#define FNCLEX() __asm__("fnclex" ::); + +/* TLB_INVALIDATE_xxx are not checked yet */ +#define TLB_INVALIDATE_PAGE(_addr) do { \ + __asm__ __volatile__("invlpg %0": :"m" (*(char *) (_addr)):"memory"); \ +} while (0) + +#define TLB_INVALIDATE_PAGE_OFF_FS(_addr) do { \ + __asm__ __volatile__("fs; invlpg %0": :"m" (*(char *) (_addr)):"memory"); \ +} while (0) + +#if ! defined(VMKBOOT) +#define RESTORE_FLAGS _Set_flags +#define ENABLE_INTERRUPTS() __asm__ __volatile__ ("sti": : :"memory") +#define CLEAR_INTERRUPTS() __asm__ __volatile__ ("cli": : :"memory") +#endif + +#define RAISE_INTERRUPT(_x) __asm__ __volatile__("int %0" :: "g" (_x)) +#define RETURN_FROM_INT() __asm__ __volatile__("iret" :: ) + +#if ! defined(VMKERNEL) && ! defined(VMKBOOT) +#define NO_INTERRUPTS_BEGIN() do { \ + uintptr_t _flags; \ + SAVE_FLAGS(_flags); \ + CLEAR_INTERRUPTS(); + +#define NO_INTERRUPTS_END() RESTORE_FLAGS(_flags); \ + } while(0) +#endif + +/* End of the section whose correctness has NOT been checked */ + +#elif defined _MSC_VER /* !__GNUC__ */ + +#define SET_IDT(_idt) _Set_IDT(&(_idt)) +#define SET_GDT(_gdt) _Set_GDT(&(_gdt)) +#define SET_TR(_tr) _Set_TR(_tr) +#define SET_LDT(_tr) _Set_LDT(_tr) + +#define GET_IDT(_idt) _Get_IDT(&(_idt)) +#define GET_GDT(_gdt) _Get_GDT(&(_gdt)) +#define GET_TR(_tr) do { _tr = _Get_TR(); } while (0) +#define GET_LDT(_tr) do { _tr = _Get_LDT(); } while (0) + +#define GET_CR0(_reg) __asm mov eax, cr0 __asm mov _reg, eax +#define SET_CR0(_reg) __asm mov eax, _reg __asm mov cr0, eax +#define GET_CR2(_reg) __asm mov eax, cr2 __asm mov _reg, eax +#define SET_CR2(_reg) __asm mov eax, _reg __asm mov cr2, eax +#define GET_CR3(_reg) __asm mov eax, cr3 __asm mov _reg, eax +#define SET_CR3(_reg) __asm mov eax, _reg __asm mov cr3, eax +/* + * MSC doesn't seem to like CR4 in __asm statements. We emit + * the opcode for MOV EAX,CR4 = 0xf020e0 and MOV CR4,EAX = 0xf022e0 + */ +#define GET_CR4(_reg) { \ + __asm _emit 0x0f __asm _emit 0x20 __asm _emit 0xe0 \ + __asm mov _reg, eax \ +} +#define SET_CR4(_reg) { \ + __asm mov eax, _reg \ + __asm _emit 0x0f __asm _emit 0x22 __asm _emit 0xe0 \ +} + + +#define GET_DR0(_reg) do { __asm mov eax,dr0 __asm mov _reg,eax } while (0) +#define SET_DR0(_reg) do { __asm mov eax,_reg __asm mov dr0,eax } while (0) +#define GET_DR1(_reg) do { __asm mov eax,dr1 __asm mov _reg,eax } while (0) +#define SET_DR1(_reg) do { __asm mov eax,_reg __asm mov dr1,eax } while (0) +#define GET_DR2(_reg) do { __asm mov eax,dr2 __asm mov _reg,eax } while (0) +#define SET_DR2(_reg) do { __asm mov eax,_reg __asm mov dr2,eax } while (0) +#define GET_DR3(_reg) do { __asm mov eax,dr3 __asm mov _reg,eax } while (0) +#define SET_DR3(_reg) do { __asm mov eax,_reg __asm mov dr3,eax } while (0) +#define GET_DR6(_reg) do { __asm mov eax,dr6 __asm mov _reg,eax } while (0) +#define SET_DR6(_reg) do { __asm mov eax,_reg __asm mov dr6,eax } while (0) +#define GET_DR7(_reg) do { __asm mov eax,dr7 __asm mov _reg,eax } while (0) +#define SET_DR7(_reg) do { __asm mov eax,_reg __asm mov dr7,eax } while (0) + + +#define CLTS() __asm clts + +#define FNCLEX() __asm fnclex + +#define TLB_INVALIDATE_PAGE(_addr) { \ + void *_a = (_addr); \ + __asm mov eax, _a __asm invlpg [eax] \ +} + +#define TLB_INVALIDATE_PAGE_OFF_FS(_addr) { \ + uint32 __a = (uint32) (_addr); \ + __asm mov eax, __a _asm invlpg fs:[eax] \ +} + + +#define ENABLE_INTERRUPTS() { __asm sti } +#define CLEAR_INTERRUPTS() { __asm cli } + +#define RAISE_INTERRUPT(_x) {__asm int _x } +#define RETURN_FROM_INT() {__asm iretd } + + +#define SAVE_FLAGS(x) { \ + __asm pushfd __asm pop eax __asm mov x, eax \ +} + +#define RESTORE_FLAGS(x) { \ + __asm push x __asm popfd\ +} + + + +static INLINE void SET_DS(Selector val) +{ + __asm mov ax, val + __asm mov ds, ax +} + +static INLINE void SET_ES(Selector val) +{ + __asm mov ax, val + __asm mov es, ax +} + +static INLINE void SET_FS(Selector val) +{ + __asm mov ax, val + __asm mov fs, ax +} + +static INLINE void SET_GS(Selector val) +{ + __asm mov ax, val + __asm mov gs, ax +} + +static INLINE void SET_SS(Selector val) +{ + __asm mov ax, val + __asm mov ss, ax +} + +static INLINE Selector GET_FS(void) +{ + Selector _v; + __asm mov _v,fs + return _v; +} + +static INLINE Selector GET_GS(void) +{ + Selector _v; + __asm mov _v,gs + return _v; +} + + +static INLINE Selector GET_DS(void) +{ + Selector _v; + __asm mov _v,ds + return _v; +} + +static INLINE Selector GET_ES(void) +{ + Selector _v; + __asm mov _v,es + return _v; +} + +static INLINE Selector GET_SS(void) +{ + Selector _v; + __asm mov _v,ss + return _v; +} + +static INLINE Selector GET_CS(void) +{ + Selector _v; + __asm mov _v,cs + return _v; +} + +#pragma warning( disable : 4035) + +static INLINE uint32 GET_WORD_FROM_FS(uint32 *_addr) { + __asm mov eax, _addr + __asm mov eax, fs:[eax] +} + +static INLINE uint16 GET_SHORT_FROM_FS(uint16 *_addr) { + __asm mov eax, _addr + __asm mov ax, fs:[eax] +} + +static INLINE uint8 GET_BYTE_FROM_FS(uint8 *_addr) { + __asm mov eax, _addr + __asm mov al, fs:[eax] +} + +#pragma warning (default: 4035) + +static INLINE void SET_WORD_FS(uint32 *_addr, uint32 _val) { + __asm mov eax, _addr + __asm mov ebx, _val + __asm mov fs:[eax], ebx +} + +static INLINE void SET_SHORT_FS(uint32 *_addr, uint16 _val) { + __asm mov eax, _addr + __asm mov bx, _val + __asm mov fs:[eax], bx +} + +static INLINE void SET_BYTE_FS(uint32 *_addr, uint8 _val) { + __asm mov eax, _addr + __asm mov bl, _val + __asm mov fs:[eax], bl +} + +static INLINE void _Set_GDT(_GETSET_DTR_TYPE *dtr) { + __asm mov eax, dtr + __asm lgdt [eax] +} + +static INLINE void _Set_IDT(_GETSET_DTR_TYPE *dtr) { + __asm mov eax, dtr + __asm lidt [eax] +} + +static INLINE void _Set_LDT(Selector val) +{ + __asm lldt val +} + +static INLINE void _Set_TR(Selector val) +{ + __asm ltr val +} + +static INLINE void _Get_GDT(_GETSET_DTR_TYPE *dtr) { + __asm mov eax, dtr + __asm sgdt [eax] +} + +static INLINE void _Get_IDT(_GETSET_DTR_TYPE *dtr) { + __asm mov eax, dtr + __asm sidt [eax] +} + +static INLINE Selector _Get_LDT(void) { + Selector sel; + __asm sldt sel + return sel; +} + +static INLINE Selector _Get_TR(void) { + Selector sel; + __asm str sel + return sel; +} + + +static INLINE void +MEMCOPY_TO_FS(VA to, + char * from, + unsigned long n) +{ + unsigned long i =0; + while (i+4 <=n) { + uint32 x = *(uint32*) (from + i); + uint32 _faddr = (uint32) (to+i); + __asm mov eax, _faddr + __asm mov ebx, x + __asm mov fs:[eax], ebx + i +=4; + } + while (i> 32); + uint32 loval = (uint32)val; + __asm push edx + __asm push ecx + __asm push eax + __asm mov eax, loval + __asm mov edx, hival + __asm mov ecx, input + __asm _emit 0x0f __asm _emit 0x30 + __asm pop eax + __asm pop ecx + __asm pop edx +} +#pragma warning (default: 4035) +#else +#error +#endif + +#ifdef __GNUC__ +static INLINE void __SET_MSR(int cx, uint64 val) +{ +#ifdef VM_X86_64 + __asm__ __volatile__( + "wrmsr" + : /* no outputs */ + : "a" ((uint32) val), "d" ((uint32)(val >> 32)), "c" (cx) + ); +#else + __asm__ __volatile__( + "wrmsr" + : /* no outputs */ + : "A" (val), + "c" (cx) + ); +#endif +} +#endif + + +/* + * RDMSR/WRMSR access the 64bit MSRs as two + * 32 bit quantities, whereas GET_MSR/SET_MSR + * above access the MSRs as one 64bit quantity. + */ +#ifdef __GNUC__ +#undef RDMSR +#undef WRMSR +#define RDMSR(msrNum, low, high) do { \ + __asm__ __volatile__( \ + "rdmsr" \ + : "=a" (low), "=d" (high) \ + : "c" (msrNum) \ + ); \ +} while (0) + +#define WRMSR(msrNum, low, high) do { \ + __asm__ __volatile__( \ + "wrmsr" \ + : /* no outputs */ \ + : "c" (msrNum), \ + "a" (low), \ + "d" (high) \ + ); \ +} while (0) + +static INLINE uint64 RDPMC(int cx) +{ +#ifdef VM_X86_64 + uint64 pmcLow; + uint64 pmcHigh; + + __asm__ __volatile__( + "rdpmc" + : "=a" (pmcLow), "=d" (pmcHigh) + : "c" (cx) + ); + + return pmcHigh << 32 | pmcLow; +#else + uint64 pmc; + + __asm__ __volatile__( + "rdpmc" + : "=A" (pmc) + : "c" (cx) + ); + + return pmc; +#endif +} +#elif defined _MSC_VER +#ifndef VM_X86_64 // XXX Switch to intrinsics with the new 32 and 64-bit compilers. + +static INLINE uint64 RDPMC(int counter) +{ + __asm mov ecx, counter + __asm rdpmc +} + +static INLINE void WRMSR(uint32 msrNum, uint32 lo, uint32 hi) +{ + __asm mov ecx, msrNum + __asm mov eax, lo + __asm mov edx, hi + __asm wrmsr +} +#endif +#endif + + +#if defined(__GNUC__) && (defined(VMM) || defined(VMKERNEL) || defined(FROBOS)) +static INLINE uint64 __XGETBV(int cx) +{ +#ifdef VM_X86_64 + uint64 lowval, hival; + __asm__ __volatile__( +#if __GNUC__ < 4 || __GNUC__ == 4 && __GNUC_MINOR__ == 1 + ".byte 0x0f, 0x01, 0xd0" +#else + "xgetbv" +#endif + : "=a" (lowval), "=d" (hival) + : "c" (cx) + ); + return hival << 32 | lowval; +#else + uint64 val; + __asm__ __volatile__( +#if __GNUC__ < 4 || __GNUC__ == 4 && __GNUC_MINOR__ == 1 + ".byte 0x0f, 0x01, 0xd0" +#else + "xgetbv" +#endif + : "=A" (val) + : "c" (cx) + ); + return val; +#endif +} + +static INLINE void __XSETBV(int cx, uint64 val) +{ + __asm__ __volatile__( +#if __GNUC__ < 4 || __GNUC__ == 4 && __GNUC_MINOR__ == 1 + ".byte 0x0f, 0x01, 0xd1" +#else + "xsetbv" +#endif + : /* no outputs */ + : "a" ((uint32)val), "d" ((uint32)(val >> 32)), "c" (cx) + ); +} + +static INLINE uint64 GET_XCR0(void) +{ + return __XGETBV(0); +} + +#define SET_XCR0(val) __XSETBV(0, val) + +static INLINE void SET_XCR0_IF_NEEDED(uint64 newVal, uint64 oldVal) +{ + ASSERT(oldVal == GET_XCR0()); + if (newVal != oldVal) { + SET_XCR0(newVal); + } +} +#endif + + +#define START_TRACING() { \ + uintptr_t flags; \ + SAVE_FLAGS(flags); \ + flags |= EFLAGS_TF; \ + RESTORE_FLAGS(flags); \ +} + +#define STOP_TRACING() { \ + uintptr_t flags; \ + SAVE_FLAGS(flags); \ + flags &= ~EFLAGS_TF; \ + RESTORE_FLAGS(flags); \ +} + + +static INLINE Bool +INTERRUPTS_ENABLED(void) +{ + uintptr_t flags; + SAVE_FLAGS(flags); + return ((flags & EFLAGS_IF) != 0); +} + +static INLINE void +SET_KERNEL_PER_CORE(uint64 val) +{ + __SET_MSR(MSR_GSBASE, val); +} + +#endif diff --git a/vmmon-only/include/vm_asm_x86_64.h b/vmmon-only/include/vm_asm_x86_64.h new file mode 100644 index 00000000..902cd81b --- /dev/null +++ b/vmmon-only/include/vm_asm_x86_64.h @@ -0,0 +1,136 @@ +/********************************************************* + * Copyright (C) 1998-2014 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * vm_asm_x86_64.h + * + * x86-64 asm macros + */ + +#ifndef _VM_ASM_X86_64_H_ +#define _VM_ASM_X86_64_H_ + + +#define INCLUDE_ALLOW_VMX +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMK_MODULE +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_DISTRIBUTE +#include "includeCheck.h" + +#ifdef __GNUC__ +#include "vm_asm_x86.h" +#endif + +/* + * This file contains inline assembly routines used by x86_64 code. + */ + +#ifdef __GNUC__ + + /* nop; prevents #error for no compiler definition from firing */ + +#elif defined _MSC_VER /* !__GNUC__ */ + +/* + * x86-64 windows doesn't support inline asm so we have to use these + * intrinsic functions defined in the compiler. Not all of these are well + * documented. There is an array in the compiler dll (c1.dll) which has + * an array of the names of all the intrinsics minus the leading + * underscore. Searching around in the ntddk.h file can also be helpful. + * + * The declarations for the intrinsic functions were taken from the DDK. + * Our declarations must match the ddk's otherwise the 64-bit c++ compiler + * will complain about second linkage of the intrinsic functions. + * We define the intrinsic using the basic types corresponding to the + * Windows typedefs. This avoids having to include windows header files + * to get to the windows types. + */ + +#ifdef _WIN64 +#ifdef __cplusplus +extern "C" { +#endif +unsigned __int64 __readmsr(unsigned long); +void __writemsr(unsigned long, unsigned __int64); +#pragma intrinsic(__readmsr, __writemsr) +#ifdef __cplusplus +} +#endif + + +static INLINE uint64 +RDPMC(int counter) +{ + return __readpmc(counter); +} + + +static INLINE void +WRMSR(uint32 msrNum, uint32 lo, uint32 hi) +{ + uint64 value = QWORD(hi, lo); + __writemsr(msrNum, value); +} + + +static INLINE uint64 +__GET_MSR(int input) +{ + return __readmsr((unsigned long)input); +} + + +static INLINE void +__SET_MSR(int cx, uint64 val) +{ + __writemsr((unsigned long)cx, (unsigned __int64)val); +} + +#endif + +#else +#error No compiler defined for get/set +#endif /* !__GNUC__ && !_MSC_VER */ + + +#ifdef __GNUC__ +static INLINE void +SWAPGS(void) +{ + __asm__ __volatile__("swapgs"); +} + + +static INLINE uint64 +RDTSCP_AuxOnly(void) +{ + uint64 tscLow, tscHigh, tscAux; + + __asm__ __volatile__( + "rdtscp" + : "=a" (tscLow), "=d" (tscHigh), "=c" (tscAux) + ); + + return tscAux; +} +#endif + +#endif diff --git a/vmmon-only/include/vm_assert.h b/vmmon-only/include/vm_assert.h new file mode 100644 index 00000000..b368e1da --- /dev/null +++ b/vmmon-only/include/vm_assert.h @@ -0,0 +1,336 @@ +/********************************************************* + * Copyright (C) 1998-2015 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * vm_assert.h -- + * + * The basic assertion facility for all VMware code. + * + * For proper use, see bora/doc/assert and + * http://vmweb.vmware.com/~mts/WebSite/guide/programming/asserts.html. + */ + +#ifndef _VM_ASSERT_H_ +#define _VM_ASSERT_H_ + +#define INCLUDE_ALLOW_USERLEVEL + +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_VMKDRIVERS +#define INCLUDE_ALLOW_VMK_MODULE +#define INCLUDE_ALLOW_DISTRIBUTE +#define INCLUDE_ALLOW_VMCORE +#include "includeCheck.h" + +// XXX not necessary except some places include vm_assert.h improperly +#include "vm_basic_types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Some bits of vmcore are used in VMKernel code and cannot have + * the VMKERNEL define due to other header dependencies. + */ +#if defined(VMKERNEL) && !defined(VMKPANIC) +#define VMKPANIC 1 +#endif + +/* + * Internal macros, functions, and strings + * + * The monitor wants to save space at call sites, so it has specialized + * functions for each situation. User level wants to save on implementation + * so it uses generic functions. + */ + +#if !defined VMM || defined MONITOR_APP // { + +#if defined (VMKPANIC) +#include "vmk_assert.h" +#else /* !VMKPANIC */ +#define _ASSERT_PANIC(name) \ + Panic(_##name##Fmt "\n", __FILE__, __LINE__) +#define _ASSERT_PANIC_BUG(bug, name) \ + Panic(_##name##Fmt " bugNr=%d\n", __FILE__, __LINE__, bug) +#define _ASSERT_PANIC_NORETURN(name) \ + Panic(_##name##Fmt "\n", __FILE__, __LINE__) +#define _ASSERT_PANIC_BUG_NORETURN(bug, name) \ + Panic(_##name##Fmt " bugNr=%d\n", __FILE__, __LINE__, bug) +#endif /* VMKPANIC */ + +#endif // } + + +// These strings don't have newline so that a bug can be tacked on. +#define _AssertPanicFmt "PANIC %s:%d" +#define _AssertAssertFmt "ASSERT %s:%d" +#define _AssertVerifyFmt "VERIFY %s:%d" +#define _AssertNotImplementedFmt "NOT_IMPLEMENTED %s:%d" +#define _AssertNotReachedFmt "NOT_REACHED %s:%d" +#define _AssertMemAllocFmt "MEM_ALLOC %s:%d" +#define _AssertNotTestedFmt "NOT_TESTED %s:%d" + + +/* + * Panic and log functions + */ + +void Log(const char *fmt, ...) PRINTF_DECL(1, 2); +void Warning(const char *fmt, ...) PRINTF_DECL(1, 2); +#if defined VMKPANIC +void Panic_SaveRegs(void); + +#ifdef VMX86_DEBUG +void Panic_NoSave(const char *fmt, ...) PRINTF_DECL(1, 2); +#else +NORETURN void Panic_NoSave(const char *fmt, ...) PRINTF_DECL(1, 2); +#endif + +NORETURN void Panic_NoSaveNoReturn(const char *fmt, ...) PRINTF_DECL(1, 2); + +#define Panic(fmt...) do { \ + Panic_SaveRegs(); \ + Panic_NoSave(fmt); \ +} while(0) + +#define Panic_NoReturn(fmt...) do { \ + Panic_SaveRegs(); \ + Panic_NoSaveNoReturn(fmt); \ +} while(0) + +#else +NORETURN void Panic(const char *fmt, ...) PRINTF_DECL(1, 2); +#endif + +void LogThrottled(uint32 *count, const char *fmt, ...) PRINTF_DECL(2, 3); +void WarningThrottled(uint32 *count, const char *fmt, ...) PRINTF_DECL(2, 3); + + +#ifndef ASSERT_IFNOT + /* + * PR 271512: When compiling with gcc, catch assignments inside an ASSERT. + * + * 'UNLIKELY' is defined with __builtin_expect, which does not warn when + * passed an assignment (gcc bug 36050). To get around this, we put 'cond' + * in an 'if' statement and make sure it never gets executed by putting + * that inside of 'if (0)'. We use gcc's statement expression syntax to + * make ASSERT an expression because some code uses it that way. + * + * Since statement expression syntax is a gcc extension and since it's + * not clear if this is a problem with other compilers, the ASSERT + * definition was not changed for them. Using a bare 'cond' with the + * ternary operator may provide a solution. + */ + + #ifdef __GNUC__ + #define ASSERT_IFNOT(cond, panic) \ + ({if (UNLIKELY(!(cond))) { panic; if (0) { if (cond) {;}}} (void)0;}) + #else + #define ASSERT_IFNOT(cond, panic) \ + (UNLIKELY(!(cond)) ? (panic) : (void)0) + #endif +#endif + + +/* + * Assert, panic, and log macros + * + * Some of these are redefined below undef !VMX86_DEBUG. + * ASSERT() is special cased because of interaction with Windows DDK. + */ + +#if defined VMX86_DEBUG +#undef ASSERT +#define ASSERT(cond) ASSERT_IFNOT(cond, _ASSERT_PANIC(AssertAssert)) +#define ASSERT_BUG(bug, cond) \ + ASSERT_IFNOT(cond, _ASSERT_PANIC_BUG(bug, AssertAssert)) +#endif + +#undef VERIFY +#define VERIFY(cond) \ + ASSERT_IFNOT(cond, _ASSERT_PANIC_NORETURN(AssertVerify)) +#define VERIFY_BUG(bug, cond) \ + ASSERT_IFNOT(cond, _ASSERT_PANIC_BUG_NORETURN(bug, AssertVerify)) + +#define PANIC() _ASSERT_PANIC(AssertPanic) +#define PANIC_BUG(bug) _ASSERT_PANIC_BUG(bug, AssertPanic) + +#define ASSERT_NOT_IMPLEMENTED(cond) \ + ASSERT_IFNOT(cond, NOT_IMPLEMENTED()) +#define ASSERT_NOT_IMPLEMENTED_BUG(bug, cond) \ + ASSERT_IFNOT(cond, NOT_IMPLEMENTED_BUG(bug)) + +#if defined VMKPANIC || defined VMM +#define NOT_IMPLEMENTED() _ASSERT_PANIC_NORETURN(AssertNotImplemented) +#else +#define NOT_IMPLEMENTED() _ASSERT_PANIC(AssertNotImplemented) +#endif + +#if defined VMM +#define NOT_IMPLEMENTED_BUG(bug) \ + _ASSERT_PANIC_BUG_NORETURN(bug, AssertNotImplemented) +#else +#define NOT_IMPLEMENTED_BUG(bug) _ASSERT_PANIC_BUG(bug, AssertNotImplemented) +#endif + +#if defined VMKPANIC || defined VMM +#define NOT_REACHED() _ASSERT_PANIC_NORETURN(AssertNotReached) +#else +#define NOT_REACHED() _ASSERT_PANIC(AssertNotReached) +#endif + +#define ASSERT_MEM_ALLOC(cond) \ + ASSERT_IFNOT(cond, _ASSERT_PANIC(AssertMemAlloc)) + +#ifdef VMX86_DEVEL +#define ASSERT_DEVEL(cond) ASSERT(cond) +#define NOT_TESTED() Warning(_AssertNotTestedFmt "\n", __FILE__, __LINE__) +#else +#define ASSERT_DEVEL(cond) ((void)0) +#define NOT_TESTED() Log(_AssertNotTestedFmt "\n", __FILE__, __LINE__) +#endif + +#define ASSERT_NO_INTERRUPTS() ASSERT(!INTERRUPTS_ENABLED()) +#define ASSERT_HAS_INTERRUPTS() ASSERT(INTERRUPTS_ENABLED()) + +#define ASSERT_NOT_TESTED(cond) (UNLIKELY(!(cond)) ? NOT_TESTED() : (void)0) +#define NOT_TESTED_ONCE() DO_ONCE(NOT_TESTED()) + +#define NOT_TESTED_1024() \ + do { \ + static uint16 count = 0; \ + if (UNLIKELY(count == 0)) { NOT_TESTED(); } \ + count = (count + 1) & 1023; \ + } while (0) + +#define LOG_ONCE(_s) DO_ONCE(Log _s) + + +/* + * Redefine macros that are only in debug versions + */ + +#if !defined VMX86_DEBUG // { + +#undef ASSERT +#define ASSERT(cond) ((void)0) +#define ASSERT_BUG(bug, cond) ((void)0) + +/* + * Expand NOT_REACHED() as appropriate for each situation. + * + * Mainly, we want the compiler to infer the same control-flow + * information as it would from Panic(). Otherwise, different + * compilation options will lead to different control-flow-derived + * errors, causing some make targets to fail while others succeed. + * + * VC++ has the __assume() built-in function which we don't trust + * (see bug 43485); gcc has no such construct; we just panic in + * userlevel code. The monitor doesn't want to pay the size penalty + * (measured at 212 bytes for the release vmm for a minimal infinite + * loop; panic would cost even more) so it does without and lives + * with the inconsistency. + */ + +#if defined VMKPANIC || defined VMM +#undef NOT_REACHED +#if defined __GNUC__ && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 5) +#define NOT_REACHED() (__builtin_unreachable()) +#else +#define NOT_REACHED() ((void)0) +#endif +#else +// keep debug definition +#endif + +#undef LOG_UNEXPECTED +#define LOG_UNEXPECTED(bug) ((void)0) + +#undef ASSERT_NOT_TESTED +#define ASSERT_NOT_TESTED(cond) ((void)0) +#undef NOT_TESTED +#define NOT_TESTED() ((void)0) +#undef NOT_TESTED_ONCE +#define NOT_TESTED_ONCE() ((void)0) +#undef NOT_TESTED_1024 +#define NOT_TESTED_1024() ((void)0) + +#endif // !VMX86_DEBUG } + + +/* + * Compile-time assertions. + * + * ASSERT_ON_COMPILE does not use the common + * switch (0) { case 0: case (e): ; } trick because some compilers (e.g. MSVC) + * generate code for it. + * + * The implementation uses both enum and typedef because the typedef alone is + * insufficient; gcc allows arrays to be declared with non-constant expressions + * (even in typedefs, where it makes no sense). + * + * NOTE: if GCC ever changes so that it ignores unused types altogether, this + * assert might not fire! We explicitly mark it as unused because GCC 4.8+ + * uses -Wunused-local-typedefs as part of -Wall, which means the typedef will + * generate a warning. + */ + +#if defined(_Static_assert) || defined(__cplusplus) || \ + !defined(__GNUC__) || __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 6) +#define ASSERT_ON_COMPILE(e) \ + do { \ + enum { AssertOnCompileMisused = ((e) ? 1 : -1) }; \ + UNUSED_TYPE(typedef char AssertOnCompileFailed[AssertOnCompileMisused]); \ + } while (0) +#else +#define ASSERT_ON_COMPILE(e) \ + do { \ + _Static_assert(e, #e); \ + } while (0); +#endif + +/* + * To put an ASSERT_ON_COMPILE() outside a function, wrap it + * in MY_ASSERTS(). The first parameter must be unique in + * each .c file where it appears. For example, + * + * MY_ASSERTS(FS3_INT, + * ASSERT_ON_COMPILE(sizeof(FS3_DiskLock) == 128); + * ASSERT_ON_COMPILE(sizeof(FS3_DiskLockReserved) == DISK_BLOCK_SIZE); + * ASSERT_ON_COMPILE(sizeof(FS3_DiskBlock) == DISK_BLOCK_SIZE); + * ASSERT_ON_COMPILE(sizeof(Hardware_DMIUUID) == 16); + * ) + * + * Caution: ASSERT() within MY_ASSERTS() is silently ignored. + * The same goes for anything else not evaluated at compile time. + */ + +#define MY_ASSERTS(name, assertions) \ + static INLINE void name(void) { \ + assertions \ + } + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* ifndef _VM_ASSERT_H_ */ diff --git a/vmmon-only/include/vm_atomic.h b/vmmon-only/include/vm_atomic.h new file mode 100644 index 00000000..07f32eab --- /dev/null +++ b/vmmon-only/include/vm_atomic.h @@ -0,0 +1,3896 @@ +/********************************************************* + * Copyright (C) 1998-2015 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * vm_atomic.h -- + * + * Atomic power + * + * Note: Only partially tested on ARM processors: Works for View Open + * Client, which shouldn't have threads, and ARMv8 processors. + * + * In ARM, GCC intrinsics (__sync*) compile but might not + * work, while MS intrinsics (_Interlocked*) do not compile. + */ + +#ifndef _ATOMIC_H_ +#define _ATOMIC_H_ + +#define INCLUDE_ALLOW_USERLEVEL + +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMKDRIVERS +#define INCLUDE_ALLOW_VMK_MODULE +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_DISTRIBUTE +#define INCLUDE_ALLOW_VMCORE +#include "includeCheck.h" + +#include "vm_basic_types.h" +#include "vm_assert.h" + +/* + * In the Atomic_* definitions below, memory ordering and atomicity are somewhat + * conflated in an inconsistent manner. First, we have Atomic_{Read,Write}, + * which only guarantees single copy atomicity, i.e. that the read/write occurs + * in an atomic fashion, but have no implication on memory ordering. The second + * class of Atomics are all the non-unfenced operations excluding + * Atomic_{Read,Write}*, which both imply atomicity and act as a memory barrier, + * implying sequentially consistent ordering of the atomic operation with all + * loads/stores prior to and after it. + * + * Since on x86, the second class of operations are associated with LOCK + * semantics, assumptions have been made about the ordering these operations + * imply on surrounding code (see for example the vmkernel's RefCount + * implementation). As a result, on arm64 we have to provide these same + * guarantees. We do this by making use of DMB barriers both before and after + * the atomic ldrx/strx sequences. A barrier before and after is required to + * avoid having part of the atomic operation reordered with surrounding code, + * e.g. a store-load reordering of the strx with a following load outside the + * Atomic_ op. For the first class of operations, Atomic_{Read,Write}, we do not + * implement a barrier. + * + * This implementation of Atomic operations is suboptimal on arm64, since + * both atomicity and memory ordering are fused together. Ideally the Atomic + * operations would only imply atomicity, and an explicit memory barrier in the + * surrounding code used to enforce ordering where necessary. This would eschew + * the need for the DMBs. A middle ground can be implemented where we use the + * arm64 load-acquire/store-release exclusive instructions to implement Atomics. + * This would imply sequential consistency of the Atomic operations (but not + * with any of the surrounding non-atomic operations) without the need for a + * DMB. Using these without a DMB today can still result in problematic + * reordering by the processor with surrounding non-atomic operations, e.g. a + * store-load reordering with a stlxr. Future optimization for arm64 should + * consider the wider change required at the call sites to minimize DMBs. + * + * For further details on x86 and ARM memory ordering see + * https://wiki.eng.vmware.com/ARM/MemoryOrdering. + */ + +#ifdef VM_ARM_64 +#include "vm_basic_asm_arm64.h" + +#define ARM_64_ATOMIC_16_OPV_PRIVATE(atm,modval,op) ({ \ + uint32 _failed; \ + uint16 _sample; \ + __asm__ __volatile__ ( \ + "1: ldxrh %w0, [%2] \n\t" \ + #op " %w0, %w0, %w3 \n\t" \ + " stxrh %w1, %w0, [%2] \n\t" \ + " cbnz %w1, 1b \n\t" \ + : "=&r" (_sample), \ + "=&r" (_failed) \ + : "r" (atm), \ + "r" (modval) \ + : "memory"); \ +}) + +#define ARM_64_ATOMIC_16_OPO_PRIVATE(atm,modval,op) ({ \ + uint32 _failed; \ + uint16 _newval; \ + uint16 _oldval; \ + __asm__ __volatile__ ( \ + "1: ldxrh %w0, [%3] \n\t" \ + #op " %w1, %w0, %w4 \n\t" \ + " stxrh %w2, %w1, [%3] \n\t" \ + " cbnz %w2, 1b \n\t" \ + : "=&r" (_oldval), \ + "=&r" (_newval), \ + "=&r" (_failed) \ + : "r" (atm), \ + "r" (modval) \ + : "memory"); \ + _oldval; \ +}) + +#define ARM_64_ATOMIC_32_OPV_PRIVATE(atm,modval,op) ({ \ + uint32 _failed; \ + uint32 _sample; \ + __asm__ __volatile__ ( \ + "1: ldxr %w0, [%2] \n\t" \ + #op " %w0, %w0, %w3 \n\t" \ + " stxr %w1, %w0, [%2] \n\t" \ + " cbnz %w1, 1b \n\t" \ + : "=&r" (_sample), \ + "=&r" (_failed) \ + : "r" (atm), \ + "r" (modval) \ + : "memory"); \ +}) + +#define ARM_64_ATOMIC_32_OPO_PRIVATE(atm,modval,op) ({ \ + uint32 _failed; \ + uint32 _newval; \ + uint32 _oldval; \ + __asm__ __volatile__ ( \ + "1: ldxr %w0, [%3] \n\t" \ + #op " %w1, %w0, %w4 \n\t" \ + " stxr %w2, %w1, [%3] \n\t" \ + " cbnz %w2, 1b \n\t" \ + : "=&r" (_oldval), \ + "=&r" (_newval), \ + "=&r" (_failed) \ + : "r" (atm), \ + "r" (modval) \ + : "memory"); \ + _oldval; \ +}) + +#define ARM_64_ATOMIC_64_OPV_PRIVATE(atm,modval,op) ({ \ + uint32 _failed; \ + uint64 _sample; \ + __asm__ __volatile__ ( \ + "1: ldxr %x0, [%2] \n\t" \ + #op " %x0, %x0, %x3 \n\t" \ + " stxr %w1, %x0, [%2] \n\t" \ + " cbnz %w1, 1b \n\t" \ + : "=&r" (_sample), \ + "=&r" (_failed) \ + : "r" (atm), \ + "r" (modval) \ + : "memory"); \ +}) + +#define ARM_64_ATOMIC_64_OPO_PRIVATE(atm,modval,op) ({ \ + uint32 _failed; \ + uint64 _newval; \ + uint64 _oldval; \ + __asm__ __volatile__ ( \ + "1: ldxr %x0, [%3] \n\t" \ + #op " %x1, %x0, %x4 \n\t" \ + " stxr %w2, %x1, [%3] \n\t" \ + " cbnz %w2, 1b \n\t" \ + : "=&r" (_oldval), \ + "=&r" (_newval), \ + "=&r" (_failed) \ + : "r" (atm), \ + "r" (modval) \ + : "memory"); \ + _oldval; \ +}) + +#define ARM_64_ATOMIC_16_OPV_PRIVATE_FENCED(atm,modval,op) ({ \ + DMB(); \ + ARM_64_ATOMIC_16_OPV_PRIVATE(atm,modval,op); \ + DMB(); \ +}) + +#define ARM_64_ATOMIC_16_OPO_PRIVATE_FENCED(atm,modval,op) ({ \ + uint16 _oldval_fenced; \ + DMB(); \ + _oldval_fenced = ARM_64_ATOMIC_16_OPO_PRIVATE(atm,modval,op); \ + DMB(); \ + _oldval_fenced; \ +}) + +#define ARM_64_ATOMIC_32_OPV_PRIVATE_FENCED(atm,modval,op) ({ \ + DMB(); \ + ARM_64_ATOMIC_32_OPV_PRIVATE(atm,modval,op); \ + DMB(); \ +}) + +#define ARM_64_ATOMIC_32_OPO_PRIVATE_FENCED(atm,modval,op) ({ \ + uint32 _oldval_fenced; \ + DMB(); \ + _oldval_fenced = ARM_64_ATOMIC_32_OPO_PRIVATE(atm,modval,op); \ + DMB(); \ + _oldval_fenced; \ +}) + +#define ARM_64_ATOMIC_64_OPV_PRIVATE_FENCED(atm,modval,op) ({ \ + DMB(); \ + ARM_64_ATOMIC_64_OPV_PRIVATE(atm,modval,op); \ + DMB(); \ +}) + +#define ARM_64_ATOMIC_64_OPO_PRIVATE_FENCED(atm,modval,op) ({ \ + uint64 _oldval_fenced; \ + DMB(); \ + _oldval_fenced = ARM_64_ATOMIC_64_OPO_PRIVATE(atm,modval,op); \ + DMB(); \ + _oldval_fenced; \ +}) + +#endif /* VM_ARM_64 */ + + +/* Basic atomic types: 16, 32 and 64 bits */ +typedef struct Atomic_uint16 { + volatile uint16 value; +} Atomic_uint16 ALIGNED(2); + +typedef struct Atomic_uint32 { + volatile uint32 value; +} Atomic_uint32 ALIGNED(4); + +typedef struct Atomic_uint64 { + volatile uint64 value; +} Atomic_uint64 ALIGNED(8); + +/* + * Prototypes for msft atomics. These are defined & inlined by the + * compiler so no function definition is needed. The prototypes are + * needed for C++. Since amd64 compiler doesn't support inline asm we + * have to use these. Unfortunately, we still have to use some inline asm + * for the 32 bit code since the and/or/xor implementations didn't show up + * until XP or 2k3. + * + * The declarations for the intrinsic functions were taken from ntddk.h + * in the DDK. The declarations must match otherwise the 64-bit C++ + * compiler will complain about second linkage of the intrinsic functions. + * We define the intrinsic using the basic types corresponding to the + * Windows typedefs. This avoids having to include windows header files + * to get to the windows types. + */ +#if defined(_MSC_VER) && _MSC_VER >= 1310 && !defined(BORA_NO_WIN32_INTRINS) +#ifdef __cplusplus +extern "C" { +#endif +long _InterlockedExchange(long volatile*, long); +long _InterlockedCompareExchange(long volatile*, long, long); +long _InterlockedExchangeAdd(long volatile*, long); +long _InterlockedDecrement(long volatile*); +long _InterlockedIncrement(long volatile*); +void _ReadWriteBarrier(void); +#pragma intrinsic(_InterlockedExchange, _InterlockedCompareExchange) +#pragma intrinsic(_InterlockedExchangeAdd, _InterlockedDecrement) +#pragma intrinsic(_InterlockedIncrement, _ReadWriteBarrier) + +# if _MSC_VER >= 1600 +char _InterlockedExchange8(char volatile *, char); +char _InterlockedCompareExchange8(char volatile *, char, char); +#pragma intrinsic(_InterlockedCompareExchange8, _InterlockedCompareExchange8) +#endif + +#if defined(VM_X86_64) +long _InterlockedAnd(long volatile*, long); +__int64 _InterlockedAnd64(__int64 volatile*, __int64); +long _InterlockedOr(long volatile*, long); +__int64 _InterlockedOr64(__int64 volatile*, __int64); +long _InterlockedXor(long volatile*, long); +__int64 _InterlockedXor64(__int64 volatile*, __int64); +__int64 _InterlockedExchangeAdd64(__int64 volatile*, __int64); +__int64 _InterlockedIncrement64(__int64 volatile*); +__int64 _InterlockedDecrement64(__int64 volatile*); +__int64 _InterlockedExchange64(__int64 volatile*, __int64); +__int64 _InterlockedCompareExchange64(__int64 volatile*, __int64, __int64); +#if !defined(_WIN64) +#pragma intrinsic(_InterlockedAnd, _InterlockedAnd64) +#pragma intrinsic(_InterlockedOr, _InterlockedOr64) +#pragma intrinsic(_InterlockedXor, _InterlockedXor64) +#pragma intrinsic(_InterlockedExchangeAdd64, _InterlockedIncrement64) +#pragma intrinsic(_InterlockedDecrement64, _InterlockedExchange64) +#pragma intrinsic(_InterlockedCompareExchange64) +#endif /* !_WIN64 */ +#endif /* __x86_64__ */ + +#ifdef __cplusplus +} +#endif +#endif /* _MSC_VER */ + +#if defined(__arm__) +/* + * LDREX without STREX or CLREX may cause problems in environments where the + * context switch may not clear the reference monitor - according ARM manual + * the reference monitor should be cleared after a context switch, but some + * may not like Linux kernel's non-preemptive context switch path. So use of + * ARM routines in kernel code may not be safe. + */ +# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \ + defined(__ARM_ARCH_7R__)|| defined(__ARM_ARCH_7M__) +# define VM_ARM_V7 +# ifdef __KERNEL__ +# warning LDREX/STREX may not be safe in linux kernel, since it \ + does not issue CLREX on context switch (as of 2011-09-29). +# endif +# else +# error Only ARMv7 extends the synchronization primitives ldrex/strex. \ + For the lower ARM version, please implement the atomic functions \ + by kernel APIs. +# endif +#endif + +/* Data Memory Barrier */ +#ifdef VM_ARM_V7 +#define dmb() __asm__ __volatile__("dmb" : : : "memory") +#endif + + +/* Convert a volatile uint32 to Atomic_uint32. */ +static INLINE Atomic_uint32 * +Atomic_VolatileToAtomic(volatile uint32 *var) // IN: +{ + return (Atomic_uint32 *)var; +} + +/* Convert a volatile uint64 to Atomic_uint64. */ +static INLINE Atomic_uint64 * +Atomic_VolatileToAtomic64(volatile uint64 *var) // IN: +{ + return (Atomic_uint64 *)var; +} + +/* + *----------------------------------------------------------------------------- + * + * Atomic_Init, Atomic_SetFence, AtomicUseFence -- + * + * Determine whether an lfence intruction is executed after + * every locked instruction. + * + * Certain AMD processors have a bug (see bug 107024) that + * requires an lfence after every locked instruction. + * + * The global variable AtomicUseFence controls whether lfence + * is used (see AtomicEpilogue). + * + * Atomic_SetFence sets AtomicUseFence to the given value. + * + * Atomic_Init computes and sets AtomicUseFence for x86. + * It does not take into account the number of processors. + * + * The rationale for all this complexity is that Atomic_Init + * is the easy-to-use interface. It can be called a number + * of times cheaply, and does not depend on other libraries. + * However, because the number of CPUs is difficult to compute, + * it does without it and always assumes there are more than one. + * + * For programs that care or have special requirements, + * Atomic_SetFence can be called directly, in addition to Atomic_Init. + * It overrides the effect of Atomic_Init, and can be called + * before, after, or between calls to Atomic_Init. + * + *----------------------------------------------------------------------------- + */ + +// The freebsd assembler doesn't know the lfence instruction +#if defined(__GNUC__) && \ + __GNUC__ >= 3 && \ + (defined(__VMKERNEL__) || !defined(__FreeBSD__)) && \ + (!defined(MODULE) || defined(__VMKERNEL_MODULE__)) && \ + !defined(__APPLE__) && \ + (defined(__i386__) || defined(__x86_64__)) /* PR136775 */ +#define ATOMIC_USE_FENCE +#endif + +/* + * Starting with vSphere 2014, we no longer support ESX on AMD Rev F. + * Thus, we can eliminate all dynamic checks for whether to enable + * the Errata 147 work-around when compiling many of our binaries. + * However, we use an opt-in approach here rather than assuming all + * parts of our builds are safe. For example, the "fdm" binary from + * a new build may time travel back to hosts running older versions + * of ESX on Rev F, so "fdm" continues to require the ability to + * dynamically enable the errata work-around. With vSphere 2017, + * this will no longer be required as the oldest version of ESX that + * VC 2017 will support is ESX 2014 (which won't run on Rev F). + * + * Modules may explicitly define MAY_NEED_AMD_REVF_WORKAROUND as 0 prior to + * inclusion of vm_atomic.h when they are safe on AMD Rev F with the elided + * lfence. + */ +#if !defined(MAY_NEED_AMD_REVF_WORKAROUND) +#if (!defined(VMX86_SERVER) || \ + (!defined(VMX86_VMX) && !defined(VMKERNEL) && \ + !defined(VMM) && !defined(VMCORE))) +#define MAY_NEED_AMD_REVF_WORKAROUND 1 +#else +#define MAY_NEED_AMD_REVF_WORKAROUND 0 +#endif +#endif + +#if MAY_NEED_AMD_REVF_WORKAROUND +#if defined(VMATOMIC_IMPORT_DLLDATA) +VMX86_EXTERN_DATA Bool AtomicUseFence; +#else +EXTERN Bool AtomicUseFence; +#endif +EXTERN Bool atomicFenceInitialized; +#else /* MAY_NEED_AMD_REVF_WORKAROUND */ +#define AtomicUseFence FALSE +#define atomicFenceInitialized TRUE +#endif /* MAY_NEED_AMD_REVF_WORKAROUND */ + + +void AtomicInitFence(void); + +static INLINE void +Atomic_Init(void) +{ +#ifdef ATOMIC_USE_FENCE + if (MAY_NEED_AMD_REVF_WORKAROUND && !atomicFenceInitialized) { + AtomicInitFence(); + } +#endif +} + +static INLINE void +Atomic_SetFence(Bool fenceAfterLock) // IN: +{ + (void)fenceAfterLock; /* Work around unused parameter. */ +#if MAY_NEED_AMD_REVF_WORKAROUND + AtomicUseFence = fenceAfterLock; + atomicFenceInitialized = TRUE; +#endif +} + + +/* Conditionally execute fence after interlocked instruction. */ +static INLINE void +AtomicEpilogue(void) +{ +#if MAY_NEED_AMD_REVF_WORKAROUND && defined(ATOMIC_USE_FENCE) +#ifdef VMM + /* The monitor conditionally patches out the lfence when not needed.*/ + /* Construct a MonitorPatchTextEntry in the .patchtext section. */ + asm volatile ("1:\n\t" + "lfence\n\t" + "2:\n\t" + ".pushsection .patchtext\n\t" + ".quad 1b\n\t" + ".quad 2b\n\t" + ".quad 0\n\t" + ".popsection\n\t" ::: "memory"); +#else + if (UNLIKELY(AtomicUseFence)) { + asm volatile ("lfence" ::: "memory"); + } +#endif +#endif +} + + +/* + * All the assembly code is tricky and written conservatively. + * For example, to make sure gcc won't introduce copies, + * we force the addressing mode like this: + * + * "xchgl %0, (%1)" + * : "=r" (val) + * : "r" (&var->value), + * "0" (val) + * : "memory" + * + * - edward + * + * Actually - turns out that gcc never generates memory aliases (it + * still does generate register aliases though), so we can be a bit + * more agressive with the memory constraints. The code above can be + * modified like this: + * + * "xchgl %0, %1" + * : "=r" (val), + * "=m" (var->value), + * : "0" (val), + * "1" (var->value) + * + * The advantages are that gcc can use whatever addressing mode it + * likes to access the memory value, and that we dont have to use a + * way-too-generic "memory" clobber as there is now an explicit + * declaration that var->value is modified. + * + * see also /usr/include/asm/atomic.h to convince yourself this is a + * valid optimization. + * + * - walken + */ + +#if defined(_MSC_VER) && _MSC_VER < 1600 && defined(__x86_64__) +Bool VMWInterlockedExchangeBool(Bool volatile *ptr, + Bool val); + +Bool VMWInterlockedCompareExchangeBool(Bool volatile *ptr, + Bool newVal, + Bool oldVal); +#endif + +typedef struct Atomic_Bool { + volatile Bool value; +} Atomic_Bool; + +/* The ARM team can come along and add the code real soon now */ +extern Bool AtomicUndefinedOnARM(void); + +/* + *----------------------------------------------------------------------------- + * + * Atomic_ReadBool -- + * + * Read the value of the specified object atomically. + * + * Results: + * The value of the atomic variable. + * + * Side effects: + * None. + * + *----------------------------------------------------------------------------- + */ + +static INLINE Bool +Atomic_ReadBool(Atomic_Bool const *var) // IN: +{ + Bool val; + +#if defined(__GNUC__) && (defined(VM_ARM_32) || defined(VM_ARM_64)) + val = AtomicUndefinedOnARM(); +#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + __asm__ __volatile__( + "movb %1, %0" + : "=q" (val) + : "m" (var->value) + ); +#elif defined(_MSC_VER) + val = var->value; +#else +#error No compiler defined for Atomic_ReadBool +#endif + + return val; +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_ReadWriteBool -- + * + * Read followed by write. + * + * Results: + * The value of the atomic variable before the write. + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE Bool +Atomic_ReadWriteBool(Atomic_Bool *var, // IN/OUT: + Bool val) // IN: +{ +#if defined(__GNUC__) && (defined(VM_ARM_32) || defined(VM_ARM_64)) + return AtomicUndefinedOnARM(); +#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + __asm__ __volatile__( + "xchgb %0, %1" + : "=q" (val), + "+m" (var->value) + : "0" (val) + ); + AtomicEpilogue(); + return val; +#elif defined(_MSC_VER) && _MSC_VER >= 1600 + return _InterlockedExchange8(&var->value, val); +#elif defined(_MSC_VER) && defined(__i386__) +#pragma warning(push) +#pragma warning(disable : 4035) // disable no-return warning + { + __asm movzx eax, val + __asm mov ebx, var + __asm xchg [ebx]Atomic_Bool.value, al + } +#pragma warning(pop) +#elif defined(_MSC_VER) && defined(__x86_64__) + return VMWInterlockedExchangeBool(&var->value, val); +#else +#error No compiler defined for Atomic_ReadBool +#endif +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_WriteBool -- + * + * Write the specified value to the specified object atomically. + * + * Results: + * None. + * + * Side effects: + * None. + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_WriteBool(Atomic_Bool *var, // IN/OUT: + Bool val) // IN: +{ +#if defined(__GNUC__) && (defined(VM_ARM_32) || defined(VM_ARM_64)) + AtomicUndefinedOnARM(); +#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + __asm__ __volatile__( + "movb %1, %0" + : "=m" (var->value) + : "qn" (val) + ); +#elif defined(_MSC_VER) + var->value = val; +#else +#error No compiler defined for Atomic_WriteBool +#endif +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_ReadIfEqualWriteBool -- + * + * Compare exchange: Read variable, if equal to oldVal, write newVal. + * + * Results: + * The value of the atomic variable before the write. + * + * Side effects: + * The variable may be modified. + * + *----------------------------------------------------------------------------- + */ + +static INLINE Bool +Atomic_ReadIfEqualWriteBool(Atomic_Bool *var, // IN/OUT: + Bool oldVal, // IN: + Bool newVal) // IN: +{ +#if defined(__GNUC__) && (defined(VM_ARM_32) || defined(VM_ARM_64)) + return AtomicUndefinedOnARM(); +#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + Bool val; + + __asm__ __volatile__( + "lock; cmpxchgb %2, %1" + : "=a" (val), + "+m" (var->value) + : "q" (newVal), + "0" (oldVal) + : "cc" + ); + AtomicEpilogue(); + return val; +#elif defined(_MSC_VER) && _MSC_VER >= 1600 + return _InterlockedCompareExchange8(&var->value, newVal, oldVal); +#elif defined(_MSC_VER) && defined(__i386__) +#pragma warning(push) +#pragma warning(disable : 4035) // disable no-return warning + { + __asm mov al, oldVal + __asm mov ebx, var + __asm mov cl, newVal + __asm lock cmpxchg [ebx]Atomic_Bool.value, cl + __asm movzx eax, al + // eax is the return value, this is documented to work - edward + } +#pragma warning(pop) +#elif defined(_MSC_VER) && defined(__x86_64__) + return VMWInterlockedCompareExchangeBool(&var->value, newVal, oldVal); +#else +#error No compiler defined for Atomic_ReadIfEqualWriteBool +#endif +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_Read -- + * + * Read + * + * Results: + * The value of the atomic variable. + * + * Side effects: + * None. + * + *----------------------------------------------------------------------------- + */ + +static INLINE uint32 +Atomic_Read(Atomic_uint32 const *var) // IN +{ + uint32 value; + +#if defined(VMM) + ASSERT(((uintptr_t)var % 4) == 0); +#endif + +#if defined(__GNUC__) + /* + * Use inline assembler to force using a single load instruction to + * ensure that the compiler doesn't split a transfer operation into multiple + * instructions. + */ + +#if defined(VM_ARM_32) + __asm__ __volatile__( + "ldr %0, [%1]" + : "=r" (value) + : "r" (&var->value) + ); +#elif defined(VM_ARM_64) + __asm__ __volatile__ ( + "ldr %w0, [%1]" + : "=r" (value) + : "r" (&var->value) + ); +#else + __asm__ __volatile__( + "mov %1, %0" + : "=r" (value) + : "m" (var->value) + ); +#endif +#elif defined(_MSC_VER) + /* + * Microsoft docs guarantee simple reads and writes to properly + * aligned 32-bit variables use only a single instruction. + * http://msdn.microsoft.com/en-us/library/ms684122%28VS.85%29.aspx + */ + + value = var->value; +#else +#error No compiler defined for Atomic_Read +#endif + + return value; +} +#define Atomic_Read32 Atomic_Read + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_ReadWrite -- + * + * Read followed by write + * + * Results: + * The value of the atomic variable before the write. + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE uint32 +Atomic_ReadWrite(Atomic_uint32 *var, // IN/OUT + uint32 val) // IN +{ +#if defined(__GNUC__) +#ifdef VM_ARM_V7 + register volatile uint32 retVal; + register volatile uint32 res; + + dmb(); + + __asm__ __volatile__( + "1: ldrex %[retVal], [%[var]] \n\t" + "strex %[res], %[val], [%[var]] \n\t" + "teq %[res], #0 \n\t" + "bne 1b" + : [retVal] "=&r" (retVal), [res] "=&r" (res) + : [var] "r" (&var->value), [val] "r" (val) + : "cc" + ); + + dmb(); + + return retVal; +#elif defined(VM_ARM_64) + register uint32 retVal; + register uint32 failed; + + __asm__ __volatile__( + " dmb sy \n\t" + "1: ldxr %w0, [%2] \n\t" + " stxr %w1, %w3, [%2] \n\t" + " cbnz %w1, 1b \n\t" + " dmb sy \n\t" + : "=&r" (retVal), "=&r" (failed) + : "r" (&var->value), "r" (val) + : "memory" + ); + + return retVal; +#else /* VM_X86_ANY */ + /* Checked against the Intel manual and GCC --walken */ + __asm__ __volatile__( + "xchgl %0, %1" + : "=r" (val), + "+m" (var->value) + : "0" (val) + ); + AtomicEpilogue(); + return val; +#endif /* VM_X86_ANY */ +#elif defined _MSC_VER +#if _MSC_VER >= 1310 + return _InterlockedExchange((long *)&var->value, (long)val); +#else +#pragma warning(push) +#pragma warning(disable : 4035) // disable no-return warning + { + __asm mov eax, val + __asm mov ebx, var + __asm xchg [ebx]Atomic_uint32.value, eax + // eax is the return value, this is documented to work - edward + } +#pragma warning(pop) +#endif // _MSC_VER >= 1310 +#else +#error No compiler defined for Atomic_ReadWrite +#endif // __GNUC__ +} +#define Atomic_ReadWrite32 Atomic_ReadWrite + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_Write -- + * + * Write + * + * Results: + * None. + * + * Side effects: + * None. + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_Write(Atomic_uint32 *var, // OUT + uint32 val) // IN +{ +#if defined(VMM) + ASSERT(((uintptr_t)var % 4) == 0); +#endif + +#if defined(__GNUC__) +#if defined(VM_ARM_ANY) + /* + * A3.4.1 ARM DDI 0406C: + * + * When a processor writes using any instruction other than a + * Store-Exclusive: + * + * - if the write is to a physical address that is not covered by its local + * monitor the write does not affect the state of the local monitor + * - if the write is to a physical address that is covered by its local + * monitor it is IMPLEMENTATION DEFINED whether the write affects the + * state of the local monitor. + * + * A3.4.5 ARM DDI 0406C: + * + * If two STREX instructions are executed without an intervening LDREX the + * second STREX returns a status value of 1. This means that: + * + * - ARM recommends that, in a given thread of execution, every STREX has a + * preceding LDREX associated with it + * - it is not necessary for every LDREX to have a subsequent STREX. + */ + + Atomic_ReadWrite(var, val); +#else + /* + * Use inline assembler to force using a single store instruction to + * ensure that the compiler doesn't split a transfer operation into multiple + * instructions. + */ + + __asm__ __volatile__( + "mov %1, %0" + : "=m" (var->value) + : "r" (val) + ); +#endif +#elif defined(_MSC_VER) + /* + * Microsoft docs guarantee simple reads and writes to properly + * aligned 32-bit variables use only a single instruction. + * http://msdn.microsoft.com/en-us/library/ms684122%28VS.85%29.aspx + */ + + var->value = val; +#else +#error No compiler defined for Atomic_Write +#endif +} +#define Atomic_Write32 Atomic_Write + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_ReadIfEqualWrite -- + * + * Compare exchange: Read variable, if equal to oldVal, write newVal + * + * Results: + * The value of the atomic variable before the write. + * + * Side effects: + * The variable may be modified. + * + *----------------------------------------------------------------------------- + */ + +static INLINE uint32 +Atomic_ReadIfEqualWrite(Atomic_uint32 *var, // IN/OUT + uint32 oldVal, // IN + uint32 newVal) // IN +{ +#if defined(__GNUC__) +#ifdef VM_ARM_V7 + register uint32 retVal; + register uint32 res; + + dmb(); + + __asm__ __volatile__( + "1: ldrex %[retVal], [%[var]] \n\t" + "mov %[res], #0 \n\t" + "teq %[retVal], %[oldVal] \n\t" + "strexeq %[res], %[newVal], [%[var]] \n\t" + "teq %[res], #0 \n\t" + "bne 1b" + : [retVal] "=&r" (retVal), [res] "=&r" (res) + : [var] "r" (&var->value), [oldVal] "r" (oldVal), [newVal] "r" (newVal) + : "cc" + ); + + dmb(); + + return retVal; +#elif defined(VM_ARM_64) + register uint32 retVal; + register uint32 failed; + + __asm__ __volatile__ ( + " dmb sy \n\t" + "1: ldxr %w0, [%2] \n\t" + " cmp %w0, %w3 \n\t" + " b.ne 2f \n\t" + " stxr %w1, %w4, [%2] \n\t" + " cbnz %w1, 1b \n\t" + "2: clrex \n\t" + " dmb sy \n\t" + : "=&r" (retVal), "=&r" (failed) + : "r" (&var->value), "r" (oldVal), "r" (newVal) + : "cc", "memory"); + + return retVal; +#else /* VM_X86_ANY */ + uint32 val; + + /* Checked against the Intel manual and GCC --walken */ + __asm__ __volatile__( + "lock; cmpxchgl %2, %1" + : "=a" (val), + "+m" (var->value) + : "r" (newVal), + "0" (oldVal) + : "cc" + ); + AtomicEpilogue(); + return val; +#endif /* VM_X86_ANY */ +#elif defined _MSC_VER +#if _MSC_VER >= 1310 + return _InterlockedCompareExchange((long *)&var->value, + (long)newVal, + (long)oldVal); +#else +#pragma warning(push) +#pragma warning(disable : 4035) // disable no-return warning + { + __asm mov eax, oldVal + __asm mov ebx, var + __asm mov ecx, newVal + __asm lock cmpxchg [ebx]Atomic_uint32.value, ecx + // eax is the return value, this is documented to work - edward + } +#pragma warning(pop) +#endif +#else +#error No compiler defined for Atomic_ReadIfEqualWrite +#endif +} +#define Atomic_ReadIfEqualWrite32 Atomic_ReadIfEqualWrite + + +#if defined(VM_64BIT) || defined(VM_ARM_V7) +/* + *----------------------------------------------------------------------------- + * + * Atomic_ReadIfEqualWrite64 -- + * + * Compare exchange: Read variable, if equal to oldVal, write newVal + * + * Results: + * The value of the atomic variable before the write. + * + * Side effects: + * The variable may be modified. + * + *----------------------------------------------------------------------------- + */ + +static INLINE uint64 +Atomic_ReadIfEqualWrite64(Atomic_uint64 *var, // IN/OUT + uint64 oldVal, // IN + uint64 newVal) // IN +{ +#if defined(__GNUC__) +#ifdef VM_ARM_V7 + register uint64 retVal; + register uint32 res; + + dmb(); + + /* + * Under Apple LLVM version 5.0 (clang-500.2.76) (based on LLVM 3.3svn) + * There will be a warning: + * "value size does not match register size specified by the constraint + * and modifier [-Wasm-operand-widths]" + * on the lines: + * : [var] "r" (&var->value), [oldVal] "r" (oldVal), [newVal] "r" (newVal) + * ^ + * : [var] "r" (&var->value), [oldVal] "r" (oldVal), [newVal] "r" (newVal) + * ^ + * + * Furthermore, using a 32-bits register to store a + * 64-bits value of an variable looks risky. + */ +#if defined(__APPLE__) && __clang__ == 1 && __clang_major__ >= 5 +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wasm-operand-widths" +#endif + __asm__ __volatile__( + "1: ldrexd %[retVal], %H[retVal], [%[var]] \n\t" + "mov %[res], #0 \n\t" + "teq %[retVal], %[oldVal] \n\t" + "teqeq %H[retVal], %H[oldVal] \n\t" + "strexdeq %[res], %[newVal], %H[newVal], [%[var]] \n\t" + "teq %[res], #0 \n\t" + "bne 1b" + : [retVal] "=&r" (retVal), [res] "=&r" (res) + : [var] "r" (&var->value), [oldVal] "r" (oldVal), [newVal] "r" (newVal) + : "cc" + ); +#if defined(__APPLE__) && __clang__ == 1 && __clang_major__ >= 5 +#pragma clang diagnostic pop +#endif // defined(__APPLE__) && __clang__ == 1 && __clang_major__ >= 5 + dmb(); + + return retVal; +#elif defined(VM_ARM_64) + register uint64 retVal; + register uint32 failed; + + __asm__ __volatile__ ( + " dmb sy \n\t" + "1: ldxr %x0, [%2] \n\t" + " cmp %x0, %x3 \n\t" + " b.ne 2f \n\t" + " stxr %w1, %x4, [%2] \n\t" + " cbnz %w1, 1b \n\t" + "2: clrex \n\t" + " dmb sy \n\t" + : "=&r" (retVal), "=&r" (failed) + : "r" (&var->value), "r" (oldVal), "r" (newVal) + : "cc", "memory"); + + return retVal; +#else /* VM_X86_64 */ + uint64 val; + + /* Checked against the AMD manual and GCC --hpreg */ + __asm__ __volatile__( + "lock; cmpxchgq %2, %1" + : "=a" (val), + "+m" (var->value) + : "r" (newVal), + "0" (oldVal) + : "cc" + ); + AtomicEpilogue(); + return val; +#endif //VM_ARM_V7 +#elif defined _MSC_VER + return _InterlockedCompareExchange64((__int64 *)&var->value, + (__int64)newVal, + (__int64)oldVal); +#else +#error No compiler defined for Atomic_ReadIfEqualWrite64 +#endif +} +#endif + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_And -- + * + * Atomic read, bitwise AND with a value, write. + * + * Results: + * None + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_And(Atomic_uint32 *var, // IN/OUT + uint32 val) // IN +{ +#if defined(__GNUC__) +#ifdef VM_ARM_V7 + register volatile uint32 res; + register volatile uint32 tmp; + + dmb(); + + __asm__ __volatile__( + "1: ldrex %[tmp], [%[var]] \n\t" + "and %[tmp], %[tmp], %[val] \n\t" + "strex %[res], %[tmp], [%[var]] \n\t" + "teq %[res], #0 \n\t" + "bne 1b" + : [res] "=&r" (res), [tmp] "=&r" (tmp) + : [var] "r" (&var->value), [val] "r" (val) + : "cc" + ); + + dmb(); +#elif defined(VM_ARM_64) + ARM_64_ATOMIC_32_OPV_PRIVATE_FENCED(var, val, and); +#else /* VM_X86_ANY */ + /* Checked against the Intel manual and GCC --walken */ + __asm__ __volatile__( + "lock; andl %1, %0" + : "+m" (var->value) + : "ri" (val) + : "cc" + ); + AtomicEpilogue(); +#endif /* VM_X86_ANY */ +#elif defined _MSC_VER +#if defined(__x86_64__) + _InterlockedAnd((long *)&var->value, (long)val); +#else + __asm mov eax, val + __asm mov ebx, var + __asm lock and [ebx]Atomic_uint32.value, eax +#endif +#else +#error No compiler defined for Atomic_And +#endif +} +#define Atomic_And32 Atomic_And + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_Or -- + * + * Atomic read, bitwise OR with a value, write. + * + * Results: + * None + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_Or(Atomic_uint32 *var, // IN/OUT + uint32 val) // IN +{ +#if defined(__GNUC__) +#ifdef VM_ARM_V7 + register volatile uint32 res; + register volatile uint32 tmp; + + dmb(); + + __asm__ __volatile__( + "1: ldrex %[tmp], [%[var]] \n\t" + "orr %[tmp], %[tmp], %[val] \n\t" + "strex %[res], %[tmp], [%[var]] \n\t" + "teq %[res], #0 \n\t" + "bne 1b" + : [res] "=&r" (res), [tmp] "=&r" (tmp) + : [var] "r" (&var->value), [val] "r" (val) + : "cc" + ); + + dmb(); +#elif defined(VM_ARM_64) + ARM_64_ATOMIC_32_OPV_PRIVATE_FENCED(var, val, orr); +#else /* VM_X86_ANY */ + /* Checked against the Intel manual and GCC --walken */ + __asm__ __volatile__( + "lock; orl %1, %0" + : "+m" (var->value) + : "ri" (val) + : "cc" + ); + AtomicEpilogue(); +#endif /* VM_X86_ANY */ +#elif defined _MSC_VER +#if defined(__x86_64__) + _InterlockedOr((long *)&var->value, (long)val); +#else + __asm mov eax, val + __asm mov ebx, var + __asm lock or [ebx]Atomic_uint32.value, eax +#endif +#else +#error No compiler defined for Atomic_Or +#endif +} +#define Atomic_Or32 Atomic_Or + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_Xor -- + * + * Atomic read, bitwise XOR with a value, write. + * + * Results: + * None + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_Xor(Atomic_uint32 *var, // IN/OUT + uint32 val) // IN +{ +#if defined(__GNUC__) +#ifdef VM_ARM_V7 + register volatile uint32 res; + register volatile uint32 tmp; + + dmb(); + + __asm__ __volatile__( + "1: ldrex %[tmp], [%[var]] \n\t" + "eor %[tmp], %[tmp], %[val] \n\t" + "strex %[res], %[tmp], [%[var]] \n\t" + "teq %[res], #0 \n\t" + "bne 1b" + : [res] "=&r" (res), [tmp] "=&r" (tmp) + : [var] "r" (&var->value), [val] "r" (val) + : "cc" + ); + + dmb(); +#elif defined(VM_ARM_64) + ARM_64_ATOMIC_32_OPV_PRIVATE_FENCED(var, val, eor); +#else /* VM_X86_ANY */ + /* Checked against the Intel manual and GCC --walken */ + __asm__ __volatile__( + "lock; xorl %1, %0" + : "+m" (var->value) + : "ri" (val) + : "cc" + ); + AtomicEpilogue(); +#endif /* VM_X86_ANY */ +#elif defined _MSC_VER +#if defined(__x86_64__) + _InterlockedXor((long *)&var->value, (long)val); +#else + __asm mov eax, val + __asm mov ebx, var + __asm lock xor [ebx]Atomic_uint32.value, eax +#endif +#else +#error No compiler defined for Atomic_Xor +#endif +} +#define Atomic_Xor32 Atomic_Xor + + +#if defined(VM_64BIT) +/* + *----------------------------------------------------------------------------- + * + * Atomic_Xor64 -- + * + * Atomic read, bitwise XOR with a value, write. + * + * Results: + * None + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_Xor64(Atomic_uint64 *var, // IN/OUT + uint64 val) // IN +{ +#if defined(__GNUC__) +#if defined(VM_ARM_64) + ARM_64_ATOMIC_64_OPV_PRIVATE_FENCED(var, val, eor); +#else /* VM_X86_64 */ + /* Checked against the AMD manual and GCC --hpreg */ + __asm__ __volatile__( + "lock; xorq %1, %0" + : "+m" (var->value) + : "re" (val) + : "cc" + ); + AtomicEpilogue(); +#endif +#elif defined _MSC_VER + _InterlockedXor64((__int64 *)&var->value, (__int64)val); +#else +#error No compiler defined for Atomic_Xor64 +#endif +} +#endif + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_Add -- + * + * Atomic read, add a value, write. + * + * Results: + * None + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_Add(Atomic_uint32 *var, // IN/OUT + uint32 val) // IN +{ +#if defined(__GNUC__) +#ifdef VM_ARM_V7 + register volatile uint32 res; + register volatile uint32 tmp; + + dmb(); + + __asm__ __volatile__( + "1: ldrex %[tmp], [%[var]] \n\t" + "add %[tmp], %[tmp], %[val] \n\t" + "strex %[res], %[tmp], [%[var]] \n\t" + "teq %[res], #0 \n\t" + "bne 1b" + : [res] "=&r" (res), [tmp] "=&r" (tmp) + : [var] "r" (&var->value), [val] "r" (val) + : "cc" + ); + + dmb(); +#elif defined(VM_ARM_64) + ARM_64_ATOMIC_32_OPV_PRIVATE_FENCED(var, val, add); +#else /* VM_X86_ANY */ + /* Checked against the Intel manual and GCC --walken */ + __asm__ __volatile__( + "lock; addl %1, %0" + : "+m" (var->value) + : "ri" (val) + : "cc" + ); + AtomicEpilogue(); +#endif /* VM_X86_ANY */ +#elif defined _MSC_VER +#if _MSC_VER >= 1310 + _InterlockedExchangeAdd((long *)&var->value, (long)val); +#else + __asm mov eax, val + __asm mov ebx, var + __asm lock add [ebx]Atomic_uint32.value, eax +#endif +#else +#error No compiler defined for Atomic_Add +#endif +} +#define Atomic_Add32 Atomic_Add + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_Sub -- + * + * Atomic read, subtract a value, write. + * + * Results: + * None + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_Sub(Atomic_uint32 *var, // IN/OUT + uint32 val) // IN +{ +#if defined(__GNUC__) +#ifdef VM_ARM_V7 + register volatile uint32 res; + register volatile uint32 tmp; + + dmb(); + + __asm__ __volatile__( + "1: ldrex %[tmp], [%[var]] \n\t" + "sub %[tmp], %[tmp], %[val] \n\t" + "strex %[res], %[tmp], [%[var]] \n\t" + "teq %[res], #0 \n\t" + "bne 1b" + : [res] "=&r" (res), [tmp] "=&r" (tmp) + : [var] "r" (&var->value), [val] "r" (val) + : "cc" + ); + + dmb(); +#elif defined(VM_ARM_64) + ARM_64_ATOMIC_32_OPV_PRIVATE_FENCED(var, val, sub); +#else /* VM_X86_ANY */ + /* Checked against the Intel manual and GCC --walken */ + __asm__ __volatile__( + "lock; subl %1, %0" + : "+m" (var->value) + : "ri" (val) + : "cc" + ); + AtomicEpilogue(); +#endif /* VM_X86_ANY */ +#elif defined _MSC_VER +#if _MSC_VER >= 1310 + _InterlockedExchangeAdd((long *)&var->value, (long)-val); +#else + __asm mov eax, val + __asm mov ebx, var + __asm lock sub [ebx]Atomic_uint32.value, eax +#endif +#else +#error No compiler defined for Atomic_Sub +#endif +} +#define Atomic_Sub32 Atomic_Sub + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_Inc -- + * + * Atomic read, increment, write. + * + * Results: + * None + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_Inc(Atomic_uint32 *var) // IN/OUT +{ +#ifdef __GNUC__ +#if defined(VM_ARM_ANY) + Atomic_Add(var, 1); +#else /* VM_X86_ANY */ + /* Checked against the Intel manual and GCC --walken */ + __asm__ __volatile__( + "lock; incl %0" + : "+m" (var->value) + : + : "cc" + ); + AtomicEpilogue(); +#endif /* VM_X86_ANY */ +#elif defined _MSC_VER +#if _MSC_VER >= 1310 + _InterlockedIncrement((long *)&var->value); +#else + __asm mov ebx, var + __asm lock inc [ebx]Atomic_uint32.value +#endif +#else +#error No compiler defined for Atomic_Inc +#endif +} +#define Atomic_Inc32 Atomic_Inc + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_Dec -- + * + * Atomic read, decrement, write. + * + * Results: + * None + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_Dec(Atomic_uint32 *var) // IN/OUT +{ +#ifdef __GNUC__ +#if defined(VM_ARM_ANY) + Atomic_Sub(var, 1); +#else /* VM_X86_ANY */ + /* Checked against the Intel manual and GCC --walken */ + __asm__ __volatile__( + "lock; decl %0" + : "+m" (var->value) + : + : "cc" + ); + AtomicEpilogue(); +#endif /* VM_X86_ANY */ +#elif defined _MSC_VER +#if _MSC_VER >= 1310 + _InterlockedDecrement((long *)&var->value); +#else + __asm mov ebx, var + __asm lock dec [ebx]Atomic_uint32.value +#endif +#else +#error No compiler defined for Atomic_Dec +#endif +} +#define Atomic_Dec32 Atomic_Dec + + +/* + * Note that the technique below can be used to implement ReadX(), where X is + * an arbitrary mathematical function. + */ + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_ReadOr32 -- + * + * Atomic read (returned), bitwise OR with a value, write. + * + * Results: + * The value of the variable before the operation. + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE uint32 +Atomic_ReadOr32(Atomic_uint32 *var, // IN/OUT + uint32 val) // IN +{ + uint32 res; + + do { + res = Atomic_Read(var); + } while (res != Atomic_ReadIfEqualWrite(var, res, res | val)); + + return res; +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_ReadAnd32 -- + * + * Atomic read (returned), bitwise And with a value, write. + * + * Results: + * The value of the variable before the operation. + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE uint32 +Atomic_ReadAnd32(Atomic_uint32 *var, // IN/OUT + uint32 val) // IN +{ + uint32 res; + + do { + res = Atomic_Read(var); + } while (res != Atomic_ReadIfEqualWrite(var, res, res & val)); + + return res; +} + + +#if defined(VM_64BIT) +/* + *----------------------------------------------------------------------------- + * + * Atomic_ReadOr64 -- + * + * Atomic read (returned), bitwise OR with a value, write. + * + * Results: + * The value of the variable before the operation. + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE uint64 +Atomic_ReadOr64(Atomic_uint64 *var, // IN/OUT + uint64 val) // IN +{ + uint64 res; + + do { + res = var->value; + } while (res != Atomic_ReadIfEqualWrite64(var, res, res | val)); + + return res; +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_ReadAnd64 -- + * + * Atomic read (returned), bitwise AND with a value, write. + * + * Results: + * The value of the variable before the operation. + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE uint64 +Atomic_ReadAnd64(Atomic_uint64 *var, // IN/OUT + uint64 val) // IN +{ + uint64 res; + + do { + res = var->value; + } while (res != Atomic_ReadIfEqualWrite64(var, res, res & val)); + + return res; +} +#endif /* defined(VM_64BIT) */ + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_ReadAdd32 -- + * + * Atomic read (returned), add a value, write. + * + * If you have to implement ReadAdd32() on an architecture other than + * x86 or x86-64, you might want to consider doing something similar to + * Atomic_ReadOr32(). + * + * Results: + * The value of the variable before the operation. + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE uint32 +Atomic_ReadAdd32(Atomic_uint32 *var, // IN/OUT + uint32 val) // IN +{ +#if defined(__GNUC__) +#ifdef VM_ARM_V7 + register volatile uint32 res; + register volatile uint32 retVal; + register volatile uint32 tmp; + + dmb(); + + __asm__ __volatile__( + "1: ldrex %[retVal], [%[var]] \n\t" + "add %[tmp], %[val], %[retVal] \n\t" + "strex %[res], %[tmp], [%[var]] \n\t" + "teq %[res], #0 \n\t" + "bne 1b" + : [tmp] "=&r" (tmp), [res] "=&r" (res), [retVal] "=&r" (retVal) + : [var] "r" (&var->value), [val] "r" (val) + : "cc" + ); + + dmb(); + + return retVal; +#elif defined(VM_ARM_64) + return ARM_64_ATOMIC_32_OPO_PRIVATE_FENCED(var, val, add); +#else /* VM_X86_ANY */ + /* Checked against the Intel manual and GCC --walken */ + __asm__ __volatile__( + "lock; xaddl %0, %1" + : "=r" (val), + "+m" (var->value) + : "0" (val) + : "cc" + ); + AtomicEpilogue(); + return val; +#endif /* VM_X86_ANY */ +#elif defined _MSC_VER +#if _MSC_VER >= 1310 + return _InterlockedExchangeAdd((long *)&var->value, (long)val); +#else +#pragma warning(push) +#pragma warning(disable : 4035) // disable no-return warning + { + __asm mov eax, val + __asm mov ebx, var + __asm lock xadd [ebx]Atomic_uint32.value, eax + } +#pragma warning(pop) +#endif +#else +#error No compiler defined for Atomic_ReadAdd32 +#endif +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_ReadInc32 -- + * + * Atomic read (returned), increment, write. + * + * Results: + * The value of the variable before the operation. + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE uint32 +Atomic_ReadInc32(Atomic_uint32 *var) // IN/OUT +{ + return Atomic_ReadAdd32(var, 1); +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_ReadDec32 -- + * + * Atomic read (returned), decrement, write. + * + * Results: + * The value of the variable before the operation. + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE uint32 +Atomic_ReadDec32(Atomic_uint32 *var) // IN/OUT +{ + return Atomic_ReadAdd32(var, (uint32)-1); +} + + +#if defined VMKERNEL || defined VMM +#if !defined(VM_ARM_64) +/* + *----------------------------------------------------------------------------- + * + * CMPXCHG1B -- + * + * Compare and exchange a single byte. + * + * Results: + * The value read from ptr. + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ +static INLINE uint8 +CMPXCHG1B(volatile uint8 *ptr, // IN/OUT + uint8 oldVal, // IN + uint8 newVal) // IN +{ + uint8 val; + __asm__ __volatile__("lock; cmpxchgb %b2, %1" + : "=a" (val), + "+m" (*ptr) + : "r" (newVal), + "0" (oldVal) + : "cc"); + return val; +} +#endif /* !defined(VM_ARM_64) */ +#endif + + +/* + * Usage of this helper struct is strictly reserved to the following + * function. --hpreg + */ +typedef struct { + uint32 lowValue; + uint32 highValue; +} S_uint64; + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_CMPXCHG64 -- + * + * Compare exchange: Read variable, if equal to oldVal, write newVal + * + * XXX: Ensure that if this function is to be inlined by gcc, it is + * compiled with -fno-strict-aliasing. Otherwise it will break. + * Unfortunately we know that gcc 2.95.3 (used to build the FreeBSD 3.2 + * Tools) does not honor -fno-strict-aliasing. As a workaround, we avoid + * inlining the function entirely for versions of gcc under 3.0. + * + * Results: + * TRUE if equal, FALSE if not equal + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +#if defined(__GNUC__) && __GNUC__ < 3 +static Bool +#else +static INLINE Bool +#endif +Atomic_CMPXCHG64(Atomic_uint64 *var, // IN/OUT + uint64 const *oldVal, // IN + uint64 const *newVal) // IN +{ +#if defined(__GNUC__) +#if defined(VM_ARM_ANY) + return (Atomic_ReadIfEqualWrite64(var, *oldVal, *newVal) == *oldVal); +#else /* VM_X86_ANY */ + + Bool equal; + /* Checked against the Intel manual and GCC --walken */ +#if defined(__x86_64__) + uint64 dummy; + __asm__ __volatile__( + "lock; cmpxchgq %3, %0" "\n\t" + "sete %1" + : "+m" (*var), + "=qm" (equal), + "=a" (dummy) + : "r" (*newVal), + "2" (*oldVal) + : "cc" + ); +#else /* 32-bit version for non-ARM */ + int dummy1, dummy2; +# if defined __PIC__ + /* + * Rules for __asm__ statements in __PIC__ code + * -------------------------------------------- + * + * The compiler uses %ebx for __PIC__ code, so an __asm__ statement cannot + * clobber %ebx. The __asm__ statement can temporarily modify %ebx, but _for + * each parameter that is used while %ebx is temporarily modified_: + * + * 1) The constraint cannot be "m", because the memory location the compiler + * chooses could then be relative to %ebx. + * + * 2) The constraint cannot be a register class which contains %ebx (such as + * "r" or "q"), because the register the compiler chooses could then be + * %ebx. (This happens when compiling the Fusion UI with gcc 4.2.1, Apple + * build 5577.) + * + * 3) Using register classes even for other values is problematic, as gcc + * can decide e.g. %ecx == %edi == 0 (as compile-time constants) and + * ends up using one register for two things. Which breaks xchg's ability + * to temporarily put the PIC pointer somewhere else. PR772455 + * + * For that reason alone, the __asm__ statement should keep the regions + * where it temporarily modifies %ebx as small as possible, and should + * prefer specific register assignments. + */ +# if __GNUC__ < 3 // Part of #188541 - for RHL 6.2 etc. + __asm__ __volatile__( + "xchg %%ebx, %6" "\n\t" + "mov 4(%%ebx), %%ecx" "\n\t" + "mov (%%ebx), %%ebx" "\n\t" + "lock; cmpxchg8b (%3)" "\n\t" + "xchg %%ebx, %6" "\n\t" + "sete %0" + : "=a" (equal), + "=d" (dummy2), + "=D" (dummy1) + : /* + * See the "Rules for __asm__ statements in __PIC__ code" above: %3 + * must use a register class which does not contain %ebx. + */ + "S" (var), + "0" (((S_uint64 const *)oldVal)->lowValue), + "1" (((S_uint64 const *)oldVal)->highValue), + "D" (newVal) + : "ecx", "cc", "memory" + ); +# else + __asm__ __volatile__( + "xchgl %%ebx, %6" "\n\t" + "lock; cmpxchg8b (%3)" "\n\t" + "xchgl %%ebx, %6" "\n\t" + "sete %0" + : "=qm" (equal), + "=a" (dummy1), + "=d" (dummy2) + : /* + * See the "Rules for __asm__ statements in __PIC__ code" above: %3 + * must use a register class which does not contain %ebx. + * "a"/"c"/"d" are already used, so we are left with either "S" or "D". + * + * Note that this assembly uses ALL GP registers (with %esp reserved for + * stack, %ebp reserved for frame, %ebx reserved for PIC). + */ + "S" (var), + "1" (((S_uint64 const *)oldVal)->lowValue), + "2" (((S_uint64 const *)oldVal)->highValue), + "D" (((S_uint64 const *)newVal)->lowValue), + "c" (((S_uint64 const *)newVal)->highValue) + : "cc", "memory" + ); +# endif +# else + __asm__ __volatile__( + "lock; cmpxchg8b %0" "\n\t" + "sete %1" + : "+m" (*var), + "=qm" (equal), + "=a" (dummy1), + "=d" (dummy2) + : "2" (((S_uint64 const *)oldVal)->lowValue), + "3" (((S_uint64 const *)oldVal)->highValue), + "b" (((S_uint64 const *)newVal)->lowValue), + "c" (((S_uint64 const *)newVal)->highValue) + : "cc" + ); +# endif +#endif + AtomicEpilogue(); + return equal; +#endif //VM_ARM_V7 +#elif defined _MSC_VER +#if defined(__x86_64__) + return (__int64)*oldVal == _InterlockedCompareExchange64((__int64 *)&var->value, + (__int64)*newVal, + (__int64)*oldVal); +#else +#pragma warning(push) +#pragma warning(disable : 4035) // disable no-return warning + { + __asm mov esi, var + __asm mov edx, oldVal + __asm mov ecx, newVal + __asm mov eax, [edx]S_uint64.lowValue + __asm mov edx, [edx]S_uint64.highValue + __asm mov ebx, [ecx]S_uint64.lowValue + __asm mov ecx, [ecx]S_uint64.highValue + __asm lock cmpxchg8b [esi] + __asm sete al + __asm movzx eax, al + // eax is the return value, this is documented to work - edward + } +#pragma warning(pop) +#endif +#else +#error No compiler defined for Atomic_CMPXCHG64 +#endif // !GNUC +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_CMPXCHG32 -- + * + * Compare exchange: Read variable, if equal to oldVal, write newVal + * + * Results: + * TRUE if equal, FALSE if not equal + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE Bool +Atomic_CMPXCHG32(Atomic_uint32 *var, // IN/OUT + uint32 oldVal, // IN + uint32 newVal) // IN +{ +#if defined(__GNUC__) +#if defined(VM_ARM_ANY) + return (Atomic_ReadIfEqualWrite(var, oldVal, newVal) == oldVal); +#else /* VM_X86_ANY */ + Bool equal; + uint32 dummy; + + __asm__ __volatile__( + "lock; cmpxchgl %3, %0" "\n\t" + "sete %1" + : "+m" (*var), + "=qm" (equal), + "=a" (dummy) + : "r" (newVal), + "2" (oldVal) + : "cc" + ); + AtomicEpilogue(); + return equal; +#endif /* VM_X86_ANY */ +#else // defined(__GNUC__) + return (Atomic_ReadIfEqualWrite(var, oldVal, newVal) == oldVal); +#endif // !defined(__GNUC__) +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_Read64 -- + * + * Read and return. + * + * Results: + * The value of the atomic variable. + * + * Side effects: + * None. + * + *----------------------------------------------------------------------------- + */ + +static INLINE uint64 +Atomic_Read64(Atomic_uint64 const *var) // IN +{ +#if defined(__GNUC__) && defined(__x86_64__) + uint64 value; + +#ifdef VMM + ASSERT((uintptr_t)var % 8 == 0); +#endif + /* + * Use asm to ensure we emit a single load. + */ + __asm__ __volatile__( + "movq %1, %0" + : "=r" (value) + : "m" (var->value) + ); + return value; +#elif defined(__GNUC__) && defined(__i386__) + uint64 value; + /* + * Since cmpxchg8b will replace the contents of EDX:EAX with the + * value in memory if there is no match, we need only execute the + * instruction once in order to atomically read 64 bits from + * memory. The only constraint is that ECX:EBX must have the same + * value as EDX:EAX so that if the comparison succeeds. We + * intentionally don't tell gcc that we are using ebx and ecx as we + * don't modify them and do not care what value they store. + */ + __asm__ __volatile__( + "mov %%ebx, %%eax" "\n\t" + "mov %%ecx, %%edx" "\n\t" + "lock; cmpxchg8b %1" + : "=&A" (value) + : "m" (*var) + : "cc" + ); + AtomicEpilogue(); + return value; +#elif defined (_MSC_VER) && defined(__x86_64__) + /* + * Microsoft docs guarantee "Simple reads and writes to properly + * aligned 64-bit variables are atomic on 64-bit Windows." + * http://msdn.microsoft.com/en-us/library/ms684122%28VS.85%29.aspx + * + * XXX Verify that value is properly aligned. Bug 61315. + */ + return var->value; +#elif defined (_MSC_VER) && defined(__i386__) +# pragma warning(push) +# pragma warning(disable : 4035) // disable no-return warning + { + __asm mov ecx, var + __asm mov edx, ecx + __asm mov eax, ebx + __asm lock cmpxchg8b [ecx] + // edx:eax is the return value; this is documented to work. --mann + } +# pragma warning(pop) +#elif defined(__GNUC__) && defined (VM_ARM_V7) + uint64 value; + + __asm__ __volatile__( + "ldrexd %[value], %H[value], [%[var]] \n\t" + : [value] "=&r" (value) + : [var] "r" (&var->value) + ); + + return value; +#elif defined(__GNUC__) && defined(VM_ARM_64) + uint64 value; + + __asm__ __volatile__( + "ldr %0, [%1]" + : "=r" (value) + : "r" (&var->value)); + + return value; +#endif /* defined(__GNUC__) && defined(VM_ARM_64) */ +} + + +/* + *---------------------------------------------------------------------- + * + * Atomic_ReadUnaligned64 -- + * + * Atomically read a 64 bit integer, possibly misaligned. + * This function can be *very* expensive, costing over 50 kcycles + * on Nehalem. + * + * Note that "var" needs to be writable, even though it will not + * be modified. + * + * Results: + * The value of the atomic variable. + * + * Side effects: + * None + * + *---------------------------------------------------------------------- + */ + +#if defined(VM_64BIT) +static INLINE uint64 +Atomic_ReadUnaligned64(Atomic_uint64 const *var) // IN: +{ + return Atomic_ReadIfEqualWrite64((Atomic_uint64*)var, 0, 0); +} +#endif + + +/* + *---------------------------------------------------------------------- + * + * Atomic_ReadAdd64 -- + * + * Atomically adds a 64-bit integer to another + * + * Results: + * Returns the old value just prior to the addition + * + * Side effects: + * None + * + *---------------------------------------------------------------------- + */ + +static INLINE uint64 +Atomic_ReadAdd64(Atomic_uint64 *var, // IN/OUT + uint64 val) // IN +{ +#if defined(VM_ARM_64) + return ARM_64_ATOMIC_64_OPO_PRIVATE_FENCED(var, val, add); +#elif defined(__x86_64__) + +#if defined(__GNUC__) + __asm__ __volatile__( + "lock; xaddq %0, %1" + : "=r" (val), + "+m" (var->value) + : "0" (val) + : "cc" + ); + AtomicEpilogue(); + return val; +#elif defined _MSC_VER + return _InterlockedExchangeAdd64((__int64 *)&var->value, (__int64)val); +#else +#error No compiler defined for Atomic_ReadAdd64 +#endif + +#else + uint64 oldVal; + uint64 newVal; + + do { + oldVal = var->value; + newVal = oldVal + val; + } while (!Atomic_CMPXCHG64(var, &oldVal, &newVal)); + + return oldVal; +#endif +} + + +/* + *---------------------------------------------------------------------- + * + * Atomic_ReadSub64 -- + * + * Atomically subtracts a 64-bit integer to another + * + * Results: + * Returns the old value just prior to the subtraction + * + * Side effects: + * None + * + *---------------------------------------------------------------------- + */ + +static INLINE uint64 +Atomic_ReadSub64(Atomic_uint64 *var, // IN/OUT + uint64 val) // IN +{ + return Atomic_ReadAdd64(var, -val); +} + + +/* + *---------------------------------------------------------------------- + * + * Atomic_ReadInc64 -- + * + * Atomically increments a 64-bit integer + * + * Results: + * Returns the old value just prior to incrementing + * + * Side effects: + * None + * + *---------------------------------------------------------------------- + */ + +static INLINE uint64 +Atomic_ReadInc64(Atomic_uint64 *var) // IN/OUT +{ + return Atomic_ReadAdd64(var, 1); +} + + +/* + *---------------------------------------------------------------------- + * + * Atomic_ReadDec64 -- + * + * Atomically decrements a 64-bit integer + * + * Results: + * Returns the old value just prior to decrementing + * + * Side effects: + * None + * + *---------------------------------------------------------------------- + */ + +static INLINE uint64 +Atomic_ReadDec64(Atomic_uint64 *var) // IN/OUT +{ + return Atomic_ReadAdd64(var, CONST64U(-1)); +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_Add64 -- + * + * Atomic read, add a value, write. + * + * Results: + * None + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_Add64(Atomic_uint64 *var, // IN/OUT + uint64 val) // IN +{ +#if !defined(VM_64BIT) + Atomic_ReadAdd64(var, val); /* Return value is unused. */ +#elif defined(__GNUC__) +#if defined(VM_ARM_64) + ARM_64_ATOMIC_64_OPV_PRIVATE_FENCED(var, val, add); +#else /* defined(VM_X86_64) */ + /* Checked against the AMD manual and GCC --hpreg */ + __asm__ __volatile__( + "lock; addq %1, %0" + : "+m" (var->value) + : "re" (val) + : "cc" + ); + AtomicEpilogue(); +#endif +#elif defined _MSC_VER + _InterlockedExchangeAdd64((__int64 *)&var->value, (__int64)val); +#else +#error No compiler defined for Atomic_Add64 +#endif +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_Sub64 -- + * + * Atomic read, subtract a value, write. + * + * Results: + * None + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_Sub64(Atomic_uint64 *var, // IN/OUT + uint64 val) // IN +{ +#if !defined(__x86_64__) + Atomic_ReadSub64(var, val); /* Return value is unused. */ +#elif defined(__GNUC__) +#if defined(VM_ARM_64) + ARM_64_ATOMIC_64_OPV_PRIVATE_FENCED(var, val, sub); +#else /* VM_X86_64 */ + /* Checked against the AMD manual and GCC --hpreg */ + __asm__ __volatile__( + "lock; subq %1, %0" + : "+m" (var->value) + : "re" (val) + : "cc" + ); + AtomicEpilogue(); +#endif +#elif defined _MSC_VER + _InterlockedExchangeAdd64((__int64 *)&var->value, (__int64)-val); +#else +#error No compiler defined for Atomic_Sub64 +#endif +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_Inc64 -- + * + * Atomic read, increment, write. + * + * Results: + * None + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_Inc64(Atomic_uint64 *var) // IN/OUT +{ +#if !defined(__x86_64__) + Atomic_ReadInc64(var); /* Return value is unused. */ +#elif defined(__GNUC__) + /* Checked against the AMD manual and GCC --hpreg */ + __asm__ __volatile__( + "lock; incq %0" + : "+m" (var->value) + : + : "cc" + ); + AtomicEpilogue(); +#elif defined _MSC_VER + _InterlockedIncrement64((__int64 *)&var->value); +#else +#error No compiler defined for Atomic_Inc64 +#endif +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_Dec64 -- + * + * Atomic read, decrement, write. + * + * Results: + * None + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_Dec64(Atomic_uint64 *var) // IN/OUT +{ +#if !defined(__x86_64__) + Atomic_ReadDec64(var); /* Return value is unused. */ +#elif defined(__GNUC__) + /* Checked against the AMD manual and GCC --hpreg */ + __asm__ __volatile__( + "lock; decq %0" + : "+m" (var->value) + : + : "cc" + ); + AtomicEpilogue(); +#elif defined _MSC_VER + _InterlockedDecrement64((__int64 *)&var->value); +#else +#error No compiler defined for Atomic_Dec64 +#endif +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_ReadWrite64 -- + * + * Read followed by write + * + * Results: + * The value of the atomic variable before the write. + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE uint64 +Atomic_ReadWrite64(Atomic_uint64 *var, // IN/OUT + uint64 val) // IN +{ +#if defined(__x86_64__) +#if defined(__GNUC__) + /* Checked against the AMD manual and GCC --hpreg */ + __asm__ __volatile__( + "xchgq %0, %1" + : "=r" (val), + "+m" (var->value) + : "0" (val) + ); + AtomicEpilogue(); + return val; +#elif defined _MSC_VER + return _InterlockedExchange64((__int64 *)&var->value, (__int64)val); +#else +#error No compiler defined for Atomic_ReadWrite64 +#endif +#else + uint64 oldVal; + + do { + oldVal = var->value; + } while (!Atomic_CMPXCHG64(var, &oldVal, &val)); + + return oldVal; +#endif +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_Write64 -- + * + * Write + * + * Results: + * None. + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_Write64(Atomic_uint64 *var, // OUT + uint64 val) // IN +{ +#if defined(__x86_64__) +#if defined(__GNUC__) + +#ifdef VMM + ASSERT((uintptr_t)var % 8 == 0); +#endif + /* + * There is no move instruction for 64-bit immediate to memory, so unless + * the immediate value fits in 32-bit (i.e. can be sign-extended), GCC + * breaks the assignment into two movl instructions. The code below forces + * GCC to load the immediate value into a register first. + */ + + __asm__ __volatile__( + "movq %1, %0" + : "=m" (var->value) + : "r" (val) + ); +#elif defined _MSC_VER + /* + * Microsoft docs guarantee "Simple reads and writes to properly aligned + * 64-bit variables are atomic on 64-bit Windows." + * http://msdn.microsoft.com/en-us/library/ms684122%28VS.85%29.aspx + * + * XXX Verify that value is properly aligned. Bug 61315. + */ + + var->value = val; +#else +#error No compiler defined for Atomic_Write64 +#endif +#else /* defined(__x86_64__) */ + (void)Atomic_ReadWrite64(var, val); +#endif +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_Or64 -- + * + * Atomic read, bitwise OR with a 64-bit value, write. + * + * Results: + * None + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_Or64(Atomic_uint64 *var, // IN/OUT + uint64 val) // IN +{ +#if defined(__x86_64__) +#if defined(__GNUC__) + /* Checked against the AMD manual and GCC --hpreg */ + __asm__ __volatile__( + "lock; orq %1, %0" + : "+m" (var->value) + : "re" (val) + : "cc" + ); + AtomicEpilogue(); +#elif defined _MSC_VER + _InterlockedOr64((__int64 *)&var->value, (__int64)val); +#else +#error No compiler defined for Atomic_Or64 +#endif +#else // __x86_64__ + uint64 oldVal; + uint64 newVal; + do { + oldVal = var->value; + newVal = oldVal | val; + } while (!Atomic_CMPXCHG64(var, &oldVal, &newVal)); +#endif +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_And64 -- + * + * Atomic read, bitwise AND with a 64-bit value, write. + * + * Results: + * None + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_And64(Atomic_uint64 *var, // IN/OUT + uint64 val) // IN +{ +#if defined(__x86_64__) +#if defined(__GNUC__) + /* Checked against the AMD manual and GCC --hpreg */ + __asm__ __volatile__( + "lock; andq %1, %0" + : "+m" (var->value) + : "re" (val) + : "cc" + ); + AtomicEpilogue(); +#elif defined _MSC_VER + _InterlockedAnd64((__int64 *)&var->value, (__int64)val); +#else +#error No compiler defined for Atomic_And64 +#endif +#else // __x86_64__ + uint64 oldVal; + uint64 newVal; + do { + oldVal = var->value; + newVal = oldVal & val; + } while (!Atomic_CMPXCHG64(var, &oldVal, &newVal)); +#endif +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_SetBit64 -- + * + * Atomically set the bit 'bit' in var. Bit must be between 0 and 63. + * + * Results: + * None + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_SetBit64(Atomic_uint64 *var, // IN/OUT + uint64 bit) // IN +{ +#if defined(__x86_64__) +#if defined(__GNUC__) + ASSERT(bit <= 63); + __asm__ __volatile__( + "lock; bts %1, %0" + : "+m" (var->value) + : "ri" (bit) + : "cc" + ); + AtomicEpilogue(); +#elif defined _MSC_VER + uint64 oldVal; + uint64 newVal; + ASSERT(bit <= 63); + do { + oldVal = var->value; + newVal = oldVal | (CONST64U(1) << bit); + } while (!Atomic_CMPXCHG64(var, &oldVal, &newVal)); +#else +#error No compiler defined for Atomic_SetBit64 +#endif +#else // __x86_64__ + uint64 oldVal; + uint64 newVal; + ASSERT(bit <= 63); + do { + oldVal = var->value; + newVal = oldVal | (CONST64U(1) << bit); + } while (!Atomic_CMPXCHG64(var, &oldVal, &newVal)); +#endif +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_ClearBit64 -- + * + * Atomically clear the bit 'bit' in var. Bit must be between 0 and 63. + * + * Results: + * None + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_ClearBit64(Atomic_uint64 *var, // IN/OUT + uint64 bit) // IN +{ +#if defined(__x86_64__) +#if defined(__GNUC__) + ASSERT(bit <= 63); + __asm__ __volatile__( + "lock; btr %1, %0" + : "+m" (var->value) + : "ri" (bit) + : "cc" + ); + AtomicEpilogue(); +#elif defined _MSC_VER + uint64 oldVal; + uint64 newVal; + ASSERT(bit <= 63); + do { + oldVal = var->value; + newVal = oldVal & ~(CONST64U(1) << bit); + } while (!Atomic_CMPXCHG64(var, &oldVal, &newVal)); +#else +#error No compiler defined for Atomic_ClearBit64 +#endif +#else // __x86_64__ + uint64 oldVal; + uint64 newVal; + ASSERT(bit <= 63); + do { + oldVal = var->value; + newVal = oldVal & ~(CONST64U(1) << bit); + } while (!Atomic_CMPXCHG64(var, &oldVal, &newVal)); +#endif +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_TestBit64 -- + * + * Read the bit 'bit' in var. Bit must be between 0 and 63. + * + * Results: + * TRUE if the tested bit was set; else FALSE. + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE Bool +Atomic_TestBit64(Atomic_uint64 *var, // IN + uint64 bit) // IN +{ +#if defined(__x86_64__) +#if defined(__GNUC__) + Bool out = FALSE; + ASSERT(bit <= 63); + __asm__ __volatile__( + "bt %2, %1; setc %0" + : "=rm"(out) + : "m" (var->value), + "rJ" (bit) + : "cc" + ); + return out; +#elif defined _MSC_VER + ASSERT(bit <= 63); + return (var->value & (CONST64U(1) << bit)) != 0; +#else +#error No compiler defined for Atomic_TestBit64 +#endif +#else // __x86_64__ + ASSERT(bit <= 63); + return (var->value & (CONST64U(1) << bit)) != 0; +#endif +} + + +#if defined(__GNUC__) +/* + *----------------------------------------------------------------------------- + * + * Atomic_Read16 -- + * + * Read and return. + * + * Results: + * The value of the atomic variable. + * + * Side effects: + * None. + * + *----------------------------------------------------------------------------- + */ + +static INLINE uint16 +Atomic_Read16(Atomic_uint16 const *var) // IN +{ + uint16 value; + +#ifdef VMM + ASSERT((uintptr_t)var % 2 == 0); +#endif + +#if defined(__GNUC__) +#if (defined(__x86_64__) || defined(__i386__)) + + __asm__ __volatile__( + "movw %1, %0" + : "=r" (value) + : "m" (var->value) + ); +#elif defined(VM_ARM_V7) + NOT_TESTED(); + + __asm__ __volatile__( + "ldrh %0, [%1]" + : "=r" (value) + : "r" (&var->value) + ); +#elif defined(VM_ARM_64) + __asm__ __volatile__ ( + "ldrh %w0, [%1]" + : "=r" (value) + : "r" (&var->value) + ); + +#else +#error No 16-bits atomics. +#endif +#endif + + return value; +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_ReadWrite16 -- + * + * Read followed by write + * + * Results: + * The value of the atomic variable before the write. + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE uint16 +Atomic_ReadWrite16(Atomic_uint16 *var, // IN/OUT: + uint16 val) // IN: +{ +#if defined(__GNUC__) +#if (defined(__x86_64__) || defined(__i386__)) + __asm__ __volatile__( + "xchgw %0, %1" + : "=r" (val), + "+m" (var->value) + : "0" (val) + ); + AtomicEpilogue(); + return val; +#elif defined(VM_ARM_V7) + register volatile uint16 retVal; + register volatile uint16 res; + + NOT_TESTED(); + + dmb(); + + __asm__ __volatile__( + "1: ldrexh %[retVal], [%[var]] \n\t" + "strexh %[res], %[val], [%[var]] \n\t" + "teq %[res], #0 \n\t" + "bne 1b" + : [retVal] "=&r" (retVal), [res] "=&r" (res) + : [var] "r" (&var->value), [val] "r" (val) + : "cc" + ); + + dmb(); + + return retVal; +#elif defined(VM_ARM_64) + register uint16 retVal; + register uint16 failed; + + __asm__ __volatile__( + " dmb sy \n\t" + "1: ldxrh %w0, [%2] \n\t" + " stxrh %w1, %w3, [%2] \n\t" + " cbnz %w1, 1b \n\t" + " dmb sy \n\t" + : "=&r" (retVal), "=&r" (failed) + : "r" (&var->value), "r" (val) + : "memory" + ); + + return retVal; +#else +#error No 16-bits atomics. +#endif +#endif +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_Write16 -- + * + * Write + * + * Results: + * None. + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_Write16(Atomic_uint16 *var, // OUT: + uint16 val) // IN: +{ +#ifdef VMM + ASSERT((uintptr_t)var % 2 == 0); +#endif + +#if defined(__GNUC__) +#if (defined(__x86_64__) || defined(__i386__)) + + __asm__ __volatile__( + "movw %1, %0" + : "=m" (var->value) + : "r" (val) + ); +#elif defined(VM_ARM_ANY) + Atomic_ReadWrite16(var, val); +#else +#error No 16-bits atomics. +#endif +#endif +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_ReadIfEqualWrite16 -- + * + * Compare exchange: Read variable, if equal to oldVal, write newVal + * + * Results: + * The value of the atomic variable before the write. + * + * Side effects: + * The variable may be modified. + * + *----------------------------------------------------------------------------- + */ + +static INLINE uint16 +Atomic_ReadIfEqualWrite16(Atomic_uint16 *var, // IN/OUT + uint16 oldVal, // IN + uint16 newVal) // IN +{ +#if defined(__GNUC__) +#if (defined(__x86_64__) || defined(__i386__)) + uint16 val; + + __asm__ __volatile__( + "lock; cmpxchgw %2, %1" + : "=a" (val), + "+m" (var->value) + : "r" (newVal), + "0" (oldVal) + : "cc" + ); + AtomicEpilogue(); + return val; +#elif defined(VM_ARM_V7) + register uint16 retVal; + register uint16 res; + + NOT_TESTED(); + + dmb(); + + __asm__ __volatile__( + "1: ldrexh %[retVal], [%[var]] \n\t" + "mov %[res], #0 \n\t" + "teq %[retVal], %[oldVal] \n\t" + "strexheq %[res], %[newVal], [%[var]] \n\t" + "teq %[res], #0 \n\t" + "bne 1b" + : [retVal] "=&r" (retVal), [res] "=&r" (res) + : [var] "r" (&var->value), [oldVal] "r" (oldVal), [newVal] "r" (newVal) + : "cc" + ); + + dmb(); + + return retVal; +#elif defined(VM_ARM_64) + register uint16 retVal; + register uint16 failed; + + __asm__ __volatile__ ( + " dmb sy \n\t" + "1: ldxrh %w0, [%2] \n\t" + " cmp %w0, %w3 \n\t" + " b.ne 2f \n\t" + " stxrh %w1, %w4, [%2] \n\t" + " cbnz %w1, 1b \n\t" + "2: clrex \n\t" + " dmb sy \n\t" + : "=&r" (retVal), "=&r" (failed) + : "r" (&var->value), "r" (oldVal), "r" (newVal) + : "cc", "memory"); + + return retVal; +#else +#error No 16-bits atomics. +#endif +#endif +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_And16 -- + * + * Atomic read, bitwise AND with a 16-bit value, write. + * + * Results: + * None + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_And16(Atomic_uint16 *var, // IN/OUT + uint16 val) // IN +{ +#if defined(__GNUC__) +#if (defined(__x86_64__) || defined(__i386__)) + __asm__ __volatile__( + "lock; andw %1, %0" + : "+m" (var->value) + : "re" (val) + : "cc" + ); + AtomicEpilogue(); +#elif defined(VM_ARM_V7) + register volatile uint16 res; + register volatile uint16 tmp; + + NOT_TESTED(); + + dmb(); + + __asm__ __volatile__( + "1: ldrexh %[tmp], [%[var]] \n\t" + "and %[tmp], %[tmp], %[val] \n\t" + "strexh %[res], %[tmp], [%[var]] \n\t" + "teq %[res], #0 \n\t" + "bne 1b" + : [res] "=&r" (res), [tmp] "=&r" (tmp) + : [var] "r" (&var->value), [val] "r" (val) + : "cc" + ); + + dmb(); +#elif defined(VM_ARM_64) + ARM_64_ATOMIC_16_OPV_PRIVATE_FENCED(var, val, and); +#else +#error No 16-bits atomics. +#endif +#endif +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_Or16 -- + * + * Atomic read, bitwise OR with a 16-bit value, write. + * + * Results: + * None + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_Or16(Atomic_uint16 *var, // IN/OUT + uint16 val) // IN +{ +#if defined(__GNUC__) +#if (defined(__x86_64__) || defined(__i386__)) + __asm__ __volatile__( + "lock; orw %1, %0" + : "+m" (var->value) + : "re" (val) + : "cc" + ); + AtomicEpilogue(); +#elif defined(VM_ARM_V7) + register volatile uint16 res; + register volatile uint16 tmp; + + NOT_TESTED(); + + dmb(); + + __asm__ __volatile__( + "1: ldrexh %[tmp], [%[var]] \n\t" + "orr %[tmp], %[tmp], %[val] \n\t" + "strexh %[res], %[tmp], [%[var]] \n\t" + "teq %[res], #0 \n\t" + "bne 1b" + : [res] "=&r" (res), [tmp] "=&r" (tmp) + : [var] "r" (&var->value), [val] "r" (val) + : "cc" + ); + + dmb(); +#elif defined(VM_ARM_64) + ARM_64_ATOMIC_16_OPV_PRIVATE_FENCED(var, val, orr); +#else +#error No 16-bits atomics. +#endif +#endif +} + +/* + *----------------------------------------------------------------------------- + * + * Atomic_Xor16 -- + * + * Atomic read, bitwise XOR with a value, write. + * + * Results: + * None + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_Xor16(Atomic_uint16 *var, // IN/OUT + uint16 val) // IN +{ +#if defined(__GNUC__) +#if (defined(__x86_64__) || defined(__i386__)) + __asm__ __volatile__( + "lock; xorw %1, %0" + : "+m" (var->value) + : "re" (val) + : "cc" + ); + AtomicEpilogue(); +#elif defined(VM_ARM_V7) + register volatile uint16 res; + register volatile uint16 tmp; + + NOT_TESTED(); + + dmb(); + + __asm__ __volatile__( + "1: ldrexh %[tmp], [%[var]] \n\t" + "eor %[tmp], %[tmp], %[val] \n\t" + "strexh %[res], %[tmp], [%[var]] \n\t" + "teq %[res], #0 \n\t" + "bne 1b" + : [res] "=&r" (res), [tmp] "=&r" (tmp) + : [var] "r" (&var->value), [val] "r" (val) + : "cc" + ); + + dmb(); +#elif defined(VM_ARM_64) + ARM_64_ATOMIC_16_OPV_PRIVATE_FENCED(var, val, eor); +#else +#error No 16-bits atomics. +#endif +#endif +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_Add16 -- + * + * Atomic read, add a value, write. + * + * Results: + * None + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_Add16(Atomic_uint16 *var, // IN/OUT + uint16 val) // IN +{ +#if defined(__GNUC__) +#if (defined(__x86_64__) || defined(__i386__)) + __asm__ __volatile__( + "lock; addw %1, %0" + : "+m" (var->value) + : "re" (val) + : "cc" + ); + AtomicEpilogue(); +#elif defined(VM_ARM_V7) + register volatile uint16 res; + register volatile uint16 tmp; + + NOT_TESTED(); + + dmb(); + + __asm__ __volatile__( + "1: ldrexh %[tmp], [%[var]] \n\t" + "add %[tmp], %[tmp], %[val] \n\t" + "strexh %[res], %[tmp], [%[var]] \n\t" + "teq %[res], #0 \n\t" + "bne 1b" + : [res] "=&r" (res), [tmp] "=&r" (tmp) + : [var] "r" (&var->value), [val] "r" (val) + : "cc" + ); + + dmb(); +#elif defined(VM_ARM_64) + ARM_64_ATOMIC_16_OPV_PRIVATE_FENCED(var, val, add); +#else +#error No 16-bits atomics. +#endif +#endif +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_Sub16 -- + * + * Atomic read, subtract a value, write. + * + * Results: + * None + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_Sub16(Atomic_uint16 *var, // IN/OUT + uint16 val) // IN +{ +#if defined(__GNUC__) +#if (defined(__x86_64__) || defined(__i386__)) + __asm__ __volatile__( + "lock; subw %1, %0" + : "+m" (var->value) + : "re" (val) + : "cc" + ); + AtomicEpilogue(); +#elif defined(VM_ARM_V7) + register volatile uint16 res; + register volatile uint16 tmp; + + NOT_TESTED(); + + dmb(); + + __asm__ __volatile__( + "1: ldrexh %[tmp], [%[var]] \n\t" + "sub %[tmp], %[tmp], %[val] \n\t" + "strexh %[res], %[tmp], [%[var]] \n\t" + "teq %[res], #0 \n\t" + "bne 1b" + : [res] "=&r" (res), [tmp] "=&r" (tmp) + : [var] "r" (&var->value), [val] "r" (val) + : "cc" + ); + + dmb(); +#elif defined(VM_ARM_64) + ARM_64_ATOMIC_16_OPV_PRIVATE_FENCED(var, val, sub); +#else +#error No 16-bits atomics. +#endif +#endif +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_Inc16 -- + * + * Atomic read, increment, write. + * + * Results: + * None + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_Inc16(Atomic_uint16 *var) // IN/OUT +{ +#if defined(__GNUC__) +#if (defined(__x86_64__) || defined(__i386__)) + __asm__ __volatile__( + "lock; incw %0" + : "+m" (var->value) + : + : "cc" + ); + AtomicEpilogue(); +#elif defined(VM_ARM_ANY) + Atomic_Add16(var, 1); +#else +#error No 16-bits atomics. +#endif +#endif +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_Dec16 -- + * + * Atomic read, decrement, write. + * + * Results: + * None + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_Dec16(Atomic_uint16 *var) // IN/OUT +{ +#if defined(__GNUC__) +#if (defined(__x86_64__) || defined(__i386__)) + __asm__ __volatile__( + "lock; decw %0" + : "+m" (var->value) + : + : "cc" + ); + AtomicEpilogue(); +#elif defined(VM_ARM_ANY) + Atomic_Sub16(var, 1); +#else +#error No 16-bits atomics. +#endif +#endif +} + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_ReadOr16 -- + * + * Atomic read (returned), bitwise OR with a value, write. + * + * Results: + * The value of the variable before the operation. + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static INLINE uint16 +Atomic_ReadOr16(Atomic_uint16 *var, // IN/OUT + uint16 val) // IN +{ + uint16 res; + + do { + res = var->value; + } while (res != Atomic_ReadIfEqualWrite16(var, res, res | val)); + + return res; +} + + +/* + *---------------------------------------------------------------------- + * + * Atomic_ReadAdd16 -- + * + * Atomically adds a 16-bit integer to another + * + * Results: + * Returns the old value just prior to the addition + * + * Side effects: + * None + * + *---------------------------------------------------------------------- + */ + +static INLINE uint16 +Atomic_ReadAdd16(Atomic_uint16 *var, // IN/OUT + uint16 val) // IN: +{ +#if defined(__GNUC__) +#if (defined(__x86_64__) || defined(__i386__)) + __asm__ __volatile__( + "lock; xaddw %0, %1" + : "=r" (val), + "+m" (var->value) + : "0" (val) + : "cc" + ); + AtomicEpilogue(); + return val; +#elif defined(VM_ARM_V7) + register volatile uint16 res; + register volatile uint16 retVal; + register volatile uint16 tmp; + + NOT_TESTED(); + + dmb(); + + __asm__ __volatile__( + "1: ldrexh %[retVal], [%[var]] \n\t" + "add %[tmp], %[val], %[retVal] \n\t" + "strexh %[res], %[tmp], [%[var]] \n\t" + "teq %[res], #0 \n\t" + "bne 1b" + : [tmp] "=&r" (tmp), [res] "=&r" (res), [retVal] "=&r" (retVal) + : [var] "r" (&var->value), [val] "r" (val) + : "cc" + ); + + dmb(); + + return retVal; +#elif defined(VM_ARM_64) + return ARM_64_ATOMIC_16_OPO_PRIVATE_FENCED(var, val, add); +#else +#error No 16-bits atomics. +#endif +#endif +} + + +/* + *---------------------------------------------------------------------- + * + * Atomic_ReadInc16 -- + * + * Atomically increments a 64-bit integer + * + * Results: + * Returns the old value just prior to incrementing + * + * Side effects: + * None + * + *---------------------------------------------------------------------- + */ + +static INLINE uint16 +Atomic_ReadInc16(Atomic_uint16 *var) // IN/OUT +{ + return Atomic_ReadAdd16(var, 1); +} + +/* + + *---------------------------------------------------------------------- + * + * Atomic_ReadDec16 -- + * + * Atomically decrements a 64-bit integer + * + * Results: + * Returns the old value just prior to decrementing + * + * Side effects: + * None + * + *---------------------------------------------------------------------- + */ + +static INLINE uint16 +Atomic_ReadDec16(Atomic_uint16 *var) // IN/OUT +{ + return Atomic_ReadAdd16(var, -1); +} +#endif + +/* + * Template code for the Atomic_ type and its operators. + * + * The cast argument is an intermediate type cast to make some + * compilers stop complaining about casting uint32 <-> void *, + * even though we only do it in the 32-bit case so they are always + * the same size. So for val of type uint32, instead of + * (void *)val, we have (void *)(uintptr_t)val. + * The specific problem case is the Windows ddk compiler + * (as used by the SVGA driver). -- edward + * + * NOTE: See the comment in vm_assert.h for why we need UNUSED_TYPE in + * AtomicAssertOnCompile(), and why we need to be very careful doing so. + */ + +#define MAKE_ATOMIC_TYPE(name, size, in, out, cast) \ + typedef Atomic_uint ## size Atomic_ ## name; \ + \ + \ + static INLINE void \ + AtomicAssertOnCompile ## name(void) \ + { \ + enum { AssertOnCompileMisused = 8 * sizeof (in) == size \ + && 8 * sizeof (out) == size \ + && 8 * sizeof (cast) == size \ + ? 1 : -1 }; \ + UNUSED_TYPE(typedef char AssertOnCompileFailed[AssertOnCompileMisused]);\ + } \ + \ + \ + static INLINE out \ + Atomic_Read ## name(Atomic_ ## name const *var) \ + { \ + return (out)(cast)Atomic_Read ## size(var); \ + } \ + \ + \ + static INLINE void \ + Atomic_Write ## name(Atomic_ ## name *var, \ + in val) \ + { \ + Atomic_Write ## size(var, (uint ## size)(cast)val); \ + } \ + \ + \ + static INLINE out \ + Atomic_ReadWrite ## name(Atomic_ ## name *var, \ + in val) \ + { \ + return (out)(cast)Atomic_ReadWrite ## size(var, \ + (uint ## size)(cast)val); \ + } \ + \ + \ + static INLINE out \ + Atomic_ReadIfEqualWrite ## name(Atomic_ ## name *var, \ + in oldVal, \ + in newVal) \ + { \ + return (out)(cast)Atomic_ReadIfEqualWrite ## size(var, \ + (uint ## size)(cast)oldVal, (uint ## size)(cast)newVal); \ + } \ + \ + \ + static INLINE void \ + Atomic_And ## name(Atomic_ ## name *var, \ + in val) \ + { \ + Atomic_And ## size(var, (uint ## size)(cast)val); \ + } \ + \ + \ + static INLINE void \ + Atomic_Or ## name(Atomic_ ## name *var, \ + in val) \ + { \ + Atomic_Or ## size(var, (uint ## size)(cast)val); \ + } \ + \ + \ + static INLINE void \ + Atomic_Xor ## name(Atomic_ ## name *var, \ + in val) \ + { \ + Atomic_Xor ## size(var, (uint ## size)(cast)val); \ + } \ + \ + \ + static INLINE void \ + Atomic_Add ## name(Atomic_ ## name *var, \ + in val) \ + { \ + Atomic_Add ## size(var, (uint ## size)(cast)val); \ + } \ + \ + \ + static INLINE void \ + Atomic_Sub ## name(Atomic_ ## name *var, \ + in val) \ + { \ + Atomic_Sub ## size(var, (uint ## size)(cast)val); \ + } \ + \ + \ + static INLINE void \ + Atomic_Inc ## name(Atomic_ ## name *var) \ + { \ + Atomic_Inc ## size(var); \ + } \ + \ + \ + static INLINE void \ + Atomic_Dec ## name(Atomic_ ## name *var) \ + { \ + Atomic_Dec ## size(var); \ + } \ + \ + \ + static INLINE out \ + Atomic_ReadOr ## name(Atomic_ ## name *var, \ + in val) \ + { \ + return (out)(cast)Atomic_ReadOr ## size(var, (uint ## size)(cast)val); \ + } \ + \ + \ + static INLINE out \ + Atomic_ReadAdd ## name(Atomic_ ## name *var, \ + in val) \ + { \ + return (out)(cast)Atomic_ReadAdd ## size(var, (uint ## size)(cast)val); \ + } \ + \ + \ + static INLINE out \ + Atomic_ReadInc ## name(Atomic_ ## name *var) \ + { \ + return (out)(cast)Atomic_ReadInc ## size(var); \ + } \ + \ + \ + static INLINE out \ + Atomic_ReadDec ## name(Atomic_ ## name *var) \ + { \ + return (out)(cast)Atomic_ReadDec ## size(var); \ + } + + +/* + * Since we use a macro to generate these definitions, it is hard to look for + * them. So DO NOT REMOVE THIS COMMENT and keep it up-to-date. --hpreg + * + * Atomic_Ptr + * Atomic_ReadPtr -- + * Atomic_WritePtr -- + * Atomic_ReadWritePtr -- + * Atomic_ReadIfEqualWritePtr -- + * Atomic_AndPtr -- + * Atomic_OrPtr -- + * Atomic_XorPtr -- + * Atomic_AddPtr -- + * Atomic_SubPtr -- + * Atomic_IncPtr -- + * Atomic_DecPtr -- + * Atomic_ReadOrPtr -- + * Atomic_ReadAddPtr -- + * Atomic_ReadIncPtr -- + * Atomic_ReadDecPtr -- + * + * Atomic_Int + * Atomic_ReadInt -- + * Atomic_WriteInt -- + * Atomic_ReadWriteInt -- + * Atomic_ReadIfEqualWriteInt -- + * Atomic_AndInt -- + * Atomic_OrInt -- + * Atomic_XorInt -- + * Atomic_AddInt -- + * Atomic_SubInt -- + * Atomic_IncInt -- + * Atomic_DecInt -- + * Atomic_ReadOrInt -- + * Atomic_ReadAddInt -- + * Atomic_ReadIncInt -- + * Atomic_ReadDecInt -- + */ +#if defined(VM_64BIT) +MAKE_ATOMIC_TYPE(Ptr, 64, void const *, void *, uintptr_t) +#else +MAKE_ATOMIC_TYPE(Ptr, 32, void const *, void *, uintptr_t) +#endif +MAKE_ATOMIC_TYPE(Int, 32, int, int, int) + + +/* Prevent the compiler from re-ordering memory references. */ +#ifdef __GNUC__ +#define ATOMIC_COMPILER_BARRIER() __asm__ __volatile__ ("": : :"memory") +#elif defined(_MSC_VER) +#define ATOMIC_COMPILER_BARRIER() _ReadWriteBarrier() +#else +#error No compiler defined for ATOMIC_COMPILER_BARRIER +#endif + + +/* + *----------------------------------------------------------------------------- + * + * Atomic_MFence -- + * + * Implements mfence in terms of a lock xor. The reason for implementing + * our own mfence is that not all of our supported cpus have an assembly + * mfence (P3, Athlon). We put it here to avoid duplicating code which is + * also why it is prefixed with "Atomic_". Moreover, this implementation + * performs slightly better than 'mfence' on modern CPUs (See PR 817059). + * + * Results: + * None. + * + * Side effects: + * Cause loads and stores prior to this to be globally + * visible. + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +Atomic_MFence(void) +{ + Atomic_uint32 fence; + ATOMIC_COMPILER_BARRIER(); + Atomic_Xor(&fence, 0x1); + ATOMIC_COMPILER_BARRIER(); +} + +#ifdef ATOMIC_COMPILER_BARRIER +#undef ATOMIC_COMPILER_BARRIER +#endif /* ATOMIC_COMPILER_BARRIER */ + +#endif // ifndef _ATOMIC_H_ diff --git a/vmmon-only/include/vm_basic_asm.h b/vmmon-only/include/vm_basic_asm.h new file mode 100644 index 00000000..0e4adae6 --- /dev/null +++ b/vmmon-only/include/vm_basic_asm.h @@ -0,0 +1,1258 @@ +/********************************************************* + * Copyright (C) 2003-2015 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/* + * vm_basic_asm.h + * + * Basic asm macros + */ + +#ifndef _VM_BASIC_ASM_H_ +#define _VM_BASIC_ASM_H_ + +#define INCLUDE_ALLOW_USERLEVEL + +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMK_MODULE +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_DISTRIBUTE +#define INCLUDE_ALLOW_VMCORE +#include "includeCheck.h" + +#include "vm_basic_types.h" + +#if defined VM_X86_64 +#include "vm_basic_asm_x86_common.h" +#include "vm_basic_asm_x86_64.h" +#elif defined VM_X86_32 +#include "vm_basic_asm_x86_common.h" +#include "vm_basic_asm_x86.h" +#elif defined VM_ARM_64 +#include "arm64_basic_defs.h" +#include "vm_basic_asm_arm64.h" +#else +#define MUL64_NO_ASM 1 +#include "mul64.h" +#endif + +/* + * Locate most and least significant bit set functions. Use our own name + * space to avoid namespace collisions. The new names follow a pattern, + *