diff --git a/NaCl b/NaCl index 30eb103f0b..b89312d07a 160000 --- a/NaCl +++ b/NaCl @@ -1 +1 @@ -Subproject commit 30eb103f0b8cc1c8aa9500570020b576bb9e0aa7 +Subproject commit b89312d07abb4cf4986b799e564417be0dfd2484 diff --git a/api/arch.hpp b/api/arch.hpp index d20fb1c667..8b5a7d26c5 100644 --- a/api/arch.hpp +++ b/api/arch.hpp @@ -23,6 +23,8 @@ #include #include #include +#include +#include extern void __arch_init(); extern void __arch_poweroff(); @@ -37,7 +39,9 @@ extern void __arch_preempt_forever(void(*)()); inline void __arch_hw_barrier() noexcept; inline void __sw_barrier() noexcept; -extern int64_t __arch_time_now() noexcept; + +extern uint64_t __arch_system_time() noexcept; +extern timespec __arch_wall_clock() noexcept; inline uint64_t __arch_cpu_cycles() noexcept; @@ -55,4 +59,12 @@ inline void __sw_barrier() noexcept #error "Unsupported arch specified" #endif +// retrieve system information +struct arch_system_info_t +{ + std::string uuid; + uint64_t physical_memory; +}; +const arch_system_info_t& __arch_system_info() noexcept; + #endif diff --git a/api/hw/ioport.hpp b/api/hw/ioport.hpp index 5708a2d802..887cb0d520 100644 --- a/api/hw/ioport.hpp +++ b/api/hw/ioport.hpp @@ -74,6 +74,33 @@ namespace hw { asm volatile ("outw %%ax,%%dx"::"a" (data), "d"(port)); #else #error "outw() not implemented for selected arch" +#endif + } + + /** Receive a double-word from port. + @param port : The port number to receive from + */ + static inline uint32_t inl(int port) + { + uint32_t ret; +#if defined(ARCH_x86) + //asm volatile ("xorl %eax,%eax"); + asm volatile ("inl %%dx,%%eax":"=a" (ret):"d"(port)); +#else +#error "inw() not implemented for selected arch" +#endif + return ret; + } + + /** Send a double-word to port. + @param port : The port to send to + @param data : Double-word of data + */ + static inline void outl(int port, uint32_t data) { +#if defined(ARCH_x86) + asm volatile ("outl %%eax,%%dx"::"a" (data), "d"(port)); +#else +#error "outw() not implemented for selected arch" #endif } diff --git a/api/kernel/os.hpp b/api/kernel/os.hpp index 0d7e19404a..cb0b4eb32f 100644 --- a/api/kernel/os.hpp +++ b/api/kernel/os.hpp @@ -60,27 +60,26 @@ class OS { static const char* cmdline_args() noexcept; /** Clock cycles since boot. */ - static uint64_t cycles_since_boot() { - return __arch_cpu_cycles(); - } - /** micro seconds since boot */ - static int64_t micros_since_boot() noexcept; + static uint64_t cycles_since_boot() noexcept; + + /** Nanoseconds since boot converted from cycles */ + static uint64_t nanos_since_boot() noexcept; /** Timestamp for when OS was booted */ - static RTC::timestamp_t boot_timestamp(); + static RTC::timestamp_t boot_timestamp() noexcept; /** Uptime in whole seconds. */ - static RTC::timestamp_t uptime(); + static RTC::timestamp_t uptime() noexcept; /** Time spent sleeping (halt) in cycles */ static uint64_t cycles_asleep() noexcept; - /** Time spent sleeping (halt) in micros */ - static uint64_t micros_asleep() noexcept; + /** Time spent sleeping (halt) in nanoseconds */ + static uint64_t nanos_asleep() noexcept; - static MHz cpu_freq() noexcept - { return cpu_mhz_; } + static auto cpu_freq() noexcept + { return cpu_khz_; } /** * Reboot operating system @@ -236,6 +235,8 @@ class OS { /** Initialize common subsystems, call Service::start */ static void post_start(); + static void install_cpu_frequency(MHz); + private: /** Process multiboot info. Called by 'start' if multibooted **/ static void multiboot(uint32_t boot_addr); @@ -254,9 +255,8 @@ class OS { static bool boot_sequence_passed_; static bool m_is_live_updated; static bool m_block_drivers_ready; - static MHz cpu_mhz_; + static KHz cpu_khz_; - static RTC::timestamp_t booted_at_; static uintptr_t liveupdate_loc_; static std::string version_str_; static std::string arch_str_; @@ -295,4 +295,22 @@ inline OS::Span_mods OS::modules() return nullptr; } +inline uint64_t OS::cycles_since_boot() noexcept +{ + return __arch_cpu_cycles(); +} +inline uint64_t OS::nanos_since_boot() noexcept +{ + return (cycles_since_boot() * 1e6) / cpu_freq().count(); +} + +inline RTC::timestamp_t OS::boot_timestamp() noexcept +{ + return RTC::boot_timestamp(); +} +inline RTC::timestamp_t OS::uptime() noexcept +{ + return RTC::time_since_boot(); +} + #endif //< KERNEL_OS_HPP diff --git a/api/kernel/rtc.hpp b/api/kernel/rtc.hpp index 054052d5dc..a304891899 100644 --- a/api/kernel/rtc.hpp +++ b/api/kernel/rtc.hpp @@ -26,10 +26,16 @@ class RTC { public: - using timestamp_t = int64_t; + using timestamp_t = uint64_t; + /// a 64-bit nanosecond timestamp of the current time + static timestamp_t nanos_now() { + return __arch_system_time(); + } /// returns a 64-bit unix timestamp of the current time - static timestamp_t now() { return __arch_time_now(); } + static timestamp_t now() { + return __arch_wall_clock().tv_sec; + } /// returns a 64-bit unix timestamp for when the OS was booted static timestamp_t boot_timestamp() { diff --git a/api/kernel/timers.hpp b/api/kernel/timers.hpp index 40a036e279..d44f74d6d9 100644 --- a/api/kernel/timers.hpp +++ b/api/kernel/timers.hpp @@ -29,7 +29,7 @@ class Timers { public: using id_t = int32_t; - using duration_t = std::chrono::microseconds; + using duration_t = std::chrono::nanoseconds; using handler_t = delegate; static constexpr id_t UNUSED_ID = -1; diff --git a/api/net/super_stack.hpp b/api/net/super_stack.hpp index 58eba4e7c2..b7d4af7841 100644 --- a/api/net/super_stack.hpp +++ b/api/net/super_stack.hpp @@ -62,9 +62,35 @@ class Super_stack { template static Inet& get(int N, int sub); + /** + * @brief Get a stack by MAC addr. + * Throws if no NIC with the given MAC exists. + * + * @param[in] mac The mac + * + * @tparam IPV IP version + * + * @return A stack + */ + template + static Inet& get(const std::string& mac); + template Inet& create(hw::Nic& nic, int N, int sub); + /** + * @brief Create a stack on the given Nic, + * occupying the first free index. + * + * @param nic The nic + * + * @tparam IPV IP version + * + * @return A stack + */ + template + Inet& create(hw::Nic& nic); + IP4_stacks& ip4_stacks() { return ip4_stacks_; } diff --git a/api/profile b/api/profile index 3eb15b42a8..8c2cdfa0c7 100644 --- a/api/profile +++ b/api/profile @@ -118,7 +118,7 @@ class ScopedProfiler * -------------------------------------------------------------------------------- * */ - static std::string get_statistics(); + static std::string get_statistics(bool sorted = true); private: uint64_t tick_start; @@ -132,6 +132,7 @@ class ScopedProfiler std::string function_name; unsigned num_samples; uint64_t cycles_average; + uint64_t nanos_start; }; diff --git a/diskimagebuild/filetree.cpp b/diskimagebuild/filetree.cpp index 0719ba25ad..6b846bf0de 100644 --- a/diskimagebuild/filetree.cpp +++ b/diskimagebuild/filetree.cpp @@ -20,7 +20,10 @@ File::File(const char* path) rewind(f); this->data = std::unique_ptr (new char[size], std::default_delete ()); - fread(this->data.get(), this->size, 1, f); + size_t actual = fread(this->data.get(), this->size, 1, f); + if (actual != 1) { + throw std::runtime_error("diskbuilder: Could not read from file " + std::string(path)); + } fclose(f); } Dir::Dir(const char* path) @@ -80,14 +83,23 @@ void FileSys::add_dir(Dir& dvec) strcat(cwd_buffer, dvec.name.c_str()); //printf("*** Entering %s...\n", cwd_buffer); - chdir(cwd_buffer); + int res = chdir(cwd_buffer); + // throw immediately when unable to read directory + if (res < 0) { + fprintf(stderr, "Unable to enter directory %s\n", cwd_buffer); + throw std::runtime_error("Unable to enter directory " + std::string(cwd_buffer)); + } auto* dir = opendir(cwd_buffer); - if (dir == nullptr) - { - printf("Could not open directory:\n-> %s\n", cwd_buffer); - return; + // throw immediately when unable to open directory + if (dir == nullptr) { + fprintf(stderr, "Unable to open directory %s\n", cwd_buffer); + throw std::runtime_error("Unable to open directory " + std::string(cwd_buffer)); } + + std::vector sub_dirs; + std::vector sub_files; + struct dirent* ent; while ((ent = readdir(dir)) != nullptr) { @@ -95,19 +107,38 @@ void FileSys::add_dir(Dir& dvec) if (name == ".." || name == ".") continue; if (ent->d_type == DT_DIR) { - auto& d = dvec.add_dir(ent->d_name); - add_dir(d); + sub_dirs.push_back(std::move(name)); } else { - try { - dvec.add_file(ent->d_name); - } catch (std::exception& e) { - fprintf(stderr, "%s\n", e.what()); - } + sub_files.push_back(std::move(name)); } } + // close directory before adding more folders and files + res = closedir(dir); + if (res < 0) { + throw std::runtime_error("diskbuilder: Failed to close directory"); + } + + // add sub directories + for (const auto& dirname : sub_dirs) { + auto& d = dvec.add_dir(dirname.c_str()); + add_dir(d); + } + // add files in current directory + for (const auto& filename : sub_files) + { + try { + dvec.add_file(filename.c_str()); + } catch (std::exception& e) { + fprintf(stderr, "%s\n", e.what()); + } + } + // pop work dir - chdir(pwd_buffer); + res = chdir(pwd_buffer); + if (res < 0) { + throw std::runtime_error("diskbuilder: Failed to return back to parent directory"); + } } void FileSys::gather(const char* path) diff --git a/examples/IRCd/CMakeLists.txt b/examples/IRCd/CMakeLists.txt index 79a7e15145..2a337de871 100644 --- a/examples/IRCd/CMakeLists.txt +++ b/examples/IRCd/CMakeLists.txt @@ -40,6 +40,7 @@ set(LOCAL_INCLUDES "") set(DRIVERS virtionet vmxnet3 + #boot_logger ) set (PLUGINS diff --git a/examples/IRCd/service.cpp b/examples/IRCd/service.cpp index 1ef87ac284..ebd1bb3a11 100644 --- a/examples/IRCd/service.cpp +++ b/examples/IRCd/service.cpp @@ -18,17 +18,19 @@ #include #include #include +#include #define USE_STACK_SAMPLING #define PERIOD_SECS 4 #include "ircd/ircd.hpp" static std::unique_ptr ircd = nullptr; +using namespace std::chrono; void Service::start() { // run a small self-test to verify parser is sane extern void selftest(); selftest(); - + ircd = IrcServer::from_config(); ircd->set_motd([] () -> const std::string& { @@ -136,6 +138,8 @@ void Service::ready() //StackSampler::set_mode(StackSampler::MODE_CALLER); #endif - using namespace std::chrono; Timers::periodic(seconds(1), seconds(PERIOD_SECS), print_stats); + + // profiler statistics + printf("%s\n", ScopedProfiler::get_statistics(false).c_str()); } diff --git a/examples/IRCd/vm.json b/examples/IRCd/vm.json index faaff41136..65a4826ba7 100644 --- a/examples/IRCd/vm.json +++ b/examples/IRCd/vm.json @@ -1,5 +1,5 @@ { "image" : "IRCd", - "net" : [{"device" : "vmxnet3"}], + "net" : [{"device" : "virtio"}], "mem" : 1024 } diff --git a/examples/STREAM/service.cpp b/examples/STREAM/service.cpp index 7d104903d0..5233ce86d7 100644 --- a/examples/STREAM/service.cpp +++ b/examples/STREAM/service.cpp @@ -20,7 +20,7 @@ double mysecond() { - return OS::micros_since_boot() / 1000000.f; + return OS::nanos_since_boot() / 1.0e9; } void Service::start() diff --git a/examples/TCP_perf/service.cpp b/examples/TCP_perf/service.cpp index 5caa9588eb..9f53226bad 100644 --- a/examples/TCP_perf/service.cpp +++ b/examples/TCP_perf/service.cpp @@ -64,13 +64,13 @@ void start_measure() packets_tx = Statman::get().get_by_name("eth0.ethernet.packets_tx").get_uint64(); printf(" DACK: %lli ms WSIZE: %u WS: %u CALC_WIN: %u TS: %s\n", dack.count(), winsize, wscale, winsize << wscale, timestamps ? "ON" : "OFF"); - ts = OS::micros_since_boot(); + ts = OS::nanos_since_boot(); activity_before.reset(); } void stop_measure() { - auto diff = OS::micros_since_boot() - ts; + auto diff = OS::nanos_since_boot() - ts; activity_after.reset(); StackSampler::print(15); @@ -79,7 +79,7 @@ void stop_measure() packets_rx = Statman::get().get_by_name("eth0.ethernet.packets_rx").get_uint64() - packets_rx; packets_tx = Statman::get().get_by_name("eth0.ethernet.packets_tx").get_uint64() - packets_tx; printf("Packets RX [%llu] TX [%llu]\n", packets_rx, packets_tx); - double durs = ((double)diff) / 1000 / 1000; + double durs = (double) diff / 1000000000ULL; double mbits = (received/(1024*1024)*8) / durs; printf("Duration: %.2fs - Payload: %lld/%u MB - %.2f MBit/s\n", durs, received/(1024*1024), SIZE/(1024*1024), mbits); diff --git a/examples/acorn/README.md b/examples/acorn/README.md index e50dad39cd..3552429bca 100644 --- a/examples/acorn/README.md +++ b/examples/acorn/README.md @@ -10,7 +10,7 @@ mkdir build cd build cmake .. make -../run.sh acorn +boot acorn ``` ## Features diff --git a/examples/scoped_profiler/README.md b/examples/scoped_profiler/README.md index 6db1589200..c879424884 100644 --- a/examples/scoped_profiler/README.md +++ b/examples/scoped_profiler/README.md @@ -21,7 +21,7 @@ mkdir build cd build cmake .. make -../run.sh scoped_profiler_example +boot scoped_profiler_example ``` Make something happen in the OS and then use wget or curl to `GET /profile` to see statistics: diff --git a/examples/snake/README.md b/examples/snake/README.md index d8b507cd1f..085ef61ed5 100644 --- a/examples/snake/README.md +++ b/examples/snake/README.md @@ -7,7 +7,7 @@ mkdir build cd build cmake .. make -../run.sh snake_example +boot snake_example ``` Use arrow keys to change the snakes direction. Press spacebar to restart the game. diff --git a/examples/snake/snake.hpp b/examples/snake/snake.hpp index af522cc9d7..366136d5a7 100644 --- a/examples/snake/snake.hpp +++ b/examples/snake/snake.hpp @@ -176,7 +176,7 @@ void Snake::game_loop() Timers::oneshot( std::chrono::milliseconds(_head_dir.x() == 0 ? 120 : 70), - [this](auto) { game_loop(); } + [this](auto) { this->game_loop(); } ); } diff --git a/examples/syslog/README.md b/examples/syslog/README.md index 65026f1d49..c2485b87cd 100644 --- a/examples/syslog/README.md +++ b/examples/syslog/README.md @@ -21,5 +21,5 @@ mkdir build cd build cmake .. make -../run.sh syslog_plugin_example +boot syslog_plugin_example ``` diff --git a/examples/tcp/README.md b/examples/tcp/README.md index 97c10c4916..d70a5ab7bd 100644 --- a/examples/tcp/README.md +++ b/examples/tcp/README.md @@ -5,5 +5,5 @@ mkdir build cd build cmake .. make -../run.sh tcp_example +boot tcp_example ``` diff --git a/lib/uplink/CMakeLists.txt b/lib/uplink/CMakeLists.txt index 71864d64b9..540e309304 100644 --- a/lib/uplink/CMakeLists.txt +++ b/lib/uplink/CMakeLists.txt @@ -20,6 +20,7 @@ set(SOURCES transport.cpp ws_uplink.cpp register_plugin.cpp + config.cpp ) add_library(${LIBRARY_NAME} STATIC ${SOURCES}) diff --git a/lib/uplink/config.cpp b/lib/uplink/config.cpp new file mode 100644 index 0000000000..aba91150c3 --- /dev/null +++ b/lib/uplink/config.cpp @@ -0,0 +1,102 @@ +// This file is a part of the IncludeOS unikernel - www.includeos.org +// +// Copyright 2018 Oslo and Akershus University College of Applied Sciences +// and Alfred Bratterud +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "config.hpp" +#include "common.hpp" +#include + +#ifndef RAPIDJSON_HAS_STDSTRING + #define RAPIDJSON_HAS_STDSTRING 1 +#endif + +#ifndef RAPIDJSON_THROWPARSEEXCEPTION + #define RAPIDJSON_THROWPARSEEXCEPTION 1 +#endif + +#include +#include + +namespace uplink { + + Config Config::read() + { + MYINFO("Reading uplink config"); + + const auto& json = ::Config::get(); + + Expects(not json.empty() && "Config is empty"); + + using namespace rapidjson; + Document doc; + doc.Parse(json.data()); + + Expects(doc.IsObject() && "Malformed config"); + + Expects(doc.HasMember("uplink") && "Missing member \"uplink\""); + + auto& cfg = doc["uplink"]; + + Expects(cfg.HasMember("url") && cfg.HasMember("token") && "Missing url or/and token"); + + Config config_; + + config_.url = cfg["url"].GetString(); + config_.token = cfg["token"].GetString(); + + // Decide stack/interface + if(cfg.HasMember("index")) + { + auto& index = cfg["index"]; + + if(index.IsNumber()) + { + config_.inet = &net::Super_stack::get(index.GetInt()); + } + else + { + config_.inet = &net::Super_stack::get(index.GetString()); + } + } + // If not given, pick the first stack + else + { + config_.inet = &net::Super_stack::get(0); + } + + // Reboot on panic (optional) + if(cfg.HasMember("reboot")) + { + config_.reboot = cfg["reboot"].GetBool(); + } + + // Log over websocket (optional) + if(cfg.HasMember("ws_logging")) + { + config_.ws_logging = cfg["ws_logging"].GetBool(); + } + + // Serialize conntrack + if(cfg.HasMember("serialize_ct")) + { + config_.serialize_ct = cfg["serialize_ct"].GetBool(); + } + + return config_; + } + +} diff --git a/lib/uplink/config.hpp b/lib/uplink/config.hpp new file mode 100644 index 0000000000..83f0186adc --- /dev/null +++ b/lib/uplink/config.hpp @@ -0,0 +1,42 @@ +// This file is a part of the IncludeOS unikernel - www.includeos.org +// +// Copyright 2018 Oslo and Akershus University College of Applied Sciences +// and Alfred Bratterud +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#ifndef UPLINK_CONFIG_HPP +#define UPLINK_CONFIG_HPP + +#include +#include +#include + +namespace uplink { + + struct Config + { + net::Inet* inet; + std::string url; + std::string token; + bool reboot = true; + bool ws_logging = true; + bool serialize_ct = false; + + static Config read(); + }; + +} + +#endif diff --git a/lib/uplink/register_plugin.cpp b/lib/uplink/register_plugin.cpp index 26035064b7..1048ed60f6 100644 --- a/lib/uplink/register_plugin.cpp +++ b/lib/uplink/register_plugin.cpp @@ -35,9 +35,9 @@ void setup_uplink() MYINFO("Setting up WS uplink"); try { - auto& en0 = net::Super_stack::get(0); + auto config = Config::read(); - uplink = std::make_unique(en0); + uplink = std::make_unique(std::move(config)); OS::on_panic(uplink::on_panic); diff --git a/lib/uplink/starbase/vm.json b/lib/uplink/starbase/vm.json new file mode 100644 index 0000000000..4c03590b5d --- /dev/null +++ b/lib/uplink/starbase/vm.json @@ -0,0 +1,7 @@ +{ + "net" : [ + {"device" : "vmxnet3", "mac" : "c0:01:0a:00:00:2a"}, + {"device" : "vmxnet3", "mac" : "c0:01:0a:00:00:2b"}, + {"device" : "vmxnet3", "mac" : "c0:01:0a:00:00:2c"} + ] +} diff --git a/lib/uplink/ws_uplink.cpp b/lib/uplink/ws_uplink.cpp index f53835e318..14e6420164 100644 --- a/lib/uplink/ws_uplink.cpp +++ b/lib/uplink/ws_uplink.cpp @@ -39,20 +39,28 @@ #include "log.hpp" namespace uplink { - - const std::string WS_uplink::UPLINK_CFG_FILE{"config.json"}; constexpr std::chrono::seconds WS_uplink::heartbeat_interval; - WS_uplink::WS_uplink(net::Inet& inet) - : inet_{inet}, id_{inet.link_addr().to_string()}, + WS_uplink::WS_uplink(Config config) + : config_{std::move(config)}, + inet_{*config_.inet}, + id_{inet_.link_addr().to_string()}, parser_({this, &WS_uplink::handle_transport}), heartbeat_timer({this, &WS_uplink::on_heartbeat_timer}) { + if(liu::LiveUpdate::is_resumable() && OS::is_live_updated()) + { + MYINFO("Found resumable state, try restoring..."); + liu::LiveUpdate::resume("uplink", {this, &WS_uplink::restore}); + + if(liu::LiveUpdate::partition_exists("conntrack")) + liu::LiveUpdate::resume("conntrack", {this, &WS_uplink::restore_conntrack}); + } + Log::get().set_flush_handler({this, &WS_uplink::send_log}); liu::LiveUpdate::register_partition("uplink", {this, &WS_uplink::store}); - read_config(); CHECK(config_.reboot, "Reboot on panic"); CHECK(config_.serialize_ct, "Serialize Conntrack"); @@ -61,7 +69,7 @@ namespace uplink { if(inet_.is_configured()) { - start(inet); + start(inet_); } // if not, register on config event else @@ -78,15 +86,6 @@ namespace uplink { Expects(inet.ip_addr() != 0 && "Network interface not configured"); Expects(not config_.url.empty()); - if(liu::LiveUpdate::is_resumable() && OS::is_live_updated()) - { - MYINFO("Found resumable state, try restoring..."); - liu::LiveUpdate::resume("uplink", {this, &WS_uplink::restore}); - - if(liu::LiveUpdate::partition_exists("conntrack")) - liu::LiveUpdate::resume("conntrack", {this, &WS_uplink::restore_conntrack}); - } - client_ = std::make_unique(inet.tcp(), http::Client::Request_handler{this, &WS_uplink::inject_token}); @@ -95,16 +94,22 @@ namespace uplink { void WS_uplink::store(liu::Storage& store, const liu::buffer_t*) { - liu::Storage::uid id = 0; - // BINARY HASH - store.add_string(id++, binary_hash_); + store.add_string(0, binary_hash_); + // nanos timestamp of when update begins + store.add (1, OS::nanos_since_boot()); } void WS_uplink::restore(liu::Restore& store) { // BINARY HASH binary_hash_ = store.as_string(); store.go_next(); + + // calculate update cycles taken + uint64_t prev_nanos = store.as_type (); store.go_next(); + this->update_time_taken = OS::nanos_since_boot() - prev_nanos; + + INFO2("Update took %.3f millis", this->update_time_taken / 1.0e6); } std::string WS_uplink::auth_data() const @@ -320,54 +325,6 @@ namespace uplink { }); } - void WS_uplink::read_config() - { - MYINFO("Reading uplink config"); - - const auto& cfg = ::Config::get(); - - Expects(not cfg.empty() && "Config is empty"); - - parse_config({cfg.data(), cfg.size()}); - } - - void WS_uplink::parse_config(const std::string& json) - { - using namespace rapidjson; - Document doc; - doc.Parse(json.data()); - - Expects(doc.IsObject() && "Malformed config"); - - Expects(doc.HasMember("uplink") && "Missing member \"uplink\""); - - auto& cfg = doc["uplink"]; - - Expects(cfg.HasMember("url") && cfg.HasMember("token") && "Missing url or/and token"); - - config_.url = cfg["url"].GetString(); - config_.token = cfg["token"].GetString(); - - // Reboot on panic (optional) - if(cfg.HasMember("reboot")) - { - config_.reboot = cfg["reboot"].GetBool(); - } - - // Log over websocket (optional) - if(cfg.HasMember("ws_logging")) - { - config_.ws_logging = cfg["ws_logging"].GetBool(); - } - - // Serialize conntrack - if(cfg.HasMember("serialize_ct")) - { - config_.serialize_ct = cfg["serialize_ct"].GetBool(); - } - - } - template void serialize_stack(Writer& writer, const Stack_ptr& stack) { @@ -411,6 +368,10 @@ namespace uplink { writer.StartObject(); + const auto& sysinfo = __arch_system_info(); + writer.Key("uuid"); + writer.String(sysinfo.uuid); + writer.Key("version"); writer.String(OS::version()); @@ -423,9 +384,18 @@ namespace uplink { writer.String(binary_hash_); } + if(update_time_taken > 0) + { + writer.Key("update_time_taken"); + writer.Uint64(update_time_taken); + } + writer.Key("arch"); writer.String(OS::arch()); + writer.Key("physical_ram"); + writer.Uint64(sysinfo.physical_memory); + // CPU Features auto features = CPUID::detect_features_str(); writer.Key("cpu_features"); diff --git a/lib/uplink/ws_uplink.hpp b/lib/uplink/ws_uplink.hpp index 6688f08c55..90e6d02d67 100644 --- a/lib/uplink/ws_uplink.hpp +++ b/lib/uplink/ws_uplink.hpp @@ -20,6 +20,7 @@ #define UPLINK_WS_UPLINK_HPP #include "transport.hpp" +#include "config.hpp" #include #include @@ -27,26 +28,16 @@ #include #include #include +#include namespace uplink { class WS_uplink { public: - static const std::string UPLINK_CFG_FILE; - static constexpr auto heartbeat_interval = 10s; static constexpr auto heartbeat_retries = 3; - struct Config - { - std::string url; - std::string token; - bool reboot = true; - bool ws_logging = true; - bool serialize_ct = false; - }; - - WS_uplink(net::Inet&); + WS_uplink(Config config); void start(net::Inet&); @@ -78,6 +69,8 @@ class WS_uplink { void panic(const char* why); private: + Config config_; + net::Inet& inet_; std::unique_ptr client_; net::WebSocket_ptr ws_; @@ -85,8 +78,6 @@ class WS_uplink { std::string token_; std::string binary_hash_; - Config config_; - Transport_parser parser_; Timer retry_timer; @@ -96,7 +87,9 @@ class WS_uplink { std::vector logbuf_; Timer heartbeat_timer; - int64_t last_ping; + RTC::timestamp_t last_ping; + + RTC::timestamp_t update_time_taken = 0; void inject_token(http::Request& req, http::Client::Options&, const http::Client::Host) { @@ -123,10 +116,6 @@ class WS_uplink { void parse_transport(net::WebSocket::Message_ptr msg); - void read_config(); - - void parse_config(const std::string& cfg); - void store(liu::Storage& store, const liu::buffer_t*); void restore(liu::Restore& store); diff --git a/linux/src/arch.cpp b/linux/src/arch.cpp index a0c8e181db..300b7591e4 100644 --- a/linux/src/arch.cpp +++ b/linux/src/arch.cpp @@ -17,7 +17,7 @@ bool OS::is_panicking() noexcept void __arch_subscribe_irq(uint8_t) {} -int64_t __arch_time_now() noexcept { +uint64_t __arch_system_time() noexcept { return time(0); } diff --git a/linux/src/os.cpp b/linux/src/os.cpp index 399b8db688..795d32a777 100644 --- a/linux/src/os.cpp +++ b/linux/src/os.cpp @@ -4,11 +4,18 @@ #include #include #include -int64_t OS::micros_since_boot() noexcept +#include +uint64_t __arch_system_time() noexcept { - struct timeval tv; - gettimeofday(&tv,NULL); - return tv.tv_sec*(uint64_t)1000000+tv.tv_usec; + struct timespec tv; + clock_gettime(CLOCK_REALTIME, &tv); + return tv.tv_sec*(uint64_t)1000000000ull+tv.tv_nsec; +} +timespec __arch_wall_clock() noexcept +{ + struct timespec tv; + clock_gettime(CLOCK_REALTIME, &tv); + return tv; } void OS::event_loop() @@ -69,9 +76,6 @@ uintptr_t OS::heap_usage() noexcept { #include #include RTC::timestamp_t RTC::booted_at = time(0); -RTC::timestamp_t OS::boot_timestamp() { - return RTC::boot_timestamp(); -} #include int SMP::cpu_id() noexcept { @@ -103,14 +107,14 @@ extern "C" void alarm_handler(int sig) { (void) sig; } -static void begin_timer(std::chrono::microseconds usec) +static void begin_timer(std::chrono::nanoseconds usec) { using namespace std::chrono; auto secs = duration_cast (usec); struct itimerspec it; it.it_value.tv_sec = secs.count(); - it.it_value.tv_nsec = 1000 * (usec.count() - secs.count() * 1000000); + it.it_value.tv_nsec = usec.count() - secs.count() * 1000000000ull; timer_settime(timer_id, 0, &it, nullptr); } diff --git a/linux/userspace/CMakeLists.txt b/linux/userspace/CMakeLists.txt index 72e3275b79..69bef9499f 100644 --- a/linux/userspace/CMakeLists.txt +++ b/linux/userspace/CMakeLists.txt @@ -50,7 +50,7 @@ set(NET_SOURCES ) if (CUSTOM_BOTAN) set(NET_SOURCES ${NET_SOURCES} - "${IOS}/src/net/http/secure_server.cpp") + "${IOS}/src/net/https/botan_server.cpp") endif() set(OS_SOURCES diff --git a/seed/service/CMakeLists.txt b/seed/service/CMakeLists.txt index c199102f0e..e898549165 100644 --- a/seed/service/CMakeLists.txt +++ b/seed/service/CMakeLists.txt @@ -24,6 +24,8 @@ set(DRIVERS # virtionet # Virtio networking # virtioblock # Virtio block device # ... Others from IncludeOS/src/drivers + + #boot_logger # Enable lots of logging from boot stage ) set(PLUGINS diff --git a/seed/service/service.cpp b/seed/service/service.cpp index eff58d76d2..eff95fd30b 100644 --- a/seed/service/service.cpp +++ b/seed/service/service.cpp @@ -22,4 +22,5 @@ void Service::start(const std::string& args) { printf("Hello world - OS included!\n"); printf("Args = %s\n", args.c_str()); + printf("Try giving the service less memory, eg. 5MB in vm.json\n"); } diff --git a/seed/service/vm.json b/seed/service/vm.json new file mode 100644 index 0000000000..023195a1eb --- /dev/null +++ b/seed/service/vm.json @@ -0,0 +1,4 @@ +{ + "net" : [], + "mem" : 64 +} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 75d9a24d50..3cf3e1fbcf 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -76,7 +76,6 @@ set_source_files_properties(crt/cxx_abi.cpp PROPERTIES COMPILE_FLAGS "-fno-sanit add_subdirectory(arch/${ARCH}) add_subdirectory(platform/x86_pc) add_subdirectory(platform/x86_nano) -add_subdirectory(platform/x86_linux) if(WITH_SOLO5) add_subdirectory(platform/x86_solo5) endif(WITH_SOLO5) diff --git a/src/arch/x86_64/arch_start.asm b/src/arch/x86_64/arch_start.asm index cec61dfd91..10a98cbb51 100644 --- a/src/arch/x86_64/arch_start.asm +++ b/src/arch/x86_64/arch_start.asm @@ -20,11 +20,12 @@ extern kernel_start extern __multiboot_magic extern __multiboot_addr -%define PAGE_SIZE 0x1000 %define P4_TAB 0x1000 -%define P3_TAB 0x2000 ;; - 0x5fff -%define P2_TAB 0x100000 -%define STACK_LOCATION 0x9ffff0 +%define P3_TAB 0x2000 +%define P2_TAB 0x3000 ;; - 0x7fff +%define NUM_P3_ENTRIES 5 +%define NUM_P2_ENTRIES 2560 +%define STACK_LOCATION 0x9D3F0 [BITS 32] __arch_start: @@ -36,7 +37,8 @@ __arch_start: ;; address for Page Map Level 4 mov edi, P4_TAB mov cr3, edi - mov ecx, 0x3000 / 0x4 + ;; clear out P4 and P3 + mov ecx, 0x2000 / 0x4 xor eax, eax ; Nullify the A-register. rep stosd @@ -44,32 +46,33 @@ __arch_start: mov edi, P4_TAB mov DWORD [edi], P3_TAB | 0x3 ;; present+write - ;; create 4x page directory pointer table entries + ;; create 1GB mappings + mov ecx, NUM_P3_ENTRIES mov edi, P3_TAB - mov ebx, P2_TAB | 0x3 ;; present + write - mov DWORD [edi], ebx + mov eax, P2_TAB | 0x3 ;; present + write + mov ebx, 0x0 + +.p3_loop: + mov DWORD [edi], eax ;; Low word + mov DWORD [edi+4], ebx ;; High word + add eax, 1 << 12 ;; page increments + adc ebx, 0 ;; increment high word when CF set add edi, 8 - add ebx, 0x1000 - mov DWORD [edi], ebx - add edi, 8 - add ebx, 0x1000 - mov DWORD [edi], ebx - add edi, 8 - add ebx, 0x1000 - mov DWORD [edi], ebx + loop .p3_loop - ;; create page directory entries - mov ecx, 512*4 ;; num entries + ;; create 2MB mappings + mov ecx, NUM_P2_ENTRIES mov edi, P2_TAB - - ;; start at address 0x0 - mov ebx, 0x0 | 0x3 | 1 << 7 ;; present+write + huge -.ptd_loop: - mov DWORD [edi], ebx ;; Assign the physical adress to lower 32-bits - mov DWORD [edi+4], 0x0 ;; Zero out the rest of the 64-bit word - add ebx, 1 << 21 ;; 2MB increments + mov eax, 0x0 | 0x3 | 1 << 7 ;; present + write + huge + mov ebx, 0x0 + +.p2_loop: + mov DWORD [edi], eax ;; Low word + mov DWORD [edi+4], ebx ;; High word + add eax, 1 << 21 ;; 2MB increments + adc ebx, 0 ;; increment high word when CF set add edi, 8 - loop .ptd_loop + loop .p2_loop ;; enable PAE mov eax, cr4 diff --git a/src/arch/x86_64/linker.ld b/src/arch/x86_64/linker.ld index c40f0ac27b..7d2ccaaba9 100644 --- a/src/arch/x86_64/linker.ld +++ b/src/arch/x86_64/linker.ld @@ -20,7 +20,7 @@ ENTRY(_start) SECTIONS { - PROVIDE ( _ELF_START_ = . + 0xA00000); + PROVIDE ( _ELF_START_ = . + 0x100000); PROVIDE ( _LOAD_START_ = _ELF_START_); /* For convenience w. multiboot */ . = _ELF_START_; diff --git a/src/chainload/service.cpp b/src/chainload/service.cpp index ff3c726faa..2015882c2f 100644 --- a/src/chainload/service.cpp +++ b/src/chainload/service.cpp @@ -20,6 +20,7 @@ #include #include #include +#include extern bool os_enable_boot_logging; @@ -40,8 +41,10 @@ void promote_mod_to_kernel() Expects (bootinfo->mods_count); auto* mod = (multiboot_module_t*)bootinfo->mods_addr; - // Set command line param to mod param - bootinfo->cmdline = mod->cmdline; + // Move commandline to a relatively safe area + const uintptr_t RELATIVELY_SAFE_AREA = 0x8000; + strcpy((char*) RELATIVELY_SAFE_AREA, (const char*) mod->cmdline); + bootinfo->cmdline = RELATIVELY_SAFE_AREA; // Subtract one module (bootinfo->mods_count)--; diff --git a/src/drivers/CMakeLists.txt b/src/drivers/CMakeLists.txt index 5e2be9517f..b9c982af49 100644 --- a/src/drivers/CMakeLists.txt +++ b/src/drivers/CMakeLists.txt @@ -24,6 +24,9 @@ add_dependencies(virtionet PrecompiledLibraries) add_library(vmxnet3 STATIC vmxnet3.cpp) add_dependencies(vmxnet3 PrecompiledLibraries) +add_library(e1000 STATIC e1000.cpp) +add_dependencies(e1000 PrecompiledLibraries) + add_library(ip4_reassembly STATIC "ip4_reassembly.cpp") add_dependencies(ip4_reassembly PrecompiledLibraries) @@ -45,7 +48,7 @@ add_dependencies(vga_output PrecompiledLibraries) install(TARGETS ide_readwrite ide_readonly ide_writeonly virtionet virtioblk - vmxnet3 + vmxnet3 e1000 ip4_reassembly heap_debugging boot_logger disk_logger disklog_reader diff --git a/src/drivers/disk_logger.cpp b/src/drivers/disk_logger.cpp index 2580002149..53b72b20c9 100644 --- a/src/drivers/disk_logger.cpp +++ b/src/drivers/disk_logger.cpp @@ -59,7 +59,7 @@ static void disk_logger_write(const char* data, size_t len) header.timestamp = RTC::now(); } else { - header.timestamp = OS::micros_since_boot() / 1000000; + header.timestamp = OS::nanos_since_boot() / 1000000000ull; } __builtin_memcpy(logbuffer->data(), &header, sizeof(log_structure)); diff --git a/src/drivers/e1000.cpp b/src/drivers/e1000.cpp new file mode 100644 index 0000000000..2f2a8a7fdc --- /dev/null +++ b/src/drivers/e1000.cpp @@ -0,0 +1,325 @@ +// This file is a part of the IncludeOS unikernel - www.includeos.org +// +// Copyright 2015 Oslo and Akershus University College of Applied Sciences +// and Alfred Bratterud +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "e1000.hpp" +#include "e1000_defs.hpp" +#include +#include +#include +#include +#include +#include +// loosely based on OSdev article http://wiki.osdev.org/Intel_Ethernet_i217 + +static int deferred_event = 0; +static std::vector deferred_devices; + +e1000::e1000(hw::PCI_Device& d) : + Link(Link_protocol{{this, &e1000::transmit}, mac()}, bufstore_), + m_pcidev(d), bufstore_{1024, 2048} +{ + INFO("e1000", "Intel Pro/1000 Ethernet Adapter (rev=%#x)", d.rev_id()); + + // legacy IRQ from PCI + uint32_t value = d.read_dword(PCI::CONFIG_INTR); + this->m_irq = value & 0xFF; + assert(this->m_irq != 0xFF); + + Events::get().subscribe(this->m_irq, {this, &e1000::event_handler}); + __arch_enable_legacy_irq(this->m_irq); + INFO2("Subscribed on IRQ %u", this->m_irq); + + if (deferred_event == 0) + { + deferred_event = Events::get().subscribe(&e1000::do_deferred_xmit); + } + + // shared-memory & I/O address + this->shm_base = d.get_bar(0); + this->io_base = d.iobase(); + + // initialize + write_cmd(REG_CTRL, (1 << 26)); + + this->link_up(); + + // have to clear out the multicast filter, otherwise shit breaks + for(int i = 0; i < 128; i++) + write_cmd(0x5200 + i*4, 0); + for(int i = 0; i < 64; i++) + write_cmd(0x4000 + i*4, 0); + + /* Disables flow control */ + write_cmd(0x0028, 0); + write_cmd(0x002c, 0); + write_cmd(0x0030, 0); + write_cmd(0x0170, 0); + + this->intr_enable(); + + // initialize RX + for (int i = 0; i < NUM_RX_DESC; i++) { + rx.desc[i].addr = (uint64_t) new_rx_packet(); + rx.desc[i].status = 0; + } + + uint64_t rx_desc_ptr = (uint64_t) rx.desc; + write_cmd(REG_RXDESCLO, rx_desc_ptr); + write_cmd(REG_RXDESCHI, 0); + write_cmd(REG_RXDESCLEN, NUM_RX_DESC * sizeof(rx_desc)); + write_cmd(REG_RXDESCHEAD, 0); + write_cmd(REG_RXDESCTAIL, NUM_RX_DESC-1); + +#define BROADCAST_ENABLE 0x8000 +#define STRIP_ETH_CRC 0x4000000 +#define RX_BUFFER_2048 0x0 + uint32_t rx_flags = RX_BUFFER_2048 | STRIP_ETH_CRC | BROADCAST_ENABLE + | (1 << 5) | (0 << 8) | (0 << 4) | (0 << 3) | ( 1 << 2); + write_cmd(REG_RCTRL, rx_flags); + + // initialize TX + memset(tx.desc, 0, sizeof(tx.desc)); + for (int i = 0; i < NUM_TX_DESC; i++) { + tx.desc[i].status = 0x1; // done + } + + uint64_t tx_desc_ptr = (uint64_t) tx.desc; + write_cmd(REG_TXDESCLO, tx_desc_ptr); + write_cmd(REG_TXDESCHI, 0); + write_cmd(REG_TXDESCLEN, NUM_TX_DESC * sizeof(tx_desc)); + write_cmd(REG_TXDESCHEAD, 0); + write_cmd(REG_TXDESCTAIL, NUM_TX_DESC); + + write_cmd(REG_TCTRL, (1 << 1) | (1 << 3)); + + // get MAC address + this->retrieve_hw_addr(); + + // GO! + uint32_t flags = read_cmd(REG_RCTRL); + write_cmd(REG_RCTRL, flags | RCTL_EN); + // verify device status + assert(read_cmd(REG_STATUS) == 0x80080783); +} + +uint32_t e1000::read_cmd(uint16_t cmd) +{ + //hw::outl(this->io_base, cmd); + //return hw::inl(this->io_base + 4); + return *(volatile uint32_t*) (this->shm_base + cmd); +} +void e1000::write_cmd(uint16_t cmd, uint32_t val) +{ + //hw::outl(this->io_base, cmd); + //hw::outl(this->io_base + 4, val); + *(volatile uint32_t*) (this->shm_base + cmd) = val; +} + +void e1000::retrieve_hw_addr() +{ + auto* mac_src = (const char*) (this->shm_base + 0x5400); + memcpy(&this->hw_addr, mac_src, sizeof(hw_addr)); + INFO2("MAC address: %s", hw_addr.to_string().c_str()); +} + +void e1000::link_up() +{ + uint32_t flags = read_cmd(REG_CTRL); + write_cmd(REG_CTRL, flags | ECTRL_SLU); + + int success = (read_cmd(REG_STATUS) & (1 << 1)) != 0; + INFO("e1000", "Link up: %s", (success) ? "true" : "false"); +} + +void e1000::intr_enable() +{ + write_cmd(REG_IMASK, 0x1F6DC); + write_cmd(REG_IMASK, 0xFF & ~4); + read_cmd(0xC0); +} + +net::Packet_ptr +e1000::recv_packet(uint8_t* data, uint16_t size) +{ + auto* ptr = (net::Packet*) (data - DRIVER_OFFSET - sizeof(net::Packet)); + new (ptr) net::Packet( + DRIVER_OFFSET, + size, + DRIVER_OFFSET + packet_len(), + &bufstore()); + return net::Packet_ptr(ptr); +} +net::Packet_ptr +e1000::create_packet(int link_offset) +{ + auto buffer = bufstore().get_buffer(); + auto* ptr = (net::Packet*) buffer.addr; + new (ptr) net::Packet( + DRIVER_OFFSET + link_offset, + 0, + DRIVER_OFFSET + packet_len(), + buffer.bufstore); + return net::Packet_ptr(ptr); +} +uintptr_t e1000::new_rx_packet() +{ + auto* pkt = bufstore().get_buffer().addr; + return (uintptr_t) &pkt[sizeof(net::Packet) + DRIVER_OFFSET]; +} + +void e1000::event_handler() +{ + uint32_t status = read_cmd(0xC0); + // see: e1000_regs.h + //printf("e1000: event %x received\n", status); + + // empty transmit queue + if (status & 0x02) + { + //printf("tx queue empty!\n"); + if (sendq) { + transmit(std::move(sendq)); + } + if (can_transmit()) { + transmit_queue_available_event(NUM_TX_DESC); + } + } + // link status change + if (status & 0x04) + { + this->link_up(); + } + if (status & 0x40) + { + printf("rx overrun!\n"); + } + // rx timer interrupt + if (status & 0x80) + { + recv_handler(); + } +} + +void e1000::recv_handler() +{ + uint16_t old_idx = 0xffff; + + while (rx.desc[rx.current].status & 1) + { + auto& tk = rx.desc[rx.current]; + auto* buf = (uint8_t*) tk.addr; + + //printf("e1000: recv %u bytes\n", tk.length); + auto pkt = recv_packet(buf, tk.length); + Link_layer::receive(std::move(pkt)); + + // give new buffer + tk.addr = (uint64_t) this->new_rx_packet(); + tk.status = 0; + // go to next index + old_idx = rx.current; + rx.current = (rx.current + 1) % NUM_RX_DESC; + } + if (old_idx != 0xffff) + write_cmd(REG_RXDESCTAIL, old_idx); +} + +void e1000::transmit(net::Packet_ptr pckt) +{ + if (sendq == nullptr) + sendq = std::move(pckt); + else + sendq->chain(std::move(pckt)); + // send as much as possible from sendq + while (sendq != nullptr && can_transmit()) + { + auto next = sendq->detach_tail(); + // transmit released buffer + auto* packet = sendq.release(); + transmit_data(packet->buf() + DRIVER_OFFSET, packet->size()); + // next is the new sendq + sendq = std::move(next); + } +} +bool e1000::can_transmit() +{ + return (tx.desc[tx.current].status & 0xFF) == 0x1; +} +void e1000::transmit_data(uint8_t* data, uint16_t length) +{ + auto& tk = tx.desc[tx.current]; + assert(tk.status == 0x1 && "Descriptor must be done"); + + if (tk.addr != 0x0) { + auto* packet = (net::Packet*) (tk.addr - DRIVER_OFFSET - sizeof(net::Packet)); + delete packet; // call deleter on Packet to release it + } + //printf("e1000: xmit %p -> %u bytes\n", data, length); + tk.addr = (uint64_t) data; + tk.length = length; + tk.cmd = (1 << 3) | 0x3; + tk.status = 0; + + tx.current = (tx.current + 1) % NUM_TX_DESC; + if (tx.deferred == false) + { + tx.deferred = true; + deferred_devices.push_back(this); + Events::get().trigger_event(deferred_event); + } +} +void e1000::xmit_kick() +{ + write_cmd(REG_TXDESCTAIL, tx.current); + tx.deferred = false; +} +void e1000::do_deferred_xmit() +{ + for (auto& dev : deferred_devices) + dev->xmit_kick(); + deferred_devices.clear(); +} + +void e1000::flush() +{ + this->transmit(std::move(sendq)); +} +void e1000::poll() +{ + this->recv_handler(); +} +void e1000::deactivate() +{ + uint32_t flags = read_cmd(REG_RCTRL); + write_cmd(REG_RCTRL, flags & ~RCTL_EN); +} +void e1000::move_to_this_cpu() +{ + // TODO: implement me +} + +#include +__attribute__((constructor)) +static void register_func() +{ + PCI_manager::register_nic(PCI::VENDOR_INTEL, 0x109A, &e1000::new_instance); + PCI_manager::register_nic(PCI::VENDOR_INTEL, 0x100E, &e1000::new_instance); + PCI_manager::register_nic(PCI::VENDOR_INTEL, 0x100F, &e1000::new_instance); + PCI_manager::register_nic(PCI::VENDOR_INTEL, 0x153A, &e1000::new_instance); + PCI_manager::register_nic(PCI::VENDOR_INTEL, 0x1539, &e1000::new_instance); + PCI_manager::register_nic(PCI::VENDOR_INTEL, 0x10EA, &e1000::new_instance); +} diff --git a/src/drivers/e1000.hpp b/src/drivers/e1000.hpp new file mode 100644 index 0000000000..ecf1443d5e --- /dev/null +++ b/src/drivers/e1000.hpp @@ -0,0 +1,138 @@ +// This file is a part of the IncludeOS unikernel - www.includeos.org +// +// Copyright 2015 Oslo and Akershus University College of Applied Sciences +// and Alfred Bratterud +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +class e1000 : public net::Link_layer +{ +public: + using Link = net::Link_layer; + using Link_protocol = Link::Protocol; + static const int DRIVER_OFFSET = 2; + static const int NUM_TX_DESC = 64; + static const int NUM_RX_DESC = 128; + + static std::unique_ptr new_instance(hw::PCI_Device& d) + { return std::make_unique(d); } + + const char* driver_name() const override { + return "e1000"; + } + + const MAC::Addr& mac() const noexcept override { + return this->hw_addr; + } + + uint16_t MTU() const noexcept override { + return 1500; + } + + uint16_t packet_len() const noexcept { + return sizeof(net::ethernet::Header) + MTU(); + } + + net::downstream create_physical_downstream() override + { return {this, &e1000::transmit}; } + + net::Packet_ptr create_packet(int) override; + + size_t frame_offset_device() override + { return DRIVER_OFFSET; }; + + /** Linklayer input. Hooks into IP-stack bottom, w.DOWNSTREAM data.*/ + void transmit(net::Packet_ptr pckt); + + /** Constructor. @param pcidev an initialized PCI device. */ + e1000(hw::PCI_Device& pcidev); + + /** Space available in the transmit queue, in packets */ + size_t transmit_queue_available() override { + return 1; + } + + void flush() override; + + void deactivate() override; + + void move_to_this_cpu() override; + + void poll() override; + +private: + void intr_enable(); + void intr_disable(); + void link_up(); + void retrieve_hw_addr(); + + uint32_t read_cmd(uint16_t cmd); + void write_cmd(uint16_t cmd, uint32_t val); + + net::Packet_ptr recv_packet(uint8_t*, uint16_t); + uintptr_t new_rx_packet(); + void event_handler(); + void recv_handler(); + bool can_transmit(); + void transmit_data(uint8_t*, uint16_t); + void xmit_kick(); + static void do_deferred_xmit(); + + hw::PCI_Device& m_pcidev; + std::vector irqs; + uint16_t io_base; + uintptr_t shm_base; + MAC::Addr hw_addr; + + uint8_t m_irq; + + struct rx_desc + { + uint64_t addr; + uint16_t length; + uint16_t checksum; + uint8_t status; + uint8_t errors; + uint16_t special; + } __attribute__((packed, aligned(16))); + struct tx_desc + { + uint64_t addr; + uint16_t length; + uint8_t cso; + uint8_t cmd; + uint8_t status; + uint8_t css; + uint16_t special; + } __attribute__((packed, aligned(16))); + + struct rx_t { + rx_desc desc[NUM_RX_DESC]; + uint16_t current = 0; + } rx; + + struct tx_t { + tx_desc desc[NUM_TX_DESC]; + uint16_t current = 0; + bool deferred = false; + } tx; + + // sendq as packet chain + net::Packet_ptr sendq = nullptr; + net::BufferStore bufstore_; +}; diff --git a/src/drivers/e1000_defs.hpp b/src/drivers/e1000_defs.hpp new file mode 100644 index 0000000000..ab51f3a6a6 --- /dev/null +++ b/src/drivers/e1000_defs.hpp @@ -0,0 +1,107 @@ +// This file is a part of the IncludeOS unikernel - www.includeos.org +// +// Copyright 2015 Oslo and Akershus University College of Applied Sciences +// and Alfred Bratterud +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +// see: http://wiki.osdev.org/Intel_Ethernet_i217 + +#define REG_CTRL 0x0000 +#define REG_STATUS 0x0008 +#define REG_EEPROM 0x0014 +#define REG_CTRL_EXT 0x0018 +#define REG_IMASK 0x00D0 +#define REG_RCTRL 0x0100 +#define REG_RXDESCLO 0x2800 +#define REG_RXDESCHI 0x2804 +#define REG_RXDESCLEN 0x2808 +#define REG_RXDESCHEAD 0x2810 +#define REG_RXDESCTAIL 0x2818 + +#define REG_TCTRL 0x0400 +#define REG_TXDESCLO 0x3800 +#define REG_TXDESCHI 0x3804 +#define REG_TXDESCLEN 0x3808 +#define REG_TXDESCHEAD 0x3810 +#define REG_TXDESCTAIL 0x3818 + + +#define REG_RDTR 0x2820 // RX Delay Timer Register +#define REG_RXDCTL 0x3828 // RX Descriptor Control +#define REG_RADV 0x282C // RX Int. Absolute Delay Timer +#define REG_RSRPD 0x2C00 // RX Small Packet Detect Interrupt + + + +#define REG_TIPG 0x0410 // Transmit Inter Packet Gap +#define ECTRL_SLU 0x40 //set link up + + +#define RCTL_EN (1 << 1) // Receiver Enable +#define RCTL_SBP (1 << 2) // Store Bad Packets +#define RCTL_UPE (1 << 3) // Unicast Promiscuous Enabled +#define RCTL_MPE (1 << 4) // Multicast Promiscuous Enabled +#define RCTL_LPE (1 << 5) // Long Packet Reception Enable +#define RCTL_LBM_NONE (0 << 6) // No Loopback +#define RCTL_LBM_PHY (3 << 6) // PHY or external SerDesc loopback +#define RTCL_RDMTS_HALF (0 << 8) // Free Buffer Threshold is 1/2 of RDLEN +#define RTCL_RDMTS_QUARTER (1 << 8) // Free Buffer Threshold is 1/4 of RDLEN +#define RTCL_RDMTS_EIGHTH (2 << 8) // Free Buffer Threshold is 1/8 of RDLEN +#define RCTL_MO_36 (0 << 12) // Multicast Offset - bits 47:36 +#define RCTL_MO_35 (1 << 12) // Multicast Offset - bits 46:35 +#define RCTL_MO_34 (2 << 12) // Multicast Offset - bits 45:34 +#define RCTL_MO_32 (3 << 12) // Multicast Offset - bits 43:32 +#define RCTL_BAM (1 << 15) // Broadcast Accept Mode +#define RCTL_VFE (1 << 18) // VLAN Filter Enable +#define RCTL_CFIEN (1 << 19) // Canonical Form Indicator Enable +#define RCTL_CFI (1 << 20) // Canonical Form Indicator Bit Value +#define RCTL_DPF (1 << 22) // Discard Pause Frames +#define RCTL_PMCF (1 << 23) // Pass MAC Control Frames +#define RCTL_SECRC (1 << 26) // Strip Ethernet CRC + +// Buffer Sizes +#define RCTL_BSIZE_256 (3 << 16) +#define RCTL_BSIZE_512 (2 << 16) +#define RCTL_BSIZE_1024 (1 << 16) +#define RCTL_BSIZE_2048 (0 << 16) +#define RCTL_BSIZE_4096 ((3 << 16) | (1 << 25)) +#define RCTL_BSIZE_8192 ((2 << 16) | (1 << 25)) +#define RCTL_BSIZE_16384 ((1 << 16) | (1 << 25)) + + +// Transmit Command + +#define CMD_EOP (1 << 0) // End of Packet +#define CMD_IFCS (1 << 1) // Insert FCS +#define CMD_IC (1 << 2) // Insert Checksum +#define CMD_RS (1 << 3) // Report Status +#define CMD_RPS (1 << 4) // Report Packet Sent +#define CMD_VLE (1 << 6) // VLAN Packet Enable +#define CMD_IDE (1 << 7) // Interrupt Delay Enable + + +// TCTL Register + +#define TCTL_EN (1 << 1) // Transmit Enable +#define TCTL_PSP (1 << 3) // Pad Short Packets +#define TCTL_CT_SHIFT 4 // Collision Threshold +#define TCTL_COLD_SHIFT 12 // Collision Distance +#define TCTL_SWXOFF (1 << 22) // Software XOFF Transmission +#define TCTL_RTLC (1 << 24) // Re-transmit on Late Collision + +#define TSTA_DD (1 << 0) // Descriptor Done +#define TSTA_EC (1 << 1) // Excess Collisions +#define TSTA_LC (1 << 2) // Late Collision +#define LSTA_TU (1 << 3) // Transmit Underrun diff --git a/src/hw/msi.cpp b/src/hw/msi.cpp index e17907eb25..aeae78b87d 100644 --- a/src/hw/msi.cpp +++ b/src/hw/msi.cpp @@ -122,10 +122,10 @@ namespace hw // get number of vectors we can get notifications from this->vector_cnt = (func & MSIX_TBL_SIZE) + 1; - if (vector_cnt > 32) { + if (vector_cnt > 2048) { printf("table addr: %p pba addr: %p vectors: %u\n", (void*) table_addr, (void*) pba_addr, vectors()); - assert(vectors() <= 32 && "Unreasonably many MSI-X vectors"); + assert(vectors() <= 2048 && "Unreasonably many MSI-X vectors"); } // reset all entries diff --git a/src/kernel/cpuid.cpp b/src/kernel/cpuid.cpp index 728fea656a..6de96cfd4c 100644 --- a/src/kernel/cpuid.cpp +++ b/src/kernel/cpuid.cpp @@ -280,18 +280,18 @@ bool CPUID::is_amd_cpu() noexcept { auto result = cpuid(0, 0); return - memcmp(reinterpret_cast(&result.EBX), "htuA", 4) == 0 - && memcmp(reinterpret_cast(&result.EDX), "itne", 4) == 0 - && memcmp(reinterpret_cast(&result.ECX), "DMAc", 4) == 0; + memcmp((char*) &result.EBX, "Auth", 4) == 0 + && memcmp((char*) &result.EDX, "enti", 4) == 0 + && memcmp((char*) &result.ECX, "cAMD", 4) == 0; } bool CPUID::is_intel_cpu() noexcept { auto result = cpuid(0, 0); return - memcmp(reinterpret_cast(&result.EBX), "Genu", 4) == 0 - && memcmp(reinterpret_cast(&result.EDX), "ineI", 4) == 0 - && memcmp(reinterpret_cast(&result.ECX), "ntel", 4) == 0; + memcmp((char*) &result.EBX, "Genu", 4) == 0 + && memcmp((char*) &result.EDX, "ineI", 4) == 0 + && memcmp((char*) &result.ECX, "ntel", 4) == 0; } bool CPUID::has_feature(Feature f) diff --git a/src/kernel/multiboot.cpp b/src/kernel/multiboot.cpp index 76a541c9dd..85e24ca16a 100644 --- a/src/kernel/multiboot.cpp +++ b/src/kernel/multiboot.cpp @@ -114,9 +114,9 @@ void OS::multiboot(uint32_t boot_addr) } if (bootinfo_->flags & MULTIBOOT_INFO_CMDLINE) { - INFO2("* Booted with parameters @ 0x%x: %s", bootinfo_->cmdline, - reinterpret_cast(bootinfo_->cmdline)); - OS::cmdline = reinterpret_cast(bootinfo_->cmdline); + const auto* cmdline = (const char*) (uintptr_t) bootinfo_->cmdline; + INFO2("* Booted with parameters @ %p: %s", cmdline, cmdline); + OS::cmdline = strdup(cmdline); } if (bootinfo_->flags & MULTIBOOT_INFO_MEM_MAP) { diff --git a/src/kernel/os.cpp b/src/kernel/os.cpp index d6ffb38fc2..3b9027e84e 100644 --- a/src/kernel/os.cpp +++ b/src/kernel/os.cpp @@ -48,7 +48,7 @@ bool OS::power_ = true; bool OS::boot_sequence_passed_ = false; bool OS::m_is_live_updated = false; bool OS::m_block_drivers_ready = false; -MHz OS::cpu_mhz_ {-1}; +KHz OS::cpu_khz_ {-1}; uintptr_t OS::liveupdate_loc_ = 0; uintptr_t OS::memory_end_ = 0; uintptr_t OS::heap_max_ = (uintptr_t) -1; diff --git a/src/kernel/rtc.cpp b/src/kernel/rtc.cpp index 91d9718de8..d32764e9fb 100644 --- a/src/kernel/rtc.cpp +++ b/src/kernel/rtc.cpp @@ -5,5 +5,5 @@ RTC::timestamp_t RTC::booted_at; void RTC::init() { // set boot timestamp - booted_at = __arch_time_now(); + booted_at = now(); } diff --git a/src/kernel/scoped_profiler.cpp b/src/kernel/scoped_profiler.cpp index 236251c20e..0dcfebaa0e 100644 --- a/src/kernel/scoped_profiler.cpp +++ b/src/kernel/scoped_profiler.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -33,12 +34,12 @@ void ScopedProfiler::record() // Select which guard to use (this is only done once) if (UNLIKELY(guard == Guard::NOT_SELECTED)) { - if (CPUID::is_intel_cpu() && CPUID::has_feature(CPUID::Feature::SSE2)) + if (CPUID::is_intel_cpu()) { debug2("ScopedProfiler selected guard LFENCE\n"); guard = Guard::LFENCE; } - else if (CPUID::is_amd_cpu() && CPUID::has_feature(CPUID::Feature::SSE2)) + else if (CPUID::is_amd_cpu()) { debug2("ScopedProfiler selected guard MFENCE\n"); guard = Guard::MFENCE; @@ -89,10 +90,16 @@ ScopedProfiler::~ScopedProfiler() : "=A" (tick)); } - auto cycles = tick - tick_start; + uint64_t nanos_start = RTC::nanos_now(); + + static uint64_t base_nanos = 0; + if (base_nanos == 0) base_nanos = nanos_start; + nanos_start -= base_nanos; + + uint64_t cycles = tick - tick_start; auto function_address = __builtin_return_address(0); - // Find an entry that matches this function_address, or an unused entry + // Find an entry that matches this function_address for (auto& entry : entries) { if (entry.function_address == function_address) @@ -100,13 +107,16 @@ ScopedProfiler::~ScopedProfiler() // Update the entry entry.cycles_average = ((entry.cycles_average * entry.num_samples) + cycles) / (entry.num_samples + 1); entry.num_samples += 1; - return; } - else if (entry.function_address == 0) + } + // Find an unused entry + for (auto& entry : entries) + { + if (entry.function_address == 0) { // Use this unused entry - char symbol_buffer[1024]; + char symbol_buffer[4096]; const auto symbols = Elf::safe_resolve_symbol(function_address, symbol_buffer, sizeof(symbol_buffer)); @@ -114,6 +124,7 @@ ScopedProfiler::~ScopedProfiler() entry.function_address = function_address; entry.function_name = symbols.name; entry.cycles_average = cycles; + entry.nanos_start = nanos_start; entry.num_samples = 1; return; } @@ -124,12 +135,12 @@ ScopedProfiler::~ScopedProfiler() printf("[WARNING] There are too many ScopedProfilers in use\n"); } -std::string ScopedProfiler::get_statistics() +std::string ScopedProfiler::get_statistics(bool sorted) { std::ostringstream ss; // Add header - ss << " CPU time (average) | Samples | Function Name \n"; + ss << " First seen | CPU time (avg) | Samples | Function Name \n"; ss << "--------------------------------------------------------------------------------\n"; // Calculate the number of used entries @@ -145,22 +156,29 @@ std::string ScopedProfiler::get_statistics() if (num_entries > 0) { - // Sort on cycles_average (higher value first) - // Make sure to keep unused entries last (only sort used entries) - std::sort(entries.begin(), entries.begin() + num_entries, [](const Entry& a, const Entry& b) + if (sorted) { - return a.cycles_average > b.cycles_average; - }); + // Sort on cycles_average (higher value first) + // Make sure to keep unused entries last (only sort used entries) + std::sort(entries.begin(), entries.begin() + num_entries, [](const Entry& a, const Entry& b) + { + return a.cycles_average > b.cycles_average; + }); + } // Add each entry ss.setf(std::ios_base::fixed); for (auto i = 0u; i < num_entries; i++) { const auto& entry = entries[i]; - double div = OS::cpu_freq().count() * 1000.0; - ss.width(16); - ss << entry.cycles_average / div << " ms | "; + double timst = entry.nanos_start / 1.0e6; + ss.width(10); + ss << timst << " ms | "; + + double micros = entry.cycles_average / OS::cpu_freq().count(); + ss.width(10); + ss << micros / 1000.0 << " ms | "; ss.width(7); ss << entry.num_samples << " | "; diff --git a/src/kernel/syscalls.cpp b/src/kernel/syscalls.cpp index a37e987f99..e5554520ab 100644 --- a/src/kernel/syscalls.cpp +++ b/src/kernel/syscalls.cpp @@ -23,7 +23,6 @@ #include #include #include -#include #include #include @@ -98,12 +97,6 @@ int wait(int*) { return -1; } -int gettimeofday(struct timeval* p, void*) { - p->tv_sec = RTC::now(); - p->tv_usec = 0; - return 0; -} - int kill(pid_t pid, int sig) THROW { SMP::global_lock(); printf("!!! Kill PID: %i, SIG: %i - %s ", pid, sig, strsignal(sig)); @@ -243,15 +236,21 @@ typedef int clockid_t; #define CLOCK_REALTIME 0 #endif #endif -// Basic second-resolution implementation - using CMOS directly for now. + int clock_gettime(clockid_t clk_id, struct timespec* tp) { if (clk_id == CLOCK_REALTIME) { - tp->tv_sec = RTC::now(); - tp->tv_nsec = 0; + *tp = __arch_wall_clock(); return 0; } + printf("hmm clock_gettime called, -1\n"); return -1; } +int gettimeofday(struct timeval* p, void*) { + auto tval = __arch_wall_clock(); + p->tv_sec = tval.tv_sec; + p->tv_usec = tval.tv_nsec / 1000; + return 0; +} extern "C" void _init_syscalls(); void _init_syscalls() diff --git a/src/kernel/timers.cpp b/src/kernel/timers.cpp index e597152c50..653901880d 100644 --- a/src/kernel/timers.cpp +++ b/src/kernel/timers.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -14,9 +15,9 @@ typedef Timers::handler_t handler_t; /// time functions /// -static inline std::chrono::microseconds now() noexcept +static inline auto now() noexcept { - return microseconds(OS::micros_since_boot()); + return nanoseconds(RTC::nanos_now()); } /// internal timer /// diff --git a/src/net/buffer_store.cpp b/src/net/buffer_store.cpp index e0d8564df7..180f5097ed 100644 --- a/src/net/buffer_store.cpp +++ b/src/net/buffer_store.cpp @@ -116,9 +116,9 @@ namespace net { parent = parent->next_; if (!parent->available_.empty()) return parent; } + BSD_BUF(" Allocating %lu new buffers (%lu total)\n", + local_buffers(), total_buffers() + local_buffers()); parent->next_ = new BufferStore(local_buffers(), bufsize()); - BSD_BUF(" Allocating %lu new buffers (%lu total)", - local_buffers(), total_buffers()); return parent->next_; #else return nullptr; diff --git a/src/net/super_stack.cpp b/src/net/super_stack.cpp index fb3e798088..1ea25c41e6 100644 --- a/src/net/super_stack.cpp +++ b/src/net/super_stack.cpp @@ -17,6 +17,7 @@ #include #include +#include namespace net { @@ -52,6 +53,36 @@ Inet& Super_stack::create(hw::Nic& nic, int N, int sub) return *stacks[sub]; } +// Specialization for IP4 +template <> +Inet& Super_stack::create(hw::Nic& nic) +{ + INFO("Network", "Creating stack for %s on %s", + nic.driver_name(), nic.device_name().c_str()); + + auto inet_ = [&nic]()->auto { + switch(nic.proto()) { + case hw::Nic::Proto::ETH: + return std::make_unique(nic); + default: + throw Super_stack_err{"Nic not supported"}; + } + }(); + + // Just take the first free one.. + for(auto& stacks : inet().ip4_stacks_) + { + auto& stack = stacks[0]; + if(stack == nullptr) { + stack = std::move(inet_); + return *stack; + } + } + + // we should never reach this point + throw Super_stack_err{"There wasn't a free slot to create stack on Nic"}; +} + // Specialization for IP4 template <> Inet& Super_stack::get(int N) @@ -89,6 +120,36 @@ Inet& Super_stack::get(int N, int sub) + std::to_string(N) + "," + std::to_string(sub) + "]"}; } +// Specialization for IP4 +template <> +Inet& Super_stack::get(const std::string& mac) +{ + MAC::Addr link_addr{mac.c_str()}; + // Look for the stack with the same NIC + for(auto& stacks : inet().ip4_stacks_) + { + auto& stack = stacks[0]; + if(stack == nullptr) + continue; + if(stack->link_addr() == link_addr) + return *stack; + } + + // If no stack, find the NIC + auto& devs = hw::Devices::devices(); + auto it = devs.begin(); + for(; it != devs.end(); it++) { + if((*it)->mac() == link_addr) + break; + } + // If no NIC, no point looking more + if(it == devs.end()) + throw Stack_not_found{"No NIC found with MAC address " + mac}; + + // If not found, create + return inet().create(*(*it)); +} + Super_stack::Super_stack() { if (hw::Devices::devices().empty()) diff --git a/src/net/tcp/tcp.cpp b/src/net/tcp/tcp.cpp index f6aa660929..0793a67380 100644 --- a/src/net/tcp/tcp.cpp +++ b/src/net/tcp/tcp.cpp @@ -22,7 +22,7 @@ #include #include // checksum #include -#include // micros_since_boot (get_ts_value) +#include // nanos_since_boot (get_ts_value) using namespace std; using namespace net; @@ -365,7 +365,7 @@ seq_t TCP::generate_iss() { uint32_t TCP::get_ts_value() const { - return ((OS::micros_since_boot() >> 10) & 0xffffffff); + return ((OS::nanos_since_boot() / 1000000000ull) & 0xffffffff); } void TCP::drop(const tcp::Packet&) { diff --git a/src/platform/kvm/bsd_pvclock.hpp b/src/platform/kvm/bsd_pvclock.hpp new file mode 100644 index 0000000000..c074eeff58 --- /dev/null +++ b/src/platform/kvm/bsd_pvclock.hpp @@ -0,0 +1,80 @@ +/*- + * Copyright (c) 2009 Adrian Chadd + * Copyright (c) 2012 Spectra Logic Corporation + * Copyright (c) 2014 Bryan Venteicher + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* +* Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, +* yielding a 64-bit result. +*/ +static inline uint64_t +pvclock_scale_delta(uint64_t delta, uint32_t mul_frac, int shift) +{ + uint64_t product; + + if (shift < 0) + delta >>= -shift; + else + delta <<= shift; + +#if defined(__i386__) + { + uint32_t tmp1, tmp2; + + /** + * For i386, the formula looks like: + * + * lower = (mul_frac * (delta & UINT_MAX)) >> 32 + * upper = mul_frac * (delta >> 32) + * product = lower + upper + */ + __asm__ ( + "mul %5 ; " + "mov %4,%%eax ; " + "mov %%edx,%4 ; " + "mul %5 ; " + "xor %5,%5 ; " + "add %4,%%eax ; " + "adc %5,%%edx ; " + : "=A" (product), "=r" (tmp1), "=r" (tmp2) + : "a" ((uint32_t)delta), "1" ((uint32_t)(delta >> 32)), + "2" (mul_frac) ); + } +#elif defined(__amd64__) + { + unsigned long tmp; + + __asm__ ( + "mulq %[mul_frac] ; shrd $32, %[hi], %[lo]" + : [lo]"=a" (product), [hi]"=d" (tmp) + : "0" (delta), [mul_frac]"rm"((uint64_t)mul_frac)); + } +#else +#error "pvclock: unsupported x86 architecture?" +#endif + + return (product); +} diff --git a/src/platform/kvm/kvmclock.cpp b/src/platform/kvm/kvmclock.cpp index e69de29bb2..0744ecfd5e 100644 --- a/src/platform/kvm/kvmclock.cpp +++ b/src/platform/kvm/kvmclock.cpp @@ -0,0 +1,88 @@ +#include "kvmclock.hpp" +#include "../x86_pc/cpu.hpp" +#include +#include +#include +#include + +#define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00 +#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 +using namespace x86; + +struct alignas(4096) pvclock_vcpu_time_info { + uint32_t version; + uint32_t pad0; + uint64_t tsc_timestamp; + uint64_t system_time; + uint32_t tsc_to_system_mul; + signed char tsc_shift; + unsigned char flags; + unsigned char pad[2]; +}__attribute__((packed)); +static SMP_ARRAY vcpu_time; + +struct alignas(4096) pvclock_wall_clock { + uint32_t version; + uint32_t sec; + uint32_t nsec; +}__attribute__((packed)); +static pvclock_wall_clock kvm_wall_clock; + +void KVM_clock::init() +{ + auto wall_addr = (uintptr_t) &kvm_wall_clock; + CPU::write_msr(MSR_KVM_WALL_CLOCK_NEW, wall_addr); + auto vcpu_addr = (uintptr_t) &PER_CPU(vcpu_time); + CPU::write_msr(MSR_KVM_SYSTEM_TIME_NEW, vcpu_addr | 1); +} + +KHz KVM_clock::get_tsc_khz() +{ + uint64_t khz = 1000000ULL << 32; + khz /= PER_CPU(vcpu_time).tsc_to_system_mul; + + if (PER_CPU(vcpu_time).tsc_shift < 0) + khz <<= -PER_CPU(vcpu_time).tsc_shift; + else + khz >>= PER_CPU(vcpu_time).tsc_shift; + return KHz(khz); +} + +#include "bsd_pvclock.hpp" +uint64_t KVM_clock::system_time() +{ + auto& vcpu = PER_CPU(vcpu_time); + uint32_t version = 0; + uint64_t time_ns = 0; + do + { + version = vcpu.version; + asm("mfence" ::: "memory"); + // nanosecond offset based on TSC + uint64_t delta = (__arch_cpu_cycles() - vcpu.tsc_timestamp); + time_ns = pvclock_scale_delta(delta, vcpu.tsc_to_system_mul, vcpu.tsc_shift); + // base system time + time_ns += vcpu.system_time; + asm("mfence" ::: "memory"); + } + while ((vcpu.version & 0x1) || (version != vcpu.version)); + return time_ns; +} + +timespec KVM_clock::wall_clock() +{ + uint32_t version = 0; + timespec tval; + do + { + version = kvm_wall_clock.version; + asm("mfence" ::: "memory"); + tval.tv_sec = kvm_wall_clock.sec; + tval.tv_nsec = kvm_wall_clock.nsec; + asm("mfence" ::: "memory"); + } + while ((kvm_wall_clock.version & 1) + || (kvm_wall_clock.version != version)); + + return tval; +} diff --git a/src/platform/kvm/kvmclock.hpp b/src/platform/kvm/kvmclock.hpp index e69de29bb2..9f758f1ff3 100644 --- a/src/platform/kvm/kvmclock.hpp +++ b/src/platform/kvm/kvmclock.hpp @@ -0,0 +1,12 @@ +#pragma once +#include +#include +#include + +struct KVM_clock +{ + static void init(); + static uint64_t system_time(); + static timespec wall_clock(); + static KHz get_tsc_khz(); +}; diff --git a/src/platform/kvm/pv_eoi.cpp b/src/platform/kvm/pv_eoi.cpp index e69de29bb2..1ac79d4f62 100644 --- a/src/platform/kvm/pv_eoi.cpp +++ b/src/platform/kvm/pv_eoi.cpp @@ -0,0 +1,54 @@ +#include +#include +#include "../x86_pc/cpu.hpp" +#include + +// *** manual *** +// http://choon.net/forum/read.php?21,1123399 +// https://www.kernel.org/doc/Documentation/virtual/kvm/cpuid.txt + +#define KVM_MSR_ENABLED 1 +#define MSR_KVM_PV_EOI_EN 0x4b564d04 +#define KVM_PV_EOI_BIT 0 +#define KVM_PV_EOI_MASK (0x1 << KVM_PV_EOI_BIT) +#define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK + +// current selected EOI method +extern void (*current_eoi_mechanism)(); +extern void (*real_eoi_mechanism)(); +extern void (*current_intr_handler)(); + +__attribute__ ((aligned(4))) +static volatile unsigned long kvm_exitless_eoi = 0; + +extern "C" +void kvm_pv_eoi() +{ + if (kvm_exitless_eoi) { + kprintf("avoidable eoi\n"); + } + uint8_t reg; + asm("btr %2, %0; setc %1" : "+m"(kvm_exitless_eoi), "=rm"(reg) : "r"(0)); + if (reg) { + kprintf("avoidable eoi\n"); + return; + } + else { + //kprintf("unavoidable eoi\n"); + } + // fallback to normal x2APIC EOI + real_eoi_mechanism(); +} +void kvm_pv_eoi_init() +{ + uint64_t addr = (uint64_t) &kvm_exitless_eoi; + addr |= KVM_MSR_ENABLED; + x86::CPU::write_msr(MSR_KVM_PV_EOI_EN, addr); + // verify that the feature was enabled + uint64_t res = x86::CPU::read_msr(MSR_KVM_PV_EOI_EN); + if (res & 1) { + INFO("KVM", "Paravirtual EOI enabled"); + // set new EOI handler + current_eoi_mechanism = kvm_pv_eoi; + } +} diff --git a/src/platform/x86_linux/CMakeLists.txt b/src/platform/x86_linux/CMakeLists.txt deleted file mode 100644 index 515cc6c95d..0000000000 --- a/src/platform/x86_linux/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -# -# x86 userspace Linux platform -# -set(PLATFORM_OBJECTS - os.cpp - serial1.cpp - platform.cpp - kernel_start.cpp - sanity_checks.cpp - ) - -add_library(x86_linux STATIC ${PLATFORM_OBJECTS}) -add_dependencies(x86_linux PrecompiledLibraries) -set_target_properties(x86_linux PROPERTIES LINKER_LANGUAGE CXX) -install(TARGETS x86_linux DESTINATION includeos/${ARCH}/platform) diff --git a/src/platform/x86_linux/kernel_start.cpp b/src/platform/x86_linux/kernel_start.cpp deleted file mode 100644 index 298ee83233..0000000000 --- a/src/platform/x86_linux/kernel_start.cpp +++ /dev/null @@ -1,67 +0,0 @@ -// This file is a part of the IncludeOS unikernel - www.includeos.org -// -// Copyright 2015-2016 Oslo and Akershus University College of Applied Sciences -// and Alfred Bratterud -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -extern "C" { - void __init_serial1(); - void __init_sanity_checks(); - void kernel_sanity_checks(); - uintptr_t _multiboot_free_begin(uintptr_t boot_addr); - uintptr_t _move_symbols(uintptr_t loc); - void _init_bss(); - void _init_heap(uintptr_t); - void _init_c_runtime(); - void _init_syscalls(); -} - -extern "C" -void kernel_start(uintptr_t magic, uintptr_t addr) -{ - // Determine where free memory starts - extern char _end; - uintptr_t free_mem_begin = reinterpret_cast(&_end); - - if (magic == MULTIBOOT_BOOTLOADER_MAGIC) { - free_mem_begin = _multiboot_free_begin(addr); - } - - // Preserve symbols from the ELF binary - free_mem_begin += _move_symbols(free_mem_begin); - - // Initialize zero-initialized vars - _init_bss(); - - // Initialize heap - _init_heap(free_mem_begin); - - // Initialize stack-unwinder, call global constructors etc. - _init_c_runtime(); - - // Initialize system calls - _init_syscalls(); - - // Initialize early OS, platform and devices - OS::start(magic, addr); - - // Initialize common subsystems and call Service::start - OS::post_start(); - - // Starting event loop from here allows us to profile OS::start - OS::event_loop(); -} diff --git a/src/platform/x86_linux/os.cpp b/src/platform/x86_linux/os.cpp deleted file mode 100644 index 7d844ed489..0000000000 --- a/src/platform/x86_linux/os.cpp +++ /dev/null @@ -1,190 +0,0 @@ -// This file is a part of the IncludeOS unikernel - www.includeos.org -// -// Copyright 2015-2017 Oslo and Akershus University College of Applied Sciences -// and Alfred Bratterud -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//#define DEBUG -#define MYINFO(X,...) INFO("Kernel", X, ##__VA_ARGS__) - -#include -#include -#include -#include -#include -#include -#include -#include - -//#define ENABLE_PROFILERS -#ifdef ENABLE_PROFILERS -#include -#define PROFILE(name) ScopedProfiler __CONCAT(sp, __COUNTER__){name}; -#else -#define PROFILE(name) /* name */ -#endif - -extern "C" void* get_cpu_esp(); -extern "C" void __libc_init_array(); -extern uintptr_t heap_begin; -extern uintptr_t heap_end; -extern uintptr_t _start; -extern uintptr_t _end; -extern uintptr_t _ELF_START_; -extern uintptr_t _TEXT_START_; -extern uintptr_t _LOAD_START_; -extern uintptr_t _ELF_END_; - -struct alignas(SMP_ALIGN) OS_CPU { - uint64_t cycles_hlt = 0; -}; -static SMP_ARRAY os_per_cpu; - -int64_t OS::micros_since_boot() noexcept { - return cycles_since_boot() / cpu_freq().count(); -} - -RTC::timestamp_t OS::boot_timestamp() -{ - return RTC::boot_timestamp(); -} - -RTC::timestamp_t OS::uptime() -{ - return RTC::time_since_boot(); -} - -uint64_t OS::cycles_asleep() noexcept { - return PER_CPU(os_per_cpu).cycles_hlt; -} -uint64_t OS::micros_asleep() noexcept { - return PER_CPU(os_per_cpu).cycles_hlt / cpu_freq().count(); -} - -__attribute__((noinline)) -void OS::halt() -{ - uint64_t cycles_before = __arch_cpu_cycles(); - asm volatile("hlt"); - - // add a global symbol here so we can quickly discard - // event loop from stack sampling - asm volatile( - ".global _irq_cb_return_location;\n" - "_irq_cb_return_location:" ); - - // Count sleep cycles - PER_CPU(os_per_cpu).cycles_hlt += __arch_cpu_cycles() - cycles_before; -} - -void OS::default_stdout(const char* str, const size_t len) -{ - __serial_print(str, len); -} - -void OS::start(uint32_t boot_magic, uint32_t boot_addr) -{ - OS::cmdline = Service::binary_name(); - // Initialize stdout handlers - OS::add_stdout(&OS::default_stdout); - - PROFILE("OS::start"); - // Print a fancy header - CAPTION("#include // Literally"); - - MYINFO("Stack: %p", get_cpu_esp()); - MYINFO("Boot magic: 0x%x, addr: 0x%x", boot_magic, boot_addr); - - /// STATMAN /// - PROFILE("Statman"); - /// initialize on page 7, 3 pages in size - Statman::get().init(0x6000, 0x3000); - - // Call global ctors - PROFILE("Global constructors"); - __libc_init_array(); - - // BOOT METHOD // - PROFILE("Multiboot / legacy"); - OS::memory_end_ = 0; - // Detect memory limits etc. depending on boot type - if (boot_magic == MULTIBOOT_BOOTLOADER_MAGIC) { - OS::multiboot(boot_addr); - } else { - - if (is_softreset_magic(boot_magic) && boot_addr != 0) - OS::resume_softreset(boot_addr); - - OS::legacy_boot(); - } - assert(OS::memory_end_ != 0); - // Give the rest of physical memory to heap - OS::heap_max_ = OS::memory_end_; - - PROFILE("Memory map"); - // Assign memory ranges used by the kernel - auto& memmap = memory_map(); - MYINFO("Assigning fixed memory ranges (Memory map)"); - - memmap.assign_range({0x6000, 0x8fff, "Statman", "Statistics"}); -#if defined(ARCH_x86_64) - memmap.assign_range({0x100000, 0x8fffff, "Pagetables", "System page tables"}); - memmap.assign_range({0x900000, 0x9fffff, "Stack", "System main stack"}); -#elif defined(ARCH_i686) - memmap.assign_range({0xA000, 0x9fbff, "Stack", "System main stack"}); -#endif - memmap.assign_range({(uintptr_t)&_LOAD_START_, (uintptr_t)&_end - 1, - "ELF", "Your service binary including OS"}); - - assert(::heap_begin != 0x0 and OS::heap_max_ != 0x0); - // @note for security we don't want to expose this - memmap.assign_range({(uintptr_t)&_end, ::heap_begin - 1, - "Pre-heap", "Heap randomization area"}); - - uintptr_t span_max = std::numeric_limits::max(); - uintptr_t heap_range_max_ = std::min(span_max, OS::heap_max_); - - MYINFO("Assigning heap"); - memmap.assign_range({::heap_begin, heap_range_max_, - "Heap", "Dynamic memory", heap_usage }); - - MYINFO("Printing memory map"); - for (const auto &i : memmap) - INFO2("* %s",i.second.to_string().c_str()); - - - PROFILE("Platform init"); - extern void __platform_init(); - __platform_init(); - - PROFILE("RTC init"); - // Realtime/monotonic clock - RTC::init(); -} - -void OS::event_loop() -{ - Events::get(0).process_events(); - do { - OS::halt(); - Events::get(0).process_events(); - } while (power_); - - MYINFO("Stopping service"); - Service::stop(); - - MYINFO("Powering off"); - extern void __arch_poweroff(); - __arch_poweroff(); -} diff --git a/src/platform/x86_linux/platform.cpp b/src/platform/x86_linux/platform.cpp deleted file mode 100644 index 5d77084700..0000000000 --- a/src/platform/x86_linux/platform.cpp +++ /dev/null @@ -1,72 +0,0 @@ -// This file is a part of the IncludeOS unikernel - www.includeos.org -// -// Copyright 2015 Oslo and Akershus University College of Applied Sciences -// and Alfred Bratterud -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#define MYINFO(X,...) INFO("x86", X, ##__VA_ARGS__) - -extern "C" char* get_cpu_esp(); -extern "C" void* get_cpu_ebp(); -#define _SENTINEL_VALUE_ 0x123456789ABCDEF - -namespace tls { - extern size_t get_tls_size(); - extern void fill_tls_data(char*); -} -struct alignas(64) smp_table -{ - // thread self-pointer - void* tls_data; // 0x0 - // per-cpu cpuid (and more) - int cpuid; - int reserved; - -#ifdef ARCH_x86_64 - uintptr_t pad[3]; // 64-bit padding - uintptr_t guard; // _SENTINEL_VALUE_ -#else - uint32_t pad[2]; - uintptr_t guard; // _SENTINEL_VALUE_ -#endif - /** put more here **/ -}; -#ifdef ARCH_x86_64 -// FS:0x28 on Linux is storing a special sentinel stack-guard value -static_assert(offsetof(smp_table, guard) == 0x28, "Linux stack sentinel"); -#endif - -void __platform_init() -{ - INFO("Linux", "Initialize event manager"); - Events::get(0).init_local(); - -} - -void __arch_enable_legacy_irq(uint8_t) {} -void __arch_disable_legacy_irq(uint8_t) {} - -void __arch_poweroff() -{ - // exit(0) syscall - __builtin_unreachable(); -} -void __arch_reboot() -{ - // exit(0) syscall - __builtin_unreachable(); -} diff --git a/src/platform/x86_linux/sanity_checks.cpp b/src/platform/x86_linux/sanity_checks.cpp deleted file mode 100644 index 959471f473..0000000000 --- a/src/platform/x86_linux/sanity_checks.cpp +++ /dev/null @@ -1,83 +0,0 @@ -// This file is a part of the IncludeOS unikernel - www.includeos.org -// -// Copyright 2015 Oslo and Akershus University College of Applied Sciences -// and Alfred Bratterud -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include -#include -#include - -// NOTE: crc_to MUST NOT be initialized to zero -static uint32_t crc_ro = CRC32_BEGIN(); -const auto* LOW_CHECK_SIZE = (volatile int*) 0x200; - -// Global constructors -static int gconstr_value = 0; -__attribute__((constructor)) -static void self_test_gconstr() { - gconstr_value = 1; -} - -static uint32_t generate_ro_crc() noexcept -{ - extern char _TEXT_START_; - extern char _TEXT_END_; - extern char _RODATA_START_; - extern char _RODATA_END_; - return crc32_fast(&_TEXT_START_, &_RODATA_END_ - &_TEXT_START_); -} - -extern "C" -void __init_sanity_checks() noexcept -{ - // zero low memory - for (volatile int* lowmem = NULL; lowmem < LOW_CHECK_SIZE; lowmem++) - *lowmem = 0; - // generate checksum for read-only portions of kernel - crc_ro = generate_ro_crc(); -} - -extern "C" -void kernel_sanity_checks() -{ - // verify checksum of read-only portions of kernel - uint32_t new_ro = generate_ro_crc(); - if (crc_ro != new_ro) { - kprintf("CRC mismatch %#x vs %#x\n", crc_ro, new_ro); - panic("Sanity checks: CRC of kernel read-only area failed"); - } - // verify that first page is zeroes only - for (volatile int* lowmem = NULL; lowmem < LOW_CHECK_SIZE; lowmem++) - if (UNLIKELY(*lowmem != 0)) { - kprintf("Memory at %p was not zeroed: %#x\n", lowmem, *lowmem); - panic("Sanity checks: Low-memory zero test"); - } - - // verify that Elf symbols were not overwritten - bool symbols_verified = Elf::verify_symbols(); - if (!symbols_verified) - panic("Sanity checks: Consistency of Elf symbols and string areas"); - - // global constructor self-test - if (gconstr_value != 1) { - kprintf("Sanity checks: Global constructors not working (or modified during run-time)!\n"); - panic("Sanity checks: Global constructors verification failed"); - } - -} diff --git a/src/platform/x86_linux/serial1.cpp b/src/platform/x86_linux/serial1.cpp deleted file mode 100644 index ef25e33b1f..0000000000 --- a/src/platform/x86_linux/serial1.cpp +++ /dev/null @@ -1,14 +0,0 @@ -#include -#include - -extern "C" -void __serial_print1(const char* cstr) -{ - size_t len = strlen(cstr); - write(0, cstr, len); -} -extern "C" -void __serial_print(const char* str, size_t len) -{ - write(0, str, len); -} diff --git a/src/platform/x86_linux/syscall.hpp b/src/platform/x86_linux/syscall.hpp deleted file mode 100644 index fb26bf9bf8..0000000000 --- a/src/platform/x86_linux/syscall.hpp +++ /dev/null @@ -1,48 +0,0 @@ -// This file is a part of the IncludeOS unikernel - www.includeos.org -// -// Copyright 2015-2016 Oslo and Akershus University College of Applied Sciences -// and Alfred Bratterud -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include -#include - -#define SYS_WRITE 1 - - -#define SYSCALL_1(num, arg1) \ - asm ("movq %0, %%rdi" : : "r"(arg1)); \ - asm ("movq %0, %%rax" : : "i"(num)); \ - asm ("syscall; movl %0, %%eax" : "=r"(result)); - -#define SYSCALL_2(num, arg1, arg2) \ - asm ("movq %0, %%rdi" : : "r"(arg1)); \ - asm ("movq %0, %%rsi" : : "r"(arg2)); \ - asm ("movq %0, %%rax" : : "i"(num)); \ - asm ("syscall; movl %0, %%eax" : "=r"(result)); - -#define SYSCALL_3(num, arg1, arg2, arg3) \ - asm ("mov %0, %%rdi" : : "r"(arg1)); \ - asm ("mov %0, %%rsi" : : "r"(arg2)); \ - asm ("mov %0, %%rdx" : : "r"(arg3)); \ - asm ("mov %0, %%rax" : : "i"(num)); \ - asm ("syscall; movl %0, %%eax" : "=r"(result)); - -int syscall_write(unsigned int fd, const char* buf, size_t count) -{ - int result; - SYSCALL_3(SYS_WRITE, fd, buf, count); - return result; -} diff --git a/src/platform/x86_nano/platform.cpp b/src/platform/x86_nano/platform.cpp index 9ddc427ca0..3faa090a69 100644 --- a/src/platform/x86_nano/platform.cpp +++ b/src/platform/x86_nano/platform.cpp @@ -20,9 +20,12 @@ void __platform_init() } // not supported! -int64_t __arch_time_now() noexcept { +uint64_t __arch_system_time() noexcept { return 0; } +timespec __arch_wall_clock() noexcept { + return {0, 0}; +} // not supported! void OS::block() {} // default to serial diff --git a/src/platform/x86_pc/CMakeLists.txt b/src/platform/x86_pc/CMakeLists.txt index c59ba83ed4..21193e5231 100644 --- a/src/platform/x86_pc/CMakeLists.txt +++ b/src/platform/x86_pc/CMakeLists.txt @@ -21,6 +21,7 @@ set(X86_PC_OBJECTS pic.cpp softreset.cpp sanity_checks.cpp + smbios.cpp idt.cpp ### KVM features ### ../kvm/kvmclock.cpp diff --git a/src/platform/x86_pc/apic.cpp b/src/platform/x86_pc/apic.cpp index f0d4352f67..5e3b44f05d 100644 --- a/src/platform/x86_pc/apic.cpp +++ b/src/platform/x86_pc/apic.cpp @@ -23,10 +23,10 @@ #include "smp.hpp" #include #include -#include -#include #include #include +//#define ENABLE_KVM_PV_EOI +//#define ENABLE_DYNAMIC_EOI namespace x86 { @@ -38,10 +38,9 @@ namespace x86 extern "C" { // current selected EOI method - void (*current_eoi_mechanism)(); - void (*current_intr_handler)(); - // KVM para PV-EOI feature - void kvm_pv_eoi(); + void (*current_eoi_mechanism)() = nullptr; + void (*real_eoi_mechanism)() = nullptr; + void (*current_intr_handler)() = nullptr; // shortcut that avoids virtual call void x2apic_send_eoi() { x86::CPU::write_msr(x86::x2apic::BASE_MSR + x2APIC_EOI, 0); @@ -52,18 +51,28 @@ extern "C" { uint8_t vector = x86::APIC::get_isr(); //assert(vector >= IRQ_BASE && vector < 160); Events::get().trigger_event(vector - IRQ_BASE); +#ifdef ENABLE_DYNAMIC_EOI + assert(current_eoi_mechanism != nullptr); + current_eoi_mechanism(); +#else lapic_send_eoi(); +#endif } void x2apic_intr_handler() { uint8_t vector = x86::x2apic::static_get_isr(); //assert(vector >= IRQ_BASE && vector < 160); Events::get().trigger_event(vector - IRQ_BASE); +#ifdef ENABLE_DYNAMIC_EOI + assert(current_eoi_mechanism != nullptr); + current_eoi_mechanism(); +#else x2apic_send_eoi(); +#endif } } -void kvm_pv_eoi_init(); +extern void kvm_pv_eoi_init(); namespace x86 { @@ -75,26 +84,31 @@ namespace x86 if (CPUID::has_feature(CPUID::Feature::X2APIC)) { current_apic = &x2apic::get(); - current_eoi_mechanism = x2apic_send_eoi; + real_eoi_mechanism = x2apic_send_eoi; current_intr_handler = x2apic_intr_handler; } else { // an x86 PC without APIC is insane assert(CPUID::has_feature(CPUID::Feature::APIC) && "If this fails, the machine is insane"); current_apic = &xapic::get(); - current_eoi_mechanism = lapic_send_eoi; + real_eoi_mechanism = lapic_send_eoi; current_intr_handler = xapic_intr_handler; } + if (current_eoi_mechanism == nullptr) + current_eoi_mechanism = real_eoi_mechanism; + // enable xAPIC/x2APIC on this cpu current_apic->enable(); // initialize I/O APICs IOAPIC::init(ACPI::get_ioapics()); +#ifdef ENABLE_KVM_PV_EOI // use KVMs paravirt EOI if supported - //if (CPUID::kvm_feature(KVM_FEATURE_PV_EOI)) - // kvm_pv_eoi_init(); + if (CPUID::kvm_feature(KVM_FEATURE_PV_EOI)) + kvm_pv_eoi_init(); +#endif } void APIC::enable_irq(uint8_t irq) @@ -129,46 +143,3 @@ namespace x86 IOAPIC::disable(irq); } } - -// *** manual *** -// http://choon.net/forum/read.php?21,1123399 -// https://www.kernel.org/doc/Documentation/virtual/kvm/cpuid.txt - -#define KVM_MSR_ENABLED 1 -#define MSR_KVM_PV_EOI_EN 0x4b564d04 -#define KVM_PV_EOI_BIT 0 -#define KVM_PV_EOI_MASK (0x1 << KVM_PV_EOI_BIT) -#define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK -#define KVM_PV_EOI_DISABLED 0x0 - -__attribute__ ((aligned(4))) -static volatile unsigned long kvm_exitless_eoi = KVM_PV_EOI_DISABLED; - -void kvm_pv_eoi() -{ - uint8_t reg; - asm("btr %2, %0; setc %1" : "+m"(kvm_exitless_eoi), "=rm"(reg) : "r"(0)); - if (reg) { - kprintf("avoided\n"); - return; - } - // fallback to normal x2APIC EOI - x2apic_send_eoi(); -} -void kvm_pv_eoi_init() -{ - union { - uint32_t msr[2]; - uint64_t whole; - } guest; - guest.whole = (uint64_t) &kvm_exitless_eoi; - guest.whole |= KVM_MSR_ENABLED; - x86::CPU::write_msr(MSR_KVM_PV_EOI_EN, guest.msr[0], guest.msr[1]); - // verify that the feature was enabled - uint64_t res = x86::CPU::read_msr(MSR_KVM_PV_EOI_EN); - if (res & 1) { - kprintf("* KVM paravirtual EOI enabled\n"); - // set new EOI handler - current_eoi_mechanism = kvm_pv_eoi; - } -} diff --git a/src/platform/x86_pc/apic_timer.cpp b/src/platform/x86_pc/apic_timer.cpp index 509002722f..6ef6a31458 100644 --- a/src/platform/x86_pc/apic_timer.cpp +++ b/src/platform/x86_pc/apic_timer.cpp @@ -120,10 +120,10 @@ namespace x86 return ticks_per_micro != 0; } - void APIC_Timer::oneshot(std::chrono::microseconds micros) noexcept + void APIC_Timer::oneshot(std::chrono::nanoseconds nanos) noexcept { // prevent overflow - uint64_t ticks = micros.count() * ticks_per_micro; + uint64_t ticks = nanos.count() / 1000 * ticks_per_micro; if (ticks > 0xFFFFFFFF) ticks = 0xFFFFFFFF; // set initial counter diff --git a/src/platform/x86_pc/apic_timer.hpp b/src/platform/x86_pc/apic_timer.hpp index 1c37ef7436..d0c2d6d572 100644 --- a/src/platform/x86_pc/apic_timer.hpp +++ b/src/platform/x86_pc/apic_timer.hpp @@ -32,7 +32,7 @@ struct APIC_Timer static bool ready() noexcept; - static void oneshot(std::chrono::microseconds) noexcept; + static void oneshot(std::chrono::nanoseconds) noexcept; static void stop() noexcept; }; diff --git a/src/platform/x86_pc/clocks.cpp b/src/platform/x86_pc/clocks.cpp index a6cd211732..a8442ec7a4 100644 --- a/src/platform/x86_pc/clocks.cpp +++ b/src/platform/x86_pc/clocks.cpp @@ -24,27 +24,53 @@ #include #include -typedef delegate system_time_t; -static SMP_ARRAY vcpu_clock; +struct sysclock_t +{ + typedef delegate system_time_t; + typedef delegate wall_time_t; + typedef delegate tsc_khz_t; + system_time_t system_time = nullptr; + wall_time_t wall_time = nullptr; + tsc_khz_t tsc_khz = nullptr; +}; +static SMP_ARRAY vcpu_clock; namespace x86 { void Clocks::init() { - if (false) //CPUID::kvm_feature(KVM_FEATURE_CLOCKSOURCE2)) + if (CPUID::kvm_feature(KVM_FEATURE_CLOCKSOURCE2)) { - printf("--> KVM clock\n"); + KVM_clock::init(); + PER_CPU(vcpu_clock).system_time = {&KVM_clock::system_time}; + PER_CPU(vcpu_clock).wall_time = {&KVM_clock::wall_clock}; + PER_CPU(vcpu_clock).tsc_khz = {&KVM_clock::get_tsc_khz}; + if (SMP::cpu_id() == 0) INFO("x86", "KVM PV clocks initialized"); } else { // fallback with CMOS - if (SMP::cpu_id() == 0) CMOS_clock::init(); - PER_CPU(vcpu_clock) = {&CMOS_clock::system_time}; + PER_CPU(vcpu_clock).system_time = {&CMOS_clock::system_time}; + PER_CPU(vcpu_clock).wall_time = {&CMOS_clock::wall_clock}; + PER_CPU(vcpu_clock).tsc_khz = {&CMOS_clock::get_tsc_khz}; + if (SMP::cpu_id() == 0) { + CMOS_clock::init(); + INFO("x86", "CMOS clock initialized"); + } } } + + KHz Clocks::get_khz() + { + return PER_CPU(vcpu_clock).tsc_khz(); + } } -int64_t __arch_time_now() noexcept +uint64_t __arch_system_time() noexcept +{ + return PER_CPU(vcpu_clock).system_time(); +} +timespec __arch_wall_clock() noexcept { - return PER_CPU(vcpu_clock)(); + return PER_CPU(vcpu_clock).wall_time(); } diff --git a/src/platform/x86_pc/clocks.hpp b/src/platform/x86_pc/clocks.hpp index 7bb4a38740..5c324935cc 100644 --- a/src/platform/x86_pc/clocks.hpp +++ b/src/platform/x86_pc/clocks.hpp @@ -16,10 +16,13 @@ // limitations under the License. #pragma once +#include +#include namespace x86 { struct Clocks { static void init(); + static KHz get_khz(); }; } diff --git a/src/platform/x86_pc/cmos_clock.cpp b/src/platform/x86_pc/cmos_clock.cpp index ece48ff4d1..448a0a496f 100644 --- a/src/platform/x86_pc/cmos_clock.cpp +++ b/src/platform/x86_pc/cmos_clock.cpp @@ -20,10 +20,14 @@ #include #include #include +#include +#include "pit.hpp" + +extern "C" uint16_t _cpu_sampling_freq_divider_; namespace x86 { - static int64_t current_time; + static uint64_t current_time; static uint64_t current_ticks; void CMOS_clock::init() @@ -41,11 +45,33 @@ namespace x86 }); } - int64_t CMOS_clock::system_time() + uint64_t CMOS_clock::system_time() + { + auto ticks = OS::cycles_since_boot() - current_ticks; + auto diff = (double) ticks / Hz(OS::cpu_freq()).count(); + + return (current_time + diff) * 1000000000ull; + } + timespec CMOS_clock::wall_clock() { auto ticks = OS::cycles_since_boot() - current_ticks; - auto diff = ticks / Hz(MHz(OS::cpu_freq())).count(); + auto diff = (double) ticks / Hz(OS::cpu_freq()).count(); + + timespec tval; + tval.tv_sec = current_time + time_t(diff); + tval.tv_nsec = diff * 1000000000ull; + return tval; + } + + KHz CMOS_clock::get_tsc_khz() + { + // Estimate CPU frequency + INFO("CMOS", "Estimating CPU-frequency"); + INFO2("|"); + INFO2("+--(%d samples, %f sec. interval)", 18, + (x86::PIT::FREQUENCY / _cpu_sampling_freq_divider_).count()); + INFO2("|"); - return current_time + diff; + return KHz(MHz(PIT::get().estimate_CPU_frequency())); } } diff --git a/src/platform/x86_pc/cmos_clock.hpp b/src/platform/x86_pc/cmos_clock.hpp index db0a3870a2..574e866543 100644 --- a/src/platform/x86_pc/cmos_clock.hpp +++ b/src/platform/x86_pc/cmos_clock.hpp @@ -24,6 +24,8 @@ namespace x86 struct CMOS_clock { static void init(); - static int64_t system_time(); + static uint64_t system_time(); + static timespec wall_clock(); + static KHz get_tsc_khz(); }; } diff --git a/src/platform/x86_pc/os.cpp b/src/platform/x86_pc/os.cpp index 74bdcc1dce..c9494ec24d 100644 --- a/src/platform/x86_pc/os.cpp +++ b/src/platform/x86_pc/os.cpp @@ -52,25 +52,11 @@ struct alignas(SMP_ALIGN) OS_CPU { }; static SMP_ARRAY os_per_cpu; -int64_t OS::micros_since_boot() noexcept { - return cycles_since_boot() / cpu_freq().count(); -} - -RTC::timestamp_t OS::boot_timestamp() -{ - return RTC::boot_timestamp(); -} - -RTC::timestamp_t OS::uptime() -{ - return RTC::time_since_boot(); -} - uint64_t OS::cycles_asleep() noexcept { return PER_CPU(os_per_cpu).cycles_hlt; } -uint64_t OS::micros_asleep() noexcept { - return PER_CPU(os_per_cpu).cycles_hlt / cpu_freq().count(); +uint64_t OS::nanos_asleep() noexcept { + return (PER_CPU(os_per_cpu).cycles_hlt * 1e6) / cpu_freq().count(); } __attribute__((noinline)) @@ -107,11 +93,6 @@ void OS::start(uint32_t boot_magic, uint32_t boot_addr) MYINFO("Stack: %p", get_cpu_esp()); MYINFO("Boot magic: 0x%x, addr: 0x%x", boot_magic, boot_addr); - /// STATMAN /// - PROFILE("Statman"); - /// initialize on page 7, 3 pages in size - Statman::get().init(0x6000, 0x3000); - // Call global ctors PROFILE("Global constructors"); __libc_init_array(); @@ -133,18 +114,24 @@ void OS::start(uint32_t boot_magic, uint32_t boot_addr) // Give the rest of physical memory to heap OS::heap_max_ = OS::memory_end_; + /// STATMAN /// + PROFILE("Statman"); + /// initialize on page 9, 8 pages in size + Statman::get().init(0x8000, 0x8000); + PROFILE("Memory map"); // Assign memory ranges used by the kernel auto& memmap = memory_map(); MYINFO("Assigning fixed memory ranges (Memory map)"); - memmap.assign_range({0x6000, 0x8fff, "Statman", "Statistics"}); + memmap.assign_range({0x8000, 0xffff, "Statman", "Statistics"}); #if defined(ARCH_x86_64) - memmap.assign_range({0x100000, 0x8fffff, "Pagetables", "System page tables"}); - memmap.assign_range({0x900000, 0x9fffff, "Stack", "System main stack"}); + memmap.assign_range({0x1000, 0x6fff, "Pagetables", "System page tables"}); + memmap.assign_range({0x10000, 0x9d3ff, "Stack", "System main stack"}); #elif defined(ARCH_i686) - memmap.assign_range({0xA000, 0x9fbff, "Stack", "System main stack"}); + memmap.assign_range({0x10000, 0x9d3ff, "Stack", "System main stack"}); #endif + //memmap.assign_range({0x9d400, 0x9ffff, "Multiboot", "Multiboot reserved area"}); memmap.assign_range({(uintptr_t)&_LOAD_START_, (uintptr_t)&_end - 1, "ELF", "Your service binary including OS"}); @@ -164,7 +151,6 @@ void OS::start(uint32_t boot_magic, uint32_t boot_addr) for (const auto &i : memmap) INFO2("* %s",i.second.to_string().c_str()); - PROFILE("Platform init"); extern void __platform_init(); __platform_init(); diff --git a/src/platform/x86_pc/pit.cpp b/src/platform/x86_pc/pit.cpp index d1689236cd..6e908c1da8 100644 --- a/src/platform/x86_pc/pit.cpp +++ b/src/platform/x86_pc/pit.cpp @@ -20,6 +20,7 @@ #include #include #include +#include //#undef NO_DEBUG #define DEBUG #define DEBUG2 @@ -71,9 +72,9 @@ namespace x86 return freq; } - static inline milliseconds now() noexcept + static inline auto now() noexcept { - return duration_cast (microseconds(OS::micros_since_boot())); + return duration_cast (nanoseconds(RTC::nanos_now())); } void PIT::oneshot(milliseconds timeval, timeout_handler handler) @@ -87,7 +88,7 @@ namespace x86 if (forever) { get().forev_handler = handler; } else { - get().expiration = now() + timeval; + get().expiration = now() + duration_cast(timeval); get().handler = handler; } } diff --git a/src/platform/x86_pc/pit.hpp b/src/platform/x86_pc/pit.hpp index b30a622973..d3c6de9aaf 100644 --- a/src/platform/x86_pc/pit.hpp +++ b/src/platform/x86_pc/pit.hpp @@ -85,9 +85,9 @@ namespace x86 Mode current_mode_ = NONE; // Timer handler & expiration timestamp - timeout_handler handler = nullptr; - timeout_handler forev_handler = nullptr; - std::chrono::milliseconds expiration; + timeout_handler handler = nullptr; + timeout_handler forev_handler = nullptr; + std::chrono::nanoseconds expiration; // Access mode bits are bits 4- and 5 in the Mode register enum AccessMode { LATCH_COUNT = 0x0, LO_ONLY=0x10, HI_ONLY=0x20, LO_HI=0x30 }; diff --git a/src/platform/x86_pc/platform.cpp b/src/platform/x86_pc/platform.cpp index 65a90b947d..dd926c0099 100644 --- a/src/platform/x86_pc/platform.cpp +++ b/src/platform/x86_pc/platform.cpp @@ -21,7 +21,7 @@ #include "clocks.hpp" #include "gdt.hpp" #include "idt.hpp" -#include "pit.hpp" +#include "smbios.hpp" #include "smp.hpp" #include #include @@ -31,7 +31,6 @@ #include #define MYINFO(X,...) INFO("x86", X, ##__VA_ARGS__) -extern "C" uint16_t _cpu_sampling_freq_divider_; extern "C" char* get_cpu_esp(); extern "C" void* get_cpu_ebp(); #define _SENTINEL_VALUE_ 0x123456789ABCDEF @@ -73,6 +72,9 @@ void __platform_init() // read ACPI tables ACPI::init(); + // read SMBIOS tables + SMBIOS::init(); + // setup APIC, APIC timer, SMP etc. APIC::init(); @@ -84,7 +86,7 @@ void __platform_init() initialize_gdt_for_cpu(APIC::get().get_id()); #ifdef ARCH_x86_64 // setup Interrupt Stack Table - x86::ist_initialize_for_cpu(0, 0xA00000); + x86::ist_initialize_for_cpu(0, 0x9D3F0); #endif // IDT manager: Interrupt and exception handlers @@ -101,27 +103,20 @@ void __platform_init() MYINFO("Enabling interrupts"); asm volatile("sti"); - // Estimate CPU frequency - MYINFO("Estimating CPU-frequency"); - INFO2("|"); - INFO2("+--(%d samples, %f sec. interval)", 18, - (x86::PIT::FREQUENCY / _cpu_sampling_freq_divider_).count()); - INFO2("|"); + // Setup kernel clocks + MYINFO("Setting up kernel clock sources"); + Clocks::init(); if (OS::cpu_freq().count() <= 0.0) { - OS::cpu_mhz_ = MHz(PIT::get().estimate_CPU_frequency()); + OS::cpu_khz_ = Clocks::get_khz(); } - INFO2("+--> %f MHz", OS::cpu_freq().count()); + INFO2("+--> %f MHz", OS::cpu_freq().count() / 1000.0); // Note: CPU freq must be known before we can start timer system // Initialize APIC timers and timer systems // Deferred call to Service::ready() when calibration is complete APIC_Timer::calibrate(); - // Setup kernel clocks - MYINFO("Setting up kernel clock sources"); - Clocks::init(); - // Initialize storage devices PCI_manager::init(PCI::STORAGE); OS::m_block_drivers_ready = true; diff --git a/src/platform/x86_pc/smbios.cpp b/src/platform/x86_pc/smbios.cpp new file mode 100644 index 0000000000..b1458b63a9 --- /dev/null +++ b/src/platform/x86_pc/smbios.cpp @@ -0,0 +1,190 @@ +// This file is a part of the IncludeOS unikernel - www.includeos.org +// +// Copyright 2015 Oslo and Akershus University College of Applied Sciences +// and Alfred Bratterud +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "smbios.hpp" +#include +#include +#include +#include +#include + +namespace x86 +{ + arch_system_info_t SMBIOS::sysinfo; + + struct EntryPoint + { + char EntryPointString[4]; // "_SM_" + uint8_t checksum; // This value summed with all the values of the table, should be 0 (overflow) + uint8_t length; // Length of the Entry Point Table. Since version 2.1 of SMBIOS, this is 0x1F + uint8_t major; // Major Version of SMBIOS + uint8_t minor; // Minor Version of SMBIOS + uint16_t MaxStructureSize; // Maximum size of a SMBIOS Structure (we will se later) + uint8_t EntryPointRevision; //... + char FormattedArea[5]; //... + char EntryPointString2[5]; // "_DMI_" + uint8_t Checksum2; // Checksum for values from EntryPointString2 to the end of table + uint16_t TableLength; // Length of the Table containing all the structures + uint32_t TableAddress; // Address of the Table + uint16_t num_structures; // Number of structures in the table + uint8_t BCDRevision; //Unused + }; + + struct Header + { + uint8_t type; + uint8_t length; + uint16_t handle; + + char data[0]; + + const char* strings() const + { + return &this->data[this->length - sizeof(Header)]; + } + + const char* get_string(int idx) const + { + auto* str = strings(); + while (idx-- > 0) + { + str += strnlen(str, 64) + 1; + } + return str; + } + + const Header* next() const + { + auto* str = strings(); + while (true) + { + int len = strnlen(str, 64); + str += len + 1; + if (len == 0) return (Header*) str; + } + } + }; + + struct PhysMemArray : public Header + { + struct bits_t + { + uint8_t location; + uint8_t use; + uint8_t error_corr_mtd; + uint32_t capacity32; + uint16_t mem_err_info_handle; + uint16_t num_slots; + uint64_t capacity64; + + } __attribute__((packed)); + + const bits_t& info() const noexcept { + return *(bits_t*) &data[0]; + } + + uintptr_t capacity() const noexcept { + const auto& inf = info(); + return (inf.capacity32 == 0x80000000) + ? inf.capacity64 : inf.capacity32 * 1024; + } + + }; + + void SMBIOS::parse(const char* mem) + { + auto* table = (const EntryPoint*) mem; + INFO("SMBIOS", "Version %u.%u", table->major, table->minor); + + const int structs = table->num_structures; + auto* hdr = (const Header*) (uintptr_t) table->TableAddress; + for (int i = 0; i < structs; i++) + { + //printf("Table: %u\n", hdr->type); + switch (hdr->type) + { + case 0: // BIOS + INFO2("BIOS vendor: %s", hdr->get_string(hdr->data[0])); + INFO2("BIOS version: %s", hdr->get_string(hdr->data[1])); + break; + case 1: + INFO2("Manufacturer: %s", hdr->get_string(hdr->data[0])); + INFO2("Product name: %s", hdr->get_string(hdr->data[1])); + { + char uuid[33]; + snprintf(uuid, sizeof(uuid), + "%08x%08x-%08x%08x", + *(uint32_t*) &hdr->data[4], + *(uint32_t*) &hdr->data[8], + *(uint32_t*) &hdr->data[12], + *(uint32_t*) &hdr->data[16]); + sysinfo.uuid = std::string(uuid); + INFO2("System UUID: %s", sysinfo.uuid.c_str()); + } + break; + case 16: + { + const auto* array = (PhysMemArray*) hdr; + sysinfo.physical_memory = array->capacity(); + INFO2("Physical memory array with %lu MB capacity", + sysinfo.physical_memory / (1024*1024)); + } + break; + } + + hdr = hdr->next(); + if (hdr->type == 0) break; + } + + // salvage operation for when no memory array found + if (sysinfo.physical_memory == 0) { + sysinfo.physical_memory = OS::memory_end()+1; + } + } + + static uint8_t checksum(const char* addr, int length) + { + uint8_t sum = 0; + for (int i = 0; i < length; i++) sum += addr[i]; + return sum; + } + + void SMBIOS::init() + { + auto* mem = (const char*) 0xF0000; + while (mem < (const char*) 0x100000) + { + if (strncmp(mem, "_SM_", 4) == 0) + { + if (checksum(mem, mem[5]) == 0) + { + //printf("Found SMBIOS entry @ %p\n", mem); + SMBIOS::parse(mem); + return; + } + } + mem += 16; + } + assert(0 && "Failed to find SMBIOS headers\n"); + } + +} + +const arch_system_info_t& __arch_system_info() noexcept +{ + return x86::SMBIOS::system_info(); +} diff --git a/src/platform/x86_pc/smbios.hpp b/src/platform/x86_pc/smbios.hpp new file mode 100644 index 0000000000..710b0267b4 --- /dev/null +++ b/src/platform/x86_pc/smbios.hpp @@ -0,0 +1,38 @@ +// This file is a part of the IncludeOS unikernel - www.includeos.org +// +// Copyright 2015 Oslo and Akershus University College of Applied Sciences +// and Alfred Bratterud +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include + +namespace x86 +{ + struct SMBIOS + { + static void init(); + + static inline + const arch_system_info_t& system_info() { + return sysinfo; + } + + private: + static void parse(const char*); + static arch_system_info_t sysinfo; + }; +} diff --git a/src/platform/x86_pc/softreset.cpp b/src/platform/x86_pc/softreset.cpp index e9597c7bb5..fb7b72c171 100644 --- a/src/platform/x86_pc/softreset.cpp +++ b/src/platform/x86_pc/softreset.cpp @@ -3,7 +3,7 @@ #include #define SOFT_RESET_MAGIC 0xFEE1DEAD -#define SOFT_RESET_LOCATION 0x7000 +#define SOFT_RESET_LOCATION 0x8200 namespace x86 { extern uint32_t apic_timer_get_ticks() noexcept; @@ -15,7 +15,7 @@ struct softreset_t uint32_t checksum; uint64_t liveupdate_loc; uint64_t high_mem; - MHz cpu_freq; + KHz cpu_freq; uint32_t apic_ticks; uint64_t extra; uint32_t extra_len; @@ -47,7 +47,7 @@ void OS::resume_softreset(intptr_t addr) /// restore known values OS::liveupdate_loc_ = data->liveupdate_loc; OS::memory_end_ = data->high_mem; - OS::cpu_mhz_ = data->cpu_freq; + OS::cpu_khz_ = data->cpu_freq; x86::apic_timer_set_ticks(data->apic_ticks); OS::m_is_live_updated = true; diff --git a/src/platform/x86_pc/start.asm b/src/platform/x86_pc/start.asm index e36b4f8303..313fbd238d 100644 --- a/src/platform/x86_pc/start.asm +++ b/src/platform/x86_pc/start.asm @@ -27,6 +27,10 @@ global __avx_enabled %define MB_MAGIC 0x1BADB002 %define MB_FLAGS 0x3 ;; ALIGN + MEMINFO +;; stack base address at EBDA border +;; NOTE: Multiboot can use 9d400 to 9ffff +%define STACK_LOCATION 0x9D3F0 + extern _MULTIBOOT_START_ extern _LOAD_START_ extern _LOAD_END_ @@ -76,9 +80,8 @@ rock_bottom: mov cx, 0x18 ;; GS segment mov gs, cx - ;; 32-bit stack base address at EBDA border - ;; NOTE: Multiboot can use 9fc00 to 9ffff - mov esp, 0x9FC00 + ;; 32-bit stack ptr + mov esp, STACK_LOCATION mov ebp, esp ;; enable SSE before we enter C/C++ land diff --git a/src/platform/x86_solo5/os.cpp b/src/platform/x86_solo5/os.cpp index d74ee791ee..ecc5fd9266 100644 --- a/src/platform/x86_solo5/os.cpp +++ b/src/platform/x86_solo5/os.cpp @@ -1,6 +1,5 @@ #include -#include #include #include #include @@ -35,8 +34,6 @@ extern uintptr_t _ELF_END_; #define PROFILE(name) /* name */ #endif -RTC::timestamp_t OS::booted_at_ {0}; - void solo5_poweroff() { __asm__ __volatile__("cli; hlt"); @@ -44,31 +41,25 @@ void solo5_poweroff() } // returns wall clock time in nanoseconds since the UNIX epoch -int64_t __arch_time_now() noexcept +uint64_t __arch_system_time() noexcept { return solo5_clock_wall(); } - -RTC::timestamp_t OS::boot_timestamp() +timespec __arch_wall_clock() noexcept { - return booted_at_; + uint64_t stamp = solo5_clock_wall(); + timespec result; + result.tv_sec = stamp / 1000000000ul; + result.tv_nsec = stamp % 1000000000ul; + return result; } // actually uses nanoseconds (but its just a number) uint64_t OS::cycles_asleep() noexcept { return os_cycles_hlt; } -uint64_t OS::micros_asleep() noexcept { - return os_cycles_hlt / 1000; -} - -// uptime in nanoseconds -RTC::timestamp_t OS::uptime() -{ - return solo5_clock_monotonic() - booted_at_; -} -int64_t OS::micros_since_boot() noexcept { - return uptime() / 1000; +uint64_t OS::nanos_asleep() noexcept { + return os_cycles_hlt; } void OS::default_stdout(const char* str, const size_t len) @@ -133,7 +124,7 @@ void OS::start(char* _cmdline, uintptr_t mem_size) extern void __platform_init(); __platform_init(); - MYINFO("Booted at monotonic_ns=%lld walltime_ns=%lld", + MYINFO("Booted at monotonic_ns=%ld walltime_ns=%ld", solo5_clock_monotonic(), solo5_clock_wall()); Solo5_manager::init(); @@ -141,14 +132,10 @@ void OS::start(char* _cmdline, uintptr_t mem_size) // We don't need a start or stop function in solo5. Timers::init( // timer start function - [] (std::chrono::microseconds) {}, + [] (auto) {}, // timer stop function [] () {}); - // Some tests are asserting there is at least one timer that is always ON - // (the RTC calibration timer). Let's fake some timer so those tests pass. - Timers::oneshot(std::chrono::hours(1000000), [] (auto) {}); - Timers::ready(); } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 765f49d1e2..a7df102209 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -96,6 +96,7 @@ set(TEST_SOURCES ${TEST}/net/unit/path_mtu_discovery.cpp ${TEST}/net/unit/port_util_test.cpp ${TEST}/net/unit/socket.cpp + ${TEST}/net/unit/super_stack.cpp ${TEST}/net/unit/tcp_packet_test.cpp ${TEST}/net/unit/tcp_read_buffer_test.cpp ${TEST}/net/unit/tcp_write_queue.cpp diff --git a/test/kernel/integration/LiveUpdate/service.cpp b/test/kernel/integration/LiveUpdate/service.cpp index 1a6e18ca76..b84914cc7c 100644 --- a/test/kernel/integration/LiveUpdate/service.cpp +++ b/test/kernel/integration/LiveUpdate/service.cpp @@ -15,7 +15,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include #include #include #include "liu.hpp" @@ -27,7 +27,7 @@ void Service::start() { auto func = begin_test_boot(); - if (liu::LiveUpdate::is_resumable() == false) + if (OS::is_live_updated() == false) { auto& inet = net::Super_stack::get(0); inet.network_config({10,0,0,49}, {255,255,255,0}, {10,0,0,1}); diff --git a/test/kernel/integration/LiveUpdate/test_boot.cpp b/test/kernel/integration/LiveUpdate/test_boot.cpp index b940e22464..12f6adb999 100644 --- a/test/kernel/integration/LiveUpdate/test_boot.cpp +++ b/test/kernel/integration/LiveUpdate/test_boot.cpp @@ -1,23 +1,25 @@ #include +#include #include +#include using namespace liu; -static std::vector timestamps; +static std::vector timestamps; static buffer_t bloberino; static void boot_save(Storage& storage, const buffer_t* blob) { - timestamps.push_back(OS::micros_since_boot()); + timestamps.push_back(OS::nanos_since_boot()); storage.add_vector(0, timestamps); assert(blob != nullptr); storage.add_buffer(2, *blob); } static void boot_resume_all(Restore& thing) { - timestamps = thing.as_vector(); thing.go_next(); + timestamps = thing.as_vector(); thing.go_next(); // calculate time spent auto t1 = timestamps.back(); - auto t2 = OS::micros_since_boot(); + auto t2 = OS::nanos_since_boot(); // set final time timestamps.back() = t2 - t1; // retrieve old blob @@ -30,21 +32,29 @@ LiveUpdate::storage_func begin_test_boot() { if (LiveUpdate::resume("test", boot_resume_all)) { + // OS must be able to tell it was live updated each time + assert(OS::is_live_updated()); + if (timestamps.size() >= 30) { // calculate median by sorting std::sort(timestamps.begin(), timestamps.end()); auto median = timestamps[timestamps.size()/2]; // show information - printf("Median boot time over %lu samples: %ld micros\n", - timestamps.size(), median); + printf("Median boot time over %lu samples: %.2f millis\n", + timestamps.size(), median / 1000000.0); /* for (auto& stamp : timestamps) { printf("%lld\n", stamp); } */ - printf("SUCCESS\n"); - OS::shutdown(); + printf("Verifying that timers are started...\n"); + + using namespace std::chrono; + Timers::oneshot(5ms,[] (int) { + printf("SUCCESS\n"); + }); + return nullptr; } else { // immediately liveupdate diff --git a/test/kernel/integration/modules/CMakeLists.txt b/test/kernel/integration/modules/CMakeLists.txt index 8c99d05032..123cc2b3b9 100644 --- a/test/kernel/integration/modules/CMakeLists.txt +++ b/test/kernel/integration/modules/CMakeLists.txt @@ -14,7 +14,6 @@ MESSAGE(STATUS "CMake root: " $ENV{INCLUDEOS_PREFIX}) set(SERVICE_NAME "Kernel modules test") set(BINARY "test_mods") -set(MAX_MEM 128) set(SOURCES service.cpp hotswap.cpp ) diff --git a/test/kernel/integration/modules/hotswap.cpp b/test/kernel/integration/modules/hotswap.cpp index 6f9e719274..3fa1610740 100644 --- a/test/kernel/integration/modules/hotswap.cpp +++ b/test/kernel/integration/modules/hotswap.cpp @@ -21,15 +21,11 @@ * function copied to an otherwise unused place in memory so that we can * overwrite the currently running binary with a new one. */ -#include -asm(".org 0x200000"); - -extern "C" void* __multiboot_magic; -extern "C" void* __multiboot_addr; +asm(".org 0x2000"); extern "C" __attribute__((noreturn)) -void hotswap(const char* base, int len, char* dest, void* start, - uintptr_t magic, uintptr_t bootinfo) +void hotswap(const char* base, int len, char* dest, + void* start, void* magic, void* bootinfo) { // Copy binary to its destination for (int i = 0; i < len; i++) diff --git a/test/kernel/integration/modules/mod2/CMakeLists.txt b/test/kernel/integration/modules/mod2/CMakeLists.txt index 166ccb4791..75c78594df 100644 --- a/test/kernel/integration/modules/mod2/CMakeLists.txt +++ b/test/kernel/integration/modules/mod2/CMakeLists.txt @@ -19,9 +19,6 @@ set(SERVICE_NAME "IncludeOS seed") # Name of your service binary set(BINARY "seed") -# Maximum memory can be hard-coded into the binary -set(MAX_MEM 128) - # Source files to be linked with OS library parts to form bootable image set(SOURCES service.cpp # ...add more here diff --git a/test/kernel/integration/modules/service.cpp b/test/kernel/integration/modules/service.cpp index a47989ff13..9588e639d2 100644 --- a/test/kernel/integration/modules/service.cpp +++ b/test/kernel/integration/modules/service.cpp @@ -43,15 +43,15 @@ void Service::start(const std::string& args) mod.mod_start, mod.mod_end, mod.mod_end - mod.mod_start); // Verify module cmdlines - Expects(std::string((char*)mods[0].cmdline) == "../mod1.json"); - Expects(std::string((char*)mods[1].cmdline) == "../seed loaded as module"); - Expects(std::string((char*)mods[2].cmdline) == "../mod3.json"); + Expects(std::string((char*) mods[0].cmdline) == "../mod1.json"); + Expects(std::string((char*) mods[1].cmdline) == "../seed loaded as module"); + Expects(std::string((char*) mods[2].cmdline) == "../mod3.json"); // verify content of text modules - Expects(std::string((char*)mods[0].mod_start) + Expects(std::string((char*) mods[0].mod_start) == "{\"module1\" : \"JSON data\" }\n"); - Expects(std::string((char*)mods[2].mod_start) + Expects(std::string((char*) mods[2].mod_start) == "{\"module3\" : \"More JSON data, for mod2 service\" }\n"); multiboot_module_t binary = mods[1]; @@ -60,10 +60,10 @@ void Service::start(const std::string& args) Elf_binary elf ({(char*)binary.mod_start, (int)(binary.mod_end - binary.mod_start)}); - void* hotswap_addr = (void*)0x100000; + void* hotswap_addr = (void*)0x2000; MYINFO("Moving hotswap function (now at %p)", &hotswap); - memcpy(hotswap_addr, (void*)&hotswap, 1024); + memcpy(hotswap_addr, (void*)&hotswap, 2048); extern uintptr_t __multiboot_magic; extern uintptr_t __multiboot_addr; @@ -72,9 +72,9 @@ void Service::start(const std::string& args) auto load_offs = elf.program_headers()[0].p_offset; char* base = (char*)binary.mod_start + load_offs; - int len = (int)(binary.mod_end - binary.mod_start); - char* dest = (char*)0xA00000; - void* start = (void*)elf.entry(); + int len = int(binary.mod_end - binary.mod_start); + char* dest = (char*) elf.program_headers()[0].p_paddr; + void* start = (void*) elf.entry(); SHA1 sha; sha.update(base, len); @@ -87,8 +87,7 @@ void Service::start(const std::string& args) MYINFO("Disabling interrupts and calling hotswap..."); asm("cli"); - ((decltype(&hotswap))hotswap_addr)(base, len, dest, start, __multiboot_magic, __multiboot_addr); - + ((decltype(&hotswap))hotswap_addr)(base, len, dest, start, 0, 0); + //__multiboot_magic, __multiboot_addr); panic("Should have jumped\n"); - } diff --git a/test/kernel/integration/timers/timers.cpp b/test/kernel/integration/timers/timers.cpp index 000f85832a..0ca2125868 100644 --- a/test/kernel/integration/timers/timers.cpp +++ b/test/kernel/integration/timers/timers.cpp @@ -16,7 +16,7 @@ // limitations under the License. #include -#include +#include #include #include @@ -28,16 +28,17 @@ static int repeat2 = 0; void test_timers() { - INFO("Timers", "Testing one-shot timers"); - // RTC is using a timer to calibrate itself over large periods of time - assert(Timers::active() == 1); + INFO("Timers", "Testing kernel timers"); + // a calibration timer is active on bare metal and in emulated environments + static size_t BASE_TIMERS; + BASE_TIMERS = Timers::active(); // 30 sec. - Test End Timers::oneshot(30s, [] (auto) { printf("One-shots fired: %i \n", one_shots); CHECKSERT(one_shots == 5, "5 one-shot-timers fired"); CHECKSERT(repeat1 == 25 and repeat2 == 10, "1s. timer fired 25 times, 2s. timer fired 10 times"); - CHECKSERT(Timers::active() == 1, "This was the last active timer (except RTC)"); + CHECKSERT(Timers::active() == BASE_TIMERS+0, "No more active timers"); INFO("Timers", "SUCCESS"); }); @@ -103,9 +104,8 @@ void test_timers() // Make sure this timer iterator is valid Timers::stop(timer1s); - // The current timer does not count towards the total active, - // but RTC uses a timer to calibrate itself over time - CHECKSERT(Timers::active() == 2, "There are still 2 timers left"); + // The current timer does not count towards the total active + CHECKSERT(Timers::active() == BASE_TIMERS+1, "Only the last finish timer is left"); Timers::oneshot(1s, [] (auto) { diff --git a/test/lest_util/os_mock.cpp b/test/lest_util/os_mock.cpp index 0817b75f9f..12dd124512 100644 --- a/test/lest_util/os_mock.cpp +++ b/test/lest_util/os_mock.cpp @@ -33,7 +33,7 @@ void* aligned_alloc(size_t alignment, size_t size) { #include Statman& Statman::get() { static uintptr_t start {0}; - static const size_t memsize = 0x100000; + static const size_t memsize = 0x1000000; if (!start) { start = (uintptr_t) malloc(memsize); } @@ -65,9 +65,6 @@ void OS::start(unsigned, unsigned) {} void OS::default_stdout(const char*, size_t) {} void OS::event_loop() {} void OS::block() {} -int64_t OS::micros_since_boot() noexcept { - return 0; -} void OS::resume_softreset(intptr_t) {} bool OS::is_softreset_magic(uint32_t) { return true; @@ -141,9 +138,13 @@ void __arch_subscribe_irq(uint8_t) {} void __arch_enable_legacy_irq(uint8_t) {} void __arch_disable_legacy_irq(uint8_t) {} -int64_t __arch_time_now() noexcept { +uint64_t __arch_system_time() noexcept { return 0; } +#include +timespec __arch_wall_clock() noexcept { + return timespec{0, 0}; +} /// smp /// #include diff --git a/test/net/integration/gateway/nacl.txt b/test/net/integration/gateway/nacl.txt index c362ccbec9..688e2f76d2 100644 --- a/test/net/integration/gateway/nacl.txt +++ b/test/net/integration/gateway/nacl.txt @@ -1,5 +1,5 @@ Iface eth0 { - index: 0, + index: "c0:01:0a:00:00:2a", address: 10.0.1.1, netmask: 255.255.255.0, prerouting: [my_dnat, my_fw] diff --git a/test/net/unit/super_stack.cpp b/test/net/unit/super_stack.cpp new file mode 100644 index 0000000000..bab581698f --- /dev/null +++ b/test/net/unit/super_stack.cpp @@ -0,0 +1,99 @@ +// This file is a part of the IncludeOS unikernel - www.includeos.org +// +// Copyright 2018 Oslo and Akershus University College of Applied Sciences +// and Alfred Bratterud +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include +#include +#include + +using namespace net; + +CASE("Super stack functionality") +{ + bool stack_not_found = false; + bool stack_err = false; + auto& nics = hw::Devices::devices(); + + // Add 3 nics + nics.push_back(std::make_unique()); + nics.push_back(std::make_unique()); + nics.push_back(std::make_unique()); + + // 3 stacks are preallocated + EXPECT(Super_stack::inet().ip4_stacks().size() == 3); + + // Retreiving the first stack creates an interface on the first nic + auto& stack1 = Super_stack::get(0); + EXPECT(&stack1.nic() == nics[0].get()); + + // Trying to get a stack that do not exists will throw + stack_not_found = false; + try { + Super_stack::get(3); + } catch(const Stack_not_found&) { + stack_not_found = true; + } + EXPECT(stack_not_found == true); + + // Getting by mac addr works + const MAC::Addr my_mac{0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + // hehe.. + reinterpret_cast(nics[0].get())->mac_ = my_mac; + auto& stack_by_mac = Super_stack::get(my_mac.to_string()); + EXPECT(&stack_by_mac.nic() == nics[0].get()); + + // Throws if mac addr isnt found + stack_not_found = false; + try { + Super_stack::get("FF:FF:FF:00:00:00"); + } catch(const Stack_not_found&) { + stack_not_found = true; + } + EXPECT(stack_not_found == true); + + // Creating substacks works alrite + Nic_mock my_nic; + auto& my_sub_stack = Super_stack::inet().create(my_nic, 2, 42); + EXPECT(&my_sub_stack == &Super_stack::get(2,42)); + + // Not allowed to create if already occupied tho + stack_err = false; + try { + Super_stack::inet().create(my_nic, 0, 0); + } catch(const Super_stack_err&) { + stack_err = true; + } + EXPECT(stack_err == true); + + // Also possible to create without assigning index, which means it takes the first free one + auto& custom_created_stack = Super_stack::inet().create(my_nic); + EXPECT(&custom_created_stack == &Super_stack::get(1)); + + // Not allowed to create if all indexes are occupied tho + Super_stack::get(2); // occupy the last free one + stack_err = false; + try { + Super_stack::inet().create(my_nic); + } catch(const Super_stack_err&) { + stack_err = true; + } + EXPECT(stack_err == true); + +} + diff --git a/vmrunner/vm.schema.json b/vmrunner/vm.schema.json index c2dff759ed..705317567d 100644 --- a/vmrunner/vm.schema.json +++ b/vmrunner/vm.schema.json @@ -99,6 +99,11 @@ "vga" : { "description" : "Enable VGA screen", "enum" : ["std", "cirrus", "vmware", "qxl", "xenfb", "tcx", "cg3", "virtio", "none"] + }, + + "vfio" : { + "description" : "VFIO PCI-passthrough on device", + "type" : "string" } } diff --git a/vmrunner/vmrunner.py b/vmrunner/vmrunner.py index 3454984532..6f4bcd9066 100644 --- a/vmrunner/vmrunner.py +++ b/vmrunner/vmrunner.py @@ -349,6 +349,9 @@ def boot(self, multiboot, kernel_args = "", image_name = None): if "bios" in self._config: kernel_args.extend(["-bios", self._config["bios"]]) + if "uuid" in self._config: + kernel_args.extend(["--uuid", str(self._config["uuid"])]) + if "smp" in self._config: kernel_args.extend(["-smp", str(self._config["smp"])]) @@ -376,13 +379,17 @@ def boot(self, multiboot, kernel_args = "", image_name = None): if "vga" in self._config: vga_arg = ["-vga", str(self._config["vga"])] + pci_arg = [] + if "vfio" in self._config: + pci_arg = ["-device", "vfio-pci,host=" + self._config["vfio"]] + # TODO: sudo is only required for tap networking and kvm. Check for those. command = ["sudo", "--preserve-env", "qemu-system-x86_64"] if self._kvm_present: command.extend(["--enable-kvm"]) command += kernel_args - command += disk_args + net_args + mem_arg + vga_arg + mod_args + command += disk_args + net_args + mem_arg + vga_arg + pci_arg + mod_args #command_str = " ".join(command) #command_str.encode('ascii','ignore')