| | |
| | | #include "center.h" |
| | | #include "bh_util.h" |
| | | #include "defs.h" |
| | | #include "failed_msg.h" |
| | | #include "shm.h" |
| | | #include <chrono> |
| | | #include <set> |
| | |
| | | |
| | | using namespace bhome_shm; |
| | | using namespace bhome_msg; |
| | | using namespace bhome::msg; |
| | | typedef BHCenter::MsgHandler Handler; |
| | | |
| | | namespace |
| | | { |
| | | typedef steady_clock::time_point TimePoint; |
| | | typedef steady_clock::duration Duration; |
| | | inline TimePoint Now() { return steady_clock::now(); }; |
| | | inline int64_t Seconds(const Duration &d) { return duration_cast<seconds>(d).count(); }; |
| | | |
| | | //TODO check proc_id |
| | | class NodeCenter |
| | |
| | | public: |
| | | typedef std::string ProcId; |
| | | typedef std::string Address; |
| | | typedef bhome::msg::ProcInfo ProcInfo; |
| | | typedef bhome_msg::ProcInfo ProcInfo; |
| | | typedef std::function<void(Address const &)> Cleaner; |
| | | |
| | | private: |
| | |
| | | }; |
| | | |
| | | struct ProcState { |
| | | TimePoint timestamp_; |
| | | int64_t timestamp_ = 0; |
| | | uint32_t flag_ = 0; // reserved |
| | | void UpdateState(TimePoint now, const Duration &offline_time, const Duration &kill_time) |
| | | void UpdateState(const int64_t now, const int64_t offline_time, const int64_t kill_time) |
| | | { |
| | | auto diff = now - timestamp_; |
| | | #ifndef NDEBUG |
| | | printf("diff: %ld\n", Seconds(diff)); |
| | | printf("state %p diff: %ld\n", this, diff); |
| | | #endif |
| | | if (diff < offline_time) { |
| | | flag_ = kStateNormal; |
| | |
| | | inline const std::string &SrcAddr(const BHMsgHead &head) { return head.route(0).mq_id(); } |
| | | inline bool MatchAddr(std::set<Address> const &addrs, const Address &addr) { return addrs.find(addr) != addrs.end(); } |
| | | |
| | | NodeCenter(const std::string &id, const Cleaner &cleaner, const int64_t offline_time, const int64_t kill_time) : |
| | | id_(id), cleaner_(cleaner), offline_time_(offline_time), kill_time_(kill_time), last_check_time_(0) {} |
| | | |
| | | public: |
| | | typedef std::set<TopicDest> Clients; |
| | | |
| | | NodeCenter(const std::string &id, const Cleaner &cleaner, const Duration &offline_time, const Duration &kill_time) : |
| | | id_(id), cleaner_(cleaner), offline_time_(offline_time), kill_time_(kill_time), last_check_time_(Now()) {} |
| | | const std::string &id() const { return id_; } // no need to lock. |
| | | NodeCenter(const std::string &id, const Cleaner &cleaner, const steady_clock::duration offline_time, const steady_clock::duration kill_time) : |
| | | NodeCenter(id, cleaner, duration_cast<seconds>(offline_time).count(), duration_cast<seconds>(kill_time).count()) {} |
| | | |
| | | //TODO maybe just return serialized string. |
| | | // center name, no relative to shm. |
| | | const std::string &id() const { return id_; } |
| | | |
| | | MsgCommonReply Register(const BHMsgHead &head, MsgRegister &msg) |
| | | { |
| | | if (msg.proc().proc_id() != head.proc_id()) { |
| | |
| | | } |
| | | |
| | | try { |
| | | Node node(new NodeInfo); |
| | | node->addrs_.insert(SrcAddr(head)); |
| | | for (auto &addr : msg.addrs()) { |
| | | node->addrs_.insert(addr.mq_id()); |
| | | auto UpdateRegInfo = [&](Node &node) { |
| | | node->addrs_.insert(SrcAddr(head)); |
| | | for (auto &addr : msg.addrs()) { |
| | | node->addrs_.insert(addr.mq_id()); |
| | | } |
| | | node->proc_.Swap(msg.mutable_proc()); |
| | | node->state_.timestamp_ = head.timestamp(); |
| | | node->state_.UpdateState(NowSec(), offline_time_, kill_time_); |
| | | }; |
| | | |
| | | auto pos = nodes_.find(head.proc_id()); |
| | | if (pos != nodes_.end()) { // new client |
| | | Node &node = pos->second; |
| | | if (node->addrs_.find(SrcAddr(head)) == node->addrs_.end()) { |
| | | // node restarted, release old mq. |
| | | RemoveNode(node); |
| | | node.reset(new NodeInfo); |
| | | } |
| | | UpdateRegInfo(node); |
| | | } else { |
| | | Node node(new NodeInfo); |
| | | UpdateRegInfo(node); |
| | | nodes_[node->proc_.proc_id()] = node; |
| | | } |
| | | node->proc_.Swap(msg.mutable_proc()); |
| | | node->state_.timestamp_ = Now(); |
| | | node->state_.flag_ = kStateNormal; |
| | | nodes_[node->proc_.proc_id()] = node; |
| | | return MakeReply(eSuccess); |
| | | } catch (...) { |
| | | return MakeReply(eError, "register node error."); |
| | |
| | | if (pos == nodes_.end()) { |
| | | return MakeReply<Reply>(eNotRegistered, "Node is not registered."); |
| | | } else { |
| | | auto node = pos->second; |
| | | auto &node = pos->second; |
| | | if (!MatchAddr(node->addrs_, SrcAddr(head))) { |
| | | return MakeReply<Reply>(eAddressNotMatch, "Node address error."); |
| | | } else if (head.type() == kMsgTypeHeartbeat && CanHeartbeat(*node)) { |
| | |
| | | { |
| | | return HandleMsg(head, [&](Node node) { |
| | | NodeInfo &ni = *node; |
| | | auto now = Now(); |
| | | ni.state_.timestamp_ = now; |
| | | ni.state_.flag_ = kStateNormal; |
| | | ni.state_.timestamp_ = head.timestamp(); |
| | | ni.state_.UpdateState(NowSec(), offline_time_, kill_time_); |
| | | |
| | | auto &info = msg.proc(); |
| | | if (!info.public_info().empty()) { |
| | |
| | | private: |
| | | void CheckNodes() |
| | | { |
| | | auto now = Now(); |
| | | if (Seconds(now - last_check_time_) < 1) { return; } |
| | | |
| | | auto now = NowSec(); |
| | | if (now - last_check_time_ < 1) { return; } |
| | | last_check_time_ = now; |
| | | |
| | | auto it = nodes_.begin(); |
| | |
| | | auto &cli = *it->second; |
| | | cli.state_.UpdateState(now, offline_time_, kill_time_); |
| | | if (cli.state_.flag_ == kStateKillme) { |
| | | if (cleaner_) { |
| | | for (auto &addr : cli.addrs_) { |
| | | cleaner_(addr); |
| | | } |
| | | } |
| | | RemoveNode(it->second); |
| | | it = nodes_.erase(it); |
| | | } else { |
| | | ++it; |
| | |
| | | auto node = weak.lock(); |
| | | return node && Valid(*node); |
| | | } |
| | | void CheckAllNodes(); //TODO, call it in timer. |
| | | std::string id_; // center proc id; |
| | | void RemoveNode(Node &node) |
| | | { |
| | | auto EraseMapRec = [&node](auto &rec_map, auto &node_rec) { |
| | | for (auto &addr_topics : node_rec) { |
| | | TopicDest dest{addr_topics.first, node}; |
| | | for (auto &topic : addr_topics.second) { |
| | | auto pos = rec_map.find(topic); |
| | | if (pos != rec_map.end()) { |
| | | pos->second.erase(dest); |
| | | if (pos->second.empty()) { |
| | | rec_map.erase(pos); |
| | | } |
| | | } |
| | | } |
| | | } |
| | | }; |
| | | EraseMapRec(service_map_, node->services_); |
| | | EraseMapRec(subscribe_map_, node->subscriptions_); |
| | | |
| | | for (auto &addr : node->addrs_) { |
| | | cleaner_(addr); |
| | | } |
| | | node->addrs_.clear(); |
| | | } |
| | | std::string id_; // center proc id; |
| | | |
| | | std::unordered_map<Topic, Clients> service_map_; |
| | | std::unordered_map<Topic, Clients> subscribe_map_; |
| | | std::unordered_map<ProcId, Node> nodes_; |
| | | Cleaner cleaner_; // remove mqs. |
| | | Duration offline_time_; |
| | | Duration kill_time_; |
| | | TimePoint last_check_time_; |
| | | int64_t offline_time_; |
| | | int64_t kill_time_; |
| | | int64_t last_check_time_; |
| | | }; |
| | | |
| | | template <class Body, class OnMsg, class Replyer> |
| | |
| | | |
| | | Handler Combine(const Handler &h1, const Handler &h2) |
| | | { |
| | | return [h1, h2](ShmSocket &socket, bhome_msg::MsgI &msg, bhome::msg::BHMsgHead &head) { |
| | | return [h1, h2](ShmSocket &socket, bhome_msg::MsgI &msg, bhome_msg::BHMsgHead &head) { |
| | | return h1(socket, msg, head) || h2(socket, msg, head); |
| | | }; |
| | | } |
| | |
| | | |
| | | bool AddCenter(const std::string &id, const NodeCenter::Cleaner &cleaner) |
| | | { |
| | | auto center_ptr = std::make_shared<Synced<NodeCenter>>(id, cleaner, 60s, 60s * 3); |
| | | auto center_failed_q = std::make_shared<FailedMsgQ>(); |
| | | auto MakeReplyer = [](ShmSocket &socket, BHMsgHead &head, const std::string &proc_id, FailedMsgQ &failq, const int timeout_ms = 0) { |
| | | auto center_ptr = std::make_shared<Synced<NodeCenter>>(id, cleaner, 60s, 60s * 2); |
| | | auto MakeReplyer = [](ShmSocket &socket, BHMsgHead &head, const std::string &proc_id) { |
| | | return [&](auto &&rep_body) { |
| | | auto reply_head(InitMsgHead(GetType(rep_body), proc_id, head.msg_id())); |
| | | MsgI msg; |
| | | if (msg.Make(socket.shm(), reply_head, rep_body)) { |
| | | auto &remote = head.route(0).mq_id(); |
| | | bool r = socket.Send(remote.data(), msg, timeout_ms); |
| | | if (!r) { |
| | | failq.Push(remote, msg, 60s); // for later retry. |
| | | } |
| | | } |
| | | auto &remote = head.route(0).mq_id(); |
| | | socket.Send(remote.data(), reply_head, rep_body); |
| | | }; |
| | | }; |
| | | |
| | | auto OnCenterIdle = [center_ptr, center_failed_q](ShmSocket &socket) { |
| | | auto OnCenterIdle = [center_ptr](ShmSocket &socket) { |
| | | auto ¢er = *center_ptr; |
| | | center_failed_q->TrySend(socket); |
| | | center->OnTimer(); |
| | | }; |
| | | |
| | | auto OnCenter = [=](ShmSocket &socket, MsgI &msg, BHMsgHead &head) -> bool { |
| | | auto ¢er = *center_ptr; |
| | | auto replyer = MakeReplyer(socket, head, center->id(), *center_failed_q); |
| | | auto replyer = MakeReplyer(socket, head, center->id()); |
| | | switch (head.type()) { |
| | | CASE_ON_MSG_TYPE(Register); |
| | | CASE_ON_MSG_TYPE(Heartbeat); |
| | |
| | | } |
| | | }; |
| | | |
| | | auto bus_failed_q = std::make_shared<FailedMsgQ>(); |
| | | auto OnBusIdle = [=](ShmSocket &socket) { bus_failed_q->TrySend(socket); }; |
| | | auto OnBusIdle = [=](ShmSocket &socket) {}; |
| | | auto OnPubSub = [=](ShmSocket &socket, MsgI &msg, BHMsgHead &head) -> bool { |
| | | auto ¢er = *center_ptr; |
| | | auto replyer = MakeReplyer(socket, head, center->id(), *bus_failed_q); |
| | | auto replyer = MakeReplyer(socket, head, center->id()); |
| | | auto OnPublish = [&]() { |
| | | MsgPublish pub; |
| | | NodeCenter::Clients clients; |
| | |
| | | auto &cli = *it; |
| | | auto node = cli.weak_node_.lock(); |
| | | if (node) { |
| | | if (!socket.Send(cli.mq_.data(), msg, 0)) { |
| | | bus_failed_q->Push(cli.mq_, msg, 60s); |
| | | } |
| | | // should also make sure that mq is not killed before msg expires. |
| | | // it would be ok if (kill_time - offline_time) is longer than expire time. |
| | | socket.Send(cli.mq_.data(), msg); |
| | | ++it; |
| | | } else { |
| | | it = clients.erase(it); |
| | |
| | | |
| | | } // namespace |
| | | |
| | | SharedMemory &BHomeShm() |
| | | { |
| | | static SharedMemory shm("bhome_default_shm_v0", 1024 * 1024 * 64); |
| | | return shm; |
| | | } |
| | | |
| | | BHCenter::CenterRecords &BHCenter::Centers() |
| | | { |
| | | static CenterRecords rec; |
| | |
| | | printf("remove mq : %s\n", r ? "ok" : "failed"); |
| | | }; |
| | | |
| | | AddCenter("#center", gc); |
| | | AddCenter("#bhome_center", gc); |
| | | |
| | | for (auto &kv : Centers()) { |
| | | auto &info = kv.second; |
| | | sockets_[info.name_] = std::make_shared<ShmSocket>(shm, *(MQId *) info.mqid_.data(), info.mq_len_); |
| | | } |
| | | } |
| | | |
| | | BHCenter::BHCenter() : |
| | | BHCenter(BHomeShm()) {} |
| | | |
| | | bool BHCenter::Start() |
| | | { |