From e54b8e58780c7d9f37b06cc4e1dc88badb2129c9 Mon Sep 17 00:00:00 2001
From: lichao <lichao@aiotlink.com>
Date: 星期二, 18 五月 2021 17:02:21 +0800
Subject: [PATCH] remove sync recv, node cache msgs for sync recv.

---
 box/center.cpp |  571 ++++++++++++++++++++++++++++++++++++++++++++------------
 1 files changed, 447 insertions(+), 124 deletions(-)

diff --git a/box/center.cpp b/box/center.cpp
index cde865f..9ecd04b 100644
--- a/box/center.cpp
+++ b/box/center.cpp
@@ -18,34 +18,137 @@
 #include "center.h"
 #include "bh_util.h"
 #include "defs.h"
-#include "failed_msg.h"
+#include "log.h"
 #include "shm.h"
 #include <chrono>
-#include <set>
+#include <unordered_map>
 
 using namespace std::chrono;
 using namespace std::chrono_literals;
 
 using namespace bhome_shm;
 using namespace bhome_msg;
-using namespace bhome::msg;
 typedef BHCenter::MsgHandler Handler;
 
 namespace
 {
-typedef steady_clock::time_point TimePoint;
-typedef steady_clock::duration Duration;
-inline TimePoint Now() { return steady_clock::now(); };
-inline int64_t Seconds(const Duration &d) { return duration_cast<seconds>(d).count(); };
+
+typedef std::string ProcId;
+typedef size_t ProcIndex; // max local procs.
+const int kMaxProcs = 65536;
+
+// record all procs ever registered, always grow, never remove.
+// mainly for node to request msg allocation.
+// use index instead of MQId to save some bits.
+class ProcRecords
+{
+public:
+	struct ProcRec {
+		ProcId proc_;
+		MQId ssn_ = 0;
+	};
+
+	ProcRecords() { procs_.reserve(kMaxProcs); }
+
+	ProcIndex Put(const ProcId &proc_id, const MQId ssn)
+	{
+		if (procs_.size() >= kMaxProcs) {
+			return -1;
+		}
+		auto pos_isnew = proc_index_.emplace(proc_id, procs_.size());
+		int index = pos_isnew.first->second;
+		if (pos_isnew.second) {
+			procs_.emplace_back(ProcRec{proc_id, ssn});
+		} else { // update ssn
+			procs_[index].ssn_ = ssn;
+		}
+		return index;
+	}
+	const ProcRec &Get(const ProcIndex index) const
+	{
+		static ProcRec empty_rec;
+		return (index < procs_.size()) ? procs_[index] : empty_rec;
+	}
+
+private:
+	std::unordered_map<ProcId, size_t> proc_index_;
+	std::vector<ProcRec> procs_;
+};
+
+class MsgRecords
+{
+	typedef int64_t MsgId;
+	typedef int64_t Offset;
+
+public:
+	void RecordMsg(const MsgI &msg) { msgs_.emplace(msg.id(), msg.Offset()); }
+	void FreeMsg(MsgId id)
+	{
+		auto pos = msgs_.find(id);
+		if (pos != msgs_.end()) {
+			ShmMsg(pos->second).Free();
+			msgs_.erase(pos);
+		} else {
+			LOG_TRACE() << "ignore late free request.";
+		}
+	}
+	void AutoRemove()
+	{
+		auto now = NowSec();
+		if (now < time_to_clean_) {
+			return;
+		}
+		// LOG_FUNCTION;
+		time_to_clean_ = now + 1;
+		int64_t limit = std::max(10000ul, msgs_.size() / 10);
+		int64_t n = 0;
+		auto it = msgs_.begin();
+		while (it != msgs_.end() && --limit > 0) {
+			ShmMsg msg(it->second);
+			auto Free = [&]() {
+				msg.Free();
+				it = msgs_.erase(it);
+				++n;
+			};
+			int n = now - msg.timestamp();
+			if (n < 10) {
+				++it;
+			} else if (msg.Count() == 0) {
+				Free();
+			} else if (n > 60) {
+				Free();
+			}
+		}
+		if (n > 0) {
+			LOG_DEBUG() << "~~~~~~~~~~~~~~~~ auto release msgs: " << n;
+		}
+	}
+	size_t size() const { return msgs_.size(); }
+	void DebugPrint() const
+	{
+		LOG_DEBUG() << "msgs : " << size();
+		int i = 0;
+		int total_count = 0;
+		for (auto &kv : msgs_) {
+			MsgI msg(kv.second);
+			total_count += msg.Count();
+			LOG_TRACE() << "  " << i++ << ": msg id: " << kv.first << ", offset: " << kv.second << ", count: " << msg.Count() << ", size: " << msg.Size();
+		}
+		LOG_DEBUG() << "total count: " << total_count;
+	}
+
+private:
+	std::unordered_map<MsgId, Offset> msgs_;
+	int64_t time_to_clean_ = 0;
+};
 
 //TODO check proc_id
 class NodeCenter
 {
 public:
-	typedef std::string ProcId;
-	typedef std::string Address;
-	typedef bhome::msg::ProcInfo ProcInfo;
-	typedef std::function<void(Address const &)> Cleaner;
+	typedef MQId Address;
+	typedef bhome_msg::ProcInfo ProcInfo;
+	typedef std::function<void(Address const)> Cleaner;
 
 private:
 	enum {
@@ -56,14 +159,17 @@
 	};
 
 	struct ProcState {
-		TimePoint timestamp_;
+		int64_t timestamp_ = 0;
 		uint32_t flag_ = 0; // reserved
-		void UpdateState(TimePoint now, const Duration &offline_time, const Duration &kill_time)
+		void PutOffline(const int64_t offline_time)
+		{
+			timestamp_ = NowSec() - offline_time;
+			flag_ = kStateOffline;
+		}
+		void UpdateState(const int64_t now, const int64_t offline_time, const int64_t kill_time)
 		{
 			auto diff = now - timestamp_;
-#ifndef NDEBUG
-			printf("diff: %ld\n", Seconds(diff));
-#endif
+			LOG_DEBUG() << "state " << this << " diff: " << diff;
 			if (diff < offline_time) {
 				flag_ = kStateNormal;
 			} else if (diff < kill_time) {
@@ -76,31 +182,184 @@
 	typedef std::unordered_map<Address, std::set<Topic>> AddressTopics;
 
 	struct NodeInfo {
-		ProcState state_;             // state
-		std::set<Address> addrs_;     // registered mqs
-		ProcInfo proc_;               //
-		AddressTopics services_;      // address: topics
-		AddressTopics subscriptions_; // address: topics
+		ProcState state_;               // state
+		std::map<MQId, int64_t> addrs_; // registered mqs
+		ProcInfo proc_;                 //
+		AddressTopics services_;        // address: topics
+		AddressTopics subscriptions_;   // address: topics
 	};
 	typedef std::shared_ptr<NodeInfo> Node;
 	typedef std::weak_ptr<NodeInfo> WeakNode;
 
 	struct TopicDest {
-		Address mq_;
+		MQId mq_id_;
+		int64_t mq_abs_addr_;
 		WeakNode weak_node_;
-		bool operator<(const TopicDest &a) const { return mq_ < a.mq_; }
+		bool operator<(const TopicDest &a) const { return mq_id_ < a.mq_id_; }
 	};
-	inline const std::string &SrcAddr(const BHMsgHead &head) { return head.route(0).mq_id(); }
-	inline bool MatchAddr(std::set<Address> const &addrs, const Address &addr) { return addrs.find(addr) != addrs.end(); }
+	inline MQId SrcAddr(const BHMsgHead &head) { return head.route(0).mq_id(); }
+	inline int64_t SrcAbsAddr(const BHMsgHead &head) { return head.route(0).abs_addr(); }
+	inline bool MatchAddr(std::map<Address, int64_t> const &addrs, const Address &addr) { return addrs.find(addr) != addrs.end(); }
+
+	NodeCenter(const std::string &id, const Cleaner &cleaner, const int64_t offline_time, const int64_t kill_time) :
+	    id_(id), cleaner_(cleaner), offline_time_(offline_time), kill_time_(kill_time), last_check_time_(0) {}
 
 public:
 	typedef std::set<TopicDest> Clients;
 
-	NodeCenter(const std::string &id, const Cleaner &cleaner, const Duration &offline_time, const Duration &kill_time) :
-	    id_(id), cleaner_(cleaner), offline_time_(offline_time), kill_time_(kill_time), last_check_time_(Now()) {}
-	const std::string &id() const { return id_; } // no need to lock.
+	NodeCenter(const std::string &id, const Cleaner &cleaner, const steady_clock::duration offline_time, const steady_clock::duration kill_time) :
+	    NodeCenter(id, cleaner, duration_cast<seconds>(offline_time).count(), duration_cast<seconds>(kill_time).count()) {}
 
-	//TODO maybe just return serialized string.
+	// center name, no relative to shm.
+	const std::string &id() const { return id_; }
+	int64_t OnNodeInit(ShmSocket &socket, const int64_t val)
+	{
+		LOG_FUNCTION;
+		SharedMemory &shm = socket.shm();
+		MQId ssn = (val >> 4) & MaskBits(56);
+		int reply = EncodeCmd(eCmdNodeInitReply);
+
+		if (nodes_.find(ssn) != nodes_.end()) {
+			return reply; // ignore if exists.
+		}
+
+		auto UpdateRegInfo = [&](Node &node) {
+			node->state_.timestamp_ = NowSec() - offline_time_;
+			node->state_.UpdateState(NowSec(), offline_time_, kill_time_);
+
+			// create sockets.
+			try {
+				ShmSocket tmp(shm, true, ssn, 16);
+				node->addrs_.emplace(ssn, tmp.AbsAddr());
+				return true;
+			} catch (...) {
+				return false;
+			}
+		};
+
+		auto PrepareProcInit = [&](Node &node) {
+			bool r = false;
+			ShmMsg init_msg;
+			DEFER1(init_msg.Release());
+			MsgProcInit body;
+			auto head = InitMsgHead(GetType(body), id(), ssn);
+			return init_msg.Make(GetAllocSize(CalcAllocIndex(900))) &&
+			       init_msg.Fill(ShmMsg::Serialize(head, body)) &&
+			       SendAllocMsg(socket, {ssn, node->addrs_[ssn]}, init_msg);
+		};
+
+		Node node(new NodeInfo);
+		if (UpdateRegInfo(node) && PrepareProcInit(node)) {
+			reply |= (node->addrs_[ssn] << 4);
+			nodes_[ssn] = node;
+			LOG_INFO() << "new node ssn (" << ssn << ") init";
+		} else {
+			ShmSocket::Remove(shm, ssn);
+		}
+		return reply;
+	}
+	void RecordMsg(const MsgI &msg)
+	{
+		msg.reset_managed(true);
+		msgs_.RecordMsg(msg);
+	}
+
+	bool SendAllocReply(ShmSocket &socket, const MQInfo &dest, const int64_t reply, const MsgI &msg)
+	{
+		RecordMsg(msg);
+		auto onExpireFree = [this, msg](const SendQ::Data &) { msgs_.FreeMsg(msg.id()); };
+		return socket.Send(dest, reply, onExpireFree);
+	}
+	bool SendAllocMsg(ShmSocket &socket, const MQInfo &dest, const MsgI &msg)
+	{
+		RecordMsg(msg);
+		return socket.Send(dest, msg);
+	}
+
+	void OnAlloc(ShmSocket &socket, const int64_t val)
+	{
+		// LOG_FUNCTION;
+		// 8bit size, 4bit socket index, 16bit proc index, 28bit id, ,4bit cmd+flag
+		int64_t msg_id = (val >> 4) & MaskBits(28);
+		int proc_index = (val >> 32) & MaskBits(16);
+		int socket_index = ((val) >> 48) & MaskBits(4);
+		auto proc_rec(procs_.Get(proc_index));
+		if (proc_rec.proc_.empty()) {
+			return;
+		}
+
+		MQInfo dest = {proc_rec.ssn_ + socket_index, 0};
+		auto FindMq = [&]() {
+			auto pos = nodes_.find(proc_rec.ssn_);
+			if (pos != nodes_.end()) {
+				for (auto &&mq : pos->second->addrs_) {
+					if (mq.first == dest.id_) {
+						dest.offset_ = mq.second;
+						return true;
+					}
+				}
+			}
+			return false;
+		};
+		if (!FindMq()) { return; }
+
+		auto size = GetAllocSize((val >> 52) & MaskBits(8));
+		MsgI new_msg;
+		if (new_msg.Make(size)) {
+			// 31bit proc index, 28bit id, ,4bit cmd+flag
+			int64_t reply = (new_msg.Offset() << 32) | (msg_id << 4) | EncodeCmd(eCmdAllocReply0);
+			SendAllocReply(socket, dest, reply, new_msg);
+		} else {
+			int64_t reply = (msg_id << 4) | EncodeCmd(eCmdAllocReply0); // send empty, ack failure.
+			socket.Send(dest, reply);
+		}
+	}
+
+	void OnFree(ShmSocket &socket, const int64_t val)
+	{
+		int64_t msg_id = (val >> 4) & MaskBits(31);
+		msgs_.FreeMsg(msg_id);
+	}
+
+	bool OnCommand(ShmSocket &socket, const int64_t val)
+	{
+		assert(IsCmd(val));
+		int cmd = DecodeCmd(val);
+		switch (cmd) {
+		case eCmdAllocRequest0: OnAlloc(socket, val); break;
+		case eCmdFree: OnFree(socket, val); break;
+		default: return false;
+		}
+		return true;
+	}
+
+	MsgProcInitReply ProcInit(const BHMsgHead &head, MsgProcInit &msg)
+	{
+		LOG_DEBUG() << "center got proc init.";
+		auto pos = nodes_.find(head.ssn_id());
+		if (pos == nodes_.end()) {
+			return MakeReply<MsgProcInitReply>(eNotFound, "Node Not Initialised");
+		}
+		auto index = procs_.Put(head.proc_id(), head.ssn_id());
+		auto reply(MakeReply<MsgProcInitReply>(eSuccess));
+		reply.set_proc_index(index);
+
+		auto &node = pos->second;
+		try {
+			for (int i = 0; i < msg.extra_mq_num(); ++i) {
+				ShmSocket tmp(BHomeShm(), true, head.ssn_id() + i + 1, 16);
+				node->addrs_.emplace(tmp.id(), tmp.AbsAddr());
+				auto addr = reply.add_extra_mqs();
+				addr->set_mq_id(tmp.id());
+				addr->set_abs_addr(tmp.AbsAddr());
+			}
+			return reply;
+		} catch (...) {
+			LOG_ERROR() << "proc init create mq error";
+			return MakeReply<MsgProcInitReply>(eError, "Create mq failed.");
+		}
+	}
+
 	MsgCommonReply Register(const BHMsgHead &head, MsgRegister &msg)
 	{
 		if (msg.proc().proc_id() != head.proc_id()) {
@@ -108,15 +367,36 @@
 		}
 
 		try {
-			Node node(new NodeInfo);
-			node->addrs_.insert(SrcAddr(head));
-			for (auto &addr : msg.addrs()) {
-				node->addrs_.insert(addr.mq_id());
+			MQId ssn = head.ssn_id();
+			// when node restart, ssn will change,
+			// and old node will be removed after timeout.
+			auto UpdateRegInfo = [&](Node &node) {
+				node->proc_.Swap(msg.mutable_proc());
+				node->state_.timestamp_ = head.timestamp();
+				node->state_.UpdateState(NowSec(), offline_time_, kill_time_);
+			};
+
+			auto pos = nodes_.find(ssn);
+			if (pos == nodes_.end()) {
+				return MakeReply(eInvalidInput, "invalid session.");
 			}
-			node->proc_.Swap(msg.mutable_proc());
-			node->state_.timestamp_ = Now();
-			node->state_.flag_ = kStateNormal;
-			nodes_[node->proc_.proc_id()] = node;
+
+			// update proc info
+			Node &node = pos->second;
+			UpdateRegInfo(node);
+			LOG_DEBUG() << "node (" << head.proc_id() << ") ssn (" << ssn << ")";
+
+			auto old = online_node_addr_map_.find(head.proc_id());
+			if (old != online_node_addr_map_.end()) { // old session
+				auto &old_ssn = old->second;
+				if (old_ssn != ssn) {
+					nodes_[old_ssn]->state_.PutOffline(offline_time_);
+					LOG_DEBUG() << "put node (" << nodes_[old_ssn]->proc_.proc_id() << ") ssn (" << old->second << ") offline";
+					old_ssn = ssn;
+				}
+			} else {
+				online_node_addr_map_.emplace(head.proc_id(), ssn);
+			}
 			return MakeReply(eSuccess);
 		} catch (...) {
 			return MakeReply(eError, "register node error.");
@@ -127,11 +407,11 @@
 	Reply HandleMsg(const BHMsgHead &head, Func const &op)
 	{
 		try {
-			auto pos = nodes_.find(head.proc_id());
+			auto pos = nodes_.find(head.ssn_id());
 			if (pos == nodes_.end()) {
 				return MakeReply<Reply>(eNotRegistered, "Node is not registered.");
 			} else {
-				auto node = pos->second;
+				auto &node = pos->second;
 				if (!MatchAddr(node->addrs_, SrcAddr(head))) {
 					return MakeReply<Reply>(eAddressNotMatch, "Node address error.");
 				} else if (head.type() == kMsgTypeHeartbeat && CanHeartbeat(*node)) {
@@ -153,16 +433,30 @@
 		return HandleMsg<MsgCommonReply, Func>(head, op);
 	}
 
+	MsgCommonReply Unregister(const BHMsgHead &head, MsgUnregister &msg)
+	{
+		return HandleMsg(
+		    head, [&](Node node) -> MsgCommonReply {
+			    NodeInfo &ni = *node;
+			    ni.state_.PutOffline(offline_time_);
+			    return MakeReply(eSuccess);
+		    });
+	}
+
 	MsgCommonReply RegisterRPC(const BHMsgHead &head, MsgRegisterRPC &msg)
 	{
 		return HandleMsg(
 		    head, [&](Node node) -> MsgCommonReply {
-			    auto &src = SrcAddr(head);
+			    auto src = SrcAddr(head);
 			    auto &topics = msg.topics().topic_list();
 			    node->services_[src].insert(topics.begin(), topics.end());
-			    TopicDest dest = {src, node};
+			    TopicDest dest = {src, SrcAbsAddr(head), node};
 			    for (auto &topic : topics) {
 				    service_map_[topic].insert(dest);
+			    }
+			    LOG_DEBUG() << "node " << node->proc_.proc_id() << " ssn " << node->addrs_.begin()->first << " serve " << topics.size() << " topics:\n";
+			    for (auto &topic : topics) {
+				    LOG_DEBUG() << "\t" << topic;
 			    }
 			    return MakeReply(eSuccess);
 		    });
@@ -172,9 +466,8 @@
 	{
 		return HandleMsg(head, [&](Node node) {
 			NodeInfo &ni = *node;
-			auto now = Now();
-			ni.state_.timestamp_ = now;
-			ni.state_.flag_ = kStateNormal;
+			ni.state_.timestamp_ = head.timestamp();
+			ni.state_.UpdateState(NowSec(), offline_time_, kill_time_);
 
 			auto &info = msg.proc();
 			if (!info.public_info().empty()) {
@@ -194,20 +487,18 @@
 		auto query = [&](Node self) -> MsgQueryTopicReply {
 			auto pos = service_map_.find(req.topic());
 			if (pos != service_map_.end() && !pos->second.empty()) {
-				// now just find first one.
-				const TopicDest &dest = *(pos->second.begin());
-				Node dest_node(dest.weak_node_.lock());
-				if (!dest_node) {
-					service_map_.erase(pos);
-					return MakeReply<Reply>(eOffline, "topic server offline.");
-				} else if (!Valid(*dest_node)) {
-					return MakeReply<Reply>(eNoRespond, "topic server not responding.");
-				} else {
-					MsgQueryTopicReply reply = MakeReply<Reply>(eSuccess);
-					reply.mutable_address()->set_mq_id(dest.mq_);
-					return reply;
+				auto &clients = pos->second;
+				Reply reply = MakeReply<Reply>(eSuccess);
+				for (auto &dest : clients) {
+					Node dest_node(dest.weak_node_.lock());
+					if (dest_node && Valid(*dest_node)) {
+						auto node_addr = reply.add_node_address();
+						node_addr->set_proc_id(dest_node->proc_.proc_id());
+						node_addr->mutable_addr()->set_mq_id(dest.mq_id_);
+						node_addr->mutable_addr()->set_abs_addr(dest.mq_abs_addr_);
+					}
 				}
-
+				return reply;
 			} else {
 				return MakeReply<Reply>(eNotFound, "topic server not found.");
 			}
@@ -219,10 +510,10 @@
 	MsgCommonReply Subscribe(const BHMsgHead &head, const MsgSubscribe &msg)
 	{
 		return HandleMsg(head, [&](Node node) {
-			auto &src = SrcAddr(head);
+			auto src = SrcAddr(head);
 			auto &topics = msg.topics().topic_list();
 			node->subscriptions_[src].insert(topics.begin(), topics.end());
-			TopicDest dest = {src, node};
+			TopicDest dest = {src, SrcAbsAddr(head), node};
 			for (auto &topic : topics) {
 				subscribe_map_[topic].insert(dest);
 			}
@@ -232,7 +523,7 @@
 	MsgCommonReply Unsubscribe(const BHMsgHead &head, const MsgUnsubscribe &msg)
 	{
 		return HandleMsg(head, [&](Node node) {
-			auto &src = SrcAddr(head);
+			auto src = SrcAddr(head);
 			auto pos = node->subscriptions_.find(src);
 
 			auto RemoveSubTopicDestRecord = [this](const Topic &topic, const TopicDest &dest) {
@@ -245,7 +536,7 @@
 			};
 
 			if (pos != node->subscriptions_.end()) {
-				const TopicDest &dest = {src, node};
+				const TopicDest &dest = {src, SrcAbsAddr(head), node};
 				auto &topics = msg.topics().topic_list();
 				// clear node sub records;
 				for (auto &topic : topics) {
@@ -302,14 +593,14 @@
 	void OnTimer()
 	{
 		CheckNodes();
+		msgs_.AutoRemove();
 	}
 
 private:
 	void CheckNodes()
 	{
-		auto now = Now();
-		if (Seconds(now - last_check_time_) < 1) { return; }
-
+		auto now = NowSec();
+		if (now <= last_check_time_) { return; }
 		last_check_time_ = now;
 
 		auto it = nodes_.begin();
@@ -317,16 +608,13 @@
 			auto &cli = *it->second;
 			cli.state_.UpdateState(now, offline_time_, kill_time_);
 			if (cli.state_.flag_ == kStateKillme) {
-				if (cleaner_) {
-					for (auto &addr : cli.addrs_) {
-						cleaner_(addr);
-					}
-				}
+				RemoveNode(it->second);
 				it = nodes_.erase(it);
 			} else {
 				++it;
 			}
 		}
+		msgs_.DebugPrint();
 	}
 	bool CanHeartbeat(const NodeInfo &node)
 	{
@@ -341,16 +629,52 @@
 		auto node = weak.lock();
 		return node && Valid(*node);
 	}
-	void CheckAllNodes(); //TODO, call it in timer.
-	std::string id_;      // center proc id;
+	void RemoveNode(Node &node)
+	{
+		auto EraseMapRec = [&node](auto &rec_map, auto &node_rec) {
+			for (auto &addr_topics : node_rec) {
+				TopicDest dest{addr_topics.first, 0, node}; // abs_addr is not used.
+				for (auto &topic : addr_topics.second) {
+					auto pos = rec_map.find(topic);
+					if (pos != rec_map.end()) {
+						pos->second.erase(dest);
+						if (pos->second.empty()) {
+							rec_map.erase(pos);
+						}
+					}
+				}
+			}
+		};
+		EraseMapRec(service_map_, node->services_);
+		EraseMapRec(subscribe_map_, node->subscriptions_);
+
+		// remove online record.
+		auto pos = online_node_addr_map_.find(node->proc_.proc_id());
+		if (pos != online_node_addr_map_.end()) {
+			if (node->addrs_.find(pos->second) != node->addrs_.end()) {
+				online_node_addr_map_.erase(pos);
+			}
+		}
+
+		for (auto &addr : node->addrs_) {
+			cleaner_(addr.first);
+		}
+
+		node->addrs_.clear();
+	}
+	std::string id_; // center proc id;
 
 	std::unordered_map<Topic, Clients> service_map_;
 	std::unordered_map<Topic, Clients> subscribe_map_;
-	std::unordered_map<ProcId, Node> nodes_;
+	std::unordered_map<Address, Node> nodes_;
+	std::unordered_map<ProcId, Address> online_node_addr_map_;
+	ProcRecords procs_; // To get a short index for msg alloc.
+	MsgRecords msgs_;   // record all msgs alloced.
+
 	Cleaner cleaner_; // remove mqs.
-	Duration offline_time_;
-	Duration kill_time_;
-	TimePoint last_check_time_;
+	int64_t offline_time_;
+	int64_t kill_time_;
+	int64_t last_check_time_;
 };
 
 template <class Body, class OnMsg, class Replyer>
@@ -365,7 +689,7 @@
 
 Handler Combine(const Handler &h1, const Handler &h2)
 {
-	return [h1, h2](ShmSocket &socket, bhome_msg::MsgI &msg, bhome::msg::BHMsgHead &head) {
+	return [h1, h2](ShmSocket &socket, bhome_msg::MsgI &msg, bhome_msg::BHMsgHead &head) {
 		return h1(socket, msg, head) || h2(socket, msg, head);
 	};
 }
@@ -381,48 +705,58 @@
 		    msg, head, [&](auto &body) { return center->MsgTag(head, body); }, replyer); \
 		return true;
 
-bool AddCenter(const std::string &id, const NodeCenter::Cleaner &cleaner)
+auto MakeReplyer(ShmSocket &socket, BHMsgHead &head, Synced<NodeCenter> &center)
 {
-	auto center_ptr = std::make_shared<Synced<NodeCenter>>(id, cleaner, 60s, 60s * 3);
-	auto center_failed_q = std::make_shared<FailedMsgQ>();
-	auto MakeReplyer = [](ShmSocket &socket, BHMsgHead &head, const std::string &proc_id, FailedMsgQ &failq, const int timeout_ms = 0) {
-		return [&](auto &&rep_body) {
-			auto reply_head(InitMsgHead(GetType(rep_body), proc_id, head.msg_id()));
-			MsgI msg;
-			if (msg.Make(socket.shm(), reply_head, rep_body)) {
-				auto &remote = head.route(0).mq_id();
-				bool r = socket.Send(remote.data(), msg, timeout_ms);
-				if (!r) {
-					failq.Push(remote, msg, 60s); // for later retry.
-				}
-			}
-		};
+	return [&](auto &&rep_body) {
+		auto reply_head(InitMsgHead(GetType(rep_body), center->id(), head.ssn_id(), head.msg_id()));
+		MQInfo remote = {head.route(0).mq_id(), head.route(0).abs_addr()};
+		MsgI msg;
+		if (msg.Make(reply_head, rep_body)) {
+			DEFER1(msg.Release(););
+			center->SendAllocMsg(socket, remote, msg);
+		}
+	};
+}
+
+bool AddCenter(std::shared_ptr<Synced<NodeCenter>> center_ptr)
+{
+	// command
+	auto OnCommand = [center_ptr](ShmSocket &socket, ShmMsgQueue::RawData &cmd) -> bool {
+		auto &center = *center_ptr;
+		return IsCmd(cmd) && center->OnCommand(socket, cmd);
 	};
 
-	auto OnCenterIdle = [center_ptr, center_failed_q](ShmSocket &socket) {
+	// now we can talk.
+	auto OnCenterIdle = [center_ptr](ShmSocket &socket) {
 		auto &center = *center_ptr;
-		center_failed_q->TrySend(socket);
+		auto onInit = [&](const int64_t request) {
+			return center->OnNodeInit(socket, request);
+		};
+		BHCenterHandleInit(onInit);
 		center->OnTimer();
 	};
 
 	auto OnCenter = [=](ShmSocket &socket, MsgI &msg, BHMsgHead &head) -> bool {
 		auto &center = *center_ptr;
-		auto replyer = MakeReplyer(socket, head, center->id(), *center_failed_q);
+		auto replyer = MakeReplyer(socket, head, center);
 		switch (head.type()) {
+			CASE_ON_MSG_TYPE(ProcInit);
 			CASE_ON_MSG_TYPE(Register);
 			CASE_ON_MSG_TYPE(Heartbeat);
+			CASE_ON_MSG_TYPE(Unregister);
 
 			CASE_ON_MSG_TYPE(RegisterRPC);
 			CASE_ON_MSG_TYPE(QueryTopic);
 		default: return false;
 		}
 	};
+	BHCenter::Install("#center.main", OnCenter, OnCommand, OnCenterIdle, BHTopicCenterAddress(), 1000);
 
-	auto bus_failed_q = std::make_shared<FailedMsgQ>();
-	auto OnBusIdle = [=](ShmSocket &socket) { bus_failed_q->TrySend(socket); };
+	auto OnBusIdle = [=](ShmSocket &socket) {};
+	auto OnBusCmd = [=](ShmSocket &socket, ShmMsgQueue::RawData &val) { return false; };
 	auto OnPubSub = [=](ShmSocket &socket, MsgI &msg, BHMsgHead &head) -> bool {
 		auto &center = *center_ptr;
-		auto replyer = MakeReplyer(socket, head, center->id(), *bus_failed_q);
+		auto replyer = MakeReplyer(socket, head, center);
 		auto OnPublish = [&]() {
 			MsgPublish pub;
 			NodeCenter::Clients clients;
@@ -433,7 +767,6 @@
 				replyer(reply);
 			} else {
 				replyer(MakeReply(eSuccess));
-				if (!msg.EnableRefCount(socket.shm())) { return; } // no memory?
 				if (clients.empty()) { return; }
 
 				auto it = clients.begin();
@@ -441,9 +774,9 @@
 					auto &cli = *it;
 					auto node = cli.weak_node_.lock();
 					if (node) {
-						if (!socket.Send(cli.mq_.data(), msg, 0)) {
-							bus_failed_q->Push(cli.mq_, msg, 60s);
-						}
+						// should also make sure that mq is not killed before msg expires.
+						// it would be ok if (kill_time - offline_time) is longer than expire time.
+						socket.Send({cli.mq_id_, cli.mq_abs_addr_}, msg);
 						++it;
 					} else {
 						it = clients.erase(it);
@@ -459,8 +792,7 @@
 		}
 	};
 
-	BHCenter::Install("#center.reg", OnCenter, OnCenterIdle, BHTopicCenterAddress(), 1000);
-	BHCenter::Install("#center.bus", OnPubSub, OnBusIdle, BHTopicBusAddress(), 1000);
+	BHCenter::Install("#center.bus", OnPubSub, OnBusCmd, OnBusIdle, BHTopicBusAddress(), 1000);
 
 	return true;
 }
@@ -469,51 +801,42 @@
 
 } // namespace
 
-SharedMemory &BHomeShm()
-{
-	static SharedMemory shm("bhome_default_shm_v0", 1024 * 1024 * 64);
-	return shm;
-}
-
 BHCenter::CenterRecords &BHCenter::Centers()
 {
 	static CenterRecords rec;
 	return rec;
 }
 
-bool BHCenter::Install(const std::string &name, MsgHandler handler, IdleHandler idle, const std::string &mqid, const int mq_len)
+bool BHCenter::Install(const std::string &name, MsgHandler handler, RawHandler raw_handler, IdleHandler idle, const MQInfo &mq, const int mq_len)
 {
-	Centers()[name] = CenterInfo{name, handler, idle, mqid, mq_len};
+	Centers()[name] = CenterInfo{name, handler, raw_handler, idle, mq, mq_len};
 	return true;
-}
-bool BHCenter::Install(const std::string &name, MsgHandler handler, IdleHandler idle, const MQId &mqid, const int mq_len)
-{
-	return Install(name, handler, idle, std::string((const char *) &mqid, sizeof(mqid)), mq_len);
 }
 
 BHCenter::BHCenter(Socket::Shm &shm)
 {
-	auto gc = [&](const std::string &id) {
-		auto r = ShmSocket::Remove(shm, *(MQId *) id.data());
-		printf("remove mq : %s\n", r ? "ok" : "failed");
+	auto gc = [&](const MQId id) {
+		auto r = ShmSocket::Remove(shm, id);
+		if (r) {
+			LOG_DEBUG() << "remove mq " << id << " ok\n";
+		}
 	};
 
-	AddCenter("#center", gc);
+	auto nsec = seconds(NodeTimeoutSec());
+	auto center_ptr = std::make_shared<Synced<NodeCenter>>("#bhome_center", gc, nsec, nsec * 3); // *3 to allow other clients to finish sending msgs.
+	AddCenter(center_ptr);
 
 	for (auto &kv : Centers()) {
 		auto &info = kv.second;
-		sockets_[info.name_] = std::make_shared<ShmSocket>(shm, *(MQId *) info.mqid_.data(), info.mq_len_);
+		sockets_[info.name_] = std::make_shared<ShmSocket>(info.mq_.offset_, shm, info.mq_.id_);
 	}
 }
-
-BHCenter::BHCenter() :
-    BHCenter(BHomeShm()) {}
 
 bool BHCenter::Start()
 {
 	for (auto &kv : Centers()) {
 		auto &info = kv.second;
-		sockets_[info.name_]->Start(info.handler_, info.idle_);
+		sockets_[info.name_]->Start(1, info.handler_, info.raw_handler_, info.idle_);
 	}
 
 	return true;

--
Gitblit v1.8.0