From 65ef4d68321e56906920be75831b5e968f7abd7b Mon Sep 17 00:00:00 2001
From: lichao <lichao@aiotlink.com>
Date: 星期二, 13 四月 2021 09:34:05 +0800
Subject: [PATCH] add heartbeat; refactor.
---
.vscode/launch.json | 2
src/topic_node.h | 7 +
utest/utest.cpp | 33 ++++++++++
src/failed_msg.cpp | 1
src/shm_queue.cpp | 14 ++++
src/topic_node.cpp | 43 ++++++++++++--
src/center.cpp | 40 +++++++++---
7 files changed, 117 insertions(+), 23 deletions(-)
diff --git a/.vscode/launch.json b/.vscode/launch.json
index 12aa21d..b4e9631 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -11,7 +11,7 @@
"program": "${workspaceFolder}/debug/bin/utest",
"args": [
"-t",
- "SRTest"
+ "HeartbeatTest"
],
"stopAtEntry": false,
"cwd": "${workspaceFolder}",
diff --git a/src/center.cpp b/src/center.cpp
index 7865e57..cde865f 100644
--- a/src/center.cpp
+++ b/src/center.cpp
@@ -34,7 +34,9 @@
namespace
{
typedef steady_clock::time_point TimePoint;
+typedef steady_clock::duration Duration;
inline TimePoint Now() { return steady_clock::now(); };
+inline int64_t Seconds(const Duration &d) { return duration_cast<seconds>(d).count(); };
//TODO check proc_id
class NodeCenter
@@ -56,15 +58,15 @@
struct ProcState {
TimePoint timestamp_;
uint32_t flag_ = 0; // reserved
- void UpdateState(TimePoint now)
+ void UpdateState(TimePoint now, const Duration &offline_time, const Duration &kill_time)
{
- const auto kOfflineTime = 60 * 10s;
- const auto kKillTime = 60 * 20s;
-
auto diff = now - timestamp_;
- if (diff < kOfflineTime) {
+#ifndef NDEBUG
+ printf("diff: %ld\n", Seconds(diff));
+#endif
+ if (diff < offline_time) {
flag_ = kStateNormal;
- } else if (diff < kKillTime) {
+ } else if (diff < kill_time) {
flag_ = kStateOffline;
} else {
flag_ = kStateKillme;
@@ -94,8 +96,8 @@
public:
typedef std::set<TopicDest> Clients;
- NodeCenter(const std::string &id, const Cleaner &cleaner) :
- id_(id), cleaner_(cleaner) {}
+ NodeCenter(const std::string &id, const Cleaner &cleaner, const Duration &offline_time, const Duration &kill_time) :
+ id_(id), cleaner_(cleaner), offline_time_(offline_time), kill_time_(kill_time), last_check_time_(Now()) {}
const std::string &id() const { return id_; } // no need to lock.
//TODO maybe just return serialized string.
@@ -132,6 +134,8 @@
auto node = pos->second;
if (!MatchAddr(node->addrs_, SrcAddr(head))) {
return MakeReply<Reply>(eAddressNotMatch, "Node address error.");
+ } else if (head.type() == kMsgTypeHeartbeat && CanHeartbeat(*node)) {
+ return op(node);
} else if (!Valid(*node)) {
return MakeReply<Reply>(eNoRespond, "Node is not alive.");
} else {
@@ -168,7 +172,9 @@
{
return HandleMsg(head, [&](Node node) {
NodeInfo &ni = *node;
- ni.state_.timestamp_ = Now();
+ auto now = Now();
+ ni.state_.timestamp_ = now;
+ ni.state_.flag_ = kStateNormal;
auto &info = msg.proc();
if (!info.public_info().empty()) {
@@ -301,10 +307,15 @@
private:
void CheckNodes()
{
+ auto now = Now();
+ if (Seconds(now - last_check_time_) < 1) { return; }
+
+ last_check_time_ = now;
+
auto it = nodes_.begin();
while (it != nodes_.end()) {
auto &cli = *it->second;
- cli.state_.UpdateState(Now());
+ cli.state_.UpdateState(now, offline_time_, kill_time_);
if (cli.state_.flag_ == kStateKillme) {
if (cleaner_) {
for (auto &addr : cli.addrs_) {
@@ -316,6 +327,10 @@
++it;
}
}
+ }
+ bool CanHeartbeat(const NodeInfo &node)
+ {
+ return Valid(node) || node.state_.flag_ == kStateOffline;
}
bool Valid(const NodeInfo &node)
{
@@ -333,6 +348,9 @@
std::unordered_map<Topic, Clients> subscribe_map_;
std::unordered_map<ProcId, Node> nodes_;
Cleaner cleaner_; // remove mqs.
+ Duration offline_time_;
+ Duration kill_time_;
+ TimePoint last_check_time_;
};
template <class Body, class OnMsg, class Replyer>
@@ -365,7 +383,7 @@
bool AddCenter(const std::string &id, const NodeCenter::Cleaner &cleaner)
{
- auto center_ptr = std::make_shared<Synced<NodeCenter>>(id, cleaner);
+ auto center_ptr = std::make_shared<Synced<NodeCenter>>(id, cleaner, 60s, 60s * 3);
auto center_failed_q = std::make_shared<FailedMsgQ>();
auto MakeReplyer = [](ShmSocket &socket, BHMsgHead &head, const std::string &proc_id, FailedMsgQ &failq, const int timeout_ms = 0) {
return [&](auto &&rep_body) {
diff --git a/src/failed_msg.cpp b/src/failed_msg.cpp
index ab4658d..0b4c443 100644
--- a/src/failed_msg.cpp
+++ b/src/failed_msg.cpp
@@ -24,6 +24,7 @@
assert(valid_sock);
ShmSocket &sock = *static_cast<ShmSocket *>(valid_sock);
bool r = sock.Send(remote.data(), msg, 0);
+ //TODO check remote removed.
if (r && msg.IsCounted()) {
auto tmp = msg; // Release() is not const, but it's safe to release.
tmp.Release(sock.shm());
diff --git a/src/shm_queue.cpp b/src/shm_queue.cpp
index 652ed5b..8e4e56e 100644
--- a/src/shm_queue.cpp
+++ b/src/shm_queue.cpp
@@ -72,12 +72,22 @@
bool ShmMsgQueue::Send(SharedMemory &shm, const MQId &remote_id, const MsgI &msg, const int timeout_ms, OnSend const &onsend)
{
Queue *remote = Find(shm, MsgQIdToName(remote_id));
- return remote && remote->Write(msg, timeout_ms, [&onsend](const MsgI &msg) { onsend(); msg.AddRef(); });
+ if (remote) {
+ return remote->Write(msg, timeout_ms, [&onsend](const MsgI &msg) { onsend(); msg.AddRef(); });
+ } else {
+ // SetLestError(eNotFound);
+ return false;
+ }
}
bool ShmMsgQueue::Send(SharedMemory &shm, const MQId &remote_id, const MsgI &msg, const int timeout_ms)
{
Queue *remote = Find(shm, MsgQIdToName(remote_id));
- return remote && remote->Write(msg, timeout_ms, [](const MsgI &msg) { msg.AddRef(); });
+ if (remote) {
+ return remote->Write(msg, timeout_ms, [](const MsgI &msg) { msg.AddRef(); });
+ } else {
+ // SetLestError(eNotFound);
+ return false;
+ }
}
// Test shows that in the 2 cases:
diff --git a/src/topic_node.cpp b/src/topic_node.cpp
index 8cd5cc4..788c536 100644
--- a/src/topic_node.cpp
+++ b/src/topic_node.cpp
@@ -39,17 +39,21 @@
TopicNode::TopicNode(SharedMemory &shm) :
shm_(shm), sock_node_(shm), sock_request_(shm), sock_reply_(shm), sock_sub_(shm)
{
- SockNode().Start();
- SockClient().Start();
- SockServer().Start();
+ Start();
}
TopicNode::~TopicNode()
{
- StopAll();
+ Stop();
}
-void TopicNode::StopAll()
+void TopicNode::Start()
+{
+ SockNode().Start();
+ SockClient().Start();
+ SockServer().Start();
+}
+void TopicNode::Stop()
{
SockServer().Stop();
SockClient().Stop();
@@ -76,12 +80,39 @@
BHMsgHead reply_head;
bool r = sock.SendAndRecv(&BHTopicCenterAddress(), head, body, reply, reply_head, timeout_ms);
r = r && reply_head.type() == kMsgTypeCommonReply && reply.ParseBody(reply_body);
- if (r) {
+ if (r && IsSuccess(reply_body.errmsg().errcode())) {
info_ = body;
}
return r;
}
+bool TopicNode::Heartbeat(ProcInfo &proc, MsgCommonReply &reply_body, const int timeout_ms)
+{
+ auto &sock = SockNode();
+ MsgHeartbeat body;
+ *body.mutable_proc() = proc;
+
+ auto head(InitMsgHead(GetType(body), body.proc().proc_id()));
+ AddRoute(head, sock.id());
+
+ MsgI reply;
+ DEFER1(reply.Release(shm_););
+ BHMsgHead reply_head;
+ bool r = sock.SendAndRecv(&BHTopicCenterAddress(), head, body, reply, reply_head, timeout_ms);
+ r = r && reply_head.type() == kMsgTypeCommonReply && reply.ParseBody(reply_body);
+ if (r && IsSuccess(reply_body.errmsg().errcode())) {
+ // TODO update proc info
+ }
+ return r;
+}
+bool TopicNode::Heartbeat(const int timeout_ms)
+{
+ ProcInfo proc;
+ proc.set_proc_id(proc_id());
+ MsgCommonReply reply_body;
+ return Heartbeat(proc, reply_body, timeout_ms) && IsSuccess(reply_body.errmsg().errcode());
+}
+
bool TopicNode::ServerRegisterRPC(MsgTopicList &topics, MsgCommonReply &reply_body, const int timeout_ms)
{
//TODO check registered
diff --git a/src/topic_node.h b/src/topic_node.h
index 60497ad..d2cdcf9 100644
--- a/src/topic_node.h
+++ b/src/topic_node.h
@@ -37,9 +37,12 @@
TopicNode(SharedMemory &shm);
~TopicNode();
- void StopAll();
+ void Start();
+ void Stop();
// topic node
- bool Register(ProcInfo &body, MsgCommonReply &reply, const int timeout_ms);
+ bool Register(ProcInfo &proc, MsgCommonReply &reply_body, const int timeout_ms);
+ bool Heartbeat(ProcInfo &proc, MsgCommonReply &reply_body, const int timeout_ms);
+ bool Heartbeat(const int timeout_ms);
// topic rpc server
typedef std::function<bool(const std::string &topic, const std::string &data, std::string &reply)> OnRequest;
diff --git a/utest/utest.cpp b/utest/utest.cpp
index e0a9023..f127a8f 100644
--- a/utest/utest.cpp
+++ b/utest/utest.cpp
@@ -240,7 +240,7 @@
do {
std::this_thread::yield();
} while (count.load() < nreq);
- client.StopAll();
+ client.Stop();
printf("request %s %d done ", topic.c_str(), count.load());
};
@@ -282,6 +282,37 @@
servers.WaitAll();
}
+BOOST_AUTO_TEST_CASE(HeartbeatTest)
+{
+ const std::string shm_name("ShmHeartbeat");
+ ShmRemover auto_remove(shm_name);
+ SharedMemory shm(shm_name, 1024 * 1024 * 50);
+
+ BHCenter center(shm);
+ center.Start();
+
+ {
+
+ DemoNode node("demo_node", shm);
+ auto Check = [&]() {
+ bool r = node.Heartbeat(100);
+ printf("hearbeat ret : %s\n", r ? "ok" : "failed");
+ };
+ Check();
+ for (int i = 0; i < 3; ++i) {
+ std::this_thread::sleep_for(1s);
+ Check();
+ }
+ printf("sleep 4\n");
+ std::this_thread::sleep_for(4s);
+ for (int i = 0; i < 2; ++i) {
+ std::this_thread::sleep_for(1s);
+ Check();
+ }
+ }
+ printf("sleep 8\n");
+ std::this_thread::sleep_for(8s);
+}
inline int MyMin(int a, int b)
{
printf("MyMin\n");
--
Gitblit v1.8.0