1. HTTP协议格式
HTTP协议(超文本传输协议HyperText Transfer Protocol)是一种应用层的网络协议,传输层利用TCP协议。HTTP协议是Web应用客户端和服务端进行数据传输的基本协议
HTTP报文格式上不同部分由\r\n分隔,分为一行请求行(Request Line),多行头部字段(Headers),数据(Content,长度由头部的Content-Length决定)

HTTP是一种无状态(stateless)协议,但可以引入Cookie等字段维护客户端状态,使用HTTP/1.1的keep-alive 建立长连接以持久化通信状态。

2. HTTP请求解析
我在使用C++编写一个静态web服务器时,首先要考虑的是如何从TCP字节流中解析出请求信息
TCP是面向字节流的,也就是说传输时候数据像“水流一样”,是没有边界的,因此从TCP socket完成一次read()读取后,可能得到的是不完整的HTTP请求
因此先声明一个枚举类http::IOState表示当前解析HTTP请求的状态,OK:完成,PENDING:需要继续从客户端读,BAD:报文格式错误
enum class IOState { OK, PENDING, BAD, KEEP_ALIVE };
声明Http Request结构体,存储关心的头部字段
struct Request {
std::string_view url, version, host, method;
size_t content_length{0};
bool keep_alive{false};
};
有说过服务端从客户端socket读HTTP请求时,可能一次读不完,所以一般用一个char[]或std::string缓存用户发送的数据,为了可读性与性能,选择使用视图类std::string_view作为解析器类RequestParser接口的形参类型
RequestParser暴露接口为upwrap(返回请求解析结果)与update(解析传入接受客户端缓存区的char[]或std::string,如果返回PENDING说明需要继续读客户端更新缓存区,再调用update),内部维护m_pivot作为客户端缓存区上的指针,随解析的过程后移;同时记录当前在解析报文的哪一部分(ProtocolState)
class RequestParser {
private:
Request m_request{};
size_t m_pivot{0};
enum class ProtocolState {
ON_REQUEST_LINE,
ON_HEADERS,
ON_CONTENT
} m_protocol_state{ProtocolState::ON_REQUEST_LINE};
IOState parse_request_line(std::string_view);
IOState parse_header_line(std::string_view);
IOState parse_content(std::string_view);
std::pair<IOState, std::string_view> next_line(std::string_view);
public:
inline const Request &unwrap() { return m_request; };
IOState update(std::string_view);
};
首先实现从m_pivot开始解析出一行内容的next_line,行间的分隔是\r\n,而不是\n
auto RequestParser::next_line(std::string_view text)
-> std::pair<IOState, std::string_view> {
auto len = text.size();
for (auto i = m_pivot; i < len; ++i) {
auto c = text[i];
if (c == '\r') {
if (i + 1 == len)
return {IOState::PENDING, {}};
else if (text[i + 1] == '\n') {
std::string_view line{text.data() + m_pivot, i - m_pivot};
m_pivot = i + 2;
return {IOState::OK, line};
}
return {IOState::BAD, {}};
}
}
return {IOState::PENDING, {}};
}
HTTP请求各部分间的状态转移
auto RequestParser::update(std::string_view text) -> IOState {
while (true) {
switch (m_protocol_state) {
case ProtocolState::ON_REQUEST_LINE: {
auto state = parse_request_line(text);
if (state != IOState::OK)
return state;
break;
}
case ProtocolState::ON_HEADERS: {
auto state = parse_header_line(text);
if (state != IOState::OK)
return state;
break;
}
case ProtocolState::ON_CONTENT: {
return parse_content(text);
}
}
}
return IOState::PENDING;
}
请求行只需split
auto RequestParser::parse_request_line(std::string_view text) -> IOState {
auto [state, line] = next_line(text);
if (state != IOState::OK)
return state;
auto end_of_method = line.find_first_of(" \t");
if (end_of_method == decltype(line)::npos)
return IOState::BAD;
auto end_of_url = text.find_first_of(" \t", end_of_method + 1);
if (end_of_url == decltype(line)::npos)
return IOState::BAD;
m_request.method = std::string_view{line.data(), end_of_method};
m_request.url = std::string_view{line.data() + end_of_method + 1,
end_of_url - end_of_method - 1};
m_request.version = std::string_view{line.data() + end_of_url + 1,
line.size() - end_of_url - 1};
m_protocol_state = ProtocolState::ON_HEADERS;
return IOState::OK;
}
头部各行都是key : value\r\n的格式
auto RequestParser::parse_header_line(std::string_view text) -> IOState {
constexpr static std::string_view CONNECTION_PREFIX = "Connection",
CONTENT_LENGTH_PREFIX = "Content-Length",
HOST_PREFIX = "Host";
auto [state, line] = next_line(text);
if (state != IOState::OK)
return state;
// end of headers
if (line.empty()) {
m_protocol_state = ProtocolState::ON_CONTENT;
return IOState::OK;
}
auto sep = line.find_first_of(':');
if (sep == decltype(line)::npos)
return IOState::BAD;
auto start_of_value = line.find_first_not_of(" \t", sep + 1);
if (start_of_value == decltype(line)::npos)
return IOState::BAD;
auto field = std::string_view{line.data(), sep};
auto value = std::string_view{line.data() + start_of_value,
line.size() - start_of_value};
if (CONNECTION_PREFIX == field) {
m_request.keep_alive = (value == "keep-alive");
} else if (CONTENT_LENGTH_PREFIX == field)
m_request.content_length = std::atoi(value.data());
else if (HOST_PREFIX == field)
m_request.host = value;
else
;
return IOState::OK;
}
并不需要关心Content部分内部是否换行,只要读取的Content部分长度达到了Content-Length,就说明已经读入并解析了一个完整的HTTP请求
auto RequestParser::parse_content(std::string_view text) -> IOState {
if (m_request.content_length + m_pivot < text.size())
return IOState::PENDING;
return IOState::OK;
}