defparse_body_arguments(content_type, body, arguments, files, headers=None): """Parses a form request body. Supports ``application/x-www-form-urlencoded`` and ``multipart/form-data``. The ``content_type`` parameter should be a string and ``body`` should be a byte string. The ``arguments`` and ``files`` parameters are dictionaries that will be updated with the parsed contents. """ # 只支持解码后(或无编码)的 body 数据。实际上在 Tornado 中有专门的 # `_GzipMessageDelegate` 类支持 `gzip` 解码,body 数据都通过其解码后再调用 # `parse_body_arguments` 函数。参见 `Http1Connection.read_response` 实现。 # `_GzipMessageDelegate` 处理请求后会删除其中的 'Content-Encoding' 头域,用 # 'X-Consumed-Content-Encoding' 头域代替。 if headers and'Content-Encoding'in headers: gen_log.warning("Unsupported Content-Encoding: %s", headers['Content-Encoding']) return if content_type.startswith("application/x-www-form-urlencoded"): try: uri_arguments = parse_qs_bytes(native_str(body), keep_blank_values=True) except Exception as e: gen_log.warning('Invalid x-www-form-urlencoded body: %s', e) uri_arguments = {} for name, values in uri_arguments.items(): if values: arguments.setdefault(name, []).extend(values) elif content_type.startswith("multipart/form-data"): fields = content_type.split(";") for field in fields: k, sep, v = field.strip().partition("=") if k == "boundary"and v: parse_multipart_form_data(utf8(v), body, arguments, files) break else: gen_log.warning("Invalid multipart/form-data")
application/x-www-form-urlencoded
这是浏览器原生表单 POST 默认的编码方式,GET 默认的请求数据编码方式也是它,差别就在于 GET 时编码后的数据是放在 URL 中一起发送给服务器的,并且 URL 有一个 2048 字符长度的限制;POST 时编码后的数据是放在 Message-Body 中发送给服务器的,协议上并没有限制长度。
从网上找了一个 POST 提交的请求数据,删除无关的消息头域后,格式如下:
1 2 3 4
POST http://www.example.com HTTP/1.1 Content-Type: application/x-www-form-urlencoded;charset=utf-8
try: from urllib.parse import parse_qs as _parse_qs # py3 except ImportError: from urlparse import parse_qs as _parse_qs # Python 2.6+ ……
if sys.version_info[0] < 3: parse_qs_bytes = _parse_qs else: defparse_qs_bytes(qs, keep_blank_values=False, strict_parsing=False): """Parses a query string like urlparse.parse_qs, but returns the values as byte strings. Keys still become type str (interpreted as latin1 in python3!) because it's too painful to keep them as byte strings in python3 and in practice they're nearly always ascii anyway. """ # This is gross, but python3 doesn't give us another way. # Latin1 is the universal donor of character encodings. result = _parse_qs(qs, keep_blank_values, strict_parsing, encoding='latin1', errors='strict') encoded = {} for k, v in result.items(): encoded[k] = [i.encode('latin1') for i in v] return encoded
defparse_multipart_form_data(boundary, data, arguments, files): """Parses a ``multipart/form-data`` body. The ``boundary`` and ``data`` parameters are both byte strings. The dictionaries given in the arguments and files parameters will be updated with the contents of the body. """ # The standard allows for the boundary to be quoted in the header, # although it's rare (it happens at least for google app engine # xmpp). I think we're also supposed to handle backslash-escapes # here but I'll save that until we see a client that uses them # in the wild. # # 兼容以引号包围的 ”boundary“ 字符串,示例中是没有引号的。 if boundary.startswith(b'"') and boundary.endswith(b'"'): boundary = boundary[1:-1]
# 消息体结束符 ”--boundary--“ final_boundary_index = data.rfind(b"--" + boundary + b"--") if final_boundary_index == -1: gen_log.warning("Invalid multipart/form-data: no final boundary") return
# 获取以 ”--boundary“ 分割的各部分列表 parts = data[:final_boundary_index].split(b"--" + boundary + b"\r\n") for part in parts: ifnot part: continue eoh = part.find(b"\r\n\r\n") if eoh == -1: gen_log.warning("multipart/form-data missing headers") continue headers = HTTPHeaders.parse(part[:eoh].decode("utf-8")) disp_header = headers.get("Content-Disposition", "") disposition, disp_params = _parse_header(disp_header) if disposition != "form-data"ornot part.endswith(b"\r\n"): gen_log.warning("Invalid multipart/form-data") continue
# b"\r\n\r\n" 的长度为 4,b"\r\n" 的长度为 2。 value = part[eoh + 4:-2] ifnot disp_params.get("name"): gen_log.warning("multipart/form-data value missing name") continue name = disp_params["name"]