From 7c6ac2d158171cf5c258c557175ce8d39e6fa308 Mon Sep 17 00:00:00 2001 From: Lie Ryan Date: Wed, 21 Jan 2015 21:27:13 +1100 Subject: [PATCH 1/2] Improve handling of media type escaping according to RFC 723x --- httpheader.py | 53 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/httpheader.py b/httpheader.py index e07bff4..9dda57c 100644 --- a/httpheader.py +++ b/httpheader.py @@ -98,7 +98,9 @@ LWS = ' \t\n\r' # linear white space CRLF = '\r\n' DIGIT = '0123456789' +ALPHA = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' HEX = '0123456789ABCDEFabcdef' +TCHAR = "!#$%&'*+-.^_`|~" + ALPHA + HEX # Try to get a set/frozenset implementation if possible try: @@ -117,7 +119,10 @@ LWS = frozenset([c for c in LWS]) CRLF = frozenset([c for c in CRLF]) DIGIT = frozenset([c for c in DIGIT]) + ALPHA = frozenset([c for c in ALPHA]) HEX = frozenset([c for c in HEX]) + TCHAR = frozenset([c for c in TCHAR]) + del c except NameError: # Python 2.3 or earlier, leave as simple strings @@ -249,6 +254,18 @@ def __str__(self): else: return '%s\n\tOccured near %s' % (self.args[0], repr(self.input_string[self.at_position:self.at_position+16])) +class EncodingError(ValueError): + """Exception class representing an error when constructing a string.""" + def __init__(self, args, input_string, at_position): + ValueError.__init__(self, args) + self.input_string = input_string + self.at_position = at_position + def __str__(self): + if self.at_position >= len(self.input_string): + return '%s\n\tOccured at end of string' % self.args[0] + else: + return '%s\n\tOccured near %s' % (self.args[0], repr(self.input_string[self.at_position:self.at_position+16])) + def is_token(s): """Determines if the string is a valid token.""" @@ -322,21 +339,51 @@ def parse_token(s, start=0): return parse_token_or_quoted_string(s, start, allow_quoted=False, allow_token=True) -def quote_string(s, always_quote=True): +def quote_string(s, always_quote=True, strict='error'): """Produces a quoted string according to HTTP 1.1 rules. If always_quote is False and if the string is also a valid token, then this function may return a string without quotes. + If strict is 'error' then nulls and control characters other than + whitespace and horizontal tab is not allowed anywhere in the string and + will raise ValueError. If strict is 'escape', they will be backspace + escaped. If strict is 'remove', they will be removed from the string. + Otherwise, if strict is 'passtru' they will remain as-is in the + quoted string. + + Syntax of quoted-string: + + quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE + qdtext = OWS / %x21 / %x23-5B / %x5D-7E / obs-text + obs-text = %x80-FF """ need_quotes = False q = '' - for c in s: - if ord(c) < 32 or ord(c) > 127 or c in SEPARATORS: + for pos, c in enumerate(s): + if c in TCHAR: + # tchar (token characters) never need special handling + q += c + elif c == '\t' or c == ' ' or ord(c) == 0x21 or 0x23 <= ord(c) <= 0x5B or 0x5D <= ord(c) <= 0x7E: + # these are characters that are not in tchar but is permissible + # unescaped as qdchar + q += c + need_quotes = True + elif c == '\\' or c == '"': + # backslash and double quote are only valid in quoted-string + # but they are always escaped q += '\\' + c need_quotes = True + elif strict == 'error': + raise EncodingError("Invalid character in quoted-string", s, pos) + elif strict == 'escape': + q += '\\' + c + need_quotes = True + elif strict == 'remove': + pass else: q += c + need_quotes = True if need_quotes or always_quote: return '"' + q + '"' else: From 3d263189ef2894578a890b1b8fa554d0b1389d55 Mon Sep 17 00:00:00 2001 From: cnelson Date: Thu, 25 Feb 2016 10:49:11 -0800 Subject: [PATCH 2/2] python 3 support; version bumped to 1.2 --- httpheader.py | 8 ++++---- setup.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/httpheader.py b/httpheader.py index e07bff4..5df7f70 100644 --- a/httpheader.py +++ b/httpheader.py @@ -1,6 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # +from __future__ import print_function """ Utility functions to work with HTTP headers. This module provides some utility functions useful for parsing @@ -496,10 +497,9 @@ def _test_comments(): def _testrm( a, b, collapse ): b2 = remove_comments( a, collapse ) if b != b2: - print 'Comment test failed:' - print ' remove_comments( %s, collapse_spaces=%s ) -> %s' \ - % (repr(a), repr(collapse), repr(b2)) - print ' expected %s' % repr(b) + print('Comment test failed:') + print(' remove_comments( {0}, collapse_spaces={1} ) -> {2}'.format(repr(a), repr(collapse), repr(b2))) + print(' expected {0}'.format(repr(b))) return 1 return 0 failures = 0 diff --git a/setup.py b/setup.py index f808931..85fc4d6 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from distutils.core import setup name = "httpheader" -version = "1.1" +version = "1.2" setup( name=name, version=version, py_modules=[name],