')
+ def download_file(filename):
+ return send_from_directory(app.config['UPLOAD_FOLDER'],
+ filename, as_attachment=True)
+
+ .. admonition:: Sending files and Performance
+
+ It is strongly recommended to activate either ``X-Sendfile`` support in
+ your webserver or (if no authentication happens) to tell the webserver
+ to serve files for the given path on its own without calling into the
+ web application for improved performance.
+
+ .. versionadded:: 0.5
+
+ :param directory: the directory where all the files are stored.
+ :param filename: the filename relative to that directory to
+ download.
+ :param options: optional keyword arguments that are directly
+ forwarded to :func:`send_file`.
+ """
+ filename = safe_join(directory, filename)
+ if not os.path.isabs(filename):
+ filename = os.path.join(current_app.root_path, filename)
+ try:
+ if not os.path.isfile(filename):
+ raise NotFound()
+ except (TypeError, ValueError):
+ raise BadRequest()
+ options.setdefault('conditional', True)
+ return send_file(filename, **options)
+
+
+def get_root_path(import_name):
+ """Returns the path to a package or cwd if that cannot be found. This
+ returns the path of a package or the folder that contains a module.
+
+ Not to be confused with the package path returned by :func:`find_package`.
+ """
+ # Module already imported and has a file attribute. Use that first.
+ mod = sys.modules.get(import_name)
+ if mod is not None and hasattr(mod, '__file__'):
+ return os.path.dirname(os.path.abspath(mod.__file__))
+
+ # Next attempt: check the loader.
+ loader = pkgutil.get_loader(import_name)
+
+ # Loader does not exist or we're referring to an unloaded main module
+ # or a main module without path (interactive sessions), go with the
+ # current working directory.
+ if loader is None or import_name == '__main__':
+ return os.getcwd()
+
+ # For .egg, zipimporter does not have get_filename until Python 2.7.
+ # Some other loaders might exhibit the same behavior.
+ if hasattr(loader, 'get_filename'):
+ filepath = loader.get_filename(import_name)
+ else:
+ # Fall back to imports.
+ __import__(import_name)
+ mod = sys.modules[import_name]
+ filepath = getattr(mod, '__file__', None)
+
+ # If we don't have a filepath it might be because we are a
+ # namespace package. In this case we pick the root path from the
+ # first module that is contained in our package.
+ if filepath is None:
+ raise RuntimeError('No root path can be found for the provided '
+ 'module "%s". This can happen because the '
+ 'module came from an import hook that does '
+ 'not provide file name information or because '
+ 'it\'s a namespace package. In this case '
+ 'the root path needs to be explicitly '
+ 'provided.' % import_name)
+
+ # filepath is import_name.py for a module, or __init__.py for a package.
+ return os.path.dirname(os.path.abspath(filepath))
+
+
+def _matching_loader_thinks_module_is_package(loader, mod_name):
+ """Given the loader that loaded a module and the module this function
+ attempts to figure out if the given module is actually a package.
+ """
+ # If the loader can tell us if something is a package, we can
+ # directly ask the loader.
+ if hasattr(loader, 'is_package'):
+ return loader.is_package(mod_name)
+ # importlib's namespace loaders do not have this functionality but
+ # all the modules it loads are packages, so we can take advantage of
+ # this information.
+ elif (loader.__class__.__module__ == '_frozen_importlib' and
+ loader.__class__.__name__ == 'NamespaceLoader'):
+ return True
+ # Otherwise we need to fail with an error that explains what went
+ # wrong.
+ raise AttributeError(
+ ('%s.is_package() method is missing but is required by Flask of '
+ 'PEP 302 import hooks. If you do not use import hooks and '
+ 'you encounter this error please file a bug against Flask.') %
+ loader.__class__.__name__)
+
+
+def find_package(import_name):
+ """Finds a package and returns the prefix (or None if the package is
+ not installed) as well as the folder that contains the package or
+ module as a tuple. The package path returned is the module that would
+ have to be added to the pythonpath in order to make it possible to
+ import the module. The prefix is the path below which a UNIX like
+ folder structure exists (lib, share etc.).
+ """
+ root_mod_name = import_name.split('.')[0]
+ loader = pkgutil.get_loader(root_mod_name)
+ if loader is None or import_name == '__main__':
+ # import name is not found, or interactive/main module
+ package_path = os.getcwd()
+ else:
+ # For .egg, zipimporter does not have get_filename until Python 2.7.
+ if hasattr(loader, 'get_filename'):
+ filename = loader.get_filename(root_mod_name)
+ elif hasattr(loader, 'archive'):
+ # zipimporter's loader.archive points to the .egg or .zip
+ # archive filename is dropped in call to dirname below.
+ filename = loader.archive
+ else:
+ # At least one loader is missing both get_filename and archive:
+ # Google App Engine's HardenedModulesHook
+ #
+ # Fall back to imports.
+ __import__(import_name)
+ filename = sys.modules[import_name].__file__
+ package_path = os.path.abspath(os.path.dirname(filename))
+
+ # In case the root module is a package we need to chop of the
+ # rightmost part. This needs to go through a helper function
+ # because of python 3.3 namespace packages.
+ if _matching_loader_thinks_module_is_package(
+ loader, root_mod_name):
+ package_path = os.path.dirname(package_path)
+
+ site_parent, site_folder = os.path.split(package_path)
+ py_prefix = os.path.abspath(sys.prefix)
+ if package_path.startswith(py_prefix):
+ return py_prefix, package_path
+ elif site_folder.lower() == 'site-packages':
+ parent, folder = os.path.split(site_parent)
+ # Windows like installations
+ if folder.lower() == 'lib':
+ base_dir = parent
+ # UNIX like installations
+ elif os.path.basename(parent).lower() == 'lib':
+ base_dir = os.path.dirname(parent)
+ else:
+ base_dir = site_parent
+ return base_dir, package_path
+ return None, package_path
+
+
+class locked_cached_property(object):
+ """A decorator that converts a function into a lazy property. The
+ function wrapped is called the first time to retrieve the result
+ and then that calculated result is used the next time you access
+ the value. Works like the one in Werkzeug but has a lock for
+ thread safety.
+ """
+
+ def __init__(self, func, name=None, doc=None):
+ self.__name__ = name or func.__name__
+ self.__module__ = func.__module__
+ self.__doc__ = doc or func.__doc__
+ self.func = func
+ self.lock = RLock()
+
+ def __get__(self, obj, type=None):
+ if obj is None:
+ return self
+ with self.lock:
+ value = obj.__dict__.get(self.__name__, _missing)
+ if value is _missing:
+ value = self.func(obj)
+ obj.__dict__[self.__name__] = value
+ return value
+
+
+class _PackageBoundObject(object):
+ #: The name of the package or module that this app belongs to. Do not
+ #: change this once it is set by the constructor.
+ import_name = None
+
+ #: Location of the template files to be added to the template lookup.
+ #: ``None`` if templates should not be added.
+ template_folder = None
+
+ #: Absolute path to the package on the filesystem. Used to look up
+ #: resources contained in the package.
+ root_path = None
+
+ def __init__(self, import_name, template_folder=None, root_path=None):
+ self.import_name = import_name
+ self.template_folder = template_folder
+
+ if root_path is None:
+ root_path = get_root_path(self.import_name)
+
+ self.root_path = root_path
+ self._static_folder = None
+ self._static_url_path = None
+
+ def _get_static_folder(self):
+ if self._static_folder is not None:
+ return os.path.join(self.root_path, self._static_folder)
+
+ def _set_static_folder(self, value):
+ self._static_folder = value
+
+ static_folder = property(
+ _get_static_folder, _set_static_folder,
+ doc='The absolute path to the configured static folder.'
+ )
+ del _get_static_folder, _set_static_folder
+
+ def _get_static_url_path(self):
+ if self._static_url_path is not None:
+ return self._static_url_path
+
+ if self.static_folder is not None:
+ return '/' + os.path.basename(self.static_folder)
+
+ def _set_static_url_path(self, value):
+ self._static_url_path = value
+
+ static_url_path = property(
+ _get_static_url_path, _set_static_url_path,
+ doc='The URL prefix that the static route will be registered for.'
+ )
+ del _get_static_url_path, _set_static_url_path
+
+ @property
+ def has_static_folder(self):
+ """This is ``True`` if the package bound object's container has a
+ folder for static files.
+
+ .. versionadded:: 0.5
+ """
+ return self.static_folder is not None
+
+ @locked_cached_property
+ def jinja_loader(self):
+ """The Jinja loader for this package bound object.
+
+ .. versionadded:: 0.5
+ """
+ if self.template_folder is not None:
+ return FileSystemLoader(os.path.join(self.root_path,
+ self.template_folder))
+
+ def get_send_file_max_age(self, filename):
+ """Provides default cache_timeout for the :func:`send_file` functions.
+
+ By default, this function returns ``SEND_FILE_MAX_AGE_DEFAULT`` from
+ the configuration of :data:`~flask.current_app`.
+
+ Static file functions such as :func:`send_from_directory` use this
+ function, and :func:`send_file` calls this function on
+ :data:`~flask.current_app` when the given cache_timeout is ``None``. If a
+ cache_timeout is given in :func:`send_file`, that timeout is used;
+ otherwise, this method is called.
+
+ This allows subclasses to change the behavior when sending files based
+ on the filename. For example, to set the cache timeout for .js files
+ to 60 seconds::
+
+ class MyFlask(flask.Flask):
+ def get_send_file_max_age(self, name):
+ if name.lower().endswith('.js'):
+ return 60
+ return flask.Flask.get_send_file_max_age(self, name)
+
+ .. versionadded:: 0.9
+ """
+ return total_seconds(current_app.send_file_max_age_default)
+
+ def send_static_file(self, filename):
+ """Function used internally to send static files from the static
+ folder to the browser.
+
+ .. versionadded:: 0.5
+ """
+ if not self.has_static_folder:
+ raise RuntimeError('No static folder for this object')
+ # Ensure get_send_file_max_age is called in all cases.
+ # Here, we ensure get_send_file_max_age is called for Blueprints.
+ cache_timeout = self.get_send_file_max_age(filename)
+ return send_from_directory(self.static_folder, filename,
+ cache_timeout=cache_timeout)
+
+ def open_resource(self, resource, mode='rb'):
+ """Opens a resource from the application's resource folder. To see
+ how this works, consider the following folder structure::
+
+ /myapplication.py
+ /schema.sql
+ /static
+ /style.css
+ /templates
+ /layout.html
+ /index.html
+
+ If you want to open the :file:`schema.sql` file you would do the
+ following::
+
+ with app.open_resource('schema.sql') as f:
+ contents = f.read()
+ do_something_with(contents)
+
+ :param resource: the name of the resource. To access resources within
+ subfolders use forward slashes as separator.
+ :param mode: resource file opening mode, default is 'rb'.
+ """
+ if mode not in ('r', 'rb'):
+ raise ValueError('Resources can only be opened for reading')
+ return open(os.path.join(self.root_path, resource), mode)
+
+
+def total_seconds(td):
+ """Returns the total seconds from a timedelta object.
+
+ :param timedelta td: the timedelta to be converted in seconds
+
+ :returns: number of seconds
+ :rtype: int
+ """
+ return td.days * 60 * 60 * 24 + td.seconds
+
+
+def is_ip(value):
+ """Determine if the given string is an IP address.
+
+ Python 2 on Windows doesn't provide ``inet_pton``, so this only
+ checks IPv4 addresses in that environment.
+
+ :param value: value to check
+ :type value: str
+
+ :return: True if string is an IP address
+ :rtype: bool
+ """
+ if PY2 and os.name == 'nt':
+ try:
+ socket.inet_aton(value)
+ return True
+ except socket.error:
+ return False
+
+ for family in (socket.AF_INET, socket.AF_INET6):
+ try:
+ socket.inet_pton(family, value)
+ except socket.error:
+ pass
+ else:
+ return True
+
+ return False
diff --git a/Master/env_master/Lib/site-packages/flask/json/__init__.py b/Master/env_master/Lib/site-packages/flask/json/__init__.py
new file mode 100644
index 00000000..fbe6b92f
--- /dev/null
+++ b/Master/env_master/Lib/site-packages/flask/json/__init__.py
@@ -0,0 +1,327 @@
+# -*- coding: utf-8 -*-
+"""
+flask.json
+~~~~~~~~~~
+
+:copyright: © 2010 by the Pallets team.
+:license: BSD, see LICENSE for more details.
+"""
+import codecs
+import io
+import uuid
+from datetime import date, datetime
+from flask.globals import current_app, request
+from flask._compat import text_type, PY2
+
+from werkzeug.http import http_date
+from jinja2 import Markup
+
+# Use the same json implementation as itsdangerous on which we
+# depend anyways.
+from itsdangerous import json as _json
+
+
+# Figure out if simplejson escapes slashes. This behavior was changed
+# from one version to another without reason.
+_slash_escape = '\\/' not in _json.dumps('/')
+
+
+__all__ = ['dump', 'dumps', 'load', 'loads', 'htmlsafe_dump',
+ 'htmlsafe_dumps', 'JSONDecoder', 'JSONEncoder',
+ 'jsonify']
+
+
+def _wrap_reader_for_text(fp, encoding):
+ if isinstance(fp.read(0), bytes):
+ fp = io.TextIOWrapper(io.BufferedReader(fp), encoding)
+ return fp
+
+
+def _wrap_writer_for_text(fp, encoding):
+ try:
+ fp.write('')
+ except TypeError:
+ fp = io.TextIOWrapper(fp, encoding)
+ return fp
+
+
+class JSONEncoder(_json.JSONEncoder):
+ """The default Flask JSON encoder. This one extends the default simplejson
+ encoder by also supporting ``datetime`` objects, ``UUID`` as well as
+ ``Markup`` objects which are serialized as RFC 822 datetime strings (same
+ as the HTTP date format). In order to support more data types override the
+ :meth:`default` method.
+ """
+
+ def default(self, o):
+ """Implement this method in a subclass such that it returns a
+ serializable object for ``o``, or calls the base implementation (to
+ raise a :exc:`TypeError`).
+
+ For example, to support arbitrary iterators, you could implement
+ default like this::
+
+ def default(self, o):
+ try:
+ iterable = iter(o)
+ except TypeError:
+ pass
+ else:
+ return list(iterable)
+ return JSONEncoder.default(self, o)
+ """
+ if isinstance(o, datetime):
+ return http_date(o.utctimetuple())
+ if isinstance(o, date):
+ return http_date(o.timetuple())
+ if isinstance(o, uuid.UUID):
+ return str(o)
+ if hasattr(o, '__html__'):
+ return text_type(o.__html__())
+ return _json.JSONEncoder.default(self, o)
+
+
+class JSONDecoder(_json.JSONDecoder):
+ """The default JSON decoder. This one does not change the behavior from
+ the default simplejson decoder. Consult the :mod:`json` documentation
+ for more information. This decoder is not only used for the load
+ functions of this module but also :attr:`~flask.Request`.
+ """
+
+
+def _dump_arg_defaults(kwargs):
+ """Inject default arguments for dump functions."""
+ if current_app:
+ bp = current_app.blueprints.get(request.blueprint) if request else None
+ kwargs.setdefault(
+ 'cls',
+ bp.json_encoder if bp and bp.json_encoder
+ else current_app.json_encoder
+ )
+
+ if not current_app.config['JSON_AS_ASCII']:
+ kwargs.setdefault('ensure_ascii', False)
+
+ kwargs.setdefault('sort_keys', current_app.config['JSON_SORT_KEYS'])
+ else:
+ kwargs.setdefault('sort_keys', True)
+ kwargs.setdefault('cls', JSONEncoder)
+
+
+def _load_arg_defaults(kwargs):
+ """Inject default arguments for load functions."""
+ if current_app:
+ bp = current_app.blueprints.get(request.blueprint) if request else None
+ kwargs.setdefault(
+ 'cls',
+ bp.json_decoder if bp and bp.json_decoder
+ else current_app.json_decoder
+ )
+ else:
+ kwargs.setdefault('cls', JSONDecoder)
+
+
+def detect_encoding(data):
+ """Detect which UTF codec was used to encode the given bytes.
+
+ The latest JSON standard (:rfc:`8259`) suggests that only UTF-8 is
+ accepted. Older documents allowed 8, 16, or 32. 16 and 32 can be big
+ or little endian. Some editors or libraries may prepend a BOM.
+
+ :param data: Bytes in unknown UTF encoding.
+ :return: UTF encoding name
+ """
+ head = data[:4]
+
+ if head[:3] == codecs.BOM_UTF8:
+ return 'utf-8-sig'
+
+ if b'\x00' not in head:
+ return 'utf-8'
+
+ if head in (codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE):
+ return 'utf-32'
+
+ if head[:2] in (codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE):
+ return 'utf-16'
+
+ if len(head) == 4:
+ if head[:3] == b'\x00\x00\x00':
+ return 'utf-32-be'
+
+ if head[::2] == b'\x00\x00':
+ return 'utf-16-be'
+
+ if head[1:] == b'\x00\x00\x00':
+ return 'utf-32-le'
+
+ if head[1::2] == b'\x00\x00':
+ return 'utf-16-le'
+
+ if len(head) == 2:
+ return 'utf-16-be' if head.startswith(b'\x00') else 'utf-16-le'
+
+ return 'utf-8'
+
+
+def dumps(obj, **kwargs):
+ """Serialize ``obj`` to a JSON formatted ``str`` by using the application's
+ configured encoder (:attr:`~flask.Flask.json_encoder`) if there is an
+ application on the stack.
+
+ This function can return ``unicode`` strings or ascii-only bytestrings by
+ default which coerce into unicode strings automatically. That behavior by
+ default is controlled by the ``JSON_AS_ASCII`` configuration variable
+ and can be overridden by the simplejson ``ensure_ascii`` parameter.
+ """
+ _dump_arg_defaults(kwargs)
+ encoding = kwargs.pop('encoding', None)
+ rv = _json.dumps(obj, **kwargs)
+ if encoding is not None and isinstance(rv, text_type):
+ rv = rv.encode(encoding)
+ return rv
+
+
+def dump(obj, fp, **kwargs):
+ """Like :func:`dumps` but writes into a file object."""
+ _dump_arg_defaults(kwargs)
+ encoding = kwargs.pop('encoding', None)
+ if encoding is not None:
+ fp = _wrap_writer_for_text(fp, encoding)
+ _json.dump(obj, fp, **kwargs)
+
+
+def loads(s, **kwargs):
+ """Unserialize a JSON object from a string ``s`` by using the application's
+ configured decoder (:attr:`~flask.Flask.json_decoder`) if there is an
+ application on the stack.
+ """
+ _load_arg_defaults(kwargs)
+ if isinstance(s, bytes):
+ encoding = kwargs.pop('encoding', None)
+ if encoding is None:
+ encoding = detect_encoding(s)
+ s = s.decode(encoding)
+ return _json.loads(s, **kwargs)
+
+
+def load(fp, **kwargs):
+ """Like :func:`loads` but reads from a file object.
+ """
+ _load_arg_defaults(kwargs)
+ if not PY2:
+ fp = _wrap_reader_for_text(fp, kwargs.pop('encoding', None) or 'utf-8')
+ return _json.load(fp, **kwargs)
+
+
+def htmlsafe_dumps(obj, **kwargs):
+ """Works exactly like :func:`dumps` but is safe for use in ``')
+ # => <script> do_nasty_stuff() </script>
+ # sanitize_html('Click here for $100')
+ # => Click here for $100
+ def sanitize_token(self, token):
+
+ # accommodate filters which use token_type differently
+ token_type = token["type"]
+ if token_type in ("StartTag", "EndTag", "EmptyTag"):
+ name = token["name"]
+ namespace = token["namespace"]
+ if ((namespace, name) in self.allowed_elements or
+ (namespace is None and
+ (namespaces["html"], name) in self.allowed_elements)):
+ return self.allowed_token(token)
+ else:
+ return self.disallowed_token(token)
+ elif token_type == "Comment":
+ pass
+ else:
+ return token
+
+ def allowed_token(self, token):
+ if "data" in token:
+ attrs = token["data"]
+ attr_names = set(attrs.keys())
+
+ # Remove forbidden attributes
+ for to_remove in (attr_names - self.allowed_attributes):
+ del token["data"][to_remove]
+ attr_names.remove(to_remove)
+
+ # Remove attributes with disallowed URL values
+ for attr in (attr_names & self.attr_val_is_uri):
+ assert attr in attrs
+ # I don't have a clue where this regexp comes from or why it matches those
+ # characters, nor why we call unescape. I just know it's always been here.
+ # Should you be worried by this comment in a sanitizer? Yes. On the other hand, all
+ # this will do is remove *more* than it otherwise would.
+ val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\s]+", '',
+ unescape(attrs[attr])).lower()
+ # remove replacement characters from unescaped characters
+ val_unescaped = val_unescaped.replace("\ufffd", "")
+ try:
+ uri = urlparse.urlparse(val_unescaped)
+ except ValueError:
+ uri = None
+ del attrs[attr]
+ if uri and uri.scheme:
+ if uri.scheme not in self.allowed_protocols:
+ del attrs[attr]
+ if uri.scheme == 'data':
+ m = data_content_type.match(uri.path)
+ if not m:
+ del attrs[attr]
+ elif m.group('content_type') not in self.allowed_content_types:
+ del attrs[attr]
+
+ for attr in self.svg_attr_val_allows_ref:
+ if attr in attrs:
+ attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
+ ' ',
+ unescape(attrs[attr]))
+ if (token["name"] in self.svg_allow_local_href and
+ (namespaces['xlink'], 'href') in attrs and re.search('^\s*[^#\s].*',
+ attrs[(namespaces['xlink'], 'href')])):
+ del attrs[(namespaces['xlink'], 'href')]
+ if (None, 'style') in attrs:
+ attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')])
+ token["data"] = attrs
+ return token
+
+ def disallowed_token(self, token):
+ token_type = token["type"]
+ if token_type == "EndTag":
+ token["data"] = "%s>" % token["name"]
+ elif token["data"]:
+ assert token_type in ("StartTag", "EmptyTag")
+ attrs = []
+ for (ns, name), v in token["data"].items():
+ attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v)))
+ token["data"] = "<%s%s>" % (token["name"], ''.join(attrs))
+ else:
+ token["data"] = "<%s>" % token["name"]
+ if token.get("selfClosing"):
+ token["data"] = token["data"][:-1] + "/>"
+
+ token["type"] = "Characters"
+
+ del token["name"]
+ return token
+
+ def sanitize_css(self, style):
+ # disallow urls
+ style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
+
+ # gauntlet
+ if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
+ return ''
+ if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
+ return ''
+
+ clean = []
+ for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style):
+ if not value:
+ continue
+ if prop.lower() in self.allowed_css_properties:
+ clean.append(prop + ': ' + value + ';')
+ elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
+ 'padding']:
+ for keyword in value.split():
+ if keyword not in self.allowed_css_keywords and \
+ not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa
+ break
+ else:
+ clean.append(prop + ': ' + value + ';')
+ elif prop.lower() in self.allowed_svg_properties:
+ clean.append(prop + ': ' + value + ';')
+
+ return ' '.join(clean)
diff --git a/Master/env_master/Lib/site-packages/pip/_vendor/html5lib/filters/whitespace.py b/Master/env_master/Lib/site-packages/pip/_vendor/html5lib/filters/whitespace.py
new file mode 100644
index 00000000..89210528
--- /dev/null
+++ b/Master/env_master/Lib/site-packages/pip/_vendor/html5lib/filters/whitespace.py
@@ -0,0 +1,38 @@
+from __future__ import absolute_import, division, unicode_literals
+
+import re
+
+from . import base
+from ..constants import rcdataElements, spaceCharacters
+spaceCharacters = "".join(spaceCharacters)
+
+SPACES_REGEX = re.compile("[%s]+" % spaceCharacters)
+
+
+class Filter(base.Filter):
+
+ spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))
+
+ def __iter__(self):
+ preserve = 0
+ for token in base.Filter.__iter__(self):
+ type = token["type"]
+ if type == "StartTag" \
+ and (preserve or token["name"] in self.spacePreserveElements):
+ preserve += 1
+
+ elif type == "EndTag" and preserve:
+ preserve -= 1
+
+ elif not preserve and type == "SpaceCharacters" and token["data"]:
+ # Test on token["data"] above to not introduce spaces where there were not
+ token["data"] = " "
+
+ elif not preserve and type == "Characters":
+ token["data"] = collapse_spaces(token["data"])
+
+ yield token
+
+
+def collapse_spaces(text):
+ return SPACES_REGEX.sub(' ', text)
diff --git a/Master/env_master/Lib/site-packages/pip/_vendor/html5lib/html5parser.py b/Master/env_master/Lib/site-packages/pip/_vendor/html5lib/html5parser.py
new file mode 100644
index 00000000..f7043cb1
--- /dev/null
+++ b/Master/env_master/Lib/site-packages/pip/_vendor/html5lib/html5parser.py
@@ -0,0 +1,2733 @@
+from __future__ import absolute_import, division, unicode_literals
+from pip._vendor.six import with_metaclass, viewkeys, PY3
+
+import types
+
+try:
+ from collections import OrderedDict
+except ImportError:
+ from pip._vendor.ordereddict import OrderedDict
+
+from . import _inputstream
+from . import _tokenizer
+
+from . import treebuilders
+from .treebuilders.base import Marker
+
+from . import _utils
+from .constants import (
+ spaceCharacters, asciiUpper2Lower,
+ specialElements, headingElements, cdataElements, rcdataElements,
+ tokenTypes, tagTokenTypes,
+ namespaces,
+ htmlIntegrationPointElements, mathmlTextIntegrationPointElements,
+ adjustForeignAttributes as adjustForeignAttributesMap,
+ adjustMathMLAttributes, adjustSVGAttributes,
+ E,
+ ReparseException
+)
+
+
+def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs):
+ """Parse a string or file-like object into a tree"""
+ tb = treebuilders.getTreeBuilder(treebuilder)
+ p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
+ return p.parse(doc, **kwargs)
+
+
+def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs):
+ tb = treebuilders.getTreeBuilder(treebuilder)
+ p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
+ return p.parseFragment(doc, container=container, **kwargs)
+
+
+def method_decorator_metaclass(function):
+ class Decorated(type):
+ def __new__(meta, classname, bases, classDict):
+ for attributeName, attribute in classDict.items():
+ if isinstance(attribute, types.FunctionType):
+ attribute = function(attribute)
+
+ classDict[attributeName] = attribute
+ return type.__new__(meta, classname, bases, classDict)
+ return Decorated
+
+
+class HTMLParser(object):
+ """HTML parser. Generates a tree structure from a stream of (possibly
+ malformed) HTML"""
+
+ def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False):
+ """
+ strict - raise an exception when a parse error is encountered
+
+ tree - a treebuilder class controlling the type of tree that will be
+ returned. Built in treebuilders can be accessed through
+ html5lib.treebuilders.getTreeBuilder(treeType)
+ """
+
+ # Raise an exception on the first error encountered
+ self.strict = strict
+
+ if tree is None:
+ tree = treebuilders.getTreeBuilder("etree")
+ self.tree = tree(namespaceHTMLElements)
+ self.errors = []
+
+ self.phases = dict([(name, cls(self, self.tree)) for name, cls in
+ getPhases(debug).items()])
+
+ def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs):
+
+ self.innerHTMLMode = innerHTML
+ self.container = container
+ self.scripting = scripting
+ self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs)
+ self.reset()
+
+ try:
+ self.mainLoop()
+ except ReparseException:
+ self.reset()
+ self.mainLoop()
+
+ def reset(self):
+ self.tree.reset()
+ self.firstStartTag = False
+ self.errors = []
+ self.log = [] # only used with debug mode
+ # "quirks" / "limited quirks" / "no quirks"
+ self.compatMode = "no quirks"
+
+ if self.innerHTMLMode:
+ self.innerHTML = self.container.lower()
+
+ if self.innerHTML in cdataElements:
+ self.tokenizer.state = self.tokenizer.rcdataState
+ elif self.innerHTML in rcdataElements:
+ self.tokenizer.state = self.tokenizer.rawtextState
+ elif self.innerHTML == 'plaintext':
+ self.tokenizer.state = self.tokenizer.plaintextState
+ else:
+ # state already is data state
+ # self.tokenizer.state = self.tokenizer.dataState
+ pass
+ self.phase = self.phases["beforeHtml"]
+ self.phase.insertHtmlElement()
+ self.resetInsertionMode()
+ else:
+ self.innerHTML = False # pylint:disable=redefined-variable-type
+ self.phase = self.phases["initial"]
+
+ self.lastPhase = None
+
+ self.beforeRCDataPhase = None
+
+ self.framesetOK = True
+
+ @property
+ def documentEncoding(self):
+ """The name of the character encoding
+ that was used to decode the input stream,
+ or :obj:`None` if that is not determined yet.
+
+ """
+ if not hasattr(self, 'tokenizer'):
+ return None
+ return self.tokenizer.stream.charEncoding[0].name
+
+ def isHTMLIntegrationPoint(self, element):
+ if (element.name == "annotation-xml" and
+ element.namespace == namespaces["mathml"]):
+ return ("encoding" in element.attributes and
+ element.attributes["encoding"].translate(
+ asciiUpper2Lower) in
+ ("text/html", "application/xhtml+xml"))
+ else:
+ return (element.namespace, element.name) in htmlIntegrationPointElements
+
+ def isMathMLTextIntegrationPoint(self, element):
+ return (element.namespace, element.name) in mathmlTextIntegrationPointElements
+
+ def mainLoop(self):
+ CharactersToken = tokenTypes["Characters"]
+ SpaceCharactersToken = tokenTypes["SpaceCharacters"]
+ StartTagToken = tokenTypes["StartTag"]
+ EndTagToken = tokenTypes["EndTag"]
+ CommentToken = tokenTypes["Comment"]
+ DoctypeToken = tokenTypes["Doctype"]
+ ParseErrorToken = tokenTypes["ParseError"]
+
+ for token in self.normalizedTokens():
+ prev_token = None
+ new_token = token
+ while new_token is not None:
+ prev_token = new_token
+ currentNode = self.tree.openElements[-1] if self.tree.openElements else None
+ currentNodeNamespace = currentNode.namespace if currentNode else None
+ currentNodeName = currentNode.name if currentNode else None
+
+ type = new_token["type"]
+
+ if type == ParseErrorToken:
+ self.parseError(new_token["data"], new_token.get("datavars", {}))
+ new_token = None
+ else:
+ if (len(self.tree.openElements) == 0 or
+ currentNodeNamespace == self.tree.defaultNamespace or
+ (self.isMathMLTextIntegrationPoint(currentNode) and
+ ((type == StartTagToken and
+ token["name"] not in frozenset(["mglyph", "malignmark"])) or
+ type in (CharactersToken, SpaceCharactersToken))) or
+ (currentNodeNamespace == namespaces["mathml"] and
+ currentNodeName == "annotation-xml" and
+ type == StartTagToken and
+ token["name"] == "svg") or
+ (self.isHTMLIntegrationPoint(currentNode) and
+ type in (StartTagToken, CharactersToken, SpaceCharactersToken))):
+ phase = self.phase
+ else:
+ phase = self.phases["inForeignContent"]
+
+ if type == CharactersToken:
+ new_token = phase.processCharacters(new_token)
+ elif type == SpaceCharactersToken:
+ new_token = phase.processSpaceCharacters(new_token)
+ elif type == StartTagToken:
+ new_token = phase.processStartTag(new_token)
+ elif type == EndTagToken:
+ new_token = phase.processEndTag(new_token)
+ elif type == CommentToken:
+ new_token = phase.processComment(new_token)
+ elif type == DoctypeToken:
+ new_token = phase.processDoctype(new_token)
+
+ if (type == StartTagToken and prev_token["selfClosing"] and
+ not prev_token["selfClosingAcknowledged"]):
+ self.parseError("non-void-element-with-trailing-solidus",
+ {"name": prev_token["name"]})
+
+ # When the loop finishes it's EOF
+ reprocess = True
+ phases = []
+ while reprocess:
+ phases.append(self.phase)
+ reprocess = self.phase.processEOF()
+ if reprocess:
+ assert self.phase not in phases
+
+ def normalizedTokens(self):
+ for token in self.tokenizer:
+ yield self.normalizeToken(token)
+
+ def parse(self, stream, *args, **kwargs):
+ """Parse a HTML document into a well-formed tree
+
+ stream - a filelike object or string containing the HTML to be parsed
+
+ The optional encoding parameter must be a string that indicates
+ the encoding. If specified, that encoding will be used,
+ regardless of any BOM or later declaration (such as in a meta
+ element)
+
+ scripting - treat noscript elements as if javascript was turned on
+ """
+ self._parse(stream, False, None, *args, **kwargs)
+ return self.tree.getDocument()
+
+ def parseFragment(self, stream, *args, **kwargs):
+ """Parse a HTML fragment into a well-formed tree fragment
+
+ container - name of the element we're setting the innerHTML property
+ if set to None, default to 'div'
+
+ stream - a filelike object or string containing the HTML to be parsed
+
+ The optional encoding parameter must be a string that indicates
+ the encoding. If specified, that encoding will be used,
+ regardless of any BOM or later declaration (such as in a meta
+ element)
+
+ scripting - treat noscript elements as if javascript was turned on
+ """
+ self._parse(stream, True, *args, **kwargs)
+ return self.tree.getFragment()
+
+ def parseError(self, errorcode="XXX-undefined-error", datavars=None):
+ # XXX The idea is to make errorcode mandatory.
+ if datavars is None:
+ datavars = {}
+ self.errors.append((self.tokenizer.stream.position(), errorcode, datavars))
+ if self.strict:
+ raise ParseError(E[errorcode] % datavars)
+
+ def normalizeToken(self, token):
+ """ HTML5 specific normalizations to the token stream """
+
+ if token["type"] == tokenTypes["StartTag"]:
+ raw = token["data"]
+ token["data"] = OrderedDict(raw)
+ if len(raw) > len(token["data"]):
+ # we had some duplicated attribute, fix so first wins
+ token["data"].update(raw[::-1])
+
+ return token
+
+ def adjustMathMLAttributes(self, token):
+ adjust_attributes(token, adjustMathMLAttributes)
+
+ def adjustSVGAttributes(self, token):
+ adjust_attributes(token, adjustSVGAttributes)
+
+ def adjustForeignAttributes(self, token):
+ adjust_attributes(token, adjustForeignAttributesMap)
+
+ def reparseTokenNormal(self, token):
+ # pylint:disable=unused-argument
+ self.parser.phase()
+
+ def resetInsertionMode(self):
+ # The name of this method is mostly historical. (It's also used in the
+ # specification.)
+ last = False
+ newModes = {
+ "select": "inSelect",
+ "td": "inCell",
+ "th": "inCell",
+ "tr": "inRow",
+ "tbody": "inTableBody",
+ "thead": "inTableBody",
+ "tfoot": "inTableBody",
+ "caption": "inCaption",
+ "colgroup": "inColumnGroup",
+ "table": "inTable",
+ "head": "inBody",
+ "body": "inBody",
+ "frameset": "inFrameset",
+ "html": "beforeHead"
+ }
+ for node in self.tree.openElements[::-1]:
+ nodeName = node.name
+ new_phase = None
+ if node == self.tree.openElements[0]:
+ assert self.innerHTML
+ last = True
+ nodeName = self.innerHTML
+ # Check for conditions that should only happen in the innerHTML
+ # case
+ if nodeName in ("select", "colgroup", "head", "html"):
+ assert self.innerHTML
+
+ if not last and node.namespace != self.tree.defaultNamespace:
+ continue
+
+ if nodeName in newModes:
+ new_phase = self.phases[newModes[nodeName]]
+ break
+ elif last:
+ new_phase = self.phases["inBody"]
+ break
+
+ self.phase = new_phase
+
+ def parseRCDataRawtext(self, token, contentType):
+ """Generic RCDATA/RAWTEXT Parsing algorithm
+ contentType - RCDATA or RAWTEXT
+ """
+ assert contentType in ("RAWTEXT", "RCDATA")
+
+ self.tree.insertElement(token)
+
+ if contentType == "RAWTEXT":
+ self.tokenizer.state = self.tokenizer.rawtextState
+ else:
+ self.tokenizer.state = self.tokenizer.rcdataState
+
+ self.originalPhase = self.phase
+
+ self.phase = self.phases["text"]
+
+
+@_utils.memoize
+def getPhases(debug):
+ def log(function):
+ """Logger that records which phase processes each token"""
+ type_names = dict((value, key) for key, value in
+ tokenTypes.items())
+
+ def wrapped(self, *args, **kwargs):
+ if function.__name__.startswith("process") and len(args) > 0:
+ token = args[0]
+ try:
+ info = {"type": type_names[token['type']]}
+ except:
+ raise
+ if token['type'] in tagTokenTypes:
+ info["name"] = token['name']
+
+ self.parser.log.append((self.parser.tokenizer.state.__name__,
+ self.parser.phase.__class__.__name__,
+ self.__class__.__name__,
+ function.__name__,
+ info))
+ return function(self, *args, **kwargs)
+ else:
+ return function(self, *args, **kwargs)
+ return wrapped
+
+ def getMetaclass(use_metaclass, metaclass_func):
+ if use_metaclass:
+ return method_decorator_metaclass(metaclass_func)
+ else:
+ return type
+
+ # pylint:disable=unused-argument
+ class Phase(with_metaclass(getMetaclass(debug, log))):
+ """Base class for helper object that implements each phase of processing
+ """
+
+ def __init__(self, parser, tree):
+ self.parser = parser
+ self.tree = tree
+
+ def processEOF(self):
+ raise NotImplementedError
+
+ def processComment(self, token):
+ # For most phases the following is correct. Where it's not it will be
+ # overridden.
+ self.tree.insertComment(token, self.tree.openElements[-1])
+
+ def processDoctype(self, token):
+ self.parser.parseError("unexpected-doctype")
+
+ def processCharacters(self, token):
+ self.tree.insertText(token["data"])
+
+ def processSpaceCharacters(self, token):
+ self.tree.insertText(token["data"])
+
+ def processStartTag(self, token):
+ return self.startTagHandler[token["name"]](token)
+
+ def startTagHtml(self, token):
+ if not self.parser.firstStartTag and token["name"] == "html":
+ self.parser.parseError("non-html-root")
+ # XXX Need a check here to see if the first start tag token emitted is
+ # this token... If it's not, invoke self.parser.parseError().
+ for attr, value in token["data"].items():
+ if attr not in self.tree.openElements[0].attributes:
+ self.tree.openElements[0].attributes[attr] = value
+ self.parser.firstStartTag = False
+
+ def processEndTag(self, token):
+ return self.endTagHandler[token["name"]](token)
+
+ class InitialPhase(Phase):
+ def processSpaceCharacters(self, token):
+ pass
+
+ def processComment(self, token):
+ self.tree.insertComment(token, self.tree.document)
+
+ def processDoctype(self, token):
+ name = token["name"]
+ publicId = token["publicId"]
+ systemId = token["systemId"]
+ correct = token["correct"]
+
+ if (name != "html" or publicId is not None or
+ systemId is not None and systemId != "about:legacy-compat"):
+ self.parser.parseError("unknown-doctype")
+
+ if publicId is None:
+ publicId = ""
+
+ self.tree.insertDoctype(token)
+
+ if publicId != "":
+ publicId = publicId.translate(asciiUpper2Lower)
+
+ if (not correct or token["name"] != "html" or
+ publicId.startswith(
+ ("+//silmaril//dtd html pro v0r11 19970101//",
+ "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
+ "-//as//dtd html 3.0 aswedit + extensions//",
+ "-//ietf//dtd html 2.0 level 1//",
+ "-//ietf//dtd html 2.0 level 2//",
+ "-//ietf//dtd html 2.0 strict level 1//",
+ "-//ietf//dtd html 2.0 strict level 2//",
+ "-//ietf//dtd html 2.0 strict//",
+ "-//ietf//dtd html 2.0//",
+ "-//ietf//dtd html 2.1e//",
+ "-//ietf//dtd html 3.0//",
+ "-//ietf//dtd html 3.2 final//",
+ "-//ietf//dtd html 3.2//",
+ "-//ietf//dtd html 3//",
+ "-//ietf//dtd html level 0//",
+ "-//ietf//dtd html level 1//",
+ "-//ietf//dtd html level 2//",
+ "-//ietf//dtd html level 3//",
+ "-//ietf//dtd html strict level 0//",
+ "-//ietf//dtd html strict level 1//",
+ "-//ietf//dtd html strict level 2//",
+ "-//ietf//dtd html strict level 3//",
+ "-//ietf//dtd html strict//",
+ "-//ietf//dtd html//",
+ "-//metrius//dtd metrius presentational//",
+ "-//microsoft//dtd internet explorer 2.0 html strict//",
+ "-//microsoft//dtd internet explorer 2.0 html//",
+ "-//microsoft//dtd internet explorer 2.0 tables//",
+ "-//microsoft//dtd internet explorer 3.0 html strict//",
+ "-//microsoft//dtd internet explorer 3.0 html//",
+ "-//microsoft//dtd internet explorer 3.0 tables//",
+ "-//netscape comm. corp.//dtd html//",
+ "-//netscape comm. corp.//dtd strict html//",
+ "-//o'reilly and associates//dtd html 2.0//",
+ "-//o'reilly and associates//dtd html extended 1.0//",
+ "-//o'reilly and associates//dtd html extended relaxed 1.0//",
+ "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
+ "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
+ "-//spyglass//dtd html 2.0 extended//",
+ "-//sq//dtd html 2.0 hotmetal + extensions//",
+ "-//sun microsystems corp.//dtd hotjava html//",
+ "-//sun microsystems corp.//dtd hotjava strict html//",
+ "-//w3c//dtd html 3 1995-03-24//",
+ "-//w3c//dtd html 3.2 draft//",
+ "-//w3c//dtd html 3.2 final//",
+ "-//w3c//dtd html 3.2//",
+ "-//w3c//dtd html 3.2s draft//",
+ "-//w3c//dtd html 4.0 frameset//",
+ "-//w3c//dtd html 4.0 transitional//",
+ "-//w3c//dtd html experimental 19960712//",
+ "-//w3c//dtd html experimental 970421//",
+ "-//w3c//dtd w3 html//",
+ "-//w3o//dtd w3 html 3.0//",
+ "-//webtechs//dtd mozilla html 2.0//",
+ "-//webtechs//dtd mozilla html//")) or
+ publicId in ("-//w3o//dtd w3 html strict 3.0//en//",
+ "-/w3c/dtd html 4.0 transitional/en",
+ "html") or
+ publicId.startswith(
+ ("-//w3c//dtd html 4.01 frameset//",
+ "-//w3c//dtd html 4.01 transitional//")) and
+ systemId is None or
+ systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):
+ self.parser.compatMode = "quirks"
+ elif (publicId.startswith(
+ ("-//w3c//dtd xhtml 1.0 frameset//",
+ "-//w3c//dtd xhtml 1.0 transitional//")) or
+ publicId.startswith(
+ ("-//w3c//dtd html 4.01 frameset//",
+ "-//w3c//dtd html 4.01 transitional//")) and
+ systemId is not None):
+ self.parser.compatMode = "limited quirks"
+
+ self.parser.phase = self.parser.phases["beforeHtml"]
+
+ def anythingElse(self):
+ self.parser.compatMode = "quirks"
+ self.parser.phase = self.parser.phases["beforeHtml"]
+
+ def processCharacters(self, token):
+ self.parser.parseError("expected-doctype-but-got-chars")
+ self.anythingElse()
+ return token
+
+ def processStartTag(self, token):
+ self.parser.parseError("expected-doctype-but-got-start-tag",
+ {"name": token["name"]})
+ self.anythingElse()
+ return token
+
+ def processEndTag(self, token):
+ self.parser.parseError("expected-doctype-but-got-end-tag",
+ {"name": token["name"]})
+ self.anythingElse()
+ return token
+
+ def processEOF(self):
+ self.parser.parseError("expected-doctype-but-got-eof")
+ self.anythingElse()
+ return True
+
+ class BeforeHtmlPhase(Phase):
+ # helper methods
+ def insertHtmlElement(self):
+ self.tree.insertRoot(impliedTagToken("html", "StartTag"))
+ self.parser.phase = self.parser.phases["beforeHead"]
+
+ # other
+ def processEOF(self):
+ self.insertHtmlElement()
+ return True
+
+ def processComment(self, token):
+ self.tree.insertComment(token, self.tree.document)
+
+ def processSpaceCharacters(self, token):
+ pass
+
+ def processCharacters(self, token):
+ self.insertHtmlElement()
+ return token
+
+ def processStartTag(self, token):
+ if token["name"] == "html":
+ self.parser.firstStartTag = True
+ self.insertHtmlElement()
+ return token
+
+ def processEndTag(self, token):
+ if token["name"] not in ("head", "body", "html", "br"):
+ self.parser.parseError("unexpected-end-tag-before-html",
+ {"name": token["name"]})
+ else:
+ self.insertHtmlElement()
+ return token
+
+ class BeforeHeadPhase(Phase):
+ def __init__(self, parser, tree):
+ Phase.__init__(self, parser, tree)
+
+ self.startTagHandler = _utils.MethodDispatcher([
+ ("html", self.startTagHtml),
+ ("head", self.startTagHead)
+ ])
+ self.startTagHandler.default = self.startTagOther
+
+ self.endTagHandler = _utils.MethodDispatcher([
+ (("head", "body", "html", "br"), self.endTagImplyHead)
+ ])
+ self.endTagHandler.default = self.endTagOther
+
+ def processEOF(self):
+ self.startTagHead(impliedTagToken("head", "StartTag"))
+ return True
+
+ def processSpaceCharacters(self, token):
+ pass
+
+ def processCharacters(self, token):
+ self.startTagHead(impliedTagToken("head", "StartTag"))
+ return token
+
+ def startTagHtml(self, token):
+ return self.parser.phases["inBody"].processStartTag(token)
+
+ def startTagHead(self, token):
+ self.tree.insertElement(token)
+ self.tree.headPointer = self.tree.openElements[-1]
+ self.parser.phase = self.parser.phases["inHead"]
+
+ def startTagOther(self, token):
+ self.startTagHead(impliedTagToken("head", "StartTag"))
+ return token
+
+ def endTagImplyHead(self, token):
+ self.startTagHead(impliedTagToken("head", "StartTag"))
+ return token
+
+ def endTagOther(self, token):
+ self.parser.parseError("end-tag-after-implied-root",
+ {"name": token["name"]})
+
+ class InHeadPhase(Phase):
+ def __init__(self, parser, tree):
+ Phase.__init__(self, parser, tree)
+
+ self.startTagHandler = _utils.MethodDispatcher([
+ ("html", self.startTagHtml),
+ ("title", self.startTagTitle),
+ (("noframes", "style"), self.startTagNoFramesStyle),
+ ("noscript", self.startTagNoscript),
+ ("script", self.startTagScript),
+ (("base", "basefont", "bgsound", "command", "link"),
+ self.startTagBaseLinkCommand),
+ ("meta", self.startTagMeta),
+ ("head", self.startTagHead)
+ ])
+ self.startTagHandler.default = self.startTagOther
+
+ self.endTagHandler = _utils.MethodDispatcher([
+ ("head", self.endTagHead),
+ (("br", "html", "body"), self.endTagHtmlBodyBr)
+ ])
+ self.endTagHandler.default = self.endTagOther
+
+ # the real thing
+ def processEOF(self):
+ self.anythingElse()
+ return True
+
+ def processCharacters(self, token):
+ self.anythingElse()
+ return token
+
+ def startTagHtml(self, token):
+ return self.parser.phases["inBody"].processStartTag(token)
+
+ def startTagHead(self, token):
+ self.parser.parseError("two-heads-are-not-better-than-one")
+
+ def startTagBaseLinkCommand(self, token):
+ self.tree.insertElement(token)
+ self.tree.openElements.pop()
+ token["selfClosingAcknowledged"] = True
+
+ def startTagMeta(self, token):
+ self.tree.insertElement(token)
+ self.tree.openElements.pop()
+ token["selfClosingAcknowledged"] = True
+
+ attributes = token["data"]
+ if self.parser.tokenizer.stream.charEncoding[1] == "tentative":
+ if "charset" in attributes:
+ self.parser.tokenizer.stream.changeEncoding(attributes["charset"])
+ elif ("content" in attributes and
+ "http-equiv" in attributes and
+ attributes["http-equiv"].lower() == "content-type"):
+ # Encoding it as UTF-8 here is a hack, as really we should pass
+ # the abstract Unicode string, and just use the
+ # ContentAttrParser on that, but using UTF-8 allows all chars
+ # to be encoded and as a ASCII-superset works.
+ data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8"))
+ parser = _inputstream.ContentAttrParser(data)
+ codec = parser.parse()
+ self.parser.tokenizer.stream.changeEncoding(codec)
+
+ def startTagTitle(self, token):
+ self.parser.parseRCDataRawtext(token, "RCDATA")
+
+ def startTagNoFramesStyle(self, token):
+ # Need to decide whether to implement the scripting-disabled case
+ self.parser.parseRCDataRawtext(token, "RAWTEXT")
+
+ def startTagNoscript(self, token):
+ if self.parser.scripting:
+ self.parser.parseRCDataRawtext(token, "RAWTEXT")
+ else:
+ self.tree.insertElement(token)
+ self.parser.phase = self.parser.phases["inHeadNoscript"]
+
+ def startTagScript(self, token):
+ self.tree.insertElement(token)
+ self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState
+ self.parser.originalPhase = self.parser.phase
+ self.parser.phase = self.parser.phases["text"]
+
+ def startTagOther(self, token):
+ self.anythingElse()
+ return token
+
+ def endTagHead(self, token):
+ node = self.parser.tree.openElements.pop()
+ assert node.name == "head", "Expected head got %s" % node.name
+ self.parser.phase = self.parser.phases["afterHead"]
+
+ def endTagHtmlBodyBr(self, token):
+ self.anythingElse()
+ return token
+
+ def endTagOther(self, token):
+ self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+
+ def anythingElse(self):
+ self.endTagHead(impliedTagToken("head"))
+
+ class InHeadNoscriptPhase(Phase):
+ def __init__(self, parser, tree):
+ Phase.__init__(self, parser, tree)
+
+ self.startTagHandler = _utils.MethodDispatcher([
+ ("html", self.startTagHtml),
+ (("basefont", "bgsound", "link", "meta", "noframes", "style"), self.startTagBaseLinkCommand),
+ (("head", "noscript"), self.startTagHeadNoscript),
+ ])
+ self.startTagHandler.default = self.startTagOther
+
+ self.endTagHandler = _utils.MethodDispatcher([
+ ("noscript", self.endTagNoscript),
+ ("br", self.endTagBr),
+ ])
+ self.endTagHandler.default = self.endTagOther
+
+ def processEOF(self):
+ self.parser.parseError("eof-in-head-noscript")
+ self.anythingElse()
+ return True
+
+ def processComment(self, token):
+ return self.parser.phases["inHead"].processComment(token)
+
+ def processCharacters(self, token):
+ self.parser.parseError("char-in-head-noscript")
+ self.anythingElse()
+ return token
+
+ def processSpaceCharacters(self, token):
+ return self.parser.phases["inHead"].processSpaceCharacters(token)
+
+ def startTagHtml(self, token):
+ return self.parser.phases["inBody"].processStartTag(token)
+
+ def startTagBaseLinkCommand(self, token):
+ return self.parser.phases["inHead"].processStartTag(token)
+
+ def startTagHeadNoscript(self, token):
+ self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
+
+ def startTagOther(self, token):
+ self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]})
+ self.anythingElse()
+ return token
+
+ def endTagNoscript(self, token):
+ node = self.parser.tree.openElements.pop()
+ assert node.name == "noscript", "Expected noscript got %s" % node.name
+ self.parser.phase = self.parser.phases["inHead"]
+
+ def endTagBr(self, token):
+ self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]})
+ self.anythingElse()
+ return token
+
+ def endTagOther(self, token):
+ self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+
+ def anythingElse(self):
+ # Caller must raise parse error first!
+ self.endTagNoscript(impliedTagToken("noscript"))
+
+ class AfterHeadPhase(Phase):
+ def __init__(self, parser, tree):
+ Phase.__init__(self, parser, tree)
+
+ self.startTagHandler = _utils.MethodDispatcher([
+ ("html", self.startTagHtml),
+ ("body", self.startTagBody),
+ ("frameset", self.startTagFrameset),
+ (("base", "basefont", "bgsound", "link", "meta", "noframes", "script",
+ "style", "title"),
+ self.startTagFromHead),
+ ("head", self.startTagHead)
+ ])
+ self.startTagHandler.default = self.startTagOther
+ self.endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"),
+ self.endTagHtmlBodyBr)])
+ self.endTagHandler.default = self.endTagOther
+
+ def processEOF(self):
+ self.anythingElse()
+ return True
+
+ def processCharacters(self, token):
+ self.anythingElse()
+ return token
+
+ def startTagHtml(self, token):
+ return self.parser.phases["inBody"].processStartTag(token)
+
+ def startTagBody(self, token):
+ self.parser.framesetOK = False
+ self.tree.insertElement(token)
+ self.parser.phase = self.parser.phases["inBody"]
+
+ def startTagFrameset(self, token):
+ self.tree.insertElement(token)
+ self.parser.phase = self.parser.phases["inFrameset"]
+
+ def startTagFromHead(self, token):
+ self.parser.parseError("unexpected-start-tag-out-of-my-head",
+ {"name": token["name"]})
+ self.tree.openElements.append(self.tree.headPointer)
+ self.parser.phases["inHead"].processStartTag(token)
+ for node in self.tree.openElements[::-1]:
+ if node.name == "head":
+ self.tree.openElements.remove(node)
+ break
+
+ def startTagHead(self, token):
+ self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
+
+ def startTagOther(self, token):
+ self.anythingElse()
+ return token
+
+ def endTagHtmlBodyBr(self, token):
+ self.anythingElse()
+ return token
+
+ def endTagOther(self, token):
+ self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+
+ def anythingElse(self):
+ self.tree.insertElement(impliedTagToken("body", "StartTag"))
+ self.parser.phase = self.parser.phases["inBody"]
+ self.parser.framesetOK = True
+
+ class InBodyPhase(Phase):
+ # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody
+ # the really-really-really-very crazy mode
+ def __init__(self, parser, tree):
+ Phase.__init__(self, parser, tree)
+
+ # Set this to the default handler
+ self.processSpaceCharacters = self.processSpaceCharactersNonPre
+
+ self.startTagHandler = _utils.MethodDispatcher([
+ ("html", self.startTagHtml),
+ (("base", "basefont", "bgsound", "command", "link", "meta",
+ "script", "style", "title"),
+ self.startTagProcessInHead),
+ ("body", self.startTagBody),
+ ("frameset", self.startTagFrameset),
+ (("address", "article", "aside", "blockquote", "center", "details",
+ "dir", "div", "dl", "fieldset", "figcaption", "figure",
+ "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
+ "section", "summary", "ul"),
+ self.startTagCloseP),
+ (headingElements, self.startTagHeading),
+ (("pre", "listing"), self.startTagPreListing),
+ ("form", self.startTagForm),
+ (("li", "dd", "dt"), self.startTagListItem),
+ ("plaintext", self.startTagPlaintext),
+ ("a", self.startTagA),
+ (("b", "big", "code", "em", "font", "i", "s", "small", "strike",
+ "strong", "tt", "u"), self.startTagFormatting),
+ ("nobr", self.startTagNobr),
+ ("button", self.startTagButton),
+ (("applet", "marquee", "object"), self.startTagAppletMarqueeObject),
+ ("xmp", self.startTagXmp),
+ ("table", self.startTagTable),
+ (("area", "br", "embed", "img", "keygen", "wbr"),
+ self.startTagVoidFormatting),
+ (("param", "source", "track"), self.startTagParamSource),
+ ("input", self.startTagInput),
+ ("hr", self.startTagHr),
+ ("image", self.startTagImage),
+ ("isindex", self.startTagIsIndex),
+ ("textarea", self.startTagTextarea),
+ ("iframe", self.startTagIFrame),
+ ("noscript", self.startTagNoscript),
+ (("noembed", "noframes"), self.startTagRawtext),
+ ("select", self.startTagSelect),
+ (("rp", "rt"), self.startTagRpRt),
+ (("option", "optgroup"), self.startTagOpt),
+ (("math"), self.startTagMath),
+ (("svg"), self.startTagSvg),
+ (("caption", "col", "colgroup", "frame", "head",
+ "tbody", "td", "tfoot", "th", "thead",
+ "tr"), self.startTagMisplaced)
+ ])
+ self.startTagHandler.default = self.startTagOther
+
+ self.endTagHandler = _utils.MethodDispatcher([
+ ("body", self.endTagBody),
+ ("html", self.endTagHtml),
+ (("address", "article", "aside", "blockquote", "button", "center",
+ "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",
+ "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",
+ "section", "summary", "ul"), self.endTagBlock),
+ ("form", self.endTagForm),
+ ("p", self.endTagP),
+ (("dd", "dt", "li"), self.endTagListItem),
+ (headingElements, self.endTagHeading),
+ (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
+ "strike", "strong", "tt", "u"), self.endTagFormatting),
+ (("applet", "marquee", "object"), self.endTagAppletMarqueeObject),
+ ("br", self.endTagBr),
+ ])
+ self.endTagHandler.default = self.endTagOther
+
+ def isMatchingFormattingElement(self, node1, node2):
+ return (node1.name == node2.name and
+ node1.namespace == node2.namespace and
+ node1.attributes == node2.attributes)
+
+ # helper
+ def addFormattingElement(self, token):
+ self.tree.insertElement(token)
+ element = self.tree.openElements[-1]
+
+ matchingElements = []
+ for node in self.tree.activeFormattingElements[::-1]:
+ if node is Marker:
+ break
+ elif self.isMatchingFormattingElement(node, element):
+ matchingElements.append(node)
+
+ assert len(matchingElements) <= 3
+ if len(matchingElements) == 3:
+ self.tree.activeFormattingElements.remove(matchingElements[-1])
+ self.tree.activeFormattingElements.append(element)
+
+ # the real deal
+ def processEOF(self):
+ allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td",
+ "tfoot", "th", "thead", "tr", "body",
+ "html"))
+ for node in self.tree.openElements[::-1]:
+ if node.name not in allowed_elements:
+ self.parser.parseError("expected-closing-tag-but-got-eof")
+ break
+ # Stop parsing
+
+ def processSpaceCharactersDropNewline(self, token):
+ # Sometimes (start of , , and