diff --git a/castle/cms/archival.py b/castle/cms/archival.py index 4e47237be..05df870cf 100644 --- a/castle/cms/archival.py +++ b/castle/cms/archival.py @@ -199,7 +199,7 @@ def _get_vhm_base_url(public_url, site_path): parsed = urlparse(public_url) port = parsed.port if port is None: - port = 80 if parsed.scheme == 'http' else 403 + port = 80 if parsed.scheme == 'http' else 443 return '/VirtualHostBase/{scheme}/{hostname}:{port}{site_path}/VirtualHostRoot'.format( scheme=parsed.scheme, hostname=parsed.hostname, @@ -276,7 +276,7 @@ def get_vhm_url(self, url): final_path = '/' + '/'.join(parsed_path) return self.vhm_base + final_path - def __call__(self, url, use_vhm=True, require_public_url=True): + def __call__(self, url, use_vhm=True, require_public_url=True, root=None): url = normalize_url(url) if not url: return @@ -298,7 +298,7 @@ def __call__(self, url, use_vhm=True, require_public_url=True): if use_vhm: url = self.get_vhm_url(url) - resp = subrequest(url) + resp = subrequest(url, root=root) if resp.getStatus() == 404: return diff --git a/castle/cms/browser/content/__init__.py b/castle/cms/browser/content/__init__.py index 636473cc5..b43a88d7e 100644 --- a/castle/cms/browser/content/__init__.py +++ b/castle/cms/browser/content/__init__.py @@ -60,6 +60,8 @@ from zope.component.hooks import getSite from zope.container.interfaces import INameChooser from zope.interface.declarations import noLongerProvides +from zope.globalrequest import getRequest +from zope.globalrequest import setRequest try: @@ -805,6 +807,7 @@ def get_invalid_backend_urls_found(self, content): class QualityCheckContent(BrowserView): QUALITY_CHECK_URL = '/@@quality-check' + ERROR = 'ERROR' @property def formatted_url(self): @@ -816,26 +819,39 @@ def formatted_url(self): except Exception: return None - @property - def subrequest_results(self): - opener = SubrequestUrlOpener( - site=api.portal.get(), - check_blacklist=False, - ) - return opener(self.formatted_url, require_public_url=False) - @property def contains_backend_urls(self): - subrequest_results = self.subrequest_results - subrequest_html = subrequest_results['data'] - backend_utils = BackendUrlUtils() - backend_urls_found = backend_utils.get_invalid_backend_urls_found(subrequest_html) - if len(backend_urls_found) > 0: - logger.warn('There were backend urls found in the html') - logger.info('Backend urls found: {}'.format(repr(backend_urls_found))) - logger.info('Data searched for: ' + self.formatted_url) - return True - return False + app = api.portal.get() + orig_req = getRequest() + # we subvert the original request to remove any configured VHM meta that the + # plone.subrequest.subrequest() method will use to construct the request it'll + # end up using + req_for_sub = orig_req.copy() # didn't work locally - maybe clone()? + req_for_sub['VIRTUAL_URL_PARTS'] = None + setRequest(req_for_sub) + try: + opener = SubrequestUrlOpener(site=app, check_blacklist=False) + # we pass in the app as the "root" object to get a base path from, since it'll just + # be an empty path (or a single slash, which will be neglegable) + # + # the subverted VIRTUAL_URL_PARTS and the portal root object as the root/context to + # use combined will mean that the calculated VHM path for the configured frontend + # can be used from within the context of a backend url + subrequest_results = opener(self.formatted_url, require_public_url=False, root=app) + subrequest_html = subrequest_results['data'] + backend_utils = BackendUrlUtils() + backend_urls_found = backend_utils.get_invalid_backend_urls_found(subrequest_html) + if len(backend_urls_found) > 0: + logger.warn('There were backend urls found in the html') + logger.info('Backend urls found: {}'.format(repr(backend_urls_found))) + logger.info('Data searched for: ' + self.formatted_url) + return True + return False + except Exception: + return self.ERROR + finally: + # and we revert back to the original request + setRequest(orig_req) @property def are_links_valid(self): @@ -878,7 +894,7 @@ def __call__(self): 'headersOrdered': self.are_headers_ordered(html), 'html': html_parser.unescape(html), 'isTemplate': self.context in get_template_repository_info()['templates'], - # 'containsBackendUrls': self.contains_backend_urls, + 'containsBackendUrls': self.contains_backend_urls, # boolean or 'ERROR' })