From ad5a8902f308451c6dce659ad16aeed8fc801995 Mon Sep 17 00:00:00 2001 From: Chris Bowdon Date: Fri, 24 Jan 2020 17:11:25 +0000 Subject: [PATCH 1/7] Hack in fragile GitLab support Any domains that start with `gitlab` will use a handler that assumes an environment variable GITLAB_TOKEN which is your private access token for a v4 GitLab API. --- nbviewer/app.py | 2 + nbviewer/providers/__init__.py | 4 +- nbviewer/providers/gitlab/__init__.py | 1 + nbviewer/providers/gitlab/handlers.py | 107 ++++++++++++++++++++++++++ 4 files changed, 112 insertions(+), 2 deletions(-) create mode 100644 nbviewer/providers/gitlab/__init__.py create mode 100644 nbviewer/providers/gitlab/handlers.py diff --git a/nbviewer/app.py b/nbviewer/app.py index 14cef5d5..098a03c7 100644 --- a/nbviewer/app.py +++ b/nbviewer/app.py @@ -86,6 +86,7 @@ class NBViewer(Application): github_tree_handler = Unicode(default_value="nbviewer.providers.github.handlers.GitHubTreeHandler", help="The Tornado handler to use for viewing directory trees on GitHub").tag(config=True) gist_handler = Unicode(default_value="nbviewer.providers.gist.handlers.GistHandler", help="The Tornado handler to use for viewing notebooks stored as GitHub Gists").tag(config=True) user_gists_handler = Unicode(default_value="nbviewer.providers.gist.handlers.UserGistsHandler", help="The Tornado handler to use for viewing directory containing all of a user's Gists").tag(config=True) + gitlab_handler = Unicode(default_value="nbviewer.providers.gitlab.handlers.GitlabHandler", help="The Tornado handler to use for viewing notebooks in a GitLab instance").tag(config=True) client = Any().tag(config=True) @default('client') @@ -245,6 +246,7 @@ def init_tornado_application(self): local_handler=self.local_handler, gist_handler=self.gist_handler, user_gists_handler=self.user_gists_handler, + gitlab_handler=self.gitlab_handler, ) handler_kwargs = {'handler_names' : handler_names, 'handler_settings' : self.handler_settings} handlers = init_handlers(self.formats, options.providers, self.base_url, options.localfiles, **handler_kwargs) diff --git a/nbviewer/providers/__init__.py b/nbviewer/providers/__init__.py index d3338222..7baa72bb 100644 --- a/nbviewer/providers/__init__.py +++ b/nbviewer/providers/__init__.py @@ -6,10 +6,10 @@ #----------------------------------------------------------------------------- default_providers = ['nbviewer.providers.{}'.format(prov) - for prov in ['url', 'github', 'gist']] + for prov in ['url', 'github', 'gist', 'gitlab']] default_rewrites = ['nbviewer.providers.{}'.format(prov) - for prov in ['gist', 'github', 'dropbox', 'url']] + for prov in ['gitlab', 'gist', 'github', 'dropbox', 'url']] def provider_handlers(providers, **handler_kwargs): diff --git a/nbviewer/providers/gitlab/__init__.py b/nbviewer/providers/gitlab/__init__.py new file mode 100644 index 00000000..9c6b9483 --- /dev/null +++ b/nbviewer/providers/gitlab/__init__.py @@ -0,0 +1 @@ +from .handlers import default_handlers, uri_rewrites diff --git a/nbviewer/providers/gitlab/handlers.py b/nbviewer/providers/gitlab/handlers.py new file mode 100644 index 00000000..b43a322f --- /dev/null +++ b/nbviewer/providers/gitlab/handlers.py @@ -0,0 +1,107 @@ +import json +import os +from tornado import web +from tornado.log import app_log +from ..base import RenderingHandler, cached +from ...utils import response_text +from .. import _load_handler_from_location + + +class GitlabHandler(RenderingHandler): + + async def get_notebook_data(self, host, group, repo, blob, branch, path): + + token = os.environ.get("GITLAB_TOKEN") + + base_url = "https://{host}/api/v4".format(host=host) + + projects_url = ("{base_url}/projects?private_token={token}" + .format(base_url=base_url, token=token)) + + app_log.info("Fetching " + projects_url) + + try: + projects_response = await self.fetch(projects_url) + projects_text = response_text(projects_response) + projects = json.loads(projects_text) + + path_with_namespace = "{group}/{repo}".format(group=group, repo=repo) + + project = None + for p in projects: + if p["path_with_namespace"] == path_with_namespace: + project = p + break + else: + raise Exception("Project path not found: " + path_with_namespace) + + prj = project["id"] + tree_url = ("{base_url}/projects/{prj}/repository/tree?recursive=true&ref={branch}&per_page=1000&private_token={token}" + .format(base_url=base_url, + prj=prj, + branch=branch, + token=token)) + + app_log.info("Fetching " + tree_url) + + tree_response = await self.fetch(tree_url) + tree_text = response_text(tree_response) + tree = json.loads(tree_text) + + blob = None + for item in tree: + if item["path"] == path: + blob = item + break + else: + raise Exception("Blob not found: " + path) + + sha = blob["id"] + + raw_url = "{base_url}/projects/{prj}/repository/blobs/{sha}/raw?private_token={token}" + return raw_url.format(base_url=base_url, + prj=prj, + sha=sha, + token=token) + + except Exception as e: + app_log.error(e) + + + async def deliver_notebook(self, remote_url): + app_log.info("Fetching notebook: " + remote_url) + + response = await self.fetch(remote_url) + + try: + nbjson = response_text(response, encoding='utf-8') + except UnicodeDecodeError: + app_log.error("Notebook is not utf8: %s", remote_url, exc_info=True) + raise web.HTTPError(400) + + await self.finish_notebook(nbjson, + download_url=remote_url, + msg="file from url: " + remote_url, + public=False, + request=self.request) + + + + @cached + async def get(self, host, group, repo, blob, branch, path): + raw_url = await self.get_notebook_data(host, group, repo, blob, branch, path) + await self.deliver_notebook(raw_url) + +def uri_rewrites(rewrites=[]): + gitlab_rewrites = [ + (r'^https?://(gitlab\..*)$', r'/gitlab/{0}'), + (r'^/url[s]?/(gitlab\..*)$', r'/gitlab/{0}'), + (r'^/url[s]?/https?://(gitlab\..*)$', r'/gitlab/{0}'), + ] + return rewrites + gitlab_rewrites + +def default_handlers(handlers=[], **handler_names): + gitlab_handler = _load_handler_from_location(handler_names['gitlab_handler']) + return handlers + [ + (r'/gitlab/(?P[\w_\-.]+)/(?P[\w_\-.]+)/(?P[\w_\-]+)/(?Pblob)/(?P[\w_\-()]+)/(?P.*)', gitlab_handler, {}), + ] From 7d3d62a0e5d37020ed09c75d573b62987056792f Mon Sep 17 00:00:00 2001 From: Chris Bowdon Date: Mon, 27 Jan 2020 21:00:13 +0000 Subject: [PATCH 2/7] Extract Gitlab API calls into client class --- nbviewer/providers/gitlab/client.py | 71 +++++++++++++++++++++++++++ nbviewer/providers/gitlab/handlers.py | 35 ++----------- 2 files changed, 76 insertions(+), 30 deletions(-) create mode 100644 nbviewer/providers/gitlab/client.py diff --git a/nbviewer/providers/gitlab/client.py b/nbviewer/providers/gitlab/client.py new file mode 100644 index 00000000..0f8033ce --- /dev/null +++ b/nbviewer/providers/gitlab/client.py @@ -0,0 +1,71 @@ +import json +import os +from tornado.httpclient import AsyncHTTPClient, HTTPError +from tornado.log import app_log +from ...utils import response_text + + +class GitlabClient(object): + """Asynchronous client for a private GitLab instance using V4 REST API.""" + + def __init__(self, host, token=None, client=None): + """Init a GitlabClient. + + host: str + token: optional str + This needs a private access token - if not provided, uses + environment variable GITLAB_TOKEN + client: AsyncHTTPClient + """ + self.client = client or AsyncHTTPClient() + self.host = host + self.token = token or os.environ.get("GITLAB_TOKEN") + + @property + def api_url(self): + """The base URL of the REST API.""" + return "https://{host}/api/v4".format(host=self.host) + + async def _fetch_json(self, url): + """Fetch JSON content at URL.""" + app_log.info("Fetching " + url) + response = await self.client.fetch(url) + text = response_text(response) + content = json.loads(text) + return content + + async def projects(self): + """List all projects accessible on this GitLab instance.""" + projects_url = ("{base}/projects?private_token={token}" + .format(base=self.api_url, token=self.token)) + return await self._fetch_json(projects_url) + + async def tree(self, project_id, branch): + """List all files in the given branch and project. + + project_id: int + branch: str + """ + tree_url = ("{base}/projects/{project_id}/repository/tree" + "?recursive=true" + "&ref={branch}" + "&per_page=1000" + "&private_token={token}" + .format(base=self.api_url, + project_id=project_id, + branch=branch, + token=self.token)) + return await self._fetch_json(tree_url) + + def raw_file_url(self, project_id, blob_sha): + """URL of the raw file matching given blob SHA in project. + + project_id: int + blob_sha: str + """ + raw_url = ("{base}/projects/{project_id}" + "/repository/blobs/{blob_sha}/raw?private_token={token}") + return raw_url.format(base=self.api_url, + project_id=project_id, + blob_sha=blob_sha, + token=self.token) diff --git a/nbviewer/providers/gitlab/handlers.py b/nbviewer/providers/gitlab/handlers.py index b43a322f..9ce409bc 100644 --- a/nbviewer/providers/gitlab/handlers.py +++ b/nbviewer/providers/gitlab/handlers.py @@ -5,25 +5,17 @@ from ..base import RenderingHandler, cached from ...utils import response_text from .. import _load_handler_from_location +from .client import GitlabClient class GitlabHandler(RenderingHandler): async def get_notebook_data(self, host, group, repo, blob, branch, path): - token = os.environ.get("GITLAB_TOKEN") - - base_url = "https://{host}/api/v4".format(host=host) - - projects_url = ("{base_url}/projects?private_token={token}" - .format(base_url=base_url, token=token)) - - app_log.info("Fetching " + projects_url) + client = GitlabClient(host) try: - projects_response = await self.fetch(projects_url) - projects_text = response_text(projects_response) - projects = json.loads(projects_text) + projects = await client.projects() path_with_namespace = "{group}/{repo}".format(group=group, repo=repo) @@ -35,18 +27,7 @@ async def get_notebook_data(self, host, group, repo, blob, branch, path): else: raise Exception("Project path not found: " + path_with_namespace) - prj = project["id"] - tree_url = ("{base_url}/projects/{prj}/repository/tree?recursive=true&ref={branch}&per_page=1000&private_token={token}" - .format(base_url=base_url, - prj=prj, - branch=branch, - token=token)) - - app_log.info("Fetching " + tree_url) - - tree_response = await self.fetch(tree_url) - tree_text = response_text(tree_response) - tree = json.loads(tree_text) + tree = await client.tree(project["id"], branch) blob = None for item in tree: @@ -56,13 +37,7 @@ async def get_notebook_data(self, host, group, repo, blob, branch, path): else: raise Exception("Blob not found: " + path) - sha = blob["id"] - - raw_url = "{base_url}/projects/{prj}/repository/blobs/{sha}/raw?private_token={token}" - return raw_url.format(base_url=base_url, - prj=prj, - sha=sha, - token=token) + return client.raw_file_url(project["id"], blob["id"]) except Exception as e: app_log.error(e) From 39ce4b78f85e3390d1234fec22925e3258fca935 Mon Sep 17 00:00:00 2001 From: Chris Bowdon Date: Mon, 27 Jan 2020 21:04:31 +0000 Subject: [PATCH 3/7] Add copyright headers --- nbviewer/providers/gitlab/client.py | 7 +++++++ nbviewer/providers/gitlab/handlers.py | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/nbviewer/providers/gitlab/client.py b/nbviewer/providers/gitlab/client.py index 0f8033ce..817d3fa3 100644 --- a/nbviewer/providers/gitlab/client.py +++ b/nbviewer/providers/gitlab/client.py @@ -1,3 +1,10 @@ +#----------------------------------------------------------------------------- +# Copyright (C) 2020 The IPython Development Team +# +# Distributed under the terms of the BSD License. The full license is in +# the file COPYING, distributed as part of this software. +#----------------------------------------------------------------------------- + import json import os from tornado.httpclient import AsyncHTTPClient, HTTPError diff --git a/nbviewer/providers/gitlab/handlers.py b/nbviewer/providers/gitlab/handlers.py index 9ce409bc..cd4a918f 100644 --- a/nbviewer/providers/gitlab/handlers.py +++ b/nbviewer/providers/gitlab/handlers.py @@ -1,3 +1,10 @@ +#----------------------------------------------------------------------------- +# Copyright (C) 2020 The IPython Development Team +# +# Distributed under the terms of the BSD License. The full license is in +# the file COPYING, distributed as part of this software. +#----------------------------------------------------------------------------- + import json import os from tornado import web From 93b348423fb59687d0bddf6ae87e5c346fb6e3a1 Mon Sep 17 00:00:00 2001 From: Chris Bowdon Date: Mon, 27 Jan 2020 22:20:53 +0000 Subject: [PATCH 4/7] Slightly clarify GitLab handler code to prep for tree support We'll be using `path_type` to identify whether to render the notebook directly or render a list view. --- nbviewer/providers/gitlab/handlers.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/nbviewer/providers/gitlab/handlers.py b/nbviewer/providers/gitlab/handlers.py index cd4a918f..0a8f1bb4 100644 --- a/nbviewer/providers/gitlab/handlers.py +++ b/nbviewer/providers/gitlab/handlers.py @@ -17,7 +17,7 @@ class GitlabHandler(RenderingHandler): - async def get_notebook_data(self, host, group, repo, blob, branch, path): + async def get_notebook_data(self, host, group, repo, path_type, branch, path): client = GitlabClient(host) @@ -70,8 +70,8 @@ async def deliver_notebook(self, remote_url): @cached - async def get(self, host, group, repo, blob, branch, path): - raw_url = await self.get_notebook_data(host, group, repo, blob, branch, path) + async def get(self, host, group, repo, path_type, branch, path): + raw_url = await self.get_notebook_data(host, group, repo, path_type, branch, path) await self.deliver_notebook(raw_url) def uri_rewrites(rewrites=[]): @@ -85,5 +85,10 @@ def uri_rewrites(rewrites=[]): def default_handlers(handlers=[], **handler_names): gitlab_handler = _load_handler_from_location(handler_names['gitlab_handler']) return handlers + [ - (r'/gitlab/(?P[\w_\-.]+)/(?P[\w_\-.]+)/(?P[\w_\-]+)/(?Pblob)/(?P[\w_\-()]+)/(?P.*)', gitlab_handler, {}), + (r'/gitlab/(?P[\w_\-.]+)' + '/(?P[\w_\-.]+)' + '/(?P[\w_\-]+)' + '/(?Pblob|tree)' + '/(?P[\w_\-()]+)' + '/(?P.*)', gitlab_handler, {}), ] From 3cfa7369fd0413b304c9db96bad5244bd378371c Mon Sep 17 00:00:00 2001 From: Chris Bowdon Date: Mon, 27 Jan 2020 22:54:42 +0000 Subject: [PATCH 5/7] Make smarter use of GitLab API - Lookup blobs directly where possible - Fall back to searching project trees - Add logs for HTTP Errors - Remove info log messages with private tokens in URLs --- nbviewer/providers/gitlab/client.py | 60 ++++++++++++++++++------ nbviewer/providers/gitlab/handlers.py | 66 ++++++++++++++++----------- 2 files changed, 86 insertions(+), 40 deletions(-) diff --git a/nbviewer/providers/gitlab/client.py b/nbviewer/providers/gitlab/client.py index 817d3fa3..2eab6e7e 100644 --- a/nbviewer/providers/gitlab/client.py +++ b/nbviewer/providers/gitlab/client.py @@ -7,7 +7,8 @@ import json import os -from tornado.httpclient import AsyncHTTPClient, HTTPError +from urllib.parse import quote_plus +from tornado.httpclient import AsyncHTTPClient, HTTPClientError from tornado.log import app_log from ...utils import response_text @@ -35,41 +36,74 @@ def api_url(self): async def _fetch_json(self, url): """Fetch JSON content at URL.""" - app_log.info("Fetching " + url) - response = await self.client.fetch(url) - text = response_text(response) - content = json.loads(text) - return content + try: + response = await self.client.fetch(url) + text = response_text(response) + content = json.loads(text) + return content + except HTTPClientError as ex: + # log and raise because this can get lost in async + app_log.error(ex) + raise ex async def projects(self): """List all projects accessible on this GitLab instance.""" - projects_url = ("{base}/projects?private_token={token}" + projects_url = ("{base}/projects" + "?private_token={token}" + "&simple=true" .format(base=self.api_url, token=self.token)) return await self._fetch_json(projects_url) - async def tree(self, project_id, branch): + async def tree(self, project_id, branch="master", recursive=False): """List all files in the given branch and project. - project_id: int + project_id: int or str branch: str """ + if type(project_id) is str: + project_id = quote_plus(project_id) + tree_url = ("{base}/projects/{project_id}/repository/tree" - "?recursive=true" + "?private_token={token}" + "&recursive={recursive}" "&ref={branch}" "&per_page=1000" - "&private_token={token}" .format(base=self.api_url, project_id=project_id, - branch=branch, + recursive=str(recursive), + branch=quote_plus(branch), token=self.token)) return await self._fetch_json(tree_url) + async def fileinfo(self, project_id, filepath, branch="master"): + """Information for file in given branch and project. + + project_id: int or str + branch: str + filepath: str + """ + if type(project_id) is str: + project_id = quote_plus(project_id) + + file_url = ("{base}/projects/{project_id}/repository/files/{filepath}" + "?private_token={token}" + "&ref={branch}" + .format(base=self.api_url, + project_id=project_id, + branch=quote_plus(branch), + filepath=quote_plus(filepath), + token=self.token)) + return await self._fetch_json(file_url) + def raw_file_url(self, project_id, blob_sha): """URL of the raw file matching given blob SHA in project. - project_id: int + project_id: int or str blob_sha: str """ + if type(project_id) is str: + project_id = quote_plus(project_id) + raw_url = ("{base}/projects/{project_id}" "/repository/blobs/{blob_sha}/raw?private_token={token}") return raw_url.format(base=self.api_url, diff --git a/nbviewer/providers/gitlab/handlers.py b/nbviewer/providers/gitlab/handlers.py index 0a8f1bb4..ccab4f13 100644 --- a/nbviewer/providers/gitlab/handlers.py +++ b/nbviewer/providers/gitlab/handlers.py @@ -8,6 +8,7 @@ import json import os from tornado import web +from tornado.httpclient import HTTPClientError from tornado.log import app_log from ..base import RenderingHandler, cached from ...utils import response_text @@ -17,42 +18,55 @@ class GitlabHandler(RenderingHandler): - async def get_notebook_data(self, host, group, repo, path_type, branch, path): + async def lookup_notebook(self, client, path_with_namespace, branch, filepath): + """Attempt to find the notebook by searching project trees. + Used when an instance is misconfigured and paths are getting sanitised.""" + projects = await client.projects() - client = GitlabClient(host) - - try: - projects = await client.projects() + project = None + for p in projects: + if p["path_with_namespace"] == path_with_namespace: + project = p + break + else: + raise Exception("Project path not found: " + path_with_namespace) - path_with_namespace = "{group}/{repo}".format(group=group, repo=repo) + tree = await client.tree(project["id"], branch, recursive=True) - project = None - for p in projects: - if p["path_with_namespace"] == path_with_namespace: - project = p - break - else: - raise Exception("Project path not found: " + path_with_namespace) + blob = None + for item in tree: + if item["path"] == filepath: + blob = item + break + else: + raise Exception("Blob not found: " + filepath) - tree = await client.tree(project["id"], branch) + return client.raw_file_url(project["id"], blob["id"]) - blob = None - for item in tree: - if item["path"] == path: - blob = item - break - else: - raise Exception("Blob not found: " + path) + async def get_notebook_data(self, host, group, repo, path_type, branch, filepath): + client = GitlabClient(host) - return client.raw_file_url(project["id"], blob["id"]) + path_with_namespace = "{group}/{repo}".format(group=group, repo=repo) + try: + fileinfo = await client.fileinfo(path_with_namespace, filepath, branch) + return client.raw_file_url(path_with_namespace, fileinfo["blob_id"]) + except HTTPClientError as http_error: + if http_error.code == 404: + try: + # Sometimes the url-encoded paths get sanitized, so give this a try + app_log.warn("Unable to access {filepath} in {path_with_namespace} directly, attempting lookup" + .format(filepath=filepath, + path_with_namespace=path_with_namespace)) + return await self.lookup_notebook(client, path_with_namespace, branch, filepath) + except Exception as e: + app_log.error(e) + else: + app_log.error(http_error) except Exception as e: app_log.error(e) - async def deliver_notebook(self, remote_url): - app_log.info("Fetching notebook: " + remote_url) - response = await self.fetch(remote_url) try: @@ -67,8 +81,6 @@ async def deliver_notebook(self, remote_url): public=False, request=self.request) - - @cached async def get(self, host, group, repo, path_type, branch, path): raw_url = await self.get_notebook_data(host, group, repo, path_type, branch, path) From bcd7ae97dc2db7fb6fa2845710e6f2ec8f2358cd Mon Sep 17 00:00:00 2001 From: Chris Bowdon Date: Tue, 28 Jan 2020 17:37:55 +0000 Subject: [PATCH 6/7] First cut of tree support Adds support for rendering the directory view with breadcrumbs. Last modified time is not included yet. --- nbviewer/providers/gitlab/client.py | 15 ++++-- nbviewer/providers/gitlab/handlers.py | 78 ++++++++++++++++++++++++--- 2 files changed, 84 insertions(+), 9 deletions(-) diff --git a/nbviewer/providers/gitlab/client.py b/nbviewer/providers/gitlab/client.py index 2eab6e7e..00159bae 100644 --- a/nbviewer/providers/gitlab/client.py +++ b/nbviewer/providers/gitlab/client.py @@ -14,7 +14,9 @@ class GitlabClient(object): - """Asynchronous client for a private GitLab instance using V4 REST API.""" + """Asynchronous client for a private GitLab instance using V4 REST API. + + Please see https://docs.gitlab.com/ee/api/ for details.""" def __init__(self, host, token=None, client=None): """Init a GitlabClient. @@ -54,11 +56,13 @@ async def projects(self): .format(base=self.api_url, token=self.token)) return await self._fetch_json(projects_url) - async def tree(self, project_id, branch="master", recursive=False): + async def tree(self, project_id, branch="master", path=None, recursive=False): """List all files in the given branch and project. project_id: int or str - branch: str + branch: optional str + path: optional str (defaults to root) + recursive: optional bool """ if type(project_id) is str: project_id = quote_plus(project_id) @@ -73,6 +77,11 @@ async def tree(self, project_id, branch="master", recursive=False): recursive=str(recursive), branch=quote_plus(branch), token=self.token)) + + if path is not None: + tree_url = "{url}&path={path}".format(url=tree_url, + path=quote_plus(path)) + return await self._fetch_json(tree_url) async def fileinfo(self, project_id, filepath, branch="master"): diff --git a/nbviewer/providers/gitlab/handlers.py b/nbviewer/providers/gitlab/handlers.py index ccab4f13..dde0ec2d 100644 --- a/nbviewer/providers/gitlab/handlers.py +++ b/nbviewer/providers/gitlab/handlers.py @@ -43,9 +43,7 @@ async def lookup_notebook(self, client, path_with_namespace, branch, filepath): return client.raw_file_url(project["id"], blob["id"]) - async def get_notebook_data(self, host, group, repo, path_type, branch, filepath): - client = GitlabClient(host) - + async def get_notebook_data(self, client, group, repo, branch, filepath): path_with_namespace = "{group}/{repo}".format(group=group, repo=repo) try: @@ -66,9 +64,19 @@ async def get_notebook_data(self, host, group, repo, path_type, branch, filepath except Exception as e: app_log.error(e) - async def deliver_notebook(self, remote_url): + async def deliver_notebook(self, host, group, repo, branch, path, remote_url): response = await self.fetch(remote_url) + base_url = ("/gitlab/{host}/{group}/{repo}/tree/{branch}/" + .format(host=host, + group=group, + repo=repo, + branch=branch)) + + breadcrumbs = [{"url": base_url, "name": repo}] + dirpath = path.rsplit('/', 1)[0] + breadcrumbs.extend(self.breadcrumbs(dirpath, base_url)) + try: nbjson = response_text(response, encoding='utf-8') except UnicodeDecodeError: @@ -79,12 +87,70 @@ async def deliver_notebook(self, remote_url): download_url=remote_url, msg="file from url: " + remote_url, public=False, + breadcrumbs=breadcrumbs, request=self.request) + def render_dirview_template(self, entries, title, breadcrumbs): + return self.render_template('dirview.html', + entries=entries, + breadcrumbs=breadcrumbs, + title=title) + + async def show_dir(self, client, group, repo, branch, dirpath): + path_with_namespace = "{group}/{repo}".format(group=group, repo=repo) + tree = await client.tree(path_with_namespace, branch, dirpath) + + full_url = "/gitlab/{host}/{group}/{repo}/{path_type}/{branch}/{path}" + external_url = "https://{host}/{group}/{repo}/{path_type}/{branch}/{path}" + + base_url = ("/gitlab/{host}/{group}/{repo}/tree/{branch}/" + .format(host=client.host, + group=group, + repo=repo, + branch=branch)) + + breadcrumbs = [{"url": base_url, "name": repo}] + breadcrumbs.extend(self.breadcrumbs(dirpath, base_url)) + + entries = [] + for item in tree: + if item["type"] == "tree": + entry_class = "fa fa-folder-open" + url = item["path"] + elif item["type"] == "blob" and item["path"].endswith("ipynb"): + entry_class = "fa fa-book" + url = full_url.format(host=client.host, + group=group, + repo=repo, + path_type="blob", + branch=branch, + path=item["path"]) + else: + entry_class = "fa fa-share" + url = external_url.format(host=client.host, + group=group, + repo=repo, + path_type="blob", + branch=branch, + path=item["path"]) + + entries.append({"name": item["name"], + "url": url, + "class": entry_class}) + + html = self.render_dirview_template(entries=entries, + title=dirpath, + breadcrumbs=breadcrumbs) + await self.cache_and_finish(html) + @cached async def get(self, host, group, repo, path_type, branch, path): - raw_url = await self.get_notebook_data(host, group, repo, path_type, branch, path) - await self.deliver_notebook(raw_url) + client = GitlabClient(host) + if path_type == "blob": + raw_url = await self.get_notebook_data(client, group, repo, branch, path) + await self.deliver_notebook(host, group, repo, branch, path, raw_url) + else: + await self.show_dir(client, group, repo, branch, path) def uri_rewrites(rewrites=[]): gitlab_rewrites = [ From c4409b5dc07b2ff7a90d4558a1a0d00212dee615 Mon Sep 17 00:00:00 2001 From: Chris Bowdon Date: Mon, 24 Feb 2020 12:26:50 +0000 Subject: [PATCH 7/7] Search projects for efficiency Also prevents the fallback lookup method failing because we hit the number of project search results. --- nbviewer/providers/gitlab/client.py | 8 ++++++-- nbviewer/providers/gitlab/handlers.py | 8 +++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/nbviewer/providers/gitlab/client.py b/nbviewer/providers/gitlab/client.py index 00159bae..cf2759be 100644 --- a/nbviewer/providers/gitlab/client.py +++ b/nbviewer/providers/gitlab/client.py @@ -48,12 +48,16 @@ async def _fetch_json(self, url): app_log.error(ex) raise ex - async def projects(self): - """List all projects accessible on this GitLab instance.""" + async def projects(self, search=None): + """List projects accessible on this GitLab instance.""" projects_url = ("{base}/projects" "?private_token={token}" "&simple=true" .format(base=self.api_url, token=self.token)) + + if search is not None: + projects_url = projects_url + "&search={}".format(search) + return await self._fetch_json(projects_url) async def tree(self, project_id, branch="master", path=None, recursive=False): diff --git a/nbviewer/providers/gitlab/handlers.py b/nbviewer/providers/gitlab/handlers.py index dde0ec2d..5200db30 100644 --- a/nbviewer/providers/gitlab/handlers.py +++ b/nbviewer/providers/gitlab/handlers.py @@ -18,13 +18,15 @@ class GitlabHandler(RenderingHandler): - async def lookup_notebook(self, client, path_with_namespace, branch, filepath): + async def lookup_notebook(self, client, group, repo, branch, filepath): """Attempt to find the notebook by searching project trees. Used when an instance is misconfigured and paths are getting sanitised.""" - projects = await client.projects() + projects = await client.projects(search=repo) project = None + path_with_namespace = "{0}/{1}".format(group, repo) for p in projects: + print(p["path_with_namespace"]) if p["path_with_namespace"] == path_with_namespace: project = p break @@ -56,7 +58,7 @@ async def get_notebook_data(self, client, group, repo, branch, filepath): app_log.warn("Unable to access {filepath} in {path_with_namespace} directly, attempting lookup" .format(filepath=filepath, path_with_namespace=path_with_namespace)) - return await self.lookup_notebook(client, path_with_namespace, branch, filepath) + return await self.lookup_notebook(client, group, repo, branch, filepath) except Exception as e: app_log.error(e) else: