From a69010b6dded3e86e19b9c6f0b3bfa13123d96ea Mon Sep 17 00:00:00 2001 From: Saksham Date: Fri, 10 Oct 2025 11:41:26 +0200 Subject: [PATCH 1/2] profiler: Add blueprint and template --- invenio_app_rdm/profiler.py | 303 ++++++++++++++++++ .../templates/semantic-ui/profiler/index.html | 115 +++++++ setup.cfg | 6 + 3 files changed, 424 insertions(+) create mode 100644 invenio_app_rdm/profiler.py create mode 100644 invenio_app_rdm/theme/templates/semantic-ui/profiler/index.html diff --git a/invenio_app_rdm/profiler.py b/invenio_app_rdm/profiler.py new file mode 100644 index 000000000..fe00126b9 --- /dev/null +++ b/invenio_app_rdm/profiler.py @@ -0,0 +1,303 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2023 CERN. +# +# ZenodoRDM is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +"""Profiler module.""" + +import re +from datetime import datetime, timedelta +from pathlib import Path + +import pyinstrument +import sqlalchemy as sa +import sqltap +from flask import ( + Blueprint, + abort, + current_app, + flash, + g, + make_response, + redirect, + render_template, + request, + session, + url_for, +) +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import Session +from sqlalchemy.pool import SingletonThreadPool +from werkzeug.local import LocalProxy +from werkzeug.utils import secure_filename + +# +# SQLite DB model +# +Base = declarative_base() + + +class SessionRequest(Base): + """Profiling session requests model.""" + + __tablename__ = "session_requests" + + id = sa.Column(sa.Integer, primary_key=True) + ts = sa.Column(sa.DateTime) + context = sa.Column(sa.JSON) + base_report = sa.Column(sa.Text) + sql_report = sa.Column(sa.Text) + + +# +# Proxies +# +current_profiler = LocalProxy(lambda: current_app.extensions["profiler"]) +"""Proxy for the profiler extension.""" + + +# +# Views +# +blueprint = Blueprint( + "profiler", + __name__, + url_prefix="/profiler", + template_folder="templates", +) + + +@blueprint.get("/") +def index(): + """Index view.""" + return render_template( + "profiler/index.html", + active_session=current_profiler.active_session, + profiler_sessions=current_profiler.profiler_sessions, + ) + + +@blueprint.post("/start") +def start_session(): + """Start a profiling session.""" + active_session = current_profiler.active_session + if active_session: + flash( + f"You already have a profiling session running with {active_session['id']}", + "error", + ) + return redirect(url_for("profiler.index"), 302) + + current_profiler.active_session = { + "id": secure_filename(request.form["id"]), + "base": request.form.get("base", type=bool), + "sql": request.form.get("sql", type=bool), + } + return redirect(url_for("profiler.index"), 303) + + +@blueprint.post("/stop") +def stop_session(): + """Stop a profiling session.""" + active_session = current_profiler.active_session + if not active_session: + flash("You don't have an active profiling session running", "error") + return redirect(url_for("profiler.index"), 302) + current_profiler.active_session = None + return redirect(url_for("profiler.index"), 303) + + +@blueprint.post("/delete") +def clear_sessions(): + """Clear profiling sessions from storage.""" + current_profiler.clear_sessions() + return redirect(url_for("profiler.index"), 302) + + +@blueprint.get("/reports///") +def report_view(session_id, request_id, report_type): + """Serve an profiling HTML report.""" + content = current_profiler.get_request_report(session_id, request_id, report_type) + if not content: + abort(404) + resp = make_response(content, 200) + resp.content_type = "text/html" + resp.charset = "utf-8" + return resp + + +@blueprint.before_request +def check_permission(): + """Hook for permission check over all the profiler views.""" + if not current_profiler.permission_func(): + abort(403) + + +# +# Flask extension +# +class Profiler: + """Profiler Flask extension.""" + + def __init__(self, app=None): + """Extension initialization.""" + if app: + self.init_app(app) + + def init_app(self, app): + """Flask application initialization.""" + self.init_config(app) + app.extensions["profiler"] = self + app.register_blueprint(blueprint) + + @app.before_request + def _setup_profilers(): + active_session = self.active_session + if active_session: + endpoint_ignored = any( + re.match(e, request.endpoint) + for e in current_app.config["PROFILER_IGNORED_ENDPOINTS"] + ) + if endpoint_ignored: + return + g.profiler_session_id = active_session["id"] + if active_session.get("base"): + g.base_profiler = pyinstrument.Profiler() + g.base_profiler.start() + if active_session.get("sql"): + g.sql_profiler = sqltap.ProfilingSession() + g.sql_profiler.start() + + @app.after_request + def _store_profiler_reports(response): + reports = {} + if hasattr(g, "base_profiler"): + g.base_profiler.stop() + report_html = g.base_profiler.output_html() + reports["base"] = report_html + if hasattr(g, "sql_profiler"): + g.sql_profiler.stop() + report_html = sqltap.report( + g.sql_profiler.collect(), + report_format="html", + ) + reports["sql"] = report_html + if reports: + self.store_session_request(reports) + self.refresh_active_session() + return response + + def init_config(self, app): + """Initialize configuration.""" + app.config.setdefault("PROFILER_STORAGE", Path(app.instance_path) / "profiler") + app.config.setdefault("PROFILER_ACTIVE_SESSION_LIFETIME", timedelta(minutes=60)) + app.config.setdefault("PROFILER_ACTIVE_SESSION_REFRESH", timedelta(minutes=30)) + app.config.setdefault("PROFILER_IGNORED_ENDPOINTS", ["static", r"profiler\..+"]) + app.config.setdefault("PROFILER_PERMISSION", lambda: True) + + @property + def active_session(self): + """Get currently active profiling session, stored in ``Flask.session``.""" + value = session.get("profiler_session") + expires_at = (value or {}).get("expires_at") + if value and expires_at < datetime.utcnow(): + # delete from session and return + session.pop("profiler_session") + return + return value + + @active_session.setter + def active_session(self, value): + """Set currently active profiling session, stored in ``Flask.session``.""" + if value: + value["expires_at"] = ( + datetime.utcnow() + + current_app.config["PROFILER_ACTIVE_SESSION_LIFETIME"] + ) + session["profiler_session"] = value + + def refresh_active_session(self): + """Refresh the expiration of the active session.""" + target_ts = ( + datetime.utcnow() + current_app.config["PROFILER_ACTIVE_SESSION_REFRESH"] + ) + if self.active_session and target_ts > self.active_session["expires_at"]: + session["profiler_session"]["expires_at"] = ( + datetime.utcnow() + self.active_session_lifetime + ) + + @property + def permission_func(self): + """Get permission check function from config.""" + return current_app.config["PROFILER_PERMISSION"] + + @property + def storage_dir(self): + """Profiling sessions storage directory path from config.""" + return Path(current_app.config["PROFILER_STORAGE"]) + + def get_session_entries(self, session_id): + """Get profiling session request entries for a session.""" + session = self._db_session(session_id) + return session.query( + SessionRequest.id, + SessionRequest.ts, + SessionRequest.context, + SessionRequest.base_report.is_not(None).label("has_base_report"), + SessionRequest.sql_report.is_not(None).label("has_sql_report"), + ).order_by(SessionRequest.ts.asc()) + + @property + def profiler_sessions(self): + """List profiler sessions information.""" + if self.storage_dir.exists(): + return { + sess_db.stem: self.get_session_entries(sess_db.stem).all() + for sess_db in self.storage_dir.iterdir() + if sess_db.is_file() and sess_db.suffix == ".db" + } + return {} + + def clear_sessions(self): + """Delete all profiling sesions files from storage.""" + for sess_db in self.storage_dir.iterdir(): + if sess_db.is_file() and sess_db.suffix == ".db": + sess_db.unlink(missing_ok=True) + + def get_request_report(self, session_id, request_id, report_type): + """Retrieve raw HTML report type for a specific profiling session request.""" + session = self._db_session(session_id) + if report_type == "sql": + query = session.query(SessionRequest.sql_report) + elif report_type == "base": + query = session.query(SessionRequest.base_report) + return query.filter(SessionRequest.id == request_id).scalar() + + def _db_session(self, session_id=None): + """SQLAlchemy session for the SQLite file of a profiling session.""" + db_path = self.storage_dir / f"{session_id or g.profiler_session_id}.db" + db_path.parent.mkdir(parents=True, exist_ok=True) + engine = sa.create_engine(f"sqlite:///{db_path}", poolclass=SingletonThreadPool) + Base.metadata.create_all(engine) + return Session(bind=engine) + + def store_session_request(self, reports): + """Store profiling reports and context for a request in a session.""" + session = self._db_session() + session.add( + SessionRequest( + ts=datetime.utcnow(), + context={ + "endpoint": request.endpoint, + "url": request.url, + "path": request.path, + "method": request.method, + "referrer": request.referrer, + "headers": dict(request.headers), + }, + base_report=reports.get("base"), + sql_report=reports.get("sql"), + ) + ) + session.commit() diff --git a/invenio_app_rdm/theme/templates/semantic-ui/profiler/index.html b/invenio_app_rdm/theme/templates/semantic-ui/profiler/index.html new file mode 100644 index 000000000..6d23012c1 --- /dev/null +++ b/invenio_app_rdm/theme/templates/semantic-ui/profiler/index.html @@ -0,0 +1,115 @@ +{%- extends config.BASE_TEMPLATE %} + +{% block title %}Profiler{% endblock title %} + +{% block page_body %} +
+
+
+
+

Profiler

+ {% if active_session %} +
+
Session ID
+
{{ active_session.id }}
+
Expires at
+
{{ active_session.expires_at }}
+
+
+ +
+ + {% else %} +
+
+ + +
+
+
+ + +
+
+
+
+ + +
+
+ +
+ {% endif %} +
+ + {# Sessions list #} +
+

Sessions

+ {% if profiler_sessions %} +
+ +
+ {% endif %} +
+ {% for session_id, reports in profiler_sessions.items() %} +
+ {{ session_id }} +
+
+
+ {% for report in reports %} + {% set base_link %} + {%- if report.has_base_report -%} + Base + {%- endif -%} + {% endset %} + {% set sql_link %} + {%- if report.has_sql_report -%} + SQL + {%- endif -%} + {% endset %} +
+ +
+
{{ report.context.method }} {{ report.context.url }} + ({{ report.context.endpoint }})
+
+ Referer: {{ report.context.referrer }} + {{ base_link }} {{ sql_link }} +
+
+
+ {% endfor %} +
+
+ {% else %} +

No recorded profiling sessions in storage yet.

+ {% endfor %} +
+
+
+
+
+ +{% endblock page_body %} + +{% block javascript %} +{{ super() }} + +{% endblock javascript %} diff --git a/setup.cfg b/setup.cfg index 5b8f9b621..56671850c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -71,6 +71,8 @@ install_requires = invenio-pages>=7.0.0,<8.0.0 invenio-audit-logs>=0.3.0,<1.0.0 invenio-sitemap>=0.1.0,<2.0.0 + pyinstrument>=5.0.0,<6 + sqltap>=0.3.11,<1.0.0 [options.extras_require] tests = @@ -122,6 +124,10 @@ invenio_administration.views = invenio_app_rdm_records_list = invenio_app_rdm.administration.records:RecordAdminListView invenio_app_rdm_drafts_list = invenio_app_rdm.administration.records:DraftAdminListView invenio_app_rdm_audit_logs = invenio_app_rdm.administration.audit_logs:AuditLogListView +invenio_base.apps = + profiler = invenio_app_rdm.profiler:Profiler +invenio_base.api_apps = + profiler = invenio_app_rdm.profiler:Profiler invenio_base.finalize_app = invenio_app_rdm = invenio_app_rdm.ext:finalize_app From 7d88dce0339280de694107b073247c44b0edf630 Mon Sep 17 00:00:00 2001 From: Saksham Date: Mon, 13 Oct 2025 16:32:51 +0200 Subject: [PATCH 2/2] profiler: Make feature configurable --- invenio_app_rdm/config.py | 4 ++++ invenio_app_rdm/profiler.py | 27 ++++++++++++++++++++------- setup.cfg | 5 +++-- 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/invenio_app_rdm/config.py b/invenio_app_rdm/config.py index 677ccc5ae..36ea65a02 100644 --- a/invenio_app_rdm/config.py +++ b/invenio_app_rdm/config.py @@ -1562,3 +1562,7 @@ def github_link_render(record): "is_open": {"facet": facets.is_open, "ui": {"field": "is_open"}}, } """Available facets defined for this module.""" + +# Profiler +APP_RDM_PROFILER_ENABLED = False +"""Enable the profiler.""" diff --git a/invenio_app_rdm/profiler.py b/invenio_app_rdm/profiler.py index fe00126b9..67980139b 100644 --- a/invenio_app_rdm/profiler.py +++ b/invenio_app_rdm/profiler.py @@ -26,6 +26,7 @@ session, url_for, ) +from invenio_administration.permissions import administration_permission from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import Session from sqlalchemy.pool import SingletonThreadPool @@ -142,6 +143,8 @@ class Profiler: def __init__(self, app=None): """Extension initialization.""" + if not current_app.config["APP_RDM_PROFILER_ENABLED"]: + return if app: self.init_app(app) @@ -157,7 +160,7 @@ def _setup_profilers(): if active_session: endpoint_ignored = any( re.match(e, request.endpoint) - for e in current_app.config["PROFILER_IGNORED_ENDPOINTS"] + for e in current_app.config["APP_RDM_PROFILER_IGNORED_ENDPOINTS"] ) if endpoint_ignored: return @@ -190,11 +193,21 @@ def _store_profiler_reports(response): def init_config(self, app): """Initialize configuration.""" - app.config.setdefault("PROFILER_STORAGE", Path(app.instance_path) / "profiler") - app.config.setdefault("PROFILER_ACTIVE_SESSION_LIFETIME", timedelta(minutes=60)) - app.config.setdefault("PROFILER_ACTIVE_SESSION_REFRESH", timedelta(minutes=30)) - app.config.setdefault("PROFILER_IGNORED_ENDPOINTS", ["static", r"profiler\..+"]) - app.config.setdefault("PROFILER_PERMISSION", lambda: True) + app.config.setdefault( + "APP_RDM_PROFILER_STORAGE", Path(app.instance_path) / "profiler" + ) + app.config.setdefault( + "APP_RDM_PROFILER_ACTIVE_SESSION_LIFETIME", timedelta(minutes=60) + ) + app.config.setdefault( + "APP_RDM_PROFILER_ACTIVE_SESSION_REFRESH", timedelta(minutes=30) + ) + app.config.setdefault( + "APP_RDM_PROFILER_IGNORED_ENDPOINTS", ["static", r"profiler\..+"] + ) + app.config.setdefault( + "APP_RDM_PROFILER_PERMISSION", lambda: administration_permission.can() + ) @property def active_session(self): @@ -275,7 +288,7 @@ def get_request_report(self, session_id, request_id, report_type): return query.filter(SessionRequest.id == request_id).scalar() def _db_session(self, session_id=None): - """SQLAlchemy session for the SQLite file of a profiling session.""" + """Sqlalchemy session for the SQLite file of a profiling session.""" db_path = self.storage_dir / f"{session_id or g.profiler_session_id}.db" db_path.parent.mkdir(parents=True, exist_ok=True) engine = sa.create_engine(f"sqlite:///{db_path}", poolclass=SingletonThreadPool) diff --git a/setup.cfg b/setup.cfg index 56671850c..a092fb94c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -71,8 +71,6 @@ install_requires = invenio-pages>=7.0.0,<8.0.0 invenio-audit-logs>=0.3.0,<1.0.0 invenio-sitemap>=0.1.0,<2.0.0 - pyinstrument>=5.0.0,<6 - sqltap>=0.3.11,<1.0.0 [options.extras_require] tests = @@ -88,6 +86,9 @@ opensearch2 = invenio-search[opensearch2]>=3.0.0,<4.0.0 s3 = invenio-s3>=3.0.0,<4.0.0 +profiler = + pyinstrument>=5.0.0,<6 + sqltap>=0.3.11,<1.0.0 [options.entry_points] flask.commands =