Add zero-overhead variants, CPPTRACE_CATCH_ALT, and unprefixed aliases

jeremy-rifkin · jeremy-rifkin · commit adee091491b0 · 2024-08-20T20:53:29.000-05:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -317,6 +317,10 @@ if(NOT CPPTRACE_STD_FORMAT)
   target_compile_definitions(${target_name} PUBLIC CPPTRACE_NO_STD_FORMAT)
 endif()
 
+if(CPPTRACE_UNPREFIXED_TRY_CATCH)
+  target_compile_definitions(${target_name} PUBLIC CPPTRACE_UNPREFIXED_TRY_CATCH)
+endif()
+
 if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
   SET(CMAKE_C_ARCHIVE_FINISH   "<CMAKE_RANLIB> -no_warning_for_no_symbols -c <TARGET>")
   SET(CMAKE_CXX_ARCHIVE_FINISH "<CMAKE_RANLIB> -no_warning_for_no_symbols -c <TARGET>")
diff --git a/README.md b/README.md
@@ -29,6 +29,7 @@ Cpptrace also has a C API, docs [here](docs/c-api.md).
     - [Utilities](#utilities)
     - [Configuration](#configuration)
     - [Traces From All Exceptions](#traces-from-all-exceptions)
+      - [Removing the `CPPTRACE_` prefix](#removing-the-cpptrace_-prefix)
       - [How it works](#how-it-works)
       - [Performance](#performance)
     - [Traced Exception Objects](#traced-exception-objects)
@@ -371,14 +372,14 @@ namespace cpptrace {
 ### Traces From All Exceptions
 
 Cpptrace provides `CPPTRACE_TRY` and `CPPTRACE_CATCH` macros that allow a stack trace to be collected from the current
-thrown exception object, with no overhead in the non-throwing path:
+thrown exception object, with minimal or no overhead in the non-throwing path:
 
 ```cpp
 CPPTRACE_TRY {
     foo();
 } CPPTRACE_CATCH(const std::exception& e) {
-    std::cout<<"Exception: "<<e.what()<<std::endl;
-    std::cout<<cpptrace::from_current_exception().to_string(true)<<std::endl;
+    std::cerr<<"Exception: "<<e.what()<<std::endl;
+    cpptrace::from_current_exception().print();
 }
 ```
 
@@ -396,36 +397,121 @@ API functions:
 - `cpptrace::from_current_exception`: Returns a resolved `const stacktrace&` from the current exception. Invalidates
   references to traces returned by `cpptrace::raw_trace_from_current_exception`.
 
+There is a performance tradeoff with this functionality: Either the try-block can be zero overhead in the
+non-throwing path with potential expense in the throwing path, or the try-block can have very minimal overhead
+due to bookkeeping with guarantees about the expense of the throwing path. More details on this tradeoff
+[below](#performance). Cpptrace provides macros for both sides of this tradeoff:
+- `CPPTRACE_TRY`/`CPPTRACE_CATCH`: Minimal overhead in the non-throwing path (one `mov` on x86, and this may be
+  optimized out if the compiler is able)
+- `CPPTRACE_TRYZ`/`CPPTRACE_CATCHZ`: Zero overhead in the throwing path, potential extra cost in the throwing path
+
+Note: It's important to not mix the `Z` variants with the non-`Z` variants.
+
+Unfortunately the try/catch macros are needed to insert some magic to perform a trace during the unwinding search phase.
+In order to have multiple catch alternatives, either `CPPTRACE_CATCH_ALT` or a normal `catch` must be used:
+```cpp
+CPPTRACE_TRY {
+    foo();
+} CPPTRACE_CATCH(const std::exception&) { // <- First catch must be CPPTRACE_CATCH
+    // ...
+} CPPTRACE_CATCH_ALT(const std::exception&) { // <- Ok
+    // ...
+} catch(const std::exception&) { // <- Also Ok
+    // ...
+} CPPTRACE_CATCH(const std::exception&) { // <- Not Ok
+    // ...
+}
+```
+
+Note: The current exception is the exception most recently seen by a `CPPTRACE_CATCH` macro.
+
+```cpp
+CPPTRACE_TRY {
+    throw std::runtime_error("foo");
+} CPPTRACE_CATCH(const std::exception& e) {
+    cpptrace::from_current_exception().print(); // the trace for std::runtime_error("foo")
+    CPPTRACE_TRY {
+        throw std::runtime_error("bar");
+    } CPPTRACE_CATCH(const std::exception& e) {
+        cpptrace::from_current_exception().print(); // the trace for std::runtime_error("bar")
+    }
+    cpptrace::from_current_exception().print(); // the trace for std::runtime_error("bar"), again
+}
+```
+
+#### Removing the `CPPTRACE_` prefix
+
+`CPPTRACE_TRY` is a little cumbersome to type. To remove the `CPPTRACE_` prefix you can use the
+`CPPTRACE_UNPREFIXED_TRY_CATCH` cmake variable or define `CPPTRACE_UNPREFIXED_TRY_CATCH` for the preprocessor:
+
+```cpp
+TRY {
+    foo();
+} CATCH(const std::exception& e) {
+    std::cerr<<"Exception: "<<e.what()<<std::endl;
+    cpptrace::from_current_exception().print();
+}
+```
+
+This is not done by default for macro safety/hygiene reasons. If you do not want `TRY`/`CATCH` macros defined, as they
+are common macro names, you can easily modify the following snippet to provide your own aliases:
+
+```cpp
+#define TRY CPPTRACE_TRY
+#define CATCH(param) CPPTRACE_CATCH(param)
+#define TRYZ CPPTRACE_TRYZ
+#define CATCHZ(param) CPPTRACE_CATCHZ(param)
+#define CATCH_ALT(param) CPPTRACE_CATCH_ALT(param)
+```
+
 #### How it works
 
 C++ does not provide any language support for collecting stack traces when exceptions are thrown, however, exception
 handling under both the Itanium ABI and by SEH (used to implement C++ exceptions on windows) involves unwinding the
-stack twice, the first unwind searches for an appropriate `catch` handler, the second actually unwinds the stack and
+stack twice. The first unwind searches for an appropriate `catch` handler, the second actually unwinds the stack and
 calls destructors. Since the stack remains intact during the search phase it's possible to collect a stack trace with
-zero overhead when the `catch` is considered for matching the exception.
+zero overhead when the `catch` is considered for matching the exception. The try/catch macros for cpptrace set up a
+special try/catch setup that can collect a stack trace when considered during a search phase.
 
 N.b.: This mechanism is also discussed in [P2490R3][P2490R3].
 
 #### Performance
 
-`CPPTRACE_CATCH` internally generates lightweight raw traces when considered in the search phase. These are quite fast
-to generate and are only resolved when `cpptrace::from_current_exception` is called.
-
-Currently `CPPTRACE_CATCH` always generates a raw trace when considered as a candidate. That means that if there is a
-nesting of handlers, either directly in code or as a result of the current call stack, the current stack may be traced
-mutliple times until the appropriate handler is found.
-
-This should not matter for the vast majority applications given that performance very rarely is critical in throwing
-paths, how exception handling is usually used, and the shallowness of most call stacks. However, it's something to be
-aware of.
+The fundamental mechanism for this functionality is generating a trace when a catch block is considered during an
+exception handler search phase. Internally a lightweight raw trace is generated upon consideration, which is quite
+fast. This raw trace is only resolved when `cpptrace::from_current_exception` is called, or when the user manually
+resolves a trace from `cpptrace::raw_trace_from_current_exception`.
+
+It's tricky, however, from the library's standpoint to check if the catch will end up matching. The library could simply
+generate a trace every time a `CPPTRACE_CATCH` is considered, however, in a deep nesting of catch's, e.g. as a result of
+recusion, where a matching handler is not found quickly this could cause notable overhead due to tracing the stack
+multiple times. Thus, there is a performance tradeoff between a little book keeping to prevent duplicate tracing or
+biting the bullet, so to speak, in the throwing path and unwinding multiple times.
+
+> [!TIP]
+> The choice between the `Z` and non-`Z` (zero-overhead and non-zero-overhead) variants of the exception handlers should
+> not matter 99% of the time, however, both are provided in the rare case that it does.
+>
+> `CPPTRACE_TRY`/`CPPTRACE_CATCH` could only hurt performance if used in a hot loop where the compiler can't optimize
+> away the internal bookkeeping, otherwise the bookkeeping should be completely negligible.
+>
+> `CPPTRACE_TRYZ`/`CPPTRACE_CATCHZ` could only hurt performance when there is an exceptionally deep nesting of exception
+> handlers in a call stack before a matching handler.
+
+More information on performance considerations with the zero-overhead variant:
+
+Tracing the stack multiple times in throwing paths should not matter for the vast majority applications given that:
+1. Performance very rarely is critical in throwing paths and exceptions should be exceptionally rare
+2. Exception handling is not usually used in such a way that you could have a deep nesting of handlers before finding a
+   matching handler
+3. An that most call stacks are fairly shallow
 
 To put the scale of this performance consideration into perspective: In my benchmarking I have found generation of raw
 traces to take on the order of `100ns` per frame. Thus, even if there were 100 non-matching handlers before a matching
 handler in a 100-deep call stack the total time would stil be on the order of one millisecond.
 
-It's possible to avoid this by adding some bookkeeping to the `CPPTRACE_TRY` block. With the tradeoff between
-zero-overhead try-catch in the happy path and a little extra overhead in the unhappy throwing path I decided to keep
-try-catch zero-overhead. Should this be a concern to anyone, I'm happy to facilitate both solutions.
+Nonetheless, I chose a default bookkeeping behavior for `CPPTRACE_TRY`/`CPPTRACE_CATCH` since it is safer with better
+performance guarantees for the most general possible set of users.
 
 ### Traced Exception Objects
 
diff --git a/cmake/OptionVariables.cmake b/cmake/OptionVariables.cmake
@@ -174,6 +174,7 @@ option(CPPTRACE_WERROR_BUILD "" OFF)
 option(CPPTRACE_POSITION_INDEPENDENT_CODE "" ON)
 option(CPPTRACE_SKIP_UNIT "" OFF)
 option(CPPTRACE_STD_FORMAT "" ON)
+option(CPPTRACE_UNPREFIXED_TRY_CATCH "" OFF)
 option(CPPTRACE_USE_EXTERNAL_GTEST "" OFF)
 set(CPPTRACE_ZSTD_REPO "https://github.com/facebook/zstd.git" CACHE STRING "")
 set(CPPTRACE_ZSTD_TAG "794ea1b0afca0f020f4e57b6732332231fb23c70" CACHE STRING "") # v1.5.6
diff --git a/include/cpptrace/from_current.hpp b/include/cpptrace/from_current.hpp
@@ -41,11 +41,19 @@ namespace cpptrace {
              exception_unwind_interceptor(1);
              return 0; // EXCEPTION_CONTINUE_SEARCH
          }
+         CPPTRACE_FORCE_NO_INLINE inline int unconditional_exception_filter() {
+             collect_current_trace(1);
+             return 0; // EXCEPTION_CONTINUE_SEARCH
+         }
         #else
          class CPPTRACE_EXPORT unwind_interceptor {
          public:
              virtual ~unwind_interceptor();
          };
+         class CPPTRACE_EXPORT unconditional_unwind_interceptor {
+         public:
+             virtual ~unconditional_unwind_interceptor();
+         };
 
          CPPTRACE_EXPORT void do_prepare_unwind_interceptor(char(*)(std::size_t));
 
@@ -81,6 +89,16 @@ namespace cpptrace {
              } __except(::cpptrace::detail::exception_filter()) {} \
          }(); \
      } catch(param)
+ #define CPPTRACE_TRYZ \
+     try { \
+         [&]() { \
+             __try { \
+                 [&]() {
+ #define CPPTRACE_CATCHZ(param) \
+                 }(); \
+             } __except(::cpptrace::detail::unconditional_exception_filter()) {} \
+         }(); \
+     } catch(param)
 #else
  #define CPPTRACE_TRY \
      try { \
@@ -92,6 +110,22 @@ namespace cpptrace {
  #define CPPTRACE_CATCH(param) \
          } catch(::cpptrace::detail::unwind_interceptor&) {} \
      } catch(param)
+ #define CPPTRACE_TRYZ \
+     try { \
+         try {
+ #define CPPTRACE_CATCHZ(param) \
+         } catch(::cpptrace::detail::unconditional_unwind_interceptor&) {} \
+     } catch(param)
+#endif
+
+#define CPPTRACE_CATCH_ALT(param) catch(param)
+
+#ifdef CPPTRACE_UNPREFIXED_TRY_CATCH
+ #define TRY CPPTRACE_TRY
+ #define CATCH(param) CPPTRACE_CATCH(param)
+ #define TRYZ CPPTRACE_TRYZ
+ #define CATCHZ(param) CPPTRACE_CATCHZ(param)
+ #define CATCH_ALT(param) CPPTRACE_CATCH_ALT(param)
 #endif
 
 #endif
diff --git a/src/from_current.cpp b/src/from_current.cpp
@@ -48,7 +48,16 @@ namespace cpptrace {
             return false;
         }
 
+        CPPTRACE_FORCE_NO_INLINE
+        bool unconditional_exception_unwind_interceptor(const std::type_info*, const std::type_info*, void**, unsigned) {
+            collect_current_trace(1);
+            return false;
+        }
+
+        using do_catch_fn = decltype(intercept_unwind);
+
         unwind_interceptor::~unwind_interceptor() = default;
+        unconditional_unwind_interceptor::~unconditional_unwind_interceptor() = default;
 
         #if IS_LIBSTDCXX
             constexpr size_t vtable_size = 11;
@@ -194,10 +203,7 @@ namespace cpptrace {
         }
         #endif
 
-        // allocated below, cleaned up by OS after exit
-        void* new_vtable_page = nullptr;
-
-        void perform_typeinfo_surgery(const std::type_info& info) {
+        void perform_typeinfo_surgery(const std::type_info& info, do_catch_fn* do_catch_function) {
             if(vtable_size == 0) { // set to zero if we don't know what standard library we're working with
                 return;
             }
@@ -243,12 +249,13 @@ namespace cpptrace {
             }
 
             // allocate a page for the new vtable so it can be made read-only later
-            new_vtable_page = allocate_page(page_size);
+            // the OS cleans this up, no cleanup done here for it
+            void* new_vtable_page = allocate_page(page_size);
             // make our own copy of the vtable
             memcpy(new_vtable_page, type_info_vtable_pointer, vtable_size * sizeof(void*));
             // ninja in the custom __do_catch interceptor
             auto new_vtable = static_cast<void**>(new_vtable_page);
-            new_vtable[6] = reinterpret_cast<void*>(intercept_unwind);
+            new_vtable[6] = reinterpret_cast<void*>(do_catch_function);
             // make the page read-only
             mprotect_page(new_vtable_page, page_size, memory_readonly);
 
@@ -273,7 +280,11 @@ namespace cpptrace {
             if(!did_prepare) {
                 cpptrace::detail::intercept_unwind_handler = intercept_unwind_handler;
                 try {
-                    perform_typeinfo_surgery(typeid(cpptrace::detail::unwind_interceptor));
+                    perform_typeinfo_surgery(typeid(cpptrace::detail::unwind_interceptor), intercept_unwind);
+                    perform_typeinfo_surgery(
+                        typeid(cpptrace::detail::unconditional_unwind_interceptor),
+                        unconditional_exception_unwind_interceptor
+                    );
                 } catch(std::exception& e) {
                     std::fprintf(
                         stderr,
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
@@ -84,6 +84,7 @@ if(NOT CPPTRACE_SKIP_UNIT)
     unit/object_trace.cpp
     unit/stacktrace.cpp
     unit/from_current.cpp
+    unit/from_current_z.cpp
     unit/traced_exception.cpp
   )
   target_compile_features(unittest PRIVATE cxx_std_20)
diff --git a/test/unit/from_current_z.cpp b/test/unit/from_current_z.cpp

Original file line number	Diff line number	Diff line change
`@@ -84,6 +84,7 @@ if(NOT CPPTRACE_SKIP_UNIT)`
`84`	`84`	`unit/object_trace.cpp`
`85`	`85`	`unit/stacktrace.cpp`
`86`	`86`	`unit/from_current.cpp`
	`87`	`+ unit/from_current_z.cpp`
`87`	`88`	`unit/traced_exception.cpp`
`88`	`89`	`)`
`89`	`90`	`target_compile_features(unittest PRIVATE cxx_std_20)`