Skip to content

Commit 4711761

Browse files
committed
[GR-13035] First version of the fastRCluster (FastR on GNU-R) package.
PullRequest: fastr/2063
2 parents bafca07 + df52dbc commit 4711761

File tree

10 files changed

+1131
-0
lines changed

10 files changed

+1131
-0
lines changed
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
Package: fastRCluster
2+
Title: Run FastR, the fastest R engine, in GNU-R
3+
Version: 0.0.0.1
4+
Description: Provides a simple way to run FastR cluster via the parallel package from GNU-R.
5+
License: GPL-3
6+
Encoding: UTF-8
7+
Imports: parallel
8+
LazyData: true
9+
Author: Stepan Sindelar <[email protected]>
10+
Maintainer: Stepan Sindelar <[email protected]>
11+
RoxygenNote: 6.1.1

com.oracle.truffle.r.pkgs/fastRCluster/LICENSE.md

Lines changed: 595 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Generated by roxygen2: do not edit by hand
2+
3+
S3method(print,fastRCluster)
4+
export(defaultGraalVMHome)
5+
export(fastRClusterInstallPackages)
6+
export(fastr)
7+
export(getGraalVMHome)
8+
export(installFastR)
9+
export(makeFastRCluster)
Lines changed: 296 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,296 @@
1+
#
2+
# Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
3+
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
#
5+
# This code is free software; you can redistribute it and/or modify it
6+
# under the terms of the GNU General Public License version 3 only, as
7+
# published by the Free Software Foundation.
8+
#
9+
# This code is distributed in the hope that it will be useful, but WITHOUT
10+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
# version 3 for more details (a copy is included in the LICENSE file that
13+
# accompanied this code).
14+
#
15+
# You should have received a copy of the GNU General Public License version
16+
# 3 along with this work; if not, write to the Free Software Foundation,
17+
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
#
19+
# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
# or visit www.oracle.com if you need additional information or have any
21+
# questions.
22+
#
23+
24+
#' Run your R code faster with FastR!
25+
#'
26+
#' @description
27+
#' FastR is an alternative implementation of the R programming language,
28+
#' which provides superb performance for computation intensive longer
29+
#' running jobs, but takes bit more time to warm-up. The performance
30+
#' of FastR is especially good with pure R code with loops, but it can
31+
#' also deal with C/C++/Fortran code in R packages.
32+
#'
33+
#' Package \emph{fastRCluster} lets you run FastR inside GNU-R via PSOCK cluster.
34+
#' With this package, you can move your performance critical R algorithms to FastR,
35+
#' but keep the rest of your code-base on GNU-R. You can also use this package
36+
#' to gradually move all your code to FastR.
37+
#'
38+
#' We recommend using fastRCluster as a back-end for the \emph{future} package.
39+
#' Keep your configuration of the \emph{future} package isolated from the rest
40+
#' of the system to be able to simply switch between FastR and other back-ends.
41+
#'
42+
#' @details
43+
#' This package does not come with pre-installed FastR. However, FastR can be
44+
#' installed using the \code{installFastR} function. Once FastR is installed,
45+
#' you can create cluster nodes that delegate to FastR using \code{makeFastRCluster}.
46+
#' Note: like with PSOCK cluster, you have to install required packages on the FastR engine.
47+
#' You can use \code{fastRClusterInstallPackages} to install the necessary packages.
48+
#'
49+
#' FastR leverages dynamic just-in-time compilation. R functions are first interpreted
50+
#' and then compiled. The first few executions are much slower. In order to re-use
51+
#' the compiled code as much as possible, it is good idea to first transfer all the
52+
#' necessary R functions to the cluster nodes using \code{clusterExport}.
53+
#' If you send large and computation heavy R function via, e.g., \code{clusterApply},
54+
#' it will be always deserialized to a different function on the other end in FastR and hence
55+
#' no compiled code will be reused.
56+
#'
57+
#' @examples
58+
#' library(fastRCluster)
59+
#'
60+
#' # downloads and installs FastR, note: this may take a while
61+
#' installFastR()
62+
#'
63+
#' # use the cluster package with FastR
64+
#' fastrNode <- makeFastRCluster()
65+
#' print(fastrNode)
66+
#' # prints: FastR socket cluster with 1 nodes on host ‘localhost’
67+
#'
68+
#' # install required packages on FastR
69+
#' fastRClusterInstallPackages('rlang')
70+
#'
71+
#' # use the cluster package with FastR
72+
#' # R.version will show that we are running that code on FastR
73+
#' parallel::clusterApply(fastrNode, 'dummy', function(...) R.version)
74+
#'
75+
#' # use 'fastr' convenient wrapper around clusterApply
76+
#' gg <- fastr(fastrNode, ggplot2::qplot(mpg, data=mtcars, geom="density",
77+
#' main=paste0("Generated by ", R.version$engine, "[", Sys.getpid(), "]")))
78+
#' plot(gg)
79+
#'
80+
#' # transfer data and a helper function to the global environmnet of the cluster nodes
81+
#' largeDataSet <- matrix(runif(1000000), 1000, 1000)
82+
#' myComputation <- function(x) {
83+
#' x <- x/sum(x)
84+
#' res <- 0
85+
#' colsums <- colSums(x)
86+
#' rowsums <- rowSums(x)
87+
#' for(i in seq_along(1:nrow(x))){
88+
#' for(j in seq_along(1:ncol(x))){
89+
#' temp <- log((x[i,j]/(colsums[j]*rowsums[i])))
90+
#' res <- res + x[i,j] * if(is.finite(temp)) temp else 0
91+
#' }
92+
#' }
93+
#' res
94+
#' }
95+
#' parallel::clusterExport(fastrNode, c('largeDataSet', 'myComputation'))
96+
#' # now you can refer to 'largeDataSet' and 'myComputation'
97+
#' fastr(fastrNode, myComputation(largeDataSet))
98+
#'
99+
#' # use the future package with FastR
100+
#' if (require(future)) {
101+
#' future::plan(future::cluster, workers = makeFastRCluster())
102+
#' val %<-% R.version
103+
#' print(val)
104+
#' }
105+
#'
106+
#' @keywords internal
107+
"_PACKAGE"
108+
109+
#' Default GraalVM installation path
110+
#'
111+
#' Gives the path to the default location of GraalVM installation that includes FastR.
112+
#' The default location is inside the directory where the fastRCluster was installed.
113+
#'
114+
#' \code{\link{getGraalVMHome()}} uses this value as the default,
115+
#' if no other value is explicitly configured via R options or an environment variable.
116+
#'
117+
#' @return The default GraalVM installation path
118+
#' @seealso \code{\link{getGraalVMHome}}
119+
#' @export
120+
defaultGraalVMHome <- function() {
121+
fastrPkgHome <- find.package('fastRCluster')
122+
file.path(fastrPkgHome, 'graalvm')
123+
}
124+
125+
#' Currently configured GraalVM path
126+
#'
127+
#' Gives the path that is used as a default value of the \code{graalVMHome} parameter
128+
#' for most of the functions in the fastRCluster package.
129+
#'
130+
#' The value is taken from (in this order)
131+
#' \enumerate{
132+
#' \item R option "graalvm.home"
133+
#' \item environment variable \code{GRAALVM_HOME}
134+
#' \item \code{\link{defaultGraalVMHome}()}
135+
#' }
136+
#'
137+
#' @return The currently configured path to GraalVM installation.
138+
#' @seealso \code{\link{defaultGraalVMHome}}
139+
#' @export
140+
getGraalVMHome <- function() getOption("graalvm.home", Sys.getenv('GRAALVM_HOME', defaultGraalVMHome()));
141+
142+
#' Installs FastR
143+
#'
144+
#' Downloads GraalVM Community Edition and installs the R ("FastR") component for GraalVM.
145+
#'
146+
#' Note: the download is around 300MB. The installation usually takes few seconds.
147+
#' If the given directory already contains GraalVM, this function installs the R ("FastR") component.
148+
#'
149+
#' @param path Path to a directory where GraalVM should be installed. Defaults to \code{\link{defaultGraalVMHome}()}.
150+
#' @return the path where GraalVM was installed if successful (invisible), otherwise this function raises an error.
151+
#' @seealso \code{\link{defaultGraalVMHome}}
152+
#' @export
153+
installFastR <- function(path = defaultGraalVMHome()) {
154+
toRemove <- character(0)
155+
on.exit(unlink(toRemove, recursive=T, force=T)) # note: unlink seems to be OK with non-existing files
156+
if (file.exists(file.path(path, 'bin', 'Rscript'))) {
157+
message(sprintf("The directory '%s' appears to already contain GraalVM installation with FastR. Doing nothing.", path))
158+
return(invisible(path))
159+
} else if (file.exists(file.path(path, 'bin', 'gu'))) {
160+
message(sprintf("The directory '%s' appears to already contain GraalVM installation. FastR will be installed in it.", path))
161+
} else {
162+
if (!file.exists(path)) {
163+
message(sprintf("The path '%s' does not exist. Creating it.", path))
164+
dir.create(path)
165+
} else if (length(list.files(path)) > 0L) {
166+
message(sprintf("The directory '%s' is not empty. Choose different directory or remove its contents.", path))
167+
}
168+
tarFile <- paste0(tempfile(), '.tar.gz')
169+
url <- if (Sys.info()[["sysname"]] == "Darwin")
170+
'https://github.com/oracle/graal/releases/download/vm-19.0.2/graalvm-ce-darwin-amd64-19.0.2.tar.gz' else
171+
'https://github.com/oracle/graal/releases/download/vm-19.0.2/graalvm-ce-linux-amd64-19.0.2.tar.gz';
172+
toRemove <- tarFile
173+
download.file(url, tarFile)
174+
workDir <- dirname(path)
175+
origFiles <- list.files(workDir)
176+
untarRes <- untar(tarFile, exdir = workDir)
177+
if (untarRes != 0L) {
178+
stop(sprintf("An error occurred when extracting GraalVM files to '%s'. Is this directory writeable? Error code: %d.", path, untarRes))
179+
}
180+
graalVMOrigDir <- setdiff(list.files(workDir), origFiles)
181+
renRes <- file.rename(file.path(workDir, graalVMOrigDir), file.path(workDir, basename(path)))
182+
if (!all(renRes)) {
183+
stop(sprintf("An error occurred when moving GraalVM files to '%s'. Is this directory writeable? Error code: %d.", path, renRes))
184+
}
185+
}
186+
guRes <- system2(file.path(path, 'bin', 'gu'), args=c('install', 'R'))
187+
if (guRes != 0) {
188+
stop("An error occurred during installation of FastR. Please report at https://github.com/oracle/fastr.")
189+
}
190+
invisible(path)
191+
}
192+
193+
#' Installs packages on the FastR engine
194+
#'
195+
#' @param ... Parameters passed to the R function \code{install.packages} that is run on the FastR engine.
196+
#' @return Invisible \code{NULL}
197+
#' @export
198+
#' @examples
199+
#' fastRClusterInstallPackages(c('rlang', 'ggplot2'), INSTALL_opts='--no-test-load')
200+
fastRClusterInstallPackages <- function(...) {
201+
cl <- makeFastRCluster(1, metehods=F)
202+
on.exit(stopCluster(cl))
203+
parallel::clusterApply(cl, list(list(...)), function(args) do.call(install.packages, args))
204+
invisible(NULL)
205+
}
206+
207+
#' Creates cluster nodes that delegate to FastR
208+
#'
209+
#' FastR is an alternative implementation of the R programming language,
210+
#' which provides superb performance for computation intensive and longer
211+
#' running jobs, but takes bit more time to warm-up.
212+
#'
213+
#' @param nnodes Number of nodes to be created.
214+
#' @param graalVMHome Path to the installation directory of GraalVM and FastR. Default value is obtained from \code{getGraalVMHome()}.
215+
#' @param mode Mode in which to run FastR. See the FastR documentation on the details on the difference between jvm and native modes.
216+
#' @param polyglot Run FastR in a polyglot mode: other installed GraalVM languages will be available via \code{eval.polyglot}. See \code{installGraalVMLanguage}. Allowed only for mode 'jvm' (the default).
217+
#' @param fastROptions Additional options for the FastR engine.
218+
#' @param ... Additional options forwarded to \code{makePSOCKcluster}
219+
#' @return The cluster object that can be passed to functions like \code{parallel::clusterApply}.
220+
#' @seealso \code{\link{getGraalVMHome}}
221+
#' @export
222+
#' @examples
223+
#' fastrNode <- makeFastRCluster()
224+
#' parallel::clusterApply(fastrNode, 'dummy', function(...) R.version)
225+
#' fastr(fastrNode, R.version)
226+
makeFastRCluster <- function (nnodes = 1L, graalVMHome = getGraalVMHome(), mode = c('jvm', 'native'), polyglot = FALSE, fastROptions = NULL, ...) {
227+
nnodes <- as.integer(nnodes)
228+
if(is.na(nnodes) || nnodes < 1L) {
229+
stop("'nnodes' must be >= 1")
230+
}
231+
parallel:::.check_ncores(nnodes)
232+
233+
if (!dir.exists(graalVMHome)) {
234+
if (graalVMHome == defaultGraalVMHome()) {
235+
stop(sprintf(paste0("It seems that FastR was not installed yet. ",
236+
"Use installFastR() to install GraalVM and FastR to the default location '%s', ",
237+
"or set argument 'graalVMHome' to a directory that contains GraalVM and FastR installation. ",
238+
"See ?getGraalVMHome for more details."), defaultGraalVMHome()))
239+
} else {
240+
stop(sprintf(paste0("The GraalVM directory '%s' does not exist. ",
241+
"Use installFastR('%s') to install GraalVM and FastR to that directory."),
242+
graalVMHome, graalVMHome))
243+
}
244+
}
245+
if (!file.exists(file.path(graalVMHome, 'bin', 'gu'))) {
246+
stop(sprintf("The GraalVM directory '%s' appears to be corrupt. You can remove it and use installFastR('%s') to re-install GraalVM and FastR.", graalVMHome, graalVMHome))
247+
}
248+
if (!file.exists(file.path(graalVMHome, 'bin', 'Rscript'))) {
249+
stop(sprintf("The GraalVM installation '%s' does not contain FastR. Use installFastR('%s') to install FastR.", graalVMHome, graalVMHome))
250+
}
251+
if (any(c('--jvm', '--native') %in% fastROptions)) {
252+
warning("Ignoring --jvm/--native in 'fastROptions' argument. Use the 'mode' argument instead.")
253+
}
254+
if (any(c('--polyglot') %in% fastROptions)) {
255+
warning("Ignoring --polyglot in 'fastROptions' argument. Use the 'polyglot' argument instead.")
256+
}
257+
258+
mode <- match.arg(mode)
259+
options <- fastROptions[grep('--jvm', fastROptions)]
260+
options <- options[grep('--native', fastROptions)]
261+
options <- options[grep('--polyglot', fastROptions)]
262+
if (polyglot) {
263+
if (mode != 'jvm') {
264+
stop("polyglot is only available when mode = 'jvm'")
265+
}
266+
options <- c('--polyglot', options)
267+
}
268+
options <- switch(mode,
269+
jvm = c('--jvm', options),
270+
native = c('--native', options))
271+
272+
result <- parallel::makePSOCKcluster(nnodes, rscript=file.path(graalVMHome, 'bin', 'Rscript'), rscript_args = options, ...)
273+
class(result) <- c("fastRCluster", class(result))
274+
result
275+
}
276+
277+
#' Runs given code in the FastR engine.
278+
#'
279+
#' This is a convenient wrapper around \code{clusterApply} that runs
280+
#' the given code on the first node in the cluster.
281+
#'
282+
#' @param cl FastR cluster object. Use \code{\link{makeFastRCluster}()} to get one.
283+
#' @param code The code that will be run on the FastR node. It will not be evaluated in the current session.
284+
#' @return The result of evaluating the code
285+
#' @export
286+
#' @examples
287+
#' fastrNode <- makeFastRCluster()
288+
#' fastr(fastrNode, R.version)
289+
fastr <- function(cl, code) {
290+
parallel::clusterApply(cl, 'dummy', function(...) code)[[1L]]
291+
}
292+
293+
#' @export
294+
print.fastRCluster <- function(x, ...) {
295+
cat("FastR "); NextMethod(x, ...)
296+
}

com.oracle.truffle.r.pkgs/fastRCluster/man/defaultGraalVMHome.Rd

Lines changed: 22 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)