1+ #
2+ # Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
3+ # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+ #
5+ # This code is free software; you can redistribute it and/or modify it
6+ # under the terms of the GNU General Public License version 3 only, as
7+ # published by the Free Software Foundation.
8+ #
9+ # This code is distributed in the hope that it will be useful, but WITHOUT
10+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+ # version 3 for more details (a copy is included in the LICENSE file that
13+ # accompanied this code).
14+ #
15+ # You should have received a copy of the GNU General Public License version
16+ # 3 along with this work; if not, write to the Free Software Foundation,
17+ # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+ #
19+ # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+ # or visit www.oracle.com if you need additional information or have any
21+ # questions.
22+ #
23+
24+ # ' Run your R code faster with FastR!
25+ # '
26+ # ' @description
27+ # ' FastR is an alternative implementation of the R programming language,
28+ # ' which provides superb performance for computation intensive longer
29+ # ' running jobs, but takes bit more time to warm-up. The performance
30+ # ' of FastR is especially good with pure R code with loops, but it can
31+ # ' also deal with C/C++/Fortran code in R packages.
32+ # '
33+ # ' Package \emph{fastRCluster} lets you run FastR inside GNU-R via PSOCK cluster.
34+ # ' With this package, you can move your performance critical R algorithms to FastR,
35+ # ' but keep the rest of your code-base on GNU-R. You can also use this package
36+ # ' to gradually move all your code to FastR.
37+ # '
38+ # ' We recommend using fastRCluster as a back-end for the \emph{future} package.
39+ # ' Keep your configuration of the \emph{future} package isolated from the rest
40+ # ' of the system to be able to simply switch between FastR and other back-ends.
41+ # '
42+ # ' @details
43+ # ' This package does not come with pre-installed FastR. However, FastR can be
44+ # ' installed using the \code{installFastR} function. Once FastR is installed,
45+ # ' you can create cluster nodes that delegate to FastR using \code{makeFastRCluster}.
46+ # ' Note: like with PSOCK cluster, you have to install required packages on the FastR engine.
47+ # ' You can use \code{fastRClusterInstallPackages} to install the necessary packages.
48+ # '
49+ # ' FastR leverages dynamic just-in-time compilation. R functions are first interpreted
50+ # ' and then compiled. The first few executions are much slower. In order to re-use
51+ # ' the compiled code as much as possible, it is good idea to first transfer all the
52+ # ' necessary R functions to the cluster nodes using \code{clusterExport}.
53+ # ' If you send large and computation heavy R function via, e.g., \code{clusterApply},
54+ # ' it will be always deserialized to a different function on the other end in FastR and hence
55+ # ' no compiled code will be reused.
56+ # '
57+ # ' @examples
58+ # ' library(fastRCluster)
59+ # '
60+ # ' # downloads and installs FastR, note: this may take a while
61+ # ' installFastR()
62+ # '
63+ # ' # use the cluster package with FastR
64+ # ' fastrNode <- makeFastRCluster()
65+ # ' print(fastrNode)
66+ # ' # prints: FastR socket cluster with 1 nodes on host ‘localhost’
67+ # '
68+ # ' # install required packages on FastR
69+ # ' fastRClusterInstallPackages('rlang')
70+ # '
71+ # ' # use the cluster package with FastR
72+ # ' # R.version will show that we are running that code on FastR
73+ # ' parallel::clusterApply(fastrNode, 'dummy', function(...) R.version)
74+ # '
75+ # ' # use 'fastr' convenient wrapper around clusterApply
76+ # ' gg <- fastr(fastrNode, ggplot2::qplot(mpg, data=mtcars, geom="density",
77+ # ' main=paste0("Generated by ", R.version$engine, "[", Sys.getpid(), "]")))
78+ # ' plot(gg)
79+ # '
80+ # ' # transfer data and a helper function to the global environmnet of the cluster nodes
81+ # ' largeDataSet <- matrix(runif(1000000), 1000, 1000)
82+ # ' myComputation <- function(x) {
83+ # ' x <- x/sum(x)
84+ # ' res <- 0
85+ # ' colsums <- colSums(x)
86+ # ' rowsums <- rowSums(x)
87+ # ' for(i in seq_along(1:nrow(x))){
88+ # ' for(j in seq_along(1:ncol(x))){
89+ # ' temp <- log((x[i,j]/(colsums[j]*rowsums[i])))
90+ # ' res <- res + x[i,j] * if(is.finite(temp)) temp else 0
91+ # ' }
92+ # ' }
93+ # ' res
94+ # ' }
95+ # ' parallel::clusterExport(fastrNode, c('largeDataSet', 'myComputation'))
96+ # ' # now you can refer to 'largeDataSet' and 'myComputation'
97+ # ' fastr(fastrNode, myComputation(largeDataSet))
98+ # '
99+ # ' # use the future package with FastR
100+ # ' if (require(future)) {
101+ # ' future::plan(future::cluster, workers = makeFastRCluster())
102+ # ' val %<-% R.version
103+ # ' print(val)
104+ # ' }
105+ # '
106+ # ' @keywords internal
107+ " _PACKAGE"
108+
109+ # ' Default GraalVM installation path
110+ # '
111+ # ' Gives the path to the default location of GraalVM installation that includes FastR.
112+ # ' The default location is inside the directory where the fastRCluster was installed.
113+ # '
114+ # ' \code{\link{getGraalVMHome()}} uses this value as the default,
115+ # ' if no other value is explicitly configured via R options or an environment variable.
116+ # '
117+ # ' @return The default GraalVM installation path
118+ # ' @seealso \code{\link{getGraalVMHome}}
119+ # ' @export
120+ defaultGraalVMHome <- function () {
121+ fastrPkgHome <- find.package(' fastRCluster' )
122+ file.path(fastrPkgHome , ' graalvm' )
123+ }
124+
125+ # ' Currently configured GraalVM path
126+ # '
127+ # ' Gives the path that is used as a default value of the \code{graalVMHome} parameter
128+ # ' for most of the functions in the fastRCluster package.
129+ # '
130+ # ' The value is taken from (in this order)
131+ # ' \enumerate{
132+ # ' \item R option "graalvm.home"
133+ # ' \item environment variable \code{GRAALVM_HOME}
134+ # ' \item \code{\link{defaultGraalVMHome}()}
135+ # ' }
136+ # '
137+ # ' @return The currently configured path to GraalVM installation.
138+ # ' @seealso \code{\link{defaultGraalVMHome}}
139+ # ' @export
140+ getGraalVMHome <- function () getOption(" graalvm.home" , Sys.getenv(' GRAALVM_HOME' , defaultGraalVMHome()));
141+
142+ # ' Installs FastR
143+ # '
144+ # ' Downloads GraalVM Community Edition and installs the R ("FastR") component for GraalVM.
145+ # '
146+ # ' Note: the download is around 300MB. The installation usually takes few seconds.
147+ # ' If the given directory already contains GraalVM, this function installs the R ("FastR") component.
148+ # '
149+ # ' @param path Path to a directory where GraalVM should be installed. Defaults to \code{\link{defaultGraalVMHome}()}.
150+ # ' @return the path where GraalVM was installed if successful (invisible), otherwise this function raises an error.
151+ # ' @seealso \code{\link{defaultGraalVMHome}}
152+ # ' @export
153+ installFastR <- function (path = defaultGraalVMHome()) {
154+ toRemove <- character (0 )
155+ on.exit(unlink(toRemove , recursive = T , force = T )) # note: unlink seems to be OK with non-existing files
156+ if (file.exists(file.path(path , ' bin' , ' Rscript' ))) {
157+ message(sprintf(" The directory '%s' appears to already contain GraalVM installation with FastR. Doing nothing." , path ))
158+ return (invisible (path ))
159+ } else if (file.exists(file.path(path , ' bin' , ' gu' ))) {
160+ message(sprintf(" The directory '%s' appears to already contain GraalVM installation. FastR will be installed in it." , path ))
161+ } else {
162+ if (! file.exists(path )) {
163+ message(sprintf(" The path '%s' does not exist. Creating it." , path ))
164+ dir.create(path )
165+ } else if (length(list.files(path )) > 0L ) {
166+ message(sprintf(" The directory '%s' is not empty. Choose different directory or remove its contents." , path ))
167+ }
168+ tarFile <- paste0(tempfile(), ' .tar.gz' )
169+ url <- if (Sys.info()[[" sysname" ]] == " Darwin" )
170+ ' https://github.com/oracle/graal/releases/download/vm-19.0.2/graalvm-ce-darwin-amd64-19.0.2.tar.gz' else
171+ ' https://github.com/oracle/graal/releases/download/vm-19.0.2/graalvm-ce-linux-amd64-19.0.2.tar.gz' ;
172+ toRemove <- tarFile
173+ download.file(url , tarFile )
174+ workDir <- dirname(path )
175+ origFiles <- list.files(workDir )
176+ untarRes <- untar(tarFile , exdir = workDir )
177+ if (untarRes != 0L ) {
178+ stop(sprintf(" An error occurred when extracting GraalVM files to '%s'. Is this directory writeable? Error code: %d." , path , untarRes ))
179+ }
180+ graalVMOrigDir <- setdiff(list.files(workDir ), origFiles )
181+ renRes <- file.rename(file.path(workDir , graalVMOrigDir ), file.path(workDir , basename(path )))
182+ if (! all(renRes )) {
183+ stop(sprintf(" An error occurred when moving GraalVM files to '%s'. Is this directory writeable? Error code: %d." , path , renRes ))
184+ }
185+ }
186+ guRes <- system2(file.path(path , ' bin' , ' gu' ), args = c(' install' , ' R' ))
187+ if (guRes != 0 ) {
188+ stop(" An error occurred during installation of FastR. Please report at https://github.com/oracle/fastr." )
189+ }
190+ invisible (path )
191+ }
192+
193+ # ' Installs packages on the FastR engine
194+ # '
195+ # ' @param ... Parameters passed to the R function \code{install.packages} that is run on the FastR engine.
196+ # ' @return Invisible \code{NULL}
197+ # ' @export
198+ # ' @examples
199+ # ' fastRClusterInstallPackages(c('rlang', 'ggplot2'), INSTALL_opts='--no-test-load')
200+ fastRClusterInstallPackages <- function (... ) {
201+ cl <- makeFastRCluster(1 , metehods = F )
202+ on.exit(stopCluster(cl ))
203+ parallel :: clusterApply(cl , list (list (... )), function (args ) do.call(install.packages , args ))
204+ invisible (NULL )
205+ }
206+
207+ # ' Creates cluster nodes that delegate to FastR
208+ # '
209+ # ' FastR is an alternative implementation of the R programming language,
210+ # ' which provides superb performance for computation intensive and longer
211+ # ' running jobs, but takes bit more time to warm-up.
212+ # '
213+ # ' @param nnodes Number of nodes to be created.
214+ # ' @param graalVMHome Path to the installation directory of GraalVM and FastR. Default value is obtained from \code{getGraalVMHome()}.
215+ # ' @param mode Mode in which to run FastR. See the FastR documentation on the details on the difference between jvm and native modes.
216+ # ' @param polyglot Run FastR in a polyglot mode: other installed GraalVM languages will be available via \code{eval.polyglot}. See \code{installGraalVMLanguage}. Allowed only for mode 'jvm' (the default).
217+ # ' @param fastROptions Additional options for the FastR engine.
218+ # ' @param ... Additional options forwarded to \code{makePSOCKcluster}
219+ # ' @return The cluster object that can be passed to functions like \code{parallel::clusterApply}.
220+ # ' @seealso \code{\link{getGraalVMHome}}
221+ # ' @export
222+ # ' @examples
223+ # ' fastrNode <- makeFastRCluster()
224+ # ' parallel::clusterApply(fastrNode, 'dummy', function(...) R.version)
225+ # ' fastr(fastrNode, R.version)
226+ makeFastRCluster <- function (nnodes = 1L , graalVMHome = getGraalVMHome(), mode = c(' jvm' , ' native' ), polyglot = FALSE , fastROptions = NULL , ... ) {
227+ nnodes <- as.integer(nnodes )
228+ if (is.na(nnodes ) || nnodes < 1L ) {
229+ stop(" 'nnodes' must be >= 1" )
230+ }
231+ parallel ::: .check_ncores(nnodes )
232+
233+ if (! dir.exists(graalVMHome )) {
234+ if (graalVMHome == defaultGraalVMHome()) {
235+ stop(sprintf(paste0(" It seems that FastR was not installed yet. " ,
236+ " Use installFastR() to install GraalVM and FastR to the default location '%s', " ,
237+ " or set argument 'graalVMHome' to a directory that contains GraalVM and FastR installation. " ,
238+ " See ?getGraalVMHome for more details." ), defaultGraalVMHome()))
239+ } else {
240+ stop(sprintf(paste0(" The GraalVM directory '%s' does not exist. " ,
241+ " Use installFastR('%s') to install GraalVM and FastR to that directory." ),
242+ graalVMHome , graalVMHome ))
243+ }
244+ }
245+ if (! file.exists(file.path(graalVMHome , ' bin' , ' gu' ))) {
246+ stop(sprintf(" The GraalVM directory '%s' appears to be corrupt. You can remove it and use installFastR('%s') to re-install GraalVM and FastR." , graalVMHome , graalVMHome ))
247+ }
248+ if (! file.exists(file.path(graalVMHome , ' bin' , ' Rscript' ))) {
249+ stop(sprintf(" The GraalVM installation '%s' does not contain FastR. Use installFastR('%s') to install FastR." , graalVMHome , graalVMHome ))
250+ }
251+ if (any(c(' --jvm' , ' --native' ) %in% fastROptions )) {
252+ warning(" Ignoring --jvm/--native in 'fastROptions' argument. Use the 'mode' argument instead." )
253+ }
254+ if (any(c(' --polyglot' ) %in% fastROptions )) {
255+ warning(" Ignoring --polyglot in 'fastROptions' argument. Use the 'polyglot' argument instead." )
256+ }
257+
258+ mode <- match.arg(mode )
259+ options <- fastROptions [grep(' --jvm' , fastROptions )]
260+ options <- options [grep(' --native' , fastROptions )]
261+ options <- options [grep(' --polyglot' , fastROptions )]
262+ if (polyglot ) {
263+ if (mode != ' jvm' ) {
264+ stop(" polyglot is only available when mode = 'jvm'" )
265+ }
266+ options <- c(' --polyglot' , options )
267+ }
268+ options <- switch (mode ,
269+ jvm = c(' --jvm' , options ),
270+ native = c(' --native' , options ))
271+
272+ result <- parallel :: makePSOCKcluster(nnodes , rscript = file.path(graalVMHome , ' bin' , ' Rscript' ), rscript_args = options , ... )
273+ class(result ) <- c(" fastRCluster" , class(result ))
274+ result
275+ }
276+
277+ # ' Runs given code in the FastR engine.
278+ # '
279+ # ' This is a convenient wrapper around \code{clusterApply} that runs
280+ # ' the given code on the first node in the cluster.
281+ # '
282+ # ' @param cl FastR cluster object. Use \code{\link{makeFastRCluster}()} to get one.
283+ # ' @param code The code that will be run on the FastR node. It will not be evaluated in the current session.
284+ # ' @return The result of evaluating the code
285+ # ' @export
286+ # ' @examples
287+ # ' fastrNode <- makeFastRCluster()
288+ # ' fastr(fastrNode, R.version)
289+ fastr <- function (cl , code ) {
290+ parallel :: clusterApply(cl , ' dummy' , function (... ) code )[[1L ]]
291+ }
292+
293+ # ' @export
294+ print.fastRCluster <- function (x , ... ) {
295+ cat(" FastR " ); NextMethod(x , ... )
296+ }
0 commit comments