-- P --
pipeline_attach()
pipeline_detach()
pipeline_run()
pipeline_skeleton()
These functions allow users to attach (and detach) the pipeline functions
and scenario to the R search path which can be useful during development.
They are attached as local:pipelinedevenv. Whether this is a good idea or
not is to be determined.
pipeline_attach(
scenario = NULL,
scenario_default = getOption("pipeline.scenario_default", "default"),
pipeline_dir = ".",
relative_config_file = getOption("pipeline.config_file", "config.R"),
relative_function_dir = getOption("pipeline.function_dir", "R"),
relative_output_dir = getOption("pipeline.output_dir", "output")
)
pipeline_detach()
scenario |
Configuration you wish to attach. If Character input will be treated as configuration you wish to consider. In
this case the configuration file will first be parsed looking for a
named-list entry corresponding to the value of the |
scenario_default |
The default scenario to consider. If |
pipeline_dir |
The directory you wish to run the the pipeline relative to. |
relative_config_file |
The configuration file. Must be a none-nested and given relative to |
relative_function_dir |
The directory to look for user defined pipeline functions. Must be a none-nested and given relative to |
relative_output_dir |
The directory for output. Must be a none-nested and given relative to |
NULL (invisibly). Called only for side effects.
# generate a demo pipeline with a single scenario
dir <- pipeline_skeleton(tempfile())
# If we attach the 'default' scenario then both the CONFIG and the
# defined functions will be available to us.
pipeline_attach(scenario = "default", pipeline_dir = dir)
CONFIG
exists("load_dat") && is.function(load_dat)
pipeline_attach(scenario = "production", pipeline_dir = dir)
CONFIG
pipeline_detach()
exists("load_dat")
unlink(dir)
pipeline_run() manages the running of a user defined pipeline simplifying
object caching and configuration management across different scenarios.
pipeline_run(
x,
...,
scenario = NULL,
scenario_default = getOption("pipeline.scenario_default", "default"),
pipeline_dir = ".",
relative_config_file = getOption("pipeline.config_file", "config.R"),
relative_function_dir = getOption("pipeline.function_dir", "R"),
relative_output_dir = getOption("pipeline.output_dir", "output"),
force = FALSE,
saveRDS_args = list(),
readRDS_args = list(),
return = TRUE
)
x |
R Expression of pipeline assignments. Normally this will involve multiple assignments and will need to be embraced to represent a single expression. |
... |
Not currently used. |
scenario |
If Character input will be treated as configurations you wish to loop over.
In this case the configuration file will first be parsed looking for a
named-list entry corresponding to the value of the |
scenario_default |
The default scenario to consider. If If |
pipeline_dir |
The directory you wish to run the the pipeline relative to. |
relative_config_file |
The configuration file. Must be a none-nested and given relative to |
relative_function_dir |
The directory to look for user defined pipeline functions. Must be a none-nested and given relative to |
relative_output_dir |
The directory for output. Must be a none-nested and given relative to |
force |
Do you want to force a run of the pipeline. If TRUE, then cached objects are removed and the pipeline is (re)run. If a character vector then the corresponding object(s) are removed from the cache and the pipeline is rerun. |
saveRDS_args |
List of additional arguments passed to This argument allows you to pass additional arguments to that function
(e.g. |
readRDS_args |
List of additional arguments passed to |
return |
Should the output be returned. Defaults to TRUE, but when running across multiple scenarios and with outputs
that use a large amount of memory it can be useful to set If |
A named list of outputs for each configuration.
# generate a demo pipeline with a single scenario
dir <- pipeline_skeleton(tempfile(), single = TRUE)
# Note the configuration file and the folder of R functions
list.files(dir, all.files = TRUE, recursive = TRUE, no.. = TRUE)
#> [1] "config.R" "data/mtcars.csv" "pipeline.R" "R/load_dat.R"
#> [5] "R/plot_dat.R" "R/wrangle_dat.R"
# Run the pipeline
out <- pipeline_run(
{
raw <- load_dat(CONFIG$in_csv)
clean <- wrangle_dat(raw, CONFIG$rows)
plot <- plot_dat(clean, CONFIG$out_plot)
},
pipeline_dir = dir
)
# output in a list
str(out$default$raw)
#> 'data.frame': 32 obs. of 12 variables:
#> $ car : chr "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
#> $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
#> $ cyl : int 6 6 4 6 8 6 8 4 4 6 ...
#> $ disp: num 160 160 108 258 360 ...
#> $ hp : int 110 110 93 110 175 105 245 62 95 123 ...
#> $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
#> $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
#> $ qsec: num 16.5 17 18.6 19.4 17 ...
#> $ vs : int 0 0 1 1 0 1 0 1 1 1 ...
#> $ am : int 1 1 1 0 0 0 0 0 0 0 ...
#> $ gear: int 4 4 4 3 3 3 3 4 4 4 ...
#> $ carb: int 4 4 1 1 2 1 4 2 2 4 ...
str(out$default$clean)
#> 'data.frame': 10 obs. of 12 variables:
#> $ car : chr "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
#> $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2
#> $ cyl : int 6 6 4 6 8 6 8 4 4 6
#> $ disp: num 160 160 108 258 360 ...
#> $ hp : int 110 110 93 110 175 105 245 62 95 123
#> $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92
#> $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
#> $ qsec: num 16.5 17 18.6 19.4 17 ...
#> $ vs : int 0 0 1 1 0 1 0 1 1 1
#> $ am : int 1 1 1 0 0 0 0 0 0 0
#> $ gear: int 4 4 4 3 3 3 3 4 4 4
#> $ carb: int 4 4 1 1 2 1 4 2 2 4
out$default$plot
#> [1] "/tmp/Rtmp1xeszi/fileaecf463f6a9c/output/default/plot.png"
#> attr(,"OUTFILE")
#> [1] TRUE
unlink(dir)
Sets up an example pipeline which can be used as the basis for your own.
pipeline_skeleton(dir = "DemoPipeline", single = FALSE)
dir |
Directory you wish to create the pipeline in. |
single |
Whether the demo should use a single or multiple scenarios in the demo. |
Absolute path of the created pipeline directory (invisibly).
dir <- pipeline_skeleton(tempfile())
# Note the configuration file and the folder of R functions
list.files(dir, all.files = TRUE, recursive = TRUE, no.. = TRUE)
#> [1] "config.R" "data/mtcars.csv" "pipeline.R" "R/load_dat.R"
#> [5] "R/plot_dat.R" "R/wrangle_dat.R"
unlink(dir)