• Demo Module YAML , configure it based on your NF pipeline
name: testname  # Module name 
type: testtype # Module type 
version: v1.0.0 command:     ## Install Java and Nextflow    # This section install the required versions of java and nextflow    - /home/ec2-user/bioinfo/infra/storage/sync_reflib.py -k reflib/tool/install-java-on-worker.sh -d /home/ec2-user/tools/ -f 
    - sudo chmod +x /home/ec2-user/tools/install-java-on-worker.sh && sudo sh /home/ec2-user/tools/install-java-on-worker    - export NXF_VER=23.10.0 # Choose nextflow version to use 
    - wget -qO- <https://get.nextflow.io> | bash 
    - sudo mv nextflow /usr/bin/     ## Set Up AWS Variables     # This section configure the scripts to use your AWS credential    - REGION=$(jq '.repository.settings.region' /home/ec2-user/.bpconfig.json | sed 's/"//g')
    - AWS_ACCOUNT_ID=$(jq '.repository.settings.account' /home/ec2-user/.bpconfig.json | sed 's/"//g') 
    - /home/ec2-user/bioinfo/infra/repository/ecr_login.py     # For Private Git Repo
    - /home/ec2-user/bioinfo/infra/storage/sync_reflib.py -k reflib/bucket/nfcode-{{nfcode_version}}.zip -d {{basedir}}/ -f # sync your code to worker 
    - unzip {{basedir}}/nfcode-{{nfcode_version}}.zip -d {{basedir}} && cd {{basedir}}/nfcode-{{nfcode_version}}/ # unzip your code 
    # For Public Git Repo (Note:comment above private git repo)
    # git clone -b {{nfcode_version}} --single-branch git@github.com:gitaccount/reponame.git {{basedir}}/reponame/ && cd {{basedir}}/reponame/
    ## Below script will create a sample sheet csv input file , pass the column header name in below arguments as your nf scripts expected :- 
    # 1. --samplename { colname for samples to preapre samplesheet }
    # 2. --forcolname { colname for forward fastq to preapre samplesheet }
    # 3. --revcolname { colname for reverse fastq to preapre samplesheet}
    - |
       /home/ec2-user/bioinfo/bioinfo/nf_samplesheet.py --sampleids {{all_sample_ids}} --samplename 'sample' --forcolname '' --revcolname '' --outdir {{basedir}}/
    # Add required params to nextflow run command with default values as well as add them to Inputs section so user can change values at the analysis page. 
    - |
       nextflow run main.nf -profile docker --input {{basedir}}/samplesheet.csv 
       --outdir {{basedir}}/results/ --max_cpus {{max_cpus}} 

# This section includes the inputs/params (i.e str,int,float,option type) user can provide to command section
inputs:
  nfcode_version:
    val: v1.0.1
    type: option
    show: true # make it false if don't want to show this at analysis page
    label: nf code pipeline version
    options:
      - v1.0.0
      - v1.0.1
      - v1.0.2
      - v1.0.3
    help: "select version to run nf pipeline"
  max_cpus:
    val: 24
    type: int
    min: 8
    max: 36
    label: Max CPU
    show: false
  
## This section includes the outputs (i.e file,zip,folder) options to save the results and display them at analysis page
# Add or remove outputs filed based on pipeline requirements. 
outputs:
  input_csv:
    type: file
    val: ''  
    action: template
    formula: _{{basedir}}/samplesheet.csv
  summary_html:
    type: file
    val: ''
    action: template
    formula: _{{basedir}}/results/summary_report_final.html
    tags: [report, html]
  zip_outdir:
    type: file
    val: 'Output.zip'
    action: template
    dir_action: template
    dir_formula: _{{basedir}}/results/
    dir_val: _{{basedir}}/results/
    formula: _{{basedir}}/Output.zip

Push Module YAML to basepair database

# Module create command creates a new module in the BP database and returns its id.basepair module create --file ~/pathtomodule/modulename.yaml 

# Module update command return id needs to be added to the top of module YAML file before running the update command eg. id: 12345
basepair module update --file ~/pathtomodule/modulename.yaml

  • Demo Pipeline YAML, configure it based on your NF pipeline requirements
name: 'pipelinename' #name of pipeline
summary: |
         'Summary about pipeline'
description: |
         'Description about pipeline'
datatype: dna-seq # {atac-seq,chip-seq,crispr,cutnrun,cutntag,dna-seq,other,panel,rna-seq,scrna-seq,small-rna-seq,snap-chip,wes,wgs}
visibility: private # choose between public/private
exclude_multiple_triggers: true # This will allow multiple samples in single analysis. 
# Choose instance from below list based on MAX Memory and CPU defined in nextflow config for docker profile.
# Instane types: "c1.medium","c3.2xlarge","c3.4xlarge","c3.8xlarge","c3.large","c3.xlarge","c4.8xlarge","c5d.18xlarge","c5d.24xlarge","c5d.2xlarge","c5d.4xlarge","c5d.9xlarge","c5d.large","c5d.xlarge","c6gd.large","i3.16xlarge","i3.2xlarge","i3.4xlarge","i3.8xlarge","i3en.xlarge","m1.large","m1.medium","m1.small","m1.xlarge","m2.2xlarge","m2.4xlarge","m3.2xlarge","m3.large","m3.medium","m3.xlarge","m5d.12xlarge","m5d.2xlarge","m5d.4xlarge","m5d.8xlarge","m5d.large","m5d.xlarge","m6gd.medium","r3.2xlarge","r3.4xlarge","r3.large","r3.xlarge","t3.micro","t3.nano","t4g.nano","x1e.16xlarge"
infra:
  instance_type: i3.2xlarge 
save_app_node_id: 'save'
tags: [nf, pipelinename]

validation:
  required:
    filetypes:
      - fastq
    genome: false # True if basepair genome files will be going to use
    num_samples: '1+' # '1+' indicates more than 1 sample change it to 1 if pipeline work for single sample at a time. 
    num_controls: '0' # Add control sample counts
    paired: true # switch to false for single end data
    datatype:
      - dna-seq 

edges:
- parent_node_id: '-1'
  app_node_id: 'start'

- parent_node_id: 'start'
  app_node_id: 'modulename'

- parent_node_id: 'modulename'
  app_node_id: 'stop'

nodes:
  'save':
    app_id: '9'
    info:
      bucket: bucket

  'start':
    app_id: '5'
    info:
      dirname: compute_basedir

  'modulename':
    app_id: 'Id used at module creation'
    info:
      num_threads: num_threads #fetched from analysis api
      memory: memory #fetched from analysis api
      bucket: bucket #fetched from analysis api
      all_sample_ids: all_sample_ids #fetched from analysis api
      storage_basedir: storage_basedir #fetched from analysis api
      basedir: compute_basedir #fetched from analysis api
      genome_name: genome_id #fetched from analysis api      fasta: genome_id #fetched from analysis api      genome_id: genome_id #fetched from analysis api
      slug: slug #fetched from analysis api

  'stop':
    app_id: '22'
    info:
      compute_basedir: compute_basedir

Push Pipeline YAML to basepair database

# Pipeline create command: creates a new pipeline in the BP database and returns its id.
basepair pipeline create  --file ~/pathtopipeline/pipelinename.yaml

# Pipeline update command: return id needs to be added to the top of pipeline YAML file before running the update command eg. id: 100001
basepair pipeline update  --file ~/pathtopipeline/pipelinename.yaml -u 100001