- Demo Module YAML , configure it based on your NF pipeline
name: testname # Module name type: testtype # Module type version: v1.0.0 path: /home/ec2-user/bioinfo/infra/repository command: ## Install Java and Nextflow # This section install the required versions of java and nextflow - /home/ec2-user/bioinfo/infra/storage/sync_reflib.py -k reflib/tool/install-java-on-worker.sh -d /home/ec2-user/tools/ -f - sudo chmod +x /home/ec2-user/tools/install-java-on-worker.sh && sudo sh /home/ec2-user/tools/install-java-on-worker - export NXF_VER=23.10.0 # Choose nextflow version to use - wget -qO- <https://get.nextflow.io> | bash - sudo mv nextflow /usr/bin/ ## Set Up AWS Variables # This section configure the scripts to use your AWS credential - REGION=$(jq '.repository.settings.region' /home/ec2-user/.bpconfig.json | sed 's/"//g') ## For Private Git Repo - python3.8 {{path}}/github_integration.py --url https://github.com/gitaccount/reponame.git --clonedir {{basedir}}/reponame/ --branch {{nfcode_version}} && cd {{basedir}}/reponame/ # optional params --branch [release tag / branch name] ## For Public Git Repo (Note:comment above private git repo) # git clone -b {{nfcode_version}} --single-branch git@github.com:gitaccount/reponame.git {{basedir}}/reponame/ && cd {{basedir}}/reponame/ ## SampleSheet Creation # Below script will create a sample sheet csv input file , pass the column header name in below arguments as your nf scripts expected :- # 1. --samplename { colname for samples to prepare samplesheet } # 2. --forcolname { colname for forward fastq to prepare samplesheet } # 3. --revcolname { colname for reverse fastq to prepare samplesheet} # 4. --extra_columnn value --extra_datan value { colname for extra column and data value where can be integer } - | /home/ec2-user/bioinfo/bioinfo/nf_samplesheet.py --sampleids {{all_sample_ids}} --samplename 'sample' --forcolname '' --revcolname '' --extra_column1 '' --extra_data1 '' --outdir {{basedir}}/ ## Run Nextflow CMD # Add required params to nextflow run command with default values as well as add them to Inputs section so user can change values at the analysis page. - | nextflow run main.nf -profile docker --input {{basedir}}/samplesheet.csv --outdir {{basedir}}/results/ --max_cpus {{max_cpus}} # NOTE:- To run on AWS batch add --profile awsbatch,docker --awsregion $REGION --awsqueue job-queue-prod # This section includes the inputs/params (i.e str,int,float,option type) user can provide to command section inputs: nfcode_version: val: v1.0.1 type: option show: true # make it false if don't want to show this at analysis page label: nf code pipeline version options: - v1.0.0 - v1.0.1 - v1.0.2 - v1.0.3 help: "select version to run nf pipeline" max_cpus: val: 24 type: int min: 8 max: 36 label: Max CPU show: false ## This section includes the outputs (i.e file,zip,folder) options to save the results and display them at analysis page # Add or remove outputs filed based on pipeline requirements. outputs: input_csv: type: file val: '' action: template formula: _{{basedir}}/samplesheet.csv summary_html: type: file val: '' action: template formula: _{{basedir}}/results/summary_report_final.html tags: [report, html] zip_outdir: type: file val: 'Output.zip' action: template dir_action: template dir_formula: _{{basedir}}/results/ dir_val: _{{basedir}}/results/ formula: _{{basedir}}/Output.zip
Push Module YAML to basepair database
# Module create command creates a new module in the BP database and returns id which needs to be added to the top of module YAML eg. id: 12345. basepair module create --file ~/pathtomodule/modulename.yaml # Module update command if changes done at module level needs to update on BP database. basepair module update --file ~/pathtomodule/modulename.yaml
- Demo Pipeline YAML, configure it based on your NF pipeline requirements
name: 'pipelinename' #name of pipeline summary: | 'Summary about pipeline' description: | 'Description about pipeline' datatype: dna-seq # {atac-seq,chip-seq,crispr,cutnrun,cutntag,dna-seq,other,panel,rna-seq,scrna-seq,small-rna-seq,snap-chip,wes,wgs} visibility: private # choose between public/private # Choose instance from below list based on MAX Memory and CPU defined in nextflow config for docker profile. # Instane types: "c1.medium","c3.2xlarge","c3.4xlarge","c3.8xlarge","c3.large","c3.xlarge","c4.8xlarge","c5d.18xlarge","c5d.24xlarge","c5d.2xlarge","c5d.4xlarge","c5d.9xlarge","c5d.large","c5d.xlarge","c6gd.large","i3.16xlarge","i3.2xlarge","i3.4xlarge","i3.8xlarge","i3en.xlarge","m1.large","m1.medium","m1.small","m1.xlarge","m2.2xlarge","m2.4xlarge","m3.2xlarge","m3.large","m3.medium","m3.xlarge","m5d.12xlarge","m5d.2xlarge","m5d.4xlarge","m5d.8xlarge","m5d.large","m5d.xlarge","m6gd.medium","r3.2xlarge","r3.4xlarge","r3.large","r3.xlarge","t3.micro","t3.nano","t4g.nano","x1e.16xlarge" infra: instance_type: c1.medium save_app_node_id: 'save' tags: [nf, pipelinename] validation: required: filetypes: - fastq genome: false # True if basepair genome files will be going to use num_samples: '1' # Default '1' analysis work for single sample at a time where '1+' indicates more than 1 sample. num_controls: '0' # Add control sample counts paired: true # switch to false for single end data datatype: - dna-seq edges: - parent_node_id: '-1' app_node_id: 'start' - parent_node_id: 'start' app_node_id: 'modulename' - parent_node_id: 'modulename' app_node_id: 'stop' nodes: 'save': app_id: '9' info: bucket: bucket 'start': app_id: '5' info: dirname: compute_basedir 'modulename': app_id: 'Id used at module creation' info: num_threads: num_threads #fetched from analysis api memory: memory #fetched from analysis api bucket: bucket #fetched from analysis api all_sample_ids: all_sample_ids #fetched from analysis api storage_basedir: storage_basedir #fetched from analysis api basedir: compute_basedir #fetched from analysis api genome_name: genome_id #fetched from analysis api fasta: genome_id #fetched from analysis api genome_id: genome_id #fetched from analysis api slug: slug #fetched from analysis api 'stop': app_id: '22' info: compute_basedir: compute_basedir
Push Pipeline YAML to basepair database
# Pipeline create command: creates a new pipeline in the BP database and returns id which needs to be added to the top of pipeline YAML eg. id: 10000 basepair pipeline create --file ~/pathtopipeline/pipelinename.yaml # Pipeline update command: basepair pipeline update --file ~/pathtopipeline/pipelinename.yaml -u 10000
Was this article helpful?
That’s Great!
Thank you for your feedback
Sorry! We couldn't be helpful
Thank you for your feedback
Feedback sent
We appreciate your effort and will try to fix the article