Source code for pydamage.main

#!/usr/bin/env python3
import pysam
from pydamage import utils
import multiprocessing
from functools import partial
from pydamage import damage
from pydamage.plot import damageplot
import pandas as pd
import sys
from tqdm import tqdm
import warnings
from pydamage import __version__


[docs]def analyze(bam, wlen=30, show_al=False, mini=2000, cov=0.5, process=1, outdir="", plot=False, verbose=False, force=False): """Runs the pydamage analysis Args: bam(str): Path to alignment (sam/bam/cram) file wlen(int): window length show_al(bool): print alignments representations mini(int): Minimum numbers of reads aligned to consider contigs cov(float): Minimum coverage to consider contig process(int): Number of processes for parellel computing outdir(str): Path to output directory verbose(bool): verbose mode force(bool): force overwriting of results directory Returns: pd.DataFrame: pandas DataFrame containg pydamage results """ if verbose: print(f"Pydamage version {__version__}\n") utils.makedir(outdir, force=force) if not verbose: warnings.filterwarnings("ignore") mode = utils.check_extension(bam) alf = pysam.AlignmentFile(bam, mode) if not alf.has_index(): print(f"BAM file {bam} has no index. Sort BAM file and provide index " "before running pydamage.") sys.exit(1) refs = list(alf.references) if len(refs) == 0: print(f"No aligned sequences in {bam}") return([]) proc = min(len(refs), process) ########################## # Simple loop for debugging ########################## # filt_res = [] # for ref in refs: # res = damage.test_damage(bam=bam, ref=ref, wlen=wlen, # min_al=mini, min_cov=cov, show_al=show_al, # mode=mode, process=process, verbose=verbose) # if res: # filt_res.append(res) ########################## ########################## test_damage_partial = partial(damage.test_damage, bam=bam, wlen=wlen, min_al=mini, min_cov=cov, show_al=show_al, mode=mode, process=process, verbose=verbose) print("Estimating and testing Damage") with multiprocessing.Pool(proc) as p: res = list(tqdm(p.imap(test_damage_partial, refs), total=len(refs))) filt_res = [i for i in res if i] print(f"{len(filt_res)} contigs were successfully analyzed by Pydamage") if plot and len(filt_res) > 0: print("\nGenerating Pydamage plots") plotdir = f"{outdir}/plots" utils.makedir(plotdir, confirm=False) plot_partial = partial(damageplot, outdir=plotdir) with multiprocessing.Pool(proc) as p: list(tqdm(p.imap(plot_partial, filt_res), total=len(filt_res))) df = utils.pandas_processing(res_dict=filt_res, outdir=outdir) return(df)