Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions pygenometracks/makeTracksFile.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
def parse_arguments(args=None):

parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description='Facilitates the creation of a configuration file for pyGenomeTracks. The program takes a list '
'of files and does the boilerplate for the configuration file.',
description='Facilitates the creation of a configuration file for '
'pyGenomeTracks. The program takes a list of files and does the '
'boilerplate for the configuration file.',
usage="%(prog)s --trackFiles <bigwig file> <bed file> etc. -o tracks.ini")

# define the arguments
Expand Down
11 changes: 7 additions & 4 deletions pygenometracks/plotTracks.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,11 @@ def parse_arguments(args=None):


def main(args=None):
"""
Main function to plot the tracks.

:param args: arguments to parse. Default is None.
"""
args = parse_arguments().parse_args(args)

# Identify the regions to plot:
Expand Down Expand Up @@ -277,7 +281,7 @@ def main(args=None):
track_label_width=args.trackLabelFraction,
plot_regions=regions, plot_width=args.plotWidth)

# Create dir if dir does not exists:
# Create dir if dir does not exist:
# Modified from https://stackoverflow.com/questions/12517451/automatically-creating-directories-with-file-output
os.makedirs(os.path.dirname(os.path.abspath(args.outFileName)), exist_ok=True)

Expand All @@ -289,9 +293,8 @@ def main(args=None):
for chrom, start, end in regions:
file_name = f"{file_prefix}_{chrom}-{start}-{end}.{file_suffix}"
if end - start < 200000:
warnings.warn("A region shorter than 200kb has been "
"detected! This can be too small to return "
"a proper TAD plot!\n")
warnings.warn("A region shorter than 200kb has been detected! This can be too small to return a proper "
"TAD plot!\n")
sys.stderr.write(f"saving {file_name}\n")
current_fig = trp.plot(file_name, chrom, start, end, title=args.title,
h_align_titles=args.trackLabelHAlign,
Expand Down
75 changes: 24 additions & 51 deletions pygenometracks/readGtf.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ class ReadGtf(object):

"""

def __init__(self, file_path, prefered_name="transcript_name",
merge_transcripts=True,
def __init__(self, file_path, prefered_name="transcript_name", merge_transcripts=True,
merge_overlapping_exons=True):
"""
:param file_path: the path of the gtf file
Expand All @@ -53,11 +52,8 @@ def __init__(self, file_path, prefered_name="transcript_name",
self.file_type = 'bed12'

# list of bed fields
self.fields = ['chromosome', 'start', 'end',
'name', 'score', 'strand',
'thick_start', 'thick_end',
'rgb', 'block_count',
'block_sizes', 'block_starts']
self.fields = ['chromosome', 'start', 'end', 'name', 'score', 'strand', 'thick_start', 'thick_end', 'rgb',
'block_count', 'block_sizes', 'block_starts']

self.BedInterval = collections.namedtuple('BedInterval', self.fields)
# I think the name which should be written
Expand All @@ -83,15 +79,13 @@ def __init__(self, file_path, prefered_name="transcript_name",
else:
if self.merge_transcripts:
self.length = self.db.count_features_of_type("gene")
self.all_transcripts = self.db.features_of_type("gene",
order_by='start')
self.all_transcripts = self.db.features_of_type("gene", order_by='start')
else:
self.length = self.db.count_features_of_type("transcript")
if self.length == 0:
# This is unexpected as the database contains things
log.warning("No transcript found consider. If your gtf only have genes, use `merge_transcripts = true`")
self.all_transcripts = self.db.features_of_type("transcript",
order_by='start')
self.all_transcripts = self.db.features_of_type("transcript", order_by='start')

def __iter__(self):
return self
Expand All @@ -106,43 +100,34 @@ def __next__(self):

def get_bed_interval(self):
"""
Process a transcript from the database,
retrieve all the values and return
a namedtuple object
Process a transcript from the database, retrieve all the values and return a namedtuple object.
"""
tr = next(self.all_transcripts)

# The name would be the prefered_name if exists
try:
trName = tr.attributes[self.prefered_name][0]
except KeyError:
# Else try to guess the prefered_name from exons:
try:
trName = set([e.attributes[self.prefered_name][0]
for e in
self.db.children(tr,
featuretype='exon',
order_by='start')]).pop()
trName = set(
[e.attributes[self.prefered_name][0] for e in self.db.children(tr, featuretype='exon', order_by='start')]
).pop()
except KeyError:
# Else take the transcript id
trName = tr.id
# If the cds is defined in the gtf,
# use it to define the thick start and end
# The gtf is 1-based closed intervalls
# and bed are 0-based half-open so:
# I need to remove one from each start

# If the cds is defined in the gtf, use it to define the thick start and end. The gtf is 1-based closed
# intervals and bed are 0-based half-open so: I need to remove one from each start
try:
cds_start = next(self.db.children(tr,
featuretype='CDS',
order_by='start')).start - 1
cds_end = next(self.db.children(tr,
featuretype='CDS',
order_by='-start')).end
cds_start = next(self.db.children(tr, featuretype='CDS', order_by='start')).start - 1
cds_end = next(self.db.children(tr, featuretype='CDS', order_by='-start')).end
except StopIteration:
# If the CDS is not defined, then it is set to the start
# as proposed here:
# If the CDS is not defined, then it is set to the start as proposed here:
# https://genome.ucsc.edu/FAQ/FAQformat.html#format1
cds_start = tr.start - 1
cds_end = tr.start - 1

# Get all exons starts and end to get lengths
exons = [e for e in self.db.children(tr, featuretype='exon', order_by='start')]
if len(exons) > 0:
Expand All @@ -158,44 +143,32 @@ def get_bed_interval(self):
current_end = e.end
else:
if e.start > current_end:
# This is a non-overlapping exon
# We store the previous exon:
# This is a non-overlapping exon. We store the previous exon:
exons_starts.append(current_start)
exons_ends.append(current_end)
# We set the current:
current_start = e.start - 1
current_end = e.end
else:
# This is an overlapping exon
# We update current_end if necessary
# This is an overlapping exon. We update current_end if necessary
current_end = max(current_end, e.end)
if current_start != -1:
# There is a last exon to store:
exons_starts.append(current_start)
exons_ends.append(current_end)
else:
exons_starts = [e.start - 1
for e in
exons]
exons_ends = [e.end
for e in
exons]
exons_starts = [e.start - 1 for e in exons]
exons_ends = [e.end for e in exons]
else:
# This means that the gtf does not have exon info for this gene/transcript:
try:
exons_starts = [[ch.start - 1
for ch in self.db.children(tr,
order_by='start')][0]]
exons_ends = [[ch.end
for ch in self.db.children(tr,
order_by='end',
reverse=True)][0]]
exons_starts = [[ch.start - 1 for ch in self.db.children(tr, order_by='start')][0]]
exons_ends = [[ch.end for ch in self.db.children(tr, order_by='end', reverse=True)][0]]
except IndexError:
exons_starts = [tr.start - 1]
exons_ends = [tr.end]
exons_length = [e - s for s, e in zip(exons_starts, exons_ends)]
relative_exons_starts = [s - (tr.start - 1) for s in exons_starts]
line_values = [tr.chrom, tr.start - 1, tr.end, trName, 0, tr.strand,
cds_start, cds_end, "0", len(exons_starts),
line_values = [tr.chrom, tr.start - 1, tr.end, trName, 0, tr.strand, cds_start, cds_end, "0", len(exons_starts),
exons_length, relative_exons_starts]
return self.BedInterval._make(line_values)
128 changes: 128 additions & 0 deletions pygenometracks/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# -*- coding: utf-8 -*-

import os
import sys
import subprocess
import re

from setuptools import setup, find_packages
from setuptools.command.sdist import sdist as _sdist
from setuptools.command.install import install as _install

VERSION_PY = """
# This file is originally generated from Git information by running 'setup.py
# version'. Distribution tarballs contain a pre-generated copy of this file.

__version__ = '%s'
"""


def update_version_py():
if not os.path.isdir(".git"):
print("This does not appear to be a Git repository.")
return
try:
p = subprocess.Popen(["git", "describe",
"--tags", "--always"],
stdout=subprocess.PIPE)
except EnvironmentError:
print("unable to run git, leaving pygenometracks/_version.py alone")
return
stdout = p.communicate()[0]
if p.returncode != 0:
print("unable to run git, leaving pygenometracks/_version.py alone")
return
ver = stdout.strip()
f = open(os.path.join("pygenometracks", "_version.py"), "w")
f.write(VERSION_PY % ver)
f.close()
print(f"set pygenometracks/_version.py to '{ver}'")


def get_version():
try:
f = open(os.path.join("pygenometracks", "_version.py"))
except EnvironmentError:
return None
for line in f.readlines():
mo = re.match("__version__ = '([^']+)'", line)
if mo:
ver = mo.group(1)
return ver
return None


class sdist(_sdist):

def run(self):
# update_version_py()
self.distribution.metadata.version = get_version()
return _sdist.run(self)

# Install class to check for external dependencies from OS environment


class install(_install):

def run(self):
# update_version_py()
self.distribution.metadata.version = get_version()
_install.run(self)
return

def checkProgramIsInstalled(self, program, args, where_to_download,
affected_tools):
try:
subprocess.Popen([program, args],
stderr=subprocess.PIPE,
stdout=subprocess.PIPE)
return True
except EnvironmentError:
# handle file not found error.
# the config file is installed in:
msg = (f"\n**{program} not found. This program is needed"
" for the following tools to work properly:\n"
f"{affected_tools}\n{program} can be downloaded"
f" from here:\n{where_to_download}\n")
sys.stderr.write(msg)

except Exception as e:
sys.stderr.write(f"Error: {e}")


install_requires_py = ["numpy >=1.20",
"matplotlib >=3.1.1,<=3.6.2",
"intervaltree >=2.1.0",
"pyBigWig >=0.3.16",
"future >=0.17.0",
"hicmatrix >=15",
"pysam >=0.14",
"pytest",
"gffutils >=0.9",
"pybedtools >=0.8.1",
"tqdm >=4.20",
"bx-python >=0.8.13",
"pyfaidx >=0.1.3"
]

setup(
name='pyGenomeTracks',
version=get_version(),
author='Lucille Lopez-Delisle, Leily Rabbani, Joachim Wolf, Björn Grüning',
packages=find_packages(exclude=['tests']),
scripts=['bin/make_tracks_file', 'bin/pgt', 'bin/pyGenomeTracks'],
include_package_data=True,
package_dir={'pygenometracks': 'pygenometracks'},
url='http://pygenometracks.readthedocs.io',
license='LICENSE.txt',
description='Command-line tool to make beautiful and reproducible genome browser snapshots',
long_description=open('README.md').read().replace("./docs/", "https://raw.githubusercontent.com/deeptools/pyGenomeTracks/" + get_version() + "/docs/"),
long_description_content_type="text/markdown",
classifiers=[
'Intended Audience :: Science/Research',
'Topic :: Scientific/Engineering :: Bio-Informatics'],
install_requires=install_requires_py,
zip_safe=False,
python_requires='>=3.7, <4',
cmdclass={'sdist': sdist, 'install': install}
)
14 changes: 14 additions & 0 deletions pygenometracks/tests/test_gwasTrack.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import matplotlib as mpl
from matplotlib.testing.compare import compare_images
from tempfile import NamedTemporaryFile
import os.path
import pygenometracks.plotTracks
mpl.use('agg')

ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)),
"test_data")

browser_tracks = """
[x-axis]
"""
# TODO: this is incomplete?
24 changes: 19 additions & 5 deletions pygenometracks/tracks/BedTrack.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,10 +294,8 @@ def process_bed(self, plot_regions=None):
bed_file_h, total_length = self.get_bed_handler(plot_regions)
self.bed_type = bed_file_h.file_type

if self.properties['color'] == 'bed_rgb' and \
self.bed_type not in ['bed12', 'bed9']:
self.log.warning("*WARNING* Color set to 'bed_rgb', "
"but bed file does not have the rgb field. "
if self.properties['color'] == 'bed_rgb' and self.bed_type not in ['bed12', 'bed9']:
self.log.warning("*WARNING* Color set to 'bed_rgb', but bed file does not have the rgb field. "
f"The color has been set to {DEFAULT_BED_COLOR}.\n")
self.properties['color'] = DEFAULT_BED_COLOR

Expand Down Expand Up @@ -663,6 +661,13 @@ def is_right_to(a, b):
self.log.debug(f"ylim {ax.get_ylim()}")

def plot_label(self, label_ax, width_dpi, h_align='left'):
"""
Plot the label of the track.

:param label_ax: the axis where to plot the label
:param width_dpi: the width of the figure in dpi
:param h_align: the horizontal alignment of the label. Options are 'left', 'right' or 'center'
"""
if h_align == 'left':
label_ax.text(0.05, 1, self.properties['title'],
horizontalalignment='left', size='large',
Expand All @@ -686,7 +691,16 @@ def plot_label(self, label_ax, width_dpi, h_align='left'):
# To be able to wrap to the left:
txt._get_wrap_line_width = lambda: width_dpi

def plot_y_axis(self, ax, plot_axis):
def plot_y_axis(self, ax, plot_axis, transform='no', log_pseudocount=0, y_axis='tranformed', only_at_ticks=False):
"""
Plot the y-axis of the track. Overwrite the GenomeTrack method to have bed specific y-axis plotting.
When a color map is used for the color in the .ini file (e.g. coolwarm, Reds), the bed score column mapped to a
color.

:param ax: the axis where to plot the y axis
:param plot_axis: whether to plot the axis or not
:return: None
"""
if self.colormap is not None:
self.colormap.set_array([])
GenomeTrack.plot_custom_cobar(self, ax, fraction=1)
Expand Down
Loading
Loading