deeptools · AnttonLA · Dec 8, 2023 · Dec 11, 2023 · Oct 18, 2024 · Dec 8, 2023
diff --git a/pygenometracks/makeTracksFile.py b/pygenometracks/makeTracksFile.py
@@ -8,8 +8,9 @@
 def parse_arguments(args=None):
 
     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-                                     description='Facilitates the creation of a configuration file for pyGenomeTracks. The program takes a list '
-                                                 'of files and does the boilerplate for the configuration file.',
+                                     description='Facilitates the creation of a configuration file for '
+                                                 'pyGenomeTracks. The program takes a list of files and does the '
+                                                 'boilerplate for the configuration file.',
                                      usage="%(prog)s --trackFiles <bigwig file> <bed file> etc. -o tracks.ini")
 
     # define the arguments

diff --git a/pygenometracks/plotTracks.py b/pygenometracks/plotTracks.py
@@ -248,7 +248,11 @@ def parse_arguments(args=None):
 
 
 def main(args=None):
+    """
+    Main function to plot the tracks.
 
+    :param args: arguments to parse. Default is None.
+    """
     args = parse_arguments().parse_args(args)
 
     # Identify the regions to plot:
@@ -277,7 +281,7 @@ def main(args=None):
                      track_label_width=args.trackLabelFraction,
                      plot_regions=regions, plot_width=args.plotWidth)
 
-    # Create dir if dir does not exists:
+    # Create dir if dir does not exist:
     # Modified from https://stackoverflow.com/questions/12517451/automatically-creating-directories-with-file-output
     os.makedirs(os.path.dirname(os.path.abspath(args.outFileName)), exist_ok=True)
 
@@ -289,9 +293,8 @@ def main(args=None):
         for chrom, start, end in regions:
             file_name = f"{file_prefix}_{chrom}-{start}-{end}.{file_suffix}"
             if end - start < 200000:
-                warnings.warn("A region shorter than 200kb has been "
-                              "detected! This can be too small to return "
-                              "a proper TAD plot!\n")
+                warnings.warn("A region shorter than 200kb has been detected! This can be too small to return a proper "
+                              "TAD plot!\n")
             sys.stderr.write(f"saving {file_name}\n")
             current_fig = trp.plot(file_name, chrom, start, end, title=args.title,
                                    h_align_titles=args.trackLabelHAlign,

diff --git a/pygenometracks/readGtf.py b/pygenometracks/readGtf.py
@@ -42,8 +42,7 @@ class ReadGtf(object):
 
     """
 
-    def __init__(self, file_path, prefered_name="transcript_name",
-                 merge_transcripts=True,
+    def __init__(self, file_path, prefered_name="transcript_name", merge_transcripts=True,
                  merge_overlapping_exons=True):
         """
         :param file_path: the path of the gtf file
@@ -53,11 +52,8 @@ def __init__(self, file_path, prefered_name="transcript_name",
         self.file_type = 'bed12'
 
         # list of bed fields
-        self.fields = ['chromosome', 'start', 'end',
-                       'name', 'score', 'strand',
-                       'thick_start', 'thick_end',
-                       'rgb', 'block_count',
-                       'block_sizes', 'block_starts']
+        self.fields = ['chromosome', 'start', 'end', 'name', 'score', 'strand', 'thick_start', 'thick_end', 'rgb',
+                       'block_count', 'block_sizes', 'block_starts']
 
         self.BedInterval = collections.namedtuple('BedInterval', self.fields)
         # I think the name which should be written
@@ -83,15 +79,13 @@ def __init__(self, file_path, prefered_name="transcript_name",
         else:
             if self.merge_transcripts:
                 self.length = self.db.count_features_of_type("gene")
-                self.all_transcripts = self.db.features_of_type("gene",
-                                                                order_by='start')
+                self.all_transcripts = self.db.features_of_type("gene", order_by='start')
             else:
                 self.length = self.db.count_features_of_type("transcript")
                 if self.length == 0:
                     # This is unexpected as the database contains things
                     log.warning("No transcript found consider. If your gtf only have genes, use `merge_transcripts = true`")
-                self.all_transcripts = self.db.features_of_type("transcript",
-                                                                order_by='start')
+                self.all_transcripts = self.db.features_of_type("transcript", order_by='start')
 
     def __iter__(self):
         return self
@@ -106,43 +100,34 @@ def __next__(self):
 
     def get_bed_interval(self):
         """
-        Process a transcript from the database,
-        retrieve all the values and return
-        a namedtuple object
+        Process a transcript from the database, retrieve all the values and return a namedtuple object.
         """
         tr = next(self.all_transcripts)
+
         # The name would be the prefered_name if exists
         try:
             trName = tr.attributes[self.prefered_name][0]
         except KeyError:
             # Else try to guess the prefered_name from exons:
             try:
-                trName = set([e.attributes[self.prefered_name][0]
-                              for e in
-                              self.db.children(tr,
-                                               featuretype='exon',
-                                               order_by='start')]).pop()
+                trName = set(
+                    [e.attributes[self.prefered_name][0] for e in self.db.children(tr, featuretype='exon', order_by='start')]
+                ).pop()
             except KeyError:
                 # Else take the transcript id
                 trName = tr.id
-        # If the cds is defined in the gtf,
-        # use it to define the thick start and end
-        # The gtf is 1-based closed intervalls
-        # and bed are 0-based half-open so:
-        # I need to remove one from each start
+
+        # If the cds is defined in the gtf, use it to define the thick start and end. The gtf is 1-based closed
+        # intervals and bed are 0-based half-open so: I need to remove one from each start
         try:
-            cds_start = next(self.db.children(tr,
-                                              featuretype='CDS',
-                                              order_by='start')).start - 1
-            cds_end = next(self.db.children(tr,
-                                            featuretype='CDS',
-                                            order_by='-start')).end
+            cds_start = next(self.db.children(tr, featuretype='CDS', order_by='start')).start - 1
+            cds_end = next(self.db.children(tr, featuretype='CDS', order_by='-start')).end
         except StopIteration:
-            # If the CDS is not defined, then it is set to the start
-            # as proposed here:
+            # If the CDS is not defined, then it is set to the start as proposed here:
             # https://genome.ucsc.edu/FAQ/FAQformat.html#format1
             cds_start = tr.start - 1
             cds_end = tr.start - 1
+
         # Get all exons starts and end to get lengths
         exons = [e for e in self.db.children(tr, featuretype='exon', order_by='start')]
         if len(exons) > 0:
@@ -158,44 +143,32 @@ def get_bed_interval(self):
                         current_end = e.end
                     else:
                         if e.start > current_end:
-                            # This is a non-overlapping exon
-                            # We store the previous exon:
+                            # This is a non-overlapping exon. We store the previous exon:
                             exons_starts.append(current_start)
                             exons_ends.append(current_end)
                             # We set the current:
                             current_start = e.start - 1
                             current_end = e.end
                         else:
-                            # This is an overlapping exon
-                            # We update current_end if necessary
+                            # This is an overlapping exon. We update current_end if necessary
                             current_end = max(current_end, e.end)
                 if current_start != -1:
                     # There is a last exon to store:
                     exons_starts.append(current_start)
                     exons_ends.append(current_end)
             else:
-                exons_starts = [e.start - 1
-                                for e in
-                                exons]
-                exons_ends = [e.end
-                              for e in
-                              exons]
+                exons_starts = [e.start - 1 for e in exons]
+                exons_ends = [e.end for e in exons]
         else:
             # This means that the gtf does not have exon info for this gene/transcript:
             try:
-                exons_starts = [[ch.start - 1
-                                 for ch in self.db.children(tr,
-                                                            order_by='start')][0]]
-                exons_ends = [[ch.end
-                               for ch in self.db.children(tr,
-                                                          order_by='end',
-                                                          reverse=True)][0]]
+                exons_starts = [[ch.start - 1 for ch in self.db.children(tr, order_by='start')][0]]
+                exons_ends = [[ch.end for ch in self.db.children(tr, order_by='end', reverse=True)][0]]
             except IndexError:
                 exons_starts = [tr.start - 1]
                 exons_ends = [tr.end]
         exons_length = [e - s for s, e in zip(exons_starts, exons_ends)]
         relative_exons_starts = [s - (tr.start - 1) for s in exons_starts]
-        line_values = [tr.chrom, tr.start - 1, tr.end, trName, 0, tr.strand,
-                       cds_start, cds_end, "0", len(exons_starts),
+        line_values = [tr.chrom, tr.start - 1, tr.end, trName, 0, tr.strand, cds_start, cds_end, "0", len(exons_starts),
                        exons_length, relative_exons_starts]
         return self.BedInterval._make(line_values)
diff --git a/pygenometracks/setup.py b/pygenometracks/setup.py
@@ -0,0 +1,128 @@
+# -*- coding: utf-8 -*-
+
+import os
+import sys
+import subprocess
+import re
+
+from setuptools import setup, find_packages
+from setuptools.command.sdist import sdist as _sdist
+from setuptools.command.install import install as _install
+
+VERSION_PY = """
+# This file is originally generated from Git information by running 'setup.py
+# version'. Distribution tarballs contain a pre-generated copy of this file.
+
+__version__ = '%s'
+"""
+
+
+def update_version_py():
+    if not os.path.isdir(".git"):
+        print("This does not appear to be a Git repository.")
+        return
+    try:
+        p = subprocess.Popen(["git", "describe",
+                              "--tags", "--always"],
+                             stdout=subprocess.PIPE)
+    except EnvironmentError:
+        print("unable to run git, leaving pygenometracks/_version.py alone")
+        return
+    stdout = p.communicate()[0]
+    if p.returncode != 0:
+        print("unable to run git, leaving pygenometracks/_version.py alone")
+        return
+    ver = stdout.strip()
+    f = open(os.path.join("pygenometracks", "_version.py"), "w")
+    f.write(VERSION_PY % ver)
+    f.close()
+    print(f"set pygenometracks/_version.py to '{ver}'")
+
+
+def get_version():
+    try:
+        f = open(os.path.join("pygenometracks", "_version.py"))
+    except EnvironmentError:
+        return None
+    for line in f.readlines():
+        mo = re.match("__version__ = '([^']+)'", line)
+        if mo:
+            ver = mo.group(1)
+            return ver
+    return None
+
+
+class sdist(_sdist):
+
+    def run(self):
+        # update_version_py()
+        self.distribution.metadata.version = get_version()
+        return _sdist.run(self)
+
+# Install class to check for external dependencies from OS environment
+
+
+class install(_install):
+
+    def run(self):
+        # update_version_py()
+        self.distribution.metadata.version = get_version()
+        _install.run(self)
+        return
+
+    def checkProgramIsInstalled(self, program, args, where_to_download,
+                                affected_tools):
+        try:
+            subprocess.Popen([program, args],
+                             stderr=subprocess.PIPE,
+                             stdout=subprocess.PIPE)
+            return True
+        except EnvironmentError:
+            # handle file not found error.
+            # the config file is installed in:
+            msg = (f"\n**{program} not found. This program is needed"
+                   " for the following tools to work properly:\n"
+                   f"{affected_tools}\n{program} can be downloaded"
+                   f" from here:\n{where_to_download}\n")
+            sys.stderr.write(msg)
+
+        except Exception as e:
+            sys.stderr.write(f"Error: {e}")
+
+
+install_requires_py = ["numpy >=1.20",
+                       "matplotlib >=3.1.1,<=3.6.2",
+                       "intervaltree >=2.1.0",
+                       "pyBigWig >=0.3.16",
+                       "future >=0.17.0",
+                       "hicmatrix >=15",
+                       "pysam >=0.14",
+                       "pytest",
+                       "gffutils >=0.9",
+                       "pybedtools >=0.8.1",
+                       "tqdm >=4.20",
+                       "bx-python >=0.8.13",
+                       "pyfaidx >=0.1.3"
+                       ]
+
+setup(
+    name='pyGenomeTracks',
+    version=get_version(),
+    author='Lucille Lopez-Delisle, Leily Rabbani, Joachim Wolf, Björn Grüning',
+    packages=find_packages(exclude=['tests']),
+    scripts=['bin/make_tracks_file', 'bin/pgt', 'bin/pyGenomeTracks'],
+    include_package_data=True,
+    package_dir={'pygenometracks': 'pygenometracks'},
+    url='http://pygenometracks.readthedocs.io',
+    license='LICENSE.txt',
+    description='Command-line tool to make beautiful and reproducible genome browser snapshots',
+    long_description=open('README.md').read().replace("./docs/", "https://raw.githubusercontent.com/deeptools/pyGenomeTracks/" + get_version() + "/docs/"),
+    long_description_content_type="text/markdown",
+    classifiers=[
+        'Intended Audience :: Science/Research',
+        'Topic :: Scientific/Engineering :: Bio-Informatics'],
+    install_requires=install_requires_py,
+    zip_safe=False,
+    python_requires='>=3.7, <4',
+    cmdclass={'sdist': sdist, 'install': install}
+)
diff --git a/pygenometracks/tests/test_gwasTrack.py b/pygenometracks/tests/test_gwasTrack.py
@@ -0,0 +1,14 @@
+import matplotlib as mpl
+from matplotlib.testing.compare import compare_images
+from tempfile import NamedTemporaryFile
+import os.path
+import pygenometracks.plotTracks
+mpl.use('agg')
+
+ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                    "test_data")
+
+browser_tracks = """
+[x-axis]
+"""
+# TODO: this is incomplete?
diff --git a/pygenometracks/tracks/BedTrack.py b/pygenometracks/tracks/BedTrack.py
@@ -294,10 +294,8 @@ def process_bed(self, plot_regions=None):
         bed_file_h, total_length = self.get_bed_handler(plot_regions)
         self.bed_type = bed_file_h.file_type
 
-        if self.properties['color'] == 'bed_rgb' and \
-           self.bed_type not in ['bed12', 'bed9']:
-            self.log.warning("*WARNING* Color set to 'bed_rgb', "
-                             "but bed file does not have the rgb field. "
+        if self.properties['color'] == 'bed_rgb' and self.bed_type not in ['bed12', 'bed9']:
+            self.log.warning("*WARNING* Color set to 'bed_rgb', but bed file does not have the rgb field. "
                              f"The color has been set to {DEFAULT_BED_COLOR}.\n")
             self.properties['color'] = DEFAULT_BED_COLOR
 
@@ -663,6 +661,13 @@ def is_right_to(a, b):
         self.log.debug(f"ylim {ax.get_ylim()}")
 
     def plot_label(self, label_ax, width_dpi, h_align='left'):
+        """
+        Plot the label of the track.
+
+        :param label_ax: the axis where to plot the label
+        :param width_dpi: the width of the figure in dpi
+        :param h_align: the horizontal alignment of the label. Options are 'left', 'right' or 'center'
+        """
         if h_align == 'left':
             label_ax.text(0.05, 1, self.properties['title'],
                           horizontalalignment='left', size='large',
@@ -686,7 +691,16 @@ def plot_label(self, label_ax, width_dpi, h_align='left'):
             # To be able to wrap to the left:
             txt._get_wrap_line_width = lambda: width_dpi
 
-    def plot_y_axis(self, ax, plot_axis):
+    def plot_y_axis(self, ax, plot_axis, transform='no', log_pseudocount=0, y_axis='tranformed', only_at_ticks=False):
+        """
+        Plot the y-axis of the track. Overwrite the GenomeTrack method to have bed specific y-axis plotting.
+        When a color map is used for the color in the .ini file (e.g. coolwarm, Reds), the bed score column mapped to a
+        color.
+
+        :param ax: the axis where to plot the y axis
+        :param plot_axis: whether to plot the axis or not
+        :return: None
+        """
         if self.colormap is not None:
             self.colormap.set_array([])
             GenomeTrack.plot_custom_cobar(self, ax, fraction=1)