Skip to content

Commit 3bb56cc

Browse files
LeilyRdpryan79
andauthored
release 3.5.0 (#990)
* Fixes for 3.4.2 * disable azure on OSX * Closes #945 * computeMatricOperation dataRange (#951) * Added dataRange to computeMatricOperation to return min,max,median and 10th and 90th percentile. * fixed pep8 * more pep8 fix * fixed test_sieve of azure tests * one more fix for test_sieve * imported pysam to test_readFiltering.py * updated hash values for test_sieve * fixed galaxy computeMatrixOperation.xml * More fixes for galaxy wrapper * fixed a little typo in bamCompare (#967) * save the output matrix of the plotheatmap in a format to be compatible with running plotheatmap on it again (#968) * Plot profile color map (#971) * Now colors can be set by user for any heatmap in plotProfile. A bug in tick index for plotting heatmap is also fixed. * added a small description * pep8 fix * added sortUsingSamples and clusterUsingSamples to the galaxy wrapper * [WIP] added auto to plotheatmap #908 (#982) * added auto to plotheatmap * fixed lint, added warning message, updated the help for zmin, zmax * galaxy test plotPCA * lower down the delat for potPCA galaxy test * fix #969 (#970) * fix #969 * PEP8 * updated changes.txt * fixed a little bug in parseCommon.py which caused flake8 failure. * [WIP] added auto to plotheatmap #908 (#982) * added auto to plotheatmap * fixed lint, added warning message, updated the help for zmin, zmax * galaxy test plotPCA * lower down the delat for potPCA galaxy test * updated version and changes * removed alpha from plotPCA test * removed compare="sim_size" * fixed plotHeatmap.xml by removing size from the the params and adding anitizer for the 2 new params. * upated change.txt * updated the doc html Co-authored-by: Devon Ryan <[email protected]>
1 parent 288311e commit 3bb56cc

13 files changed

+153
-42
lines changed

CHANGES.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
3.5.0
2+
3+
* Fixed a small issue in computeGCBias (issue #969)
4+
* Added dataRange to computeMatricOperation to return min,max,median and 10th and 90th percentile.
5+
* Fixed a small typo in bamCompare. (issue #966)
6+
* Save the output matrix of the plotheatmap in a format to be compatible with running plotheatmap on it again.(issue #953)
7+
* Different colors can now be set by user for plotProfile --plotType heatmap (issue #956)
8+
* Added the `auto` option to the zMin and zMax of plotHeatmap. (issue #908)
9+
* Added `--sortUsingSamples` and `--clusterUsingSamples` to the plotHeatmap galaxy wrapper. (issue #976)
10+
111
3.4.3
212

313
* Changed iteritems() in estimateEscaleFactor to its python3 compatible items().

deeptools/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
# This file is originally generated from Git information by running 'setup.py
33
# version'. Distribution tarballs contain a pre-generated copy of this file.
44

5-
__version__ = '3.4.3'
5+
__version__ = '3.5.0'

deeptools/bamCompare.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ def main(args=None):
254254
if args.normalizeUsing == 'None':
255255
args.normalizeUsing = None # For the sake of sanity
256256
if args.scaleFactorsMethod != 'None' and args.normalizeUsing:
257-
sys.exit("`--normalizeUsing {}` is only valid if you also use `--scaleFactorMethod None`! To prevent erroneous output, I will quit now.\n".format(args.normalizeUsing))
257+
sys.exit("`--normalizeUsing {}` is only valid if you also use `--scaleFactorsMethod None`! To prevent erroneous output, I will quit now.\n".format(args.normalizeUsing))
258258

259259
# Get mapping statistics
260260
bam1, mapped1, unmapped1, stats1 = bamHandler.openBam(args.bamfile1, returnStats=True, nThreads=args.numberOfProcessors)

deeptools/computeGCBias.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -604,7 +604,7 @@ def plotGCbias(file_name, frequencies, reads_per_gc, region_size, image_format=N
604604
plt.setp(bp['whiskers'], color='black', linestyle='dashed')
605605
plt.setp(bp['fliers'], marker='None')
606606
# get the whisker that spands the most
607-
y_max = max([x.get_data()[1][1] for x in bp['whiskers']])
607+
y_max = np.nanmax([x.get_data()[1][1] for x in bp['whiskers']])
608608
ax1.set_ylim(0 - (y_max * 0.05), y_max * 1.05)
609609
ax1.set_ylabel('Number of reads')
610610
ax1.set_xlabel('GC fraction')

deeptools/computeMatrixOperations.py

100644100755
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ def parse_arguments():
4646
or
4747
computeMatrixOperations sort -h
4848
49+
or
50+
computeMatrixOperations dataRange -h
51+
4952
""",
5053
epilog='example usages:\n'
5154
'computeMatrixOperations subset -m input.mat.gz -o output.mat.gz --group "group 1" "group 2" --samples "sample 3" "sample 10"\n\n'
@@ -126,6 +129,14 @@ def parse_arguments():
126129
help='Sort a matrix file to correspond to the order of entries in the desired input file(s). The groups of regions designated by the files must be present in the order found in the output of computeMatrix (otherwise, use the subset command first). Note that this subcommand can also be used to remove unwanted regions, since regions not present in the input file(s) will be omitted from the output.',
127130
usage='Example usage:\n computeMatrixOperations sort -m input.mat.gz -R regions1.bed regions2.bed regions3.gtf -o input.sorted.mat.gz\n\n')
128131

132+
# dataRange
133+
subparsers.add_parser(
134+
'dataRange',
135+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
136+
parents=[infoArgs()],
137+
help='Returns the min, max, median, 10th and 90th percentile of the matrix values per sample.',
138+
usage='Example usage:\n computeMatrixOperations dataRange -m input.mat.gz\n\n')
139+
129140
parser.add_argument('--version', action='version',
130141
version='%(prog)s {}'.format(__version__))
131142

@@ -300,6 +311,22 @@ def printInfo(matrix):
300311
print("\t{0}".format(sample))
301312

302313

314+
def printDataRange(matrix):
315+
"""
316+
Prints the min, max, median, 10th and 90th percentile of the matrix values per sample.
317+
"""
318+
print("Samples\tMin\tMax\tMedian\t10th\t90th")
319+
for i, sample in enumerate(matrix.matrix.sample_labels):
320+
start = matrix.matrix.sample_boundaries[i]
321+
end = matrix.matrix.sample_boundaries[i + 1]
322+
sample_matrix = matrix.matrix.matrix[..., start:end]
323+
print("{0}\t{1}\t{2}\t{3}\t{4}\t{5}".format(sample, np.amin(sample_matrix),
324+
np.amax(sample_matrix),
325+
np.ma.median(sample_matrix),
326+
np.percentile(sample_matrix, 10),
327+
np.percentile(sample_matrix, 90)))
328+
329+
303330
def relabelMatrix(matrix, args):
304331
"""
305332
Relabel the samples and groups in a matrix
@@ -771,6 +798,8 @@ def main(args=None):
771798

772799
if args.command == 'info':
773800
printInfo(hm)
801+
if args.command == 'dataRange':
802+
printDataRange(hm)
774803
elif args.command == 'subset':
775804
sIdx = getSampleBounds(args, hm)
776805
gIdx, gBounds = getGroupBounds(args, hm)

deeptools/correctGCBias.py

100644100755
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -239,10 +239,10 @@ def writeCorrected_worker(chrNameBam, chrNameBit, start, end, step):
239239
try:
240240
if debug:
241241
endTime = time.time()
242-
print("{}, processing {} ({:.1f} per sec) ")
243-
"reads @ {}:{}-{}".format(multiprocessing.current_process().name,
244-
i, i / (endTime - startTime),
245-
chrNameBit, start, end)
242+
print("{}, processing {} ({:.1f} per sec) "
243+
"reads @ {}:{}-{}".format(multiprocessing.current_process().name,
244+
i, i / (endTime - startTime),
245+
chrNameBit, start, end))
246246
except NameError:
247247
pass
248248

deeptools/parserCommon.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -358,11 +358,10 @@ def numberOfProcessors(string):
358358
"{} is not a valid number of processors".format(string))
359359

360360
except Exception as e:
361-
raise argparse.ArgumentTypeError("the value given is not valid. "
361+
raise argparse.ArgumentTypeError("the given value {} is not valid. "
362362
"Error message: {}\nThe number of "
363363
"available processors in your "
364-
"computer is {}.".format(string, e,
365-
availProc))
364+
"computer is {}.".format(string, e, availProc))
366365

367366
if numberOfProcessors > availProc:
368367
numberOfProcessors = availProc
@@ -444,7 +443,7 @@ def heatmapperOutputArgs(args=None,
444443
output.add_argument('--outFileNameMatrix',
445444
help='If this option is given, then the matrix '
446445
'of values underlying the heatmap will be saved '
447-
'using this name, e.g. MyMatrix.tab.',
446+
'using this name, e.g. MyMatrix.gz.',
448447
metavar='FILE',
449448
type=writableFile)
450449

@@ -614,9 +613,9 @@ def heatmapperOptionalArgs(mode=['heatmap', 'profile'][0]):
614613

615614
optional.add_argument('--sortUsingSamples',
616615
help='List of sample numbers (order as in matrix), '
617-
'that are used for sorting by --sortUsing, '
618-
'no value uses all samples, '
619-
'example: --sortUsingSamples 1 3',
616+
'which are used by --sortUsing for sorting. '
617+
'If no value is set, it uses all samples. '
618+
'Example: --sortUsingSamples 1 3',
620619
type=int, nargs='+')
621620

622621
optional.add_argument('--linesAtTickMarks',
@@ -704,15 +703,17 @@ def heatmapperOptionalArgs(mode=['heatmap', 'profile'][0]):
704703
default=None,
705704
help='Minimum value for the heatmap intensities. Multiple values, separated by '
706705
'spaces can be set for each heatmap. If the number of zMin values is smaller than'
707-
'the number of heatmaps the values are recycled.',
708-
type=float,
706+
'the number of heatmaps the values are recycled. If a value is set to "auto", it will be set '
707+
' to the first percentile of the matrix values.',
708+
type=str,
709709
nargs='+')
710710
optional.add_argument('--zMax', '-max',
711711
default=None,
712712
help='Maximum value for the heatmap intensities. Multiple values, separated by '
713713
'spaces can be set for each heatmap. If the number of zMax values is smaller than'
714-
'the number of heatmaps the values are recycled.',
715-
type=float,
714+
'the number of heatmaps the values are recycled. If a value is set to "auto", it will be set '
715+
' to the 98th percentile of the matrix values.',
716+
type=str,
716717
nargs='+')
717718
optional.add_argument('--heatmapHeight',
718719
help='Plot height in cm. The default for the heatmap '

deeptools/plotHeatmap.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,16 @@ def plotMatrix(hm, outFileName,
412412
zMin = [None]
413413
else:
414414
zMin = [zMin] # convert to list to support multiple entries
415+
elif 'auto' in zMin:
416+
matrix_flatten = hm.matrix.flatten()
417+
auto_min = np.percentile(matrix_flatten, 1.0)
418+
if np.isnan(auto_min):
419+
auto_min = None
420+
new_mins = [float(x) if x != 'auto' else auto_min for x in zMin]
421+
zMin = new_mins
422+
else:
423+
new_mins = [float(x) for x in zMin]
424+
zMin = new_mins
415425

416426
if zMax is None:
417427
if matrix_flatten is None:
@@ -422,6 +432,23 @@ def plotMatrix(hm, outFileName,
422432
zMax = [None]
423433
else:
424434
zMax = [zMax]
435+
elif 'auto' in zMax:
436+
matrix_flatten = hm.matrix.flatten()
437+
auto_max = np.percentile(matrix_flatten, 98.0)
438+
if np.isnan(auto_max):
439+
auto_max = None
440+
new_maxs = [float(x) if x != 'auto' else auto_max for x in zMax]
441+
zMax = new_maxs
442+
else:
443+
new_maxs = [float(x) for x in zMax]
444+
zMax = new_maxs
445+
if (len(zMin) > 1) & (len(zMax) > 1):
446+
for index, value in enumerate(zMax):
447+
if value <= zMin[index]:
448+
sys.stderr.write("Warnirng: In bigwig {}, the given zmin ({}) is larger than "
449+
"or equal to the given zmax ({}). Thus, it has been set "
450+
"to None. \n".format(index + 1, zMin[index], value))
451+
zMin[index] = None
425452

426453
if yMin is None:
427454
yMin = [None]
@@ -833,7 +860,7 @@ def main(args=None):
833860
hm.matrix.computeSilhouette(args.args.hclust)
834861

835862
if args.outFileNameMatrix:
836-
hm.save_matrix_values(args.outFileNameMatrix)
863+
hm.save_matrix(args.outFileNameMatrix)
837864

838865
if args.outFileSortedRegions:
839866
hm.save_BED(args.outFileSortedRegions)

deeptools/plotProfile.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import argparse
88
import numpy as np
9+
from math import ceil
910
import matplotlib
1011
matplotlib.use('Agg')
1112
matplotlib.rcParams['pdf.fonttype'] = 42
@@ -458,6 +459,13 @@ def plotly_hexbin(self):
458459
py.plot(fig, filename=self.out_file_name, auto_open=False)
459460

460461
def plot_heatmap(self):
462+
cmap = ['RdYlBu_r']
463+
if self.color_list is not None: # check the length to be equal to the numebr of plots otherwise multiply it!
464+
cmap = self.color_list
465+
if len(cmap) < self.numplots:
466+
all_colors = cmap
467+
for i in range(ceil(self.numplots / len(cmap))):
468+
cmap.extend(all_colors)
461469
matrix_flatten = None
462470
if self.y_min == [None]:
463471
matrix_flatten = self.hm.matrix.flatten()
@@ -479,7 +487,6 @@ def plot_heatmap(self):
479487

480488
ax_list = []
481489
# turn off y ticks
482-
483490
for plot in range(self.numplots):
484491
labels = []
485492
col = plot % self.plots_per_row
@@ -503,9 +510,10 @@ def plot_heatmap(self):
503510

504511
if self.per_group:
505512
title = self.hm.matrix.group_labels[plot]
513+
tickIdx = plot % self.hm.matrix.get_num_samples()
506514
else:
507515
title = self.hm.matrix.sample_labels[plot]
508-
516+
tickIdx = plot
509517
ax.set_title(title)
510518
mat = [] # when drawing a heatmap (in contrast to drawing lines)
511519
for data_idx in range(self.numlines):
@@ -526,13 +534,12 @@ def plot_heatmap(self):
526534
label = sub_matrix['group']
527535
labels.append(label)
528536
mat.append(np.ma.__getattribute__(self.averagetype)(sub_matrix['matrix'], axis=0))
529-
530537
img = ax.imshow(np.vstack(mat), interpolation='nearest',
531-
cmap='RdYlBu_r', aspect='auto', vmin=localYMin, vmax=localYMax)
538+
cmap=cmap[plot], aspect='auto', vmin=localYMin, vmax=localYMax)
532539
self.fig.colorbar(img, cax=cax)
533540

534541
totalWidth = np.vstack(mat).shape[1]
535-
xticks, xtickslabel = self.getTicks(plot)
542+
xticks, xtickslabel = self.getTicks(tickIdx)
536543
if np.ceil(max(xticks)) != float(totalWidth - 1):
537544
tickscale = float(totalWidth) / max(xticks)
538545
xticks_use = [x * tickscale for x in xticks]

deeptools/test/test_readFiltering.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import os.path
55
from os import unlink
66
import hashlib
7+
import pysam
8+
79

810
ROOT = os.path.dirname(os.path.abspath(__file__)) + "/test_data/"
911
BAMFILE_FILTER = ROOT + "test_filtering.bam"
@@ -72,12 +74,12 @@ def test_sieve():
7274
'test_filtering\t5\t193\n']
7375
assert_equal(resp, expected)
7476
unlink(outlog)
75-
h = hashlib.md5(open(outfile, "rb").read()).hexdigest()
76-
assert(h == "977bdab227a4dbfa3fc9f27c23a3e0b7")
77+
h = hashlib.md5(pysam.view(outfile).encode('utf-8')).hexdigest()
78+
assert(h == "acbc4443fb0387bfd6c412af9d4fc414")
7779
unlink(outfile)
7880

79-
h = hashlib.md5(open(outfiltered, "rb").read()).hexdigest()
80-
assert(h == "762e79b7a2245ff6b2cea4139a1455de")
81+
h1 = hashlib.md5(pysam.view(outfiltered).encode('utf-8')).hexdigest()
82+
assert(h1 == "b90befdd5f073f14acb9a38661f301ad")
8183
unlink(outfiltered)
8284

8385

0 commit comments

Comments
 (0)