Skip to content
Snippets Groups Projects
Commit d83935ad authored by Till Wesselmann's avatar Till Wesselmann
Browse files

Use coverage for assessing data quality

parent 055157fc
No related branches found
No related tags found
No related merge requests found
......@@ -277,13 +277,14 @@ class PreAlignmentQC(Step):
def get_score(self):
"""Returns score consisting of read count and filtering percentage."""
# We consider more that 200 mio reads to be perfect
# Effective coverage
READ_COUNT_SCORING = (
(200, 1),
(100, 0.9),
(50, 0.8),
(10, 0.6),
(0, 0.5),
(5, 1),
(4, 0.9),
(3, 0.8),
(2, 0.7),
(0.5, 0.5),
(0, 0),
)
if self.fastp_report:
......@@ -293,9 +294,9 @@ class PreAlignmentQC(Step):
filter_rate = after / before
# Readcount scoring
bases = self.fastp_report["summary"]["after_filtering"]["total_bases"]
bases = self.fastp_report["summary"]["before_filtering"]["total_bases"]
estimated_coverage = (
bases // self.pipeline.get().assembly.organism.genome_size
bases / self.pipeline.get().assembly.organism.genome_size
)
for v, r in READ_COUNT_SCORING:
if estimated_coverage >= v:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment