diff --git a/.gitignore b/.gitignore index ae945fa..567ffb9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ .idea/ venv/ +data/ + diff --git a/README.md b/README.md index 03986a0..9c8d7e2 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,9 @@ NB: ## Coverage To extract the coverage from gVCF files, the following steps are required. +Coverage is measuered in read depth. We use the `MIN_DP` field where +available (non-variant regions) with a fallback to the `DP` field +(variants). - gvcf2coverage: - `gvcf2coverage < {input} > {output}` diff --git a/gvcf2coverage/Makefile b/gvcf2coverage/Makefile index 6d9e6f4..150f3b0 100644 --- a/gvcf2coverage/Makefile +++ b/gvcf2coverage/Makefile @@ -1,8 +1,19 @@ -.PHONY: install +TARGET = gvcf2coverage -gvcf2coverage: src/main.c - $(CC) -o $@ -Wall -Wextra -Wpedantic -std=c99 -O3 -I$(HTSLIB_INCDIR) -L$(HTSLIB_LIBDIR) src/main.c -lhts +CC = gcc +CFLAGS = -std=c99 -march=native -Wall -Wextra -Wpedantic \ + -Wformat=2 -Wshadow -Wwrite-strings -Wstrict-prototypes \ + -Wold-style-definition -Wredundant-decls -Wnested-externs \ + -O3 -DNDEBUG -install: gvcf2coverage +.PHONY: clean install + +$(TARGET): src/main.c + $(CC) -o $@ $(CFLAGS) -I$(HTSLIB_INCDIR) -L$(HTSLIB_LIBDIR) $< -lhts + +clean: + rm $(TARGET) + +install: $(TARGET) mkdir -p $(PREFIX)/bin - install -m 755 gvcf2coverage $(PREFIX)/bin + install -m 755 $< $(PREFIX)/bin diff --git a/gvcf2coverage/src/main.c b/gvcf2coverage/src/main.c index 1df134e..9fe6a65 100644 --- a/gvcf2coverage/src/main.c +++ b/gvcf2coverage/src/main.c @@ -1,4 +1,4 @@ -// VERSION 0.1 +// VERSION 0.4 #include // bool, false, true #include // size_t @@ -120,7 +120,6 @@ main(int argc, char* argv[]) int32_t* gt = NULL; bool first = true; - bool jump = false; int window_start = 0; int window_end = 0; @@ -130,10 +129,14 @@ main(int argc, char* argv[]) while (0 == bcf_read(fh, hdr, rec)) { int32_t depth = 0; - if (1 == bcf_get_format_int32(hdr, rec, "DP", &dp, &(int){0})) + if (1 == bcf_get_format_int32(hdr, rec, "MIN_DP", &dp, &(int){0})) { depth = dp[0]; } // if + else if (1 == bcf_get_format_int32(hdr, rec, "DP", &dp, &(int){0})) + { + depth = dp[0]; + } // if // // If depth is below the threshold, no need to proceed @@ -174,7 +177,7 @@ main(int argc, char* argv[]) } // if // - // We just started + // Open window for first entry // if (first) { @@ -183,18 +186,18 @@ main(int argc, char* argv[]) window_chrom = chrom; window_ploidy = ploidy; - // eprint(f"First! c:{window_chrom} s:{start}, w_s={window_start} e:{end} w_e={window_end}") - first = false; continue; } // if + // + // Detect if we need to close and open a new window + // if (window_chrom != chrom || window_ploidy != ploidy || window_end + distance < start) { - jump = true; - // eprint(f"Chrom changed from {window_chrom} to {chrom}.") + // Close the window (void) fprintf(stdout, "%s\t%d\t%d\t%d\n", window_chrom, window_start, window_end, window_ploidy); window_start = start; @@ -204,17 +207,16 @@ main(int argc, char* argv[]) } // if else { - jump = false; + // Extend the window window_start = imin(window_start, start); window_end = imax(window_end, end); - // eprint(f"No jump! s:{start}, w_s={window_start} e:{end} w_e={window_end}") } // else } // while // - // If the last iteration of the loop was not a jump, we still need to print + // Always print the last entry when merging, except if there were no entries // - if (merge && !jump) + if (merge && !first) { (void) fprintf(stdout, "%s\t%d\t%d\t%d\n", window_chrom, window_start, window_end, window_ploidy); } // if diff --git a/pygvcf2coverage/pygvcf2coverage/__init__.py b/pygvcf2coverage/pygvcf2coverage/__init__.py index c49cf90..60897ef 100644 --- a/pygvcf2coverage/pygvcf2coverage/__init__.py +++ b/pygvcf2coverage/pygvcf2coverage/__init__.py @@ -27,10 +27,14 @@ def gvcf2coverage(threshold, merge, distance): jump = False # Depth - dp = entry.format('DP') + dp = entry.format('MIN_DP') if dp is None: - depth = 0 + dp = entry.format('DP') + if dp is None: + depth = 0 + else: + depth = dp[0][0] else: depth = dp[0][0] @@ -56,51 +60,37 @@ def gvcf2coverage(threshold, merge, distance): continue # - # We just started + # Open window for first entry # if first: - # First entry window_start = start window_end = end window_chrom = chrom window_ploidy = ploidy first = False - - # eprint(f"First! c:{window_chrom} s:{start}, w_s={window_start} e:{end} w_e={window_end}") - continue - if window_chrom != chrom: - # eprint(f"Chrom changed from {window_chrom} to {chrom}.") - jump = True - - elif window_ploidy != ploidy: - # eprint(f"Ploidy changed from {window_ploidy} to {ploidy}") - jump = True - - elif window_end + distance < start: - # eprint("Gap! (window_end:%d < start:%d)" % (window_end + distance, start)) - jump = True - - if jump: - # eprint("Jump!") + # + # Detect if we need to close and open a new window + # + if window_chrom != chrom or window_ploidy != ploidy or window_end + distance < start: + # Close the window print(window_chrom, window_start, window_end, window_ploidy, sep="\t") window_start = start window_end = end window_chrom = chrom window_ploidy = ploidy - else: + # Extend the window window_start = min(window_start, start) window_end = max(window_end, end) - # eprint(f"No jump! s:{start}, w_s={window_start} e:{end} w_e={window_end}") # - # If the last iteration of the loop was not a jump, we still need to print + # Always print the last entry when merging, except if there were no entries # - if merge and not jump: + if merge and not first: print(window_chrom, window_start, window_end, window_ploidy, sep="\t") diff --git a/pygvcf2coverage/setup.py b/pygvcf2coverage/setup.py index 7cce966..ab2867e 100644 --- a/pygvcf2coverage/setup.py +++ b/pygvcf2coverage/setup.py @@ -1,7 +1,7 @@ from setuptools import setup setup(name='pygvcf2coverage', - version='0.3', + version='0.4', description='A python tool to extract coverage from gvcf files.', url='http://github.com/varda/varda2_preprocessing', author='Mark Santcroos',