-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmssequence.c
166 lines (144 loc) · 4.2 KB
/
mssequence.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
/*
COPYRIGHT (c)2016 THE OHIO STATE UNIVERSITY
ALL RIGHTS RESERVED
PERMISSION IS GRANTED TO USE, COPY, CREATE DERIVATIVE WORKS AND
REDISTRIBUTE THIS SOFTWARE AND SUCH DERIVATIVE WORKS FOR NONCOMMERCIAL
EDUCATION AND RESEARCH PURPOSES, SO LONG AS NO FEE IS CHARGED, AND SO
LONG AS THE COPYRIGHT NOTICE ABOVE, THIS GRANT OF PERMISSION, AND THE
DISCLAIMER BELOW APPEAR IN ALL COPIES MADE; AND SO LONG AS THE NAME OF
THE OHIO STATE UNIVERSITY IS NOT USED IN ANY ADVERTISING OR PUBLICITY
PERTAINING TO THE USE OR DISTRIBUTION OF THIS SOFTWARE WITHOUT
SPECIFIC, WRITTEN PRIOR AUTHORIZATION.
THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION FROM THE OHIO
STATE UNIVERSITY AS TO ITS FITNESS FOR ANY PURPOSE, AND WITHOUT
WARRANTY BY THE OHIO STATE UNIVERSITY OF ANY KIND, EITHER EXPRESS OR
IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE OHIO STATE
UNIVERSITY SHALL NOT BE LIABLE FOR ANY DAMAGES, INCLUDING SPECIAL,
INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY
CLAIM ARISING OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE,
EVEN IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF SUCH
DAMAGES.
*/
/* Homopolymer repeat calling */
/* implementation of sequence functions */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "mssequence.h"
int IUPACToInternal(char c)
{
int r; /* result */
switch(toupper(c)) {
case 'A':
r=MONOSEQ_BASE_A;
break;
case 'C':
r=MONOSEQ_BASE_C;
break;
case 'G':
r=MONOSEQ_BASE_G;
break;
case 'T':
case 'U':
r=MONOSEQ_BASE_T;
break;
case 'R':
r=MONOSEQ_BASE_A | MONOSEQ_BASE_G;
break;
case 'Y':
r=MONOSEQ_BASE_C | MONOSEQ_BASE_T;
break;
case 'S':
r=MONOSEQ_BASE_C | MONOSEQ_BASE_G;
break;
case 'W':
r=MONOSEQ_BASE_A | MONOSEQ_BASE_T;
break;
case 'K':
r=MONOSEQ_BASE_T | MONOSEQ_BASE_G;
break;
case 'M':
r=MONOSEQ_BASE_A | MONOSEQ_BASE_C;
break;
case 'B':
r=MONOSEQ_BASE_C | MONOSEQ_BASE_G | MONOSEQ_BASE_T;
break;
case 'D':
r=MONOSEQ_BASE_A | MONOSEQ_BASE_G | MONOSEQ_BASE_T;
break;
case 'H':
r=MONOSEQ_BASE_A | MONOSEQ_BASE_C | MONOSEQ_BASE_T;
break;
case 'V':
r=MONOSEQ_BASE_A | MONOSEQ_BASE_C | MONOSEQ_BASE_G;
break;
case 'N':
r=MONOSEQ_BASE_A | MONOSEQ_BASE_C | MONOSEQ_BASE_G | MONOSEQ_BASE_T;
break;
default:
r=-1;
break;
}
return(r);
}
/* calculate complementary nucleotide */
int ComplementaryNucleotide(int n)
{
int r; /* result */
r=0;
if (n & MONOSEQ_BASE_A) r|=MONOSEQ_BASE_T;
if (n & MONOSEQ_BASE_T) r|=MONOSEQ_BASE_A;
if (n & MONOSEQ_BASE_G) r|=MONOSEQ_BASE_C;
if (n & MONOSEQ_BASE_C) r|=MONOSEQ_BASE_G;
return(r);
}
int *CreateInternalSequence(const char *IUPACString)
{
const char *p; /* pointer to current IUPAC character */
int *Sequence; /* sequence to be constructed */
int *q; /* pointer to current result */
Sequence=(int*)malloc((strlen(IUPACString)+1)*sizeof(int));
if (!Sequence) return(NULL);
for(p=IUPACString,q=Sequence;*p;p++,q++) {
*q=IUPACToInternal(*p);
if (*q<0) {
free((void*)Sequence);
return(NULL);
}
}
*q=0;
return(Sequence);
}
/* create reverse complement */
int *CreateReverseComplement(const int *forward)
{
int i; /* loop variable */
const int *p; /* pointer to current forward base */
int *Sequence; /* sequence to be constructed */
int *q; /* pointer to current result */
/* count length */
for(p=forward,i=0;*p;p++,i++);
/* get memory */
Sequence=(int*)malloc((i+1)*sizeof(int));
if (!Sequence) return(NULL);
for(p=forward,q=Sequence+i-1;*p;p++,q--) {
*q=ComplementaryNucleotide(*p);
if (*q<0) {
free((void*)Sequence);
return(NULL);
}
}
Sequence[i]=0;
return(Sequence);
}
/* print sequence */
void PrintSequence(const int *sequence, FILE *fp)
{
const int *p; /* pointer to current nucleotide */
char bases[17]=" ATWGRKDCMYHSVBN"; /* IUPAC codes */
for(p=sequence;*p;p++) {
if (*p>0 && *p<16) fprintf(fp,"%c",bases[*p]);
}
}