-
Notifications
You must be signed in to change notification settings - Fork 4
/
03_aepGenomeAnnotation.html
executable file
·1149 lines (1102 loc) · 891 KB
/
03_aepGenomeAnnotation.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!doctype html>
<html>
<head>
<meta charset='UTF-8'><meta name='viewport' content='width=device-width initial-scale=1'>
<link href='https://fonts.loli.net/css?family=Open+Sans:400italic,700italic,700,400&subset=latin,latin-ext' rel='stylesheet' type='text/css' /><style type='text/css'>html {overflow-x: initial !important;}:root { --bg-color: #ffffff; --text-color: #333333; --select-text-bg-color: #B5D6FC; --select-text-font-color: auto; --monospace: "Lucida Console",Consolas,"Courier",monospace; --title-bar-height: 20px; }
.mac-os-11 { --title-bar-height: 28px; }
html { font-size: 14px; background-color: var(--bg-color); color: var(--text-color); font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; -webkit-font-smoothing: antialiased; }
body { margin: 0px; padding: 0px; height: auto; inset: 0px; font-size: 1rem; line-height: 1.42857143; overflow-x: hidden; background-image: inherit; background-size: inherit; background-attachment: inherit; background-origin: inherit; background-clip: inherit; background-color: inherit; tab-size: 4; background-position: inherit; background-repeat: inherit; }
iframe { margin: auto; }
a.url { word-break: break-all; }
a:active, a:hover { outline: 0px; }
.in-text-selection, ::selection { text-shadow: none; background: var(--select-text-bg-color); color: var(--select-text-font-color); }
#write { margin: 0px auto; height: auto; width: inherit; word-break: normal; word-wrap: break-word; position: relative; white-space: normal; overflow-x: visible; padding-top: 36px; }
#write.first-line-indent p { text-indent: 2em; }
#write.first-line-indent li p, #write.first-line-indent p * { text-indent: 0px; }
#write.first-line-indent li { margin-left: 2em; }
.for-image #write { padding-left: 8px; padding-right: 8px; }
body.typora-export { padding-left: 30px; padding-right: 30px; }
.typora-export .footnote-line, .typora-export li, .typora-export p { white-space: pre-wrap; }
.typora-export .task-list-item input { pointer-events: none; }
@media screen and (max-width: 500px) {
body.typora-export { padding-left: 0px; padding-right: 0px; }
#write { padding-left: 20px; padding-right: 20px; }
.CodeMirror-sizer { margin-left: 0px !important; }
.CodeMirror-gutters { display: none !important; }
}
#write li > figure:last-child { margin-bottom: 0.5rem; }
#write ol, #write ul { position: relative; }
img { max-width: 100%; vertical-align: middle; image-orientation: from-image; }
button, input, select, textarea { color: inherit; font-family: inherit; font-size: inherit; font-style: inherit; font-variant-caps: inherit; font-weight: inherit; font-stretch: inherit; line-height: inherit; }
input[type="checkbox"], input[type="radio"] { line-height: normal; padding: 0px; }
*, ::after, ::before { box-sizing: border-box; }
#write h1, #write h2, #write h3, #write h4, #write h5, #write h6, #write p, #write pre { width: inherit; }
#write h1, #write h2, #write h3, #write h4, #write h5, #write h6, #write p { position: relative; }
p { line-height: inherit; }
h1, h2, h3, h4, h5, h6 { break-after: avoid-page; break-inside: avoid; orphans: 4; }
p { orphans: 4; }
h1 { font-size: 2rem; }
h2 { font-size: 1.8rem; }
h3 { font-size: 1.6rem; }
h4 { font-size: 1.4rem; }
h5 { font-size: 1.2rem; }
h6 { font-size: 1rem; }
.md-math-block, .md-rawblock, h1, h2, h3, h4, h5, h6, p { margin-top: 1rem; margin-bottom: 1rem; }
.hidden { display: none; }
.md-blockmeta { color: rgb(204, 204, 204); font-weight: 700; font-style: italic; }
a { cursor: pointer; }
sup.md-footnote { padding: 2px 4px; background-color: rgba(238, 238, 238, 0.7); color: rgb(85, 85, 85); border-radius: 4px; cursor: pointer; }
sup.md-footnote a, sup.md-footnote a:hover { color: inherit; text-transform: inherit; text-decoration: inherit; }
#write input[type="checkbox"] { cursor: pointer; width: inherit; height: inherit; }
figure { overflow-x: auto; margin: 1.2em 0px; max-width: calc(100% + 16px); padding: 0px; }
figure > table { margin: 0px; }
tr { break-inside: avoid; break-after: auto; }
thead { display: table-header-group; }
table { border-collapse: collapse; border-spacing: 0px; width: 100%; overflow: auto; break-inside: auto; text-align: left; }
table.md-table td { min-width: 32px; }
.CodeMirror-gutters { border-right-width: 0px; background-color: inherit; }
.CodeMirror-linenumber { }
.CodeMirror { text-align: left; }
.CodeMirror-placeholder { opacity: 0.3; }
.CodeMirror pre { padding: 0px 4px; }
.CodeMirror-lines { padding: 0px; }
div.hr:focus { cursor: none; }
#write pre { white-space: pre-wrap; }
#write.fences-no-line-wrapping pre { white-space: pre; }
#write pre.ty-contain-cm { white-space: normal; }
.CodeMirror-gutters { margin-right: 4px; }
.md-fences { font-size: 0.9rem; display: block; break-inside: avoid; text-align: left; overflow: visible; white-space: pre; background-image: inherit; background-size: inherit; background-attachment: inherit; background-origin: inherit; background-clip: inherit; background-color: inherit; position: relative !important; background-position: inherit; background-repeat: inherit; }
.md-fences-adv-panel { width: 100%; margin-top: 10px; text-align: center; padding-top: 0px; padding-bottom: 8px; overflow-x: auto; }
#write .md-fences.mock-cm { white-space: pre-wrap; }
.md-fences.md-fences-with-lineno { padding-left: 0px; }
#write.fences-no-line-wrapping .md-fences.mock-cm { white-space: pre; overflow-x: auto; }
.md-fences.mock-cm.md-fences-with-lineno { padding-left: 8px; }
.CodeMirror-line, twitterwidget { break-inside: avoid; }
.footnotes { opacity: 0.8; font-size: 0.9rem; margin-top: 1em; margin-bottom: 1em; }
.footnotes + .footnotes { margin-top: 0px; }
.md-reset { margin: 0px; padding: 0px; border: 0px; outline: 0px; vertical-align: top; text-decoration: none; text-shadow: none; float: none; position: static; width: auto; height: auto; white-space: nowrap; cursor: inherit; line-height: normal; font-weight: 400; text-align: left; box-sizing: content-box; direction: ltr; background-position: 0px 0px; }
li div { padding-top: 0px; }
blockquote { margin: 1rem 0px; }
li .mathjax-block, li p { margin: 0.5rem 0px; }
li blockquote { margin: 1rem 0px; }
li { margin: 0px; position: relative; }
blockquote > :last-child { margin-bottom: 0px; }
blockquote > :first-child, li > :first-child { margin-top: 0px; }
.footnotes-area { color: rgb(136, 136, 136); margin-top: 0.714rem; padding-bottom: 0.143rem; white-space: normal; }
#write .footnote-line { white-space: pre-wrap; }
@media print {
body, html { border: 1px solid transparent; height: 99%; break-after: avoid; break-before: avoid; font-variant-ligatures: no-common-ligatures; }
#write { margin-top: 0px; padding-top: 0px; border-color: transparent !important; }
.typora-export * { print-color-adjust: exact; }
.typora-export #write { break-after: avoid; }
.typora-export #write::after { height: 0px; }
.is-mac table { break-inside: avoid; }
.typora-export-show-outline .typora-export-sidebar { display: none; }
}
.footnote-line { margin-top: 0.714em; font-size: 0.7em; }
a img, img a { cursor: pointer; }
pre.md-meta-block { font-size: 0.8rem; min-height: 0.8rem; white-space: pre-wrap; background-color: rgb(204, 204, 204); display: block; overflow-x: hidden; }
p > .md-image:only-child:not(.md-img-error) img, p > img:only-child { display: block; margin: auto; }
#write.first-line-indent p > .md-image:only-child:not(.md-img-error) img { left: -2em; position: relative; }
p > .md-image:only-child { display: inline-block; width: 100%; }
#write .MathJax_Display { margin: 0.8em 0px 0px; }
.md-math-block { width: 100%; }
.md-math-block:not(:empty)::after { display: none; }
.MathJax_ref { fill: currentcolor; }
[contenteditable="true"]:active, [contenteditable="true"]:focus, [contenteditable="false"]:active, [contenteditable="false"]:focus { outline: 0px; box-shadow: none; }
.md-task-list-item { position: relative; list-style-type: none; }
.task-list-item.md-task-list-item { padding-left: 0px; }
.md-task-list-item > input { position: absolute; top: 0px; left: 0px; margin-left: -1.2em; margin-top: calc(1em - 10px); border: none; }
.math { font-size: 1rem; }
.md-toc { min-height: 3.58rem; position: relative; font-size: 0.9rem; border-radius: 10px; }
.md-toc-content { position: relative; margin-left: 0px; }
.md-toc-content::after, .md-toc::after { display: none; }
.md-toc-item { display: block; color: rgb(65, 131, 196); }
.md-toc-item a { text-decoration: none; }
.md-toc-inner:hover { text-decoration: underline; }
.md-toc-inner { display: inline-block; cursor: pointer; }
.md-toc-h1 .md-toc-inner { margin-left: 0px; font-weight: 700; }
.md-toc-h2 .md-toc-inner { margin-left: 2em; }
.md-toc-h3 .md-toc-inner { margin-left: 4em; }
.md-toc-h4 .md-toc-inner { margin-left: 6em; }
.md-toc-h5 .md-toc-inner { margin-left: 8em; }
.md-toc-h6 .md-toc-inner { margin-left: 10em; }
@media screen and (max-width: 48em) {
.md-toc-h3 .md-toc-inner { margin-left: 3.5em; }
.md-toc-h4 .md-toc-inner { margin-left: 5em; }
.md-toc-h5 .md-toc-inner { margin-left: 6.5em; }
.md-toc-h6 .md-toc-inner { margin-left: 8em; }
}
a.md-toc-inner { font-size: inherit; font-style: inherit; font-weight: inherit; line-height: inherit; }
.footnote-line a:not(.reversefootnote) { color: inherit; }
.md-attr { display: none; }
.md-fn-count::after { content: "."; }
code, pre, samp, tt { font-family: var(--monospace); }
kbd { margin: 0px 0.1em; padding: 0.1em 0.6em; font-size: 0.8em; color: rgb(36, 39, 41); background-color: rgb(255, 255, 255); border: 1px solid rgb(173, 179, 185); border-radius: 3px; box-shadow: rgba(12, 13, 14, 0.2) 0px 1px 0px, rgb(255, 255, 255) 0px 0px 0px 2px inset; white-space: nowrap; vertical-align: middle; }
.md-comment { color: rgb(162, 127, 3); opacity: 0.6; font-family: var(--monospace); }
code { text-align: left; }
a.md-print-anchor { white-space: pre !important; border: none !important; display: inline-block !important; position: absolute !important; width: 1px !important; right: 0px !important; outline: 0px !important; text-shadow: initial !important; background-position: 0px 0px !important; }
.os-windows.monocolor-emoji .md-emoji { font-family: "Segoe UI Symbol", sans-serif; }
.md-diagram-panel > svg { max-width: 100%; }
[lang="flow"] svg, [lang="mermaid"] svg { max-width: 100%; height: auto; }
[lang="mermaid"] .node text { font-size: 1rem; }
table tr th { border-bottom-width: 0px; }
video { max-width: 100%; display: block; margin: 0px auto; }
iframe { max-width: 100%; width: 100%; border: none; }
.highlight td, .highlight tr { border: 0px; }
mark { background-color: rgb(255, 255, 0); color: rgb(0, 0, 0); }
.md-html-inline .md-plain, .md-html-inline strong, mark .md-inline-math, mark strong { color: inherit; }
.md-expand mark .md-meta { opacity: 0.3 !important; }
mark .md-meta { color: rgb(0, 0, 0); }
@media print {
.typora-export h1, .typora-export h2, .typora-export h3, .typora-export h4, .typora-export h5, .typora-export h6 { break-inside: avoid; }
}
.md-diagram-panel .messageText { stroke: none !important; }
.md-diagram-panel .start-state { fill: var(--node-fill); }
.md-diagram-panel .edgeLabel rect { opacity: 1 !important; }
.md-fences.md-fences-math { font-size: 1em; }
.md-fences-advanced:not(.md-focus) { padding: 0px; white-space: nowrap; border: 0px; }
.md-fences-advanced:not(.md-focus) { background-image: inherit; background-size: inherit; background-attachment: inherit; background-origin: inherit; background-clip: inherit; background-color: inherit; background-position: inherit; background-repeat: inherit; }
.typora-export-show-outline .typora-export-content { max-width: 1440px; margin: auto; display: flex; flex-direction: row; }
.typora-export-sidebar { width: 300px; font-size: 0.8rem; margin-top: 80px; margin-right: 18px; }
.typora-export-show-outline #write { --webkit-flex: 2; flex: 2 1 0%; }
.typora-export-sidebar .outline-content { position: fixed; top: 0px; max-height: 100%; overflow: hidden auto; padding-bottom: 30px; padding-top: 60px; width: 300px; }
@media screen and (max-width: 1024px) {
.typora-export-sidebar, .typora-export-sidebar .outline-content { width: 240px; }
}
@media screen and (max-width: 800px) {
.typora-export-sidebar { display: none; }
}
.outline-content li, .outline-content ul { margin-left: 0px; margin-right: 0px; padding-left: 0px; padding-right: 0px; list-style: none; }
.outline-content ul { margin-top: 0px; margin-bottom: 0px; }
.outline-content strong { font-weight: 400; }
.outline-expander { width: 1rem; height: 1.428571429rem; position: relative; display: table-cell; vertical-align: middle; cursor: pointer; padding-left: 4px; }
.outline-expander::before { content: ""; position: relative; font-family: Ionicons; display: inline-block; font-size: 8px; vertical-align: middle; }
.outline-item { padding-top: 3px; padding-bottom: 3px; cursor: pointer; }
.outline-expander:hover::before { content: ""; }
.outline-h1 > .outline-item { padding-left: 0px; }
.outline-h2 > .outline-item { padding-left: 1em; }
.outline-h3 > .outline-item { padding-left: 2em; }
.outline-h4 > .outline-item { padding-left: 3em; }
.outline-h5 > .outline-item { padding-left: 4em; }
.outline-h6 > .outline-item { padding-left: 5em; }
.outline-label { cursor: pointer; display: table-cell; vertical-align: middle; text-decoration: none; color: inherit; }
.outline-label:hover { text-decoration: underline; }
.outline-item:hover { border-color: rgb(245, 245, 245); background-color: var(--item-hover-bg-color); }
.outline-item:hover { margin-left: -28px; margin-right: -28px; border-left-width: 28px; border-left-style: solid; border-left-color: transparent; border-right-width: 28px; border-right-style: solid; border-right-color: transparent; }
.outline-item-single .outline-expander::before, .outline-item-single .outline-expander:hover::before { display: none; }
.outline-item-open > .outline-item > .outline-expander::before { content: ""; }
.outline-children { display: none; }
.info-panel-tab-wrapper { display: none; }
.outline-item-open > .outline-children { display: block; }
.typora-export .outline-item { padding-top: 1px; padding-bottom: 1px; }
.typora-export .outline-item:hover { margin-right: -8px; border-right-width: 8px; border-right-style: solid; border-right-color: transparent; }
.typora-export .outline-expander::before { content: "+"; font-family: inherit; top: -1px; }
.typora-export .outline-expander:hover::before, .typora-export .outline-item-open > .outline-item > .outline-expander::before { content: "−"; }
.typora-export-collapse-outline .outline-children { display: none; }
.typora-export-collapse-outline .outline-item-open > .outline-children, .typora-export-no-collapse-outline .outline-children { display: block; }
.typora-export-no-collapse-outline .outline-expander::before { content: "" !important; }
.typora-export-show-outline .outline-item-active > .outline-item .outline-label { font-weight: 700; }
.md-inline-math-container mjx-container { zoom: 0.95; }
.CodeMirror { height: auto; }
.CodeMirror.cm-s-inner { background-image: inherit; background-size: inherit; background-attachment: inherit; background-origin: inherit; background-clip: inherit; background-color: inherit; background-position: inherit; background-repeat: inherit; }
.CodeMirror-scroll { overflow: auto hidden; z-index: 3; }
.CodeMirror-gutter-filler, .CodeMirror-scrollbar-filler { background-color: rgb(255, 255, 255); }
.CodeMirror-gutters { border-right-width: 1px; border-right-style: solid; border-right-color: rgb(221, 221, 221); background-image: inherit; background-size: inherit; background-attachment: inherit; background-origin: inherit; background-clip: inherit; background-color: inherit; white-space: nowrap; background-position: inherit; background-repeat: inherit; }
.CodeMirror-linenumber { padding: 0px 3px 0px 5px; text-align: right; color: rgb(153, 153, 153); }
.cm-s-inner .cm-keyword { color: rgb(119, 0, 136); }
.cm-s-inner .cm-atom, .cm-s-inner.cm-atom { color: rgb(34, 17, 153); }
.cm-s-inner .cm-number { color: rgb(17, 102, 68); }
.cm-s-inner .cm-def { color: rgb(0, 0, 255); }
.cm-s-inner .cm-variable { color: rgb(0, 0, 0); }
.cm-s-inner .cm-variable-2 { color: rgb(0, 85, 170); }
.cm-s-inner .cm-variable-3 { color: rgb(0, 136, 85); }
.cm-s-inner .cm-string { color: rgb(170, 17, 17); }
.cm-s-inner .cm-property { color: rgb(0, 0, 0); }
.cm-s-inner .cm-operator { color: rgb(152, 26, 26); }
.cm-s-inner .cm-comment, .cm-s-inner.cm-comment { color: rgb(170, 85, 0); }
.cm-s-inner .cm-string-2 { color: rgb(255, 85, 0); }
.cm-s-inner .cm-meta { color: rgb(85, 85, 85); }
.cm-s-inner .cm-qualifier { color: rgb(85, 85, 85); }
.cm-s-inner .cm-builtin { color: rgb(51, 0, 170); }
.cm-s-inner .cm-bracket { color: rgb(153, 153, 119); }
.cm-s-inner .cm-tag { color: rgb(17, 119, 0); }
.cm-s-inner .cm-attribute { color: rgb(0, 0, 204); }
.cm-s-inner .cm-header, .cm-s-inner.cm-header { color: rgb(0, 0, 255); }
.cm-s-inner .cm-quote, .cm-s-inner.cm-quote { color: rgb(0, 153, 0); }
.cm-s-inner .cm-hr, .cm-s-inner.cm-hr { color: rgb(153, 153, 153); }
.cm-s-inner .cm-link, .cm-s-inner.cm-link { color: rgb(0, 0, 204); }
.cm-negative { color: rgb(221, 68, 68); }
.cm-positive { color: rgb(34, 153, 34); }
.cm-header, .cm-strong { font-weight: 700; }
.cm-del { text-decoration: line-through; }
.cm-em { font-style: italic; }
.cm-link { text-decoration: underline; }
.cm-error { color: red; }
.cm-invalidchar { color: red; }
.cm-constant { color: rgb(38, 139, 210); }
.cm-defined { color: rgb(181, 137, 0); }
div.CodeMirror span.CodeMirror-matchingbracket { color: rgb(0, 255, 0); }
div.CodeMirror span.CodeMirror-nonmatchingbracket { color: rgb(255, 34, 34); }
.cm-s-inner .CodeMirror-activeline-background { background-image: inherit; background-size: inherit; background-attachment: inherit; background-origin: inherit; background-clip: inherit; background-color: inherit; background-position: inherit; background-repeat: inherit; }
.CodeMirror { position: relative; overflow: hidden; }
.CodeMirror-scroll { height: 100%; outline: 0px; position: relative; box-sizing: content-box; background-image: inherit; background-size: inherit; background-attachment: inherit; background-origin: inherit; background-clip: inherit; background-color: inherit; background-position: inherit; background-repeat: inherit; }
.CodeMirror-sizer { position: relative; }
.CodeMirror-gutter-filler, .CodeMirror-hscrollbar, .CodeMirror-scrollbar-filler, .CodeMirror-vscrollbar { position: absolute; z-index: 6; display: none; outline: 0px; }
.CodeMirror-vscrollbar { right: 0px; top: 0px; overflow: hidden; }
.CodeMirror-hscrollbar { bottom: 0px; left: 0px; overflow: auto hidden; }
.CodeMirror-scrollbar-filler { right: 0px; bottom: 0px; }
.CodeMirror-gutter-filler { left: 0px; bottom: 0px; }
.CodeMirror-gutters { position: absolute; left: 0px; top: 0px; padding-bottom: 10px; z-index: 3; overflow-y: hidden; }
.CodeMirror-gutter { white-space: normal; height: 100%; box-sizing: content-box; padding-bottom: 30px; margin-bottom: -32px; display: inline-block; }
.CodeMirror-gutter-wrapper { position: absolute; z-index: 4; border: none !important; background-position: 0px 0px !important; }
.CodeMirror-gutter-background { position: absolute; top: 0px; bottom: 0px; z-index: 4; }
.CodeMirror-gutter-elt { position: absolute; cursor: default; z-index: 4; }
.CodeMirror-lines { cursor: text; }
.CodeMirror pre { border-radius: 0px; border-width: 0px; font-family: inherit; font-size: inherit; margin: 0px; white-space: pre; word-wrap: normal; color: inherit; z-index: 2; position: relative; overflow: visible; background-position: 0px 0px; }
.CodeMirror-wrap pre { word-wrap: break-word; white-space: pre-wrap; word-break: normal; }
.CodeMirror-code pre { border-right-width: 30px; border-right-style: solid; border-right-color: transparent; width: fit-content; }
.CodeMirror-wrap .CodeMirror-code pre { border-right-style: none; width: auto; }
.CodeMirror-linebackground { position: absolute; inset: 0px; z-index: 0; }
.CodeMirror-linewidget { position: relative; z-index: 2; overflow: auto; }
.CodeMirror-wrap .CodeMirror-scroll { overflow-x: hidden; }
.CodeMirror-measure { position: absolute; width: 100%; height: 0px; overflow: hidden; visibility: hidden; }
.CodeMirror-measure pre { position: static; }
.CodeMirror div.CodeMirror-cursor { position: absolute; visibility: hidden; border-right-style: none; width: 0px; }
.CodeMirror div.CodeMirror-cursor { visibility: hidden; }
.CodeMirror-focused div.CodeMirror-cursor { visibility: inherit; }
.cm-searching { background-color: rgba(255, 255, 0, 0.4); }
span.cm-underlined { text-decoration: underline; }
span.cm-strikethrough { text-decoration: line-through; }
.cm-tw-syntaxerror { color: rgb(255, 255, 255); background-color: rgb(153, 0, 0); }
.cm-tw-deleted { text-decoration: line-through; }
.cm-tw-header5 { font-weight: 700; }
.cm-tw-listitem:first-child { padding-left: 10px; }
.cm-tw-box { border-style: solid; border-right-width: 1px; border-bottom-width: 1px; border-left-width: 1px; border-color: inherit; border-top-width: 0px !important; }
.cm-tw-underline { text-decoration: underline; }
@media print {
.CodeMirror div.CodeMirror-cursor { visibility: hidden; }
}
:root {
--side-bar-bg-color: #fafafa;
--control-text-color: #777;
}
@include-when-export url(https://fonts.loli.net/css?family=Open+Sans:400italic,700italic,700,400&subset=latin,latin-ext);
/* open-sans-regular - latin-ext_latin */
/* open-sans-italic - latin-ext_latin */
/* open-sans-700 - latin-ext_latin */
/* open-sans-700italic - latin-ext_latin */
html {
font-size: 16px;
-webkit-font-smoothing: antialiased;
}
body {
font-family: "Open Sans","Clear Sans", "Helvetica Neue", Helvetica, Arial, 'Segoe UI Emoji', sans-serif;
color: rgb(51, 51, 51);
line-height: 1.6;
}
#write {
max-width: 860px;
margin: 0 auto;
padding: 30px;
padding-bottom: 100px;
}
@media only screen and (min-width: 1400px) {
#write {
max-width: 1024px;
}
}
@media only screen and (min-width: 1800px) {
#write {
max-width: 1200px;
}
}
#write > ul:first-child,
#write > ol:first-child{
margin-top: 30px;
}
a {
color: #4183C4;
}
h1,
h2,
h3,
h4,
h5,
h6 {
position: relative;
margin-top: 1rem;
margin-bottom: 1rem;
font-weight: bold;
line-height: 1.4;
cursor: text;
}
h1:hover a.anchor,
h2:hover a.anchor,
h3:hover a.anchor,
h4:hover a.anchor,
h5:hover a.anchor,
h6:hover a.anchor {
text-decoration: none;
}
h1 tt,
h1 code {
font-size: inherit;
}
h2 tt,
h2 code {
font-size: inherit;
}
h3 tt,
h3 code {
font-size: inherit;
}
h4 tt,
h4 code {
font-size: inherit;
}
h5 tt,
h5 code {
font-size: inherit;
}
h6 tt,
h6 code {
font-size: inherit;
}
h1 {
font-size: 2.25em;
line-height: 1.2;
border-bottom: 1px solid #eee;
}
h2 {
font-size: 1.75em;
line-height: 1.225;
border-bottom: 1px solid #eee;
}
/*@media print {
.typora-export h1,
.typora-export h2 {
border-bottom: none;
padding-bottom: initial;
}
.typora-export h1::after,
.typora-export h2::after {
content: "";
display: block;
height: 100px;
margin-top: -96px;
border-top: 1px solid #eee;
}
}*/
h3 {
font-size: 1.5em;
line-height: 1.43;
}
h4 {
font-size: 1.25em;
}
h5 {
font-size: 1em;
}
h6 {
font-size: 1em;
color: #777;
}
p,
blockquote,
ul,
ol,
dl,
table{
margin: 0.8em 0;
}
li>ol,
li>ul {
margin: 0 0;
}
hr {
height: 2px;
padding: 0;
margin: 16px 0;
background-color: #e7e7e7;
border: 0 none;
overflow: hidden;
box-sizing: content-box;
}
li p.first {
display: inline-block;
}
ul,
ol {
padding-left: 30px;
}
ul:first-child,
ol:first-child {
margin-top: 0;
}
ul:last-child,
ol:last-child {
margin-bottom: 0;
}
blockquote {
border-left: 4px solid #dfe2e5;
padding: 0 15px;
color: #777777;
}
blockquote blockquote {
padding-right: 0;
}
table {
padding: 0;
word-break: initial;
}
table tr {
border: 1px solid #dfe2e5;
margin: 0;
padding: 0;
}
table tr:nth-child(2n),
thead {
background-color: #f8f8f8;
}
table th {
font-weight: bold;
border: 1px solid #dfe2e5;
border-bottom: 0;
margin: 0;
padding: 6px 13px;
}
table td {
border: 1px solid #dfe2e5;
margin: 0;
padding: 6px 13px;
}
table th:first-child,
table td:first-child {
margin-top: 0;
}
table th:last-child,
table td:last-child {
margin-bottom: 0;
}
.CodeMirror-lines {
padding-left: 4px;
}
.code-tooltip {
box-shadow: 0 1px 1px 0 rgba(0,28,36,.3);
border-top: 1px solid #eef2f2;
}
.md-fences,
code,
tt {
border: 1px solid #e7eaed;
background-color: #f8f8f8;
border-radius: 3px;
padding: 0;
padding: 2px 4px 0px 4px;
font-size: 0.9em;
}
code {
background-color: #f3f4f4;
padding: 0 2px 0 2px;
}
.md-fences {
margin-bottom: 15px;
margin-top: 15px;
padding-top: 8px;
padding-bottom: 6px;
}
.md-task-list-item > input {
margin-left: -1.3em;
}
@media print {
html {
font-size: 13px;
}
table,
pre {
page-break-inside: avoid;
}
pre {
word-wrap: break-word;
}
}
.md-fences {
background-color: #f8f8f8;
}
#write pre.md-meta-block {
padding: 1rem;
font-size: 85%;
line-height: 1.45;
background-color: #f7f7f7;
border: 0;
border-radius: 3px;
color: #777777;
margin-top: 0 !important;
}
.mathjax-block>.code-tooltip {
bottom: .375rem;
}
.md-mathjax-midline {
background: #fafafa;
}
#write>h3.md-focus:before{
left: -1.5625rem;
top: .375rem;
}
#write>h4.md-focus:before{
left: -1.5625rem;
top: .285714286rem;
}
#write>h5.md-focus:before{
left: -1.5625rem;
top: .285714286rem;
}
#write>h6.md-focus:before{
left: -1.5625rem;
top: .285714286rem;
}
.md-image>.md-meta {
/*border: 1px solid #ddd;*/
border-radius: 3px;
padding: 2px 0px 0px 4px;
font-size: 0.9em;
color: inherit;
}
.md-tag {
color: #a7a7a7;
opacity: 1;
}
.md-toc {
margin-top:20px;
padding-bottom:20px;
}
.sidebar-tabs {
border-bottom: none;
}
#typora-quick-open {
border: 1px solid #ddd;
background-color: #f8f8f8;
}
#typora-quick-open-item {
background-color: #FAFAFA;
border-color: #FEFEFE #e5e5e5 #e5e5e5 #eee;
border-style: solid;
border-width: 1px;
}
/** focus mode */
.on-focus-mode blockquote {
border-left-color: rgba(85, 85, 85, 0.12);
}
header, .context-menu, .megamenu-content, footer{
font-family: "Segoe UI", "Arial", sans-serif;
}
.file-node-content:hover .file-node-icon,
.file-node-content:hover .file-node-open-state{
visibility: visible;
}
.mac-seamless-mode #typora-sidebar {
background-color: #fafafa;
background-color: var(--side-bar-bg-color);
}
.md-lang {
color: #b4654d;
}
/*.html-for-mac {
--item-hover-bg-color: #E6F0FE;
}*/
#md-notification .btn {
border: 0;
}
.dropdown-menu .divider {
border-color: #e5e5e5;
opacity: 0.4;
}
.ty-preferences .window-content {
background-color: #fafafa;
}
.ty-preferences .nav-group-item.active {
color: white;
background: #999;
}
.menu-item-container a.menu-style-btn {
background-color: #f5f8fa;
background-image: linear-gradient( 180deg , hsla(0, 0%, 100%, 0.8), hsla(0, 0%, 100%, 0));
}
@media print { @page {margin: 0 0 0 0;} body.typora-export {padding-left: 0; padding-right: 0;} #write {padding:0;}}
</style><title>03_aepGenomeAnnotation</title>
</head>
<body class='typora-export'><div class='typora-export-content'>
<div id='write' class=''><h1 id='aep-genome-gene-model-generation'><span>AEP Genome Gene Model Generation</span></h1><p><span>This document covers the generation of gene models for the strain AEP </span><em><span>H. vulgaris</span></em><span> genome assembly from repeat annotation to final gene models. It also covers the generation of functional annotations for the AEP gene models. Finally, this document describes the approach to benchmark ATAC-seq and RNA-seq read mapping efficiency for the 105 and AEP assemblies. The starting point for this document is the finalized, non-masked AEP assembly. The creation of which is described in </span><code>01_aepGenomeAssembly.md</code><span>. </span></p><p><span>This gene model prediction process entailed generating an initial set of annotations with BRAKER2 using protein hints from a custom metazoan proteome database and transcript hints using whole animal </span><em><span>Hydra</span></em><span> RNA-seq data. We then supplemented these gene models with a second set of predictions generated with exonerate using the </span><em><span>Hydra</span></em><span> LRv2 transcriptome and a custom database of </span><em><span>Hydra</span></em><span> GenBank mRNA sequences. The final set of gene models was generated with the PASA pipeline by using a new transcriptome assembly to augment the splice isoform and UTR annotations. We generated functional annotations using OrthoFinder, InterProScan, and BLAST searches.</span></p><div class='md-toc' mdtype='toc'><p class="md-toc-content" role="list"><span role="listitem" class="md-toc-item md-toc-h1" data-ref="n0"><a class="md-toc-inner" style="" href="#aep-genome-gene-model-generation">AEP Genome Gene Model Generation</a></span><span role="listitem" class="md-toc-item md-toc-h2" data-ref="n5"><a class="md-toc-inner" style="" href="#generating-hints-for-gene-predictions">Generating Hints for Gene Predictions</a></span><span role="listitem" class="md-toc-item md-toc-h3" data-ref="n6"><a class="md-toc-inner" style="" href="#transcriptomic-hints">Transcriptomic Hints</a></span><span role="listitem" class="md-toc-item md-toc-h4" data-ref="n7"><a class="md-toc-inner" style="" href="#aligning-whole-animal-rna-seq-data-to-the-aep-assembly">Aligning Whole-Animal RNA-seq Data to the AEP Assembly</a></span><span role="listitem" class="md-toc-item md-toc-h4" data-ref="n25"><a class="md-toc-inner" style="" href="#generating-an-aep-transcriptome-for-use-in-gene-predictions">Generating an AEP Transcriptome for Use in Gene Predictions</a></span><span role="listitem" class="md-toc-item md-toc-h3" data-ref="n33"><a class="md-toc-inner" style="" href="#compiling-a-protein-hints-database">Compiling a Protein Hints Database</a></span><span role="listitem" class="md-toc-item md-toc-h4" data-ref="n36"><a class="md-toc-inner" style="" href="#translating-transcriptome-sources-into-proteomes">Translating transcriptome sources into proteomes</a></span><span role="listitem" class="md-toc-item md-toc-h5" data-ref="n38"><a class="md-toc-inner" style="" href="#creating-a-custom-protein-database-to-guide-orf-selection">Creating a Custom Protein Database to Guide ORF Selection</a></span><span role="listitem" class="md-toc-item md-toc-h5" data-ref="n49"><a class="md-toc-inner" style="" href="#translating-the-h-echinata-transcriptome">Translating the <em>H. echinata</em> transcriptome</a></span><span role="listitem" class="md-toc-item md-toc-h5" data-ref="n55"><a class="md-toc-inner" style="" href="#translating-brown-hydra-transcriptomes">Translating Brown <em>Hydra</em> Transcriptomes</a></span><span role="listitem" class="md-toc-item md-toc-h4" data-ref="n65"><a class="md-toc-inner" style="" href="#final-proteome-compilation-and-formating">Final Proteome Compilation and Formating</a></span><span role="listitem" class="md-toc-item md-toc-h2" data-ref="n71"><a class="md-toc-inner" style="" href="#performing-ab-initio-gene-predictions">Performing Ab Initio Gene Predictions</a></span><span role="listitem" class="md-toc-item md-toc-h3" data-ref="n79"><a class="md-toc-inner" style="" href="#reformatingfixing-braker2-gff3-files">Reformating/Fixing Braker2 GFF3 Files</a></span><span role="listitem" class="md-toc-item md-toc-h2" data-ref="n90"><a class="md-toc-inner" style="" href="#supplementing-gene-models-using-exonerate">Supplementing Gene Models Using Exonerate</a></span><span role="listitem" class="md-toc-item md-toc-h3" data-ref="n92"><a class="md-toc-inner" style="" href="#compiling-the-input-sequences-to-be-used-for-alignment">Compiling the Input Sequences to Be Used for Alignment</a></span><span role="listitem" class="md-toc-item md-toc-h3" data-ref="n100"><a class="md-toc-inner" style="" href="#pipeline-for-generating-gene-models-from-exonerate-alignments">Pipeline for Generating Gene Models from Exonerate Alignments</a></span><span role="listitem" class="md-toc-item md-toc-h3" data-ref="n117"><a class="md-toc-inner" style="" href="#running-the-exonerate-pipeline">Running the Exonerate Pipeline</a></span><span role="listitem" class="md-toc-item md-toc-h2" data-ref="n135"><a class="md-toc-inner" style="" href="#curating-and-combining-exonerate-and-braker2-gene-models">Curating and Combining Exonerate and Braker2 Gene Models</a></span><span role="listitem" class="md-toc-item md-toc-h3" data-ref="n136"><a class="md-toc-inner" style="" href="#merging-the-braker2-and-exonerate-gene-models">Merging the Braker2 and Exonerate Gene Models</a></span><span role="listitem" class="md-toc-item md-toc-h3" data-ref="n153"><a class="md-toc-inner" style="" href="#removing-tes-and-short-gene-models">Removing TEs and Short Gene Models</a></span><span role="listitem" class="md-toc-item md-toc-h3" data-ref="n165"><a class="md-toc-inner" style="" href="#renaming-gene-models">Renaming Gene Models</a></span><span role="listitem" class="md-toc-item md-toc-h2" data-ref="n172"><a class="md-toc-inner" style="" href="#updating-gene-models-with-pasa">Updating Gene Models with PASA</a></span><span role="listitem" class="md-toc-item md-toc-h2" data-ref="n187"><a class="md-toc-inner" style="" href="#polishing-and-finalizing-gene-models">Polishing and Finalizing Gene Models</a></span><span role="listitem" class="md-toc-item md-toc-h2" data-ref="n203"><a class="md-toc-inner" style="" href="#generating-functional-annotations">Generating Functional Annotations</a></span><span role="listitem" class="md-toc-item md-toc-h3" data-ref="n205"><a class="md-toc-inner" style="" href="#predicting-protein-domains-using-interproscan">Predicting Protein Domains Using InterProScan</a></span><span role="listitem" class="md-toc-item md-toc-h3" data-ref="n210"><a class="md-toc-inner" style="" href="#predicting-orthology-using-orthofinder">Predicting Orthology Using OrthoFinder</a></span><span role="listitem" class="md-toc-item md-toc-h3" data-ref="n242"><a class="md-toc-inner" style="" href="#blasting-against-uniprot-and-genbank">BLASTing Against UniProt and GenBank</a></span><span role="listitem" class="md-toc-item md-toc-h3" data-ref="n254"><a class="md-toc-inner" style="" href="#combining-different-annotation-sources">Combining Different Annotation Sources</a></span><span role="listitem" class="md-toc-item md-toc-h2" data-ref="n260"><a class="md-toc-inner" style="" href="#benchmarking-mapping-statistics-for-the-aep-and-105-genome-assemblies">Benchmarking Mapping Statistics for the AEP and 105 Genome Assemblies</a></span><span role="listitem" class="md-toc-item md-toc-h3" data-ref="n262"><a class="md-toc-inner" style="" href="#aligning-whole-animal-rna-seq-data-to-the-aep-and-105-reference-genomes">Aligning Whole-Animal RNA-seq Data to the AEP and 105 Reference Genomes</a></span><span role="listitem" class="md-toc-item md-toc-h3" data-ref="n288"><a class="md-toc-inner" style="" href="#aligning-whole-animal-atac-seq-data-to-the-aep-and-105-reference-genomes">Aligning Whole-Animal ATAC-seq Data to the AEP and 105 Reference Genomes</a></span><span role="listitem" class="md-toc-item md-toc-h3" data-ref="n292"><a class="md-toc-inner" style="" href="#plotting-differences-in-mapping-rates">Plotting Differences in Mapping Rates</a></span><span role="listitem" class="md-toc-item md-toc-h2" data-ref="n297"><a class="md-toc-inner" style="" href="#files-associated-with-this-document">Files Associated with This Document</a></span></p></div><h2 id='generating-hints-for-gene-predictions'><span>Generating Hints for Gene Predictions</span></h2><h3 id='transcriptomic-hints'><span>Transcriptomic Hints</span></h3><h4 id='aligning-whole-animal-rna-seq-data-to-the-aep-assembly'><span>Aligning Whole-Animal RNA-seq Data to the AEP Assembly</span></h4><p><span>To provide transcriptomic data for the gene prediction software, we made use of four paired end whole animal RNA-seq libraries generated from various AEP-derived transgenic lines. In the file names below, W indicates data from the watermelon line, O indicates data from the operon line, IW indicates data from the inverse watermelon line, and E indicates data from the enGreen1 line.</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">WHOLE_W_CGATGT_L006_R2_all.fastq.gz</span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">WHOLE_W_CGATGT_L006_R1_all.fastq.gz</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">WHOLE_O_ACAGTG_L006_R2_all.fastq.gz</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">WHOLE_O_ACAGTG_L006_R1_all.fastq.gz</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">WHOLE_IW_TGACCA_L006_R2_all.fastq.gz</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">WHOLE_IW_TGACCA_L006_R1_all.fastq.gz</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">WHOLE_E_GCCAAT_L006_R2_all.fastq.gz</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">WHOLE_E_GCCAAT_L006_R1_all.fastq.gz</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 176px;"></div><div class="CodeMirror-gutters" style="display: none; height: 176px;"></div></div></div></pre><p><span>In addition we generated two PE RNA-seq libraries from whole male and female Kiel AEP polyps (non-transgenic):</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation"><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">F3_S21_L001_R1_001.fastq.gz</span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">F3_S21_L001_R2_001.fastq.gz</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">M3_S22_L001_R2_001.fastq.gz</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">M3_S22_L001_R1_001.fastq.gz</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 88px;"></div><div class="CodeMirror-gutters" style="display: none; height: 88px;"></div></div></div></pre><p><span>Prior to performing the analysis the file names were simplified as follows:</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">W_R1.fastq.gz</span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">W_R2.fastq.gz</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">O_R1.fastq.gz</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">O_R2.fastq.gz</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">IW_R1.fastq.gz</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">IW_R2.fastq.gz</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">E_R1.fastq.gz</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">E_R2.fastq.gz</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">F3_R1.fastq.gz</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">F3_R2.fastq.gz</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">M3_R1.fastq.gz</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">M3_R2.fastq.gz</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 264px;"></div><div class="CodeMirror-gutters" style="display: none; height: 264px;"></div></div></div></pre><p><span>Reads were then processed with trimmomatic (v0.36) to remove low quality base calls and sequencing adapter contamination</span></p><p><span>(</span><em><span>01_prepHints/trim.sh</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash" style="break-inside: unset;"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-meta">#! /bin/bash</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --job-name=trim</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -c 32</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -t 60-0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --mem=36G</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --error=trim.err</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --output=trim.out</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">module load trimmomatic</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-def">prefixes</span><span class="cm-operator">=</span>( E F3 IW M3 O W )</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">for</span> arg <span class="cm-keyword">in</span> <span class="cm-string">"</span><span class="cm-def">${prefixes[@]}</span><span class="cm-string">"</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">do</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> java <span class="cm-attribute">-jar</span> /share/apps/Trimmomatic-0.36//trimmomatic-0.36.jar PE <span class="cm-attribute">-threads</span> <span class="cm-number">32</span> \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> reads/<span class="cm-string">"</span><span class="cm-def">$arg</span><span class="cm-string">"</span>_R1.fastq.gz reads/<span class="cm-string">"</span><span class="cm-def">$arg</span><span class="cm-string">"</span>_R2.fastq.gz \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> reads/<span class="cm-string">"</span><span class="cm-def">$arg</span><span class="cm-string">"</span>_R1_trim_p_fq.gz reads/<span class="cm-string">"</span><span class="cm-def">$arg</span><span class="cm-string">"</span>_R1_trim_up_fq.gz \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> reads/<span class="cm-string">"</span><span class="cm-def">$arg</span><span class="cm-string">"</span>_R2_trim_p_fq.gz reads/<span class="cm-string">"</span><span class="cm-def">$arg</span><span class="cm-string">"</span>_R2_trim_up_fq.gz \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> ILLUMINACLIP:./adapters.fa:2:30:10:2:keepBothReads LEADING:3 TRAILING:3 MINLEN:36</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">done</span></span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 440px;"></div><div class="CodeMirror-gutters" style="display: none; height: 440px;"></div></div></div></pre><p><span>Following processing, R1 and R2 files were pooled:</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation"><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">cat</span> *1_trim_p*gz > combined_R1.fq.gz</span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">cat</span> *2_trim_p*gz > combined_R2.fq.gz</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 44px;"></div><div class="CodeMirror-gutters" style="display: none; height: 44px;"></div></div></div></pre><p><span>We then prepped the AEP genome (with interspersed repeats hard-masked) for mapping with STAR (v2.7.5c)</span></p><p><span>(</span><em><span>01_prepHints/makeRef.sh</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash" style="break-inside: unset;"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-meta">#! /bin/bash</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --job-name=makeRef</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -c 24</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -t 60-0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --mem=36G</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --error=makeRef.err</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --output=makeRef.out</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -p med</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">STAR <span class="cm-attribute">--version</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">STAR <span class="cm-attribute">--runThreadN</span> <span class="cm-number">24</span> \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--runMode</span> genomeGenerate \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--genomeDir</span> ./ref \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--genomeFastaFiles</span> aep.genome.cplxmask.fa \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--genomeSAindexNbases</span> <span class="cm-number">13</span></span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 352px;"></div><div class="CodeMirror-gutters" style="display: none; height: 352px;"></div></div></div></pre><p><span>Next, we mapped the reads to the genome:</span></p><p><span>(</span><em><span>01_prepHints/runAlign.sh</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash" style="break-inside: unset;"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-meta">#! /bin/bash</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -p med </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --job-name=align</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -t 60-0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --nodes=1</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --mem=0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --exclusive</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --error=align.err</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --output=align.out</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">STAR <span class="cm-attribute">--version</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">STAR <span class="cm-attribute">--runThreadN</span> <span class="cm-number">20</span> \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--genomeDir</span> ./ref \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--readFilesIn</span> ../reads/combined_R1.fq.gz ../reads/combined_R2.fq.gz \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--readFilesCommand</span> gunzip <span class="cm-attribute">-c</span> \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--outFileNamePrefix</span> ./out/aep \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--outSAMprimaryFlag</span> AllBestScore \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--outSAMtype</span> BAM SortedByCoordinate \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--twopassMode</span> Basic \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--outFilterScoreMinOverLread</span> <span class="cm-number">0</span>.3 \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--outFilterMatchNminOverLread</span> <span class="cm-number">0</span>.3 \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--limitBAMsortRAM</span> <span class="cm-number">12316579964</span></span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 506px;"></div><div class="CodeMirror-gutters" style="display: none; height: 506px;"></div></div></div></pre><h4 id='generating-an-aep-transcriptome-for-use-in-gene-predictions'><span>Generating an AEP Transcriptome for Use in Gene Predictions</span></h4><p><span>To aid in later genome annotation steps we also generated a transcriptome using this genome-mapped RNA-seq data. Although we had already generated an AEP transcriptome (LRv2 transcriptome, Siebert et al., 2019), it was not produced using any data from animals undergoing gametogenesis. This could possibly cause us to miss some transcripts specific to male or female polyps. In addition, the transcriptome was designed to have 'low redundancy', and may have omitted some splicing complexity. </span></p><p><span>We therefore sought to generate a new transcriptome that both incorporated reads from polyps producing gametes and included the full possible transcriptomic complexity in adult </span><em><span>Hydra</span></em><span>. </span></p><p><span>To generate the transcriptome, the mapped reads from the previous section were provided as input to the Trinity reference-guided transcriptome assembly pipeline (v2.11.0)</span></p><p><span>(</span><em><span>01_prepHints/runTrinity.sh</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash" style="break-inside: unset;"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-meta">#! /bin/bash -l</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -p med</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --job-name=trinity</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -c 24</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -t 60-0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --mem=0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --error=trinity.err</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --output=trinity.out</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">source</span> ~/perl5/perlbrew/etc/bashrc</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">source</span> venv/bin/activate</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">module load jellyfish</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">which perl</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">which bowtie2</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-def">$TRINITY_HOME</span>/Trinity <span class="cm-attribute">--genome_guided_bam</span> ../align/out/aepAligned.sortedByCoord.out.bam \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--genome_guided_max_intron</span> <span class="cm-number">20000</span> \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--max_memory</span> 60G <span class="cm-attribute">--CPU</span> <span class="cm-number">24</span> \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--SS_lib_type</span> RF</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 506px;"></div><div class="CodeMirror-gutters" style="display: none; height: 506px;"></div></div></div></pre><p><span>Quantifying BUSCO (v5.beta_cv1) metrics for the transcriptome indicated high levels of redundancy (to be expected in a relatively unprocessed transcriptome), but high levels of overall completeness:</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">docker run <span class="cm-attribute">-u</span> <span class="cm-quote">$(id -u)</span> <span class="cm-attribute">-v</span> <span class="cm-quote">$(pwd)</span>:/busco_wd ezlabgva/busco:v5.beta_cv1 busco <span class="cm-attribute">-c</span> <span class="cm-number">4</span> <span class="cm-attribute">-m</span> tran <span class="cm-attribute">-i</span> Trinity-GG.fasta <span class="cm-attribute">-o</span> trinity <span class="cm-attribute">-l</span> metazoa_odb10 <span class="cm-attribute">-f</span> </span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span>C:95.8%[S:54.1%,D:41.7%],F:0.8%,M:3.4%,n:954<span class="cm-tab" role="presentation" cm-text=" "> </span> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">914</span><span class="cm-tab" role="presentation" cm-text=" "> </span>Complete BUSCOs (C)<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">516</span><span class="cm-tab" role="presentation" cm-text=" "> </span>Complete and single-copy BUSCOs (S)<span class="cm-tab" role="presentation" cm-text=" "> </span> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">398</span><span class="cm-tab" role="presentation" cm-text=" "> </span>Complete and duplicated BUSCOs (D)<span class="cm-tab" role="presentation" cm-text=" "> </span> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">8</span><span class="cm-tab" role="presentation" cm-text=" "> </span>Fragmented BUSCOs (F)<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">32</span><span class="cm-tab" role="presentation" cm-text=" "> </span>Missing BUSCOs (M)<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" class="cm-tab-wrap-hack" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">954</span><span class="cm-tab" role="presentation" cm-text=" "> </span>Total BUSCO groups searched<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span></span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 220px;"></div><div class="CodeMirror-gutters" style="display: none; height: 220px;"></div></div></div></pre><h3 id='compiling-a-protein-hints-database'><span>Compiling a Protein Hints Database</span></h3><p><span>Protein sequences from both closely and distantly related species can also provide valuable guides for gene prediction software. Our goal was to make use of diverse metazoan proteomes, with a particular focus on cnidarian species. We downloaded the following proteomes to serve as hints for gene prediction:</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash" style="break-inside: unset;"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 2363px; left: 480.390625px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre>x</pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">A_diaphana.fa </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://ftp.ncbi.nlm.nih.gov/genomes/refseq/invertebrate/Exaiptasia_diaphana/latest_assembly_versions/GCF_001417965.1_Aiptasia_genome_1.1/</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">A_queenslandica.fa </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://metazoa.ensembl.org/Amphimedon_queenslandica/Info/Index</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">A_vaga.fa </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://metazoa.ensembl.org/Adineta_vaga/Info/Index</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">B_lanceolatum.fa </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://metazoa.ensembl.org/Branchiostoma_lanceolatum/Info/Index</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">B_mori.fa </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://metazoa.ensembl.org/Bombyx_mori/Info/Index</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">C_elegans.fa </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://uswest.ensembl.org/Caenorhabditis_elegans/Info/Index</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">C_hemisphaerica.fa </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">http://marimba.obs-vlfr.fr/node/237574</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">C_intestinalis.fa </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://uswest.ensembl.org/Ciona_intestinalis/Info/Index</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">C_milii.fa </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://uswest.ensembl.org/Callorhinchus_milii/Info/Index</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">D_melanogaster.fa </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://uswest.ensembl.org/Drosophila_melanogaster/Info/Index</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">D_pulex.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://metazoa.ensembl.org/Daphnia_pulex/Info/Annotation/</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">E_muelleri.fa </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://spaces.facsci.ualberta.ca/ephybase/</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">G_gallus.fa </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://uswest.ensembl.org/Gallus_gallus/Info/Index</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">H_circumcincta.fa (obtained from authors of the study via personal communication)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://www.pnas.org/content/116/46/22915</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">H_echinata.fa </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://research.nhgri.nih.gov/hydractinia/</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">H_miamia.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://metazoa.ensembl.org/Hofstenia_miamia/Info/Index<span class="cm-def">?db</span><span class="cm-operator">=</span>core</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">H_oligactis.fa (obtained from authors of the study via personal communication)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://www.pnas.org/content/116/46/22915</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">H_sapiens.fa </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://uswest.ensembl.org/Homo_sapiens/Info/Index</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">H_viridissima.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://marinegenomics.oist.jp/hydra_viridissima_a99/viewer/download<span class="cm-def">?project_id</span><span class="cm-operator">=</span><span class="cm-number">82</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">H_vulgaris105.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://arusha.nhgri.nih.gov/hydra/download/genemodels_proteins/hydra2.0_genemodels.aa.gz</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">H_vulgarisZurich.fa (obtained from authors of the study via personal communication)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://www.pnas.org/content/116/46/22915</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">L_chalumnae.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://uswest.ensembl.org/Latimeria_chalumnae/Info/Index</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">L_oculatus.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://uswest.ensembl.org/Lepisosteus_oculatus/Info/Index</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">M_leidyi.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://research.nhgri.nih.gov/mnemiopsis/download/download.cgi<span class="cm-def">?dl</span><span class="cm-operator">=</span>proteome</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">M_virulenta.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://marinegenomics.oist.jp/morbakka_virulenta/viewer/download<span class="cm-def">?project_id</span><span class="cm-operator">=</span><span class="cm-number">70</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">N_vectensis.fa </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://simrbase.stowers.org/</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">O_bimaculoides.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://metazoa.ensembl.org/Octopus_bimaculoides/Info/Index</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">P_bachei.fa </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://neurobase.rc.ufl.edu/Pleurobrachia/download</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">P_marinus.fa </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://uswest.ensembl.org/comm_marinus</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">P_miniata.fa </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">http://legacy.echinobase.org/Echinobase/PmDownload</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">P_naikaiensis.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">http://gigadb.org/dataset/100564</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">S_callimorphus.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://simrbase.stowers.org/</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">S_mediterranea.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://planosphere.stowers.org/smedgd</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">S_purpuratus.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://metazoa.ensembl.org/Strongylocentrotus_purpuratus/Info/Index</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">T_adhaerens.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">http://metazoa.ensembl.org/Trichoplax_adhaerens/Info/Index</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">X_tropicalis.fa </span></pre><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">https://uswest.ensembl.org/Xenopus_tropicalis/Info/Index</span></pre></div></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 2376px;"></div><div class="CodeMirror-gutters" style="display: none; height: 2376px;"></div></div></div></pre><h4 id='translating-transcriptome-sources-into-proteomes'><span>Translating transcriptome sources into proteomes</span></h4><p><span>Notably the files </span><code>H_vulgarisZurich.fa</code><span>, </span><code>H_oligactis.fa</code><span>, </span><code>H_circumcincta.fa</code><span>, and </span><code>H_echinata.fa</code><span> were transcriptomes, and therefore needed to be translated into protein sequences first. We translated these files using Transdecoder. </span></p><h5 id='creating-a-custom-protein-database-to-guide-orf-selection'><span>Creating a Custom Protein Database to Guide ORF Selection</span></h5><p><span>To guide Transdecoder's selection of possible reading frames, we used BLAST results from a custom protein database.</span></p><p><span>To generate the BLAST (v2.10.0+) database, we started with the metazoan orthoDB database:</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation"><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">wget</span> https://v100.orthodb.org/download/odb10_metazoa_fasta.tar.gz</span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">tar xzvf odb10_metazoa_fasta.tar.gz</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">cat</span> metazoa/Rawdata/* > proteins.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">rm</span> <span class="cm-attribute">-r</span> odb10_metazoa_fasta.tar.gz metazoa/</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 88px;"></div><div class="CodeMirror-gutters" style="display: none; height: 88px;"></div></div></div></pre><p><span>We then supplemented these sequences with refseq mRNA entries from cnidarians (excluding </span><em><span>Hydra</span></em><span>), which were retrieved using the following query on refseq:</span></p><p><code>srcdb_refseq[prop] AND ("Cnidaria"[Organism] AND biomol_mrna[PROP]) NOT "Hydra vulgaris"[Organism]</code></p><p><span>Sequences returned by this query were downloaded into the file </span><code>cnido_prot_sequence.fa</code></p><p><span>Before we pooled </span><code>cnido_prot_sequence.fa</code><span> and </span><code>proteins.fa</code><span>, we first removed all proteins in </span><code>cnido_prot_sequence.fa</code><span> that were already present in </span><code>proteins.fa</code><span> using CD-hit (with a 95% sequence similarity threshold; CD-hit version v4.7):</span></p><p><span>(</span><em><span>01_prepHints/protFilt.sh</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-meta">#!/bin/bash</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --job-name=cdhit</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -p bigmemh</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --exclusive</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --nodes=1</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -t 60-0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --mem=0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --error=cdhit.err</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --output=cdhit.out</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">cd-hit-2d <span class="cm-attribute">-i</span> proteins.fa <span class="cm-attribute">-i2</span> cnido_prot_sequence.fa <span class="cm-attribute">-o</span> cdhit.out <span class="cm-attribute">-c</span> <span class="cm-number">0</span>.95 <span class="cm-attribute">-M</span> <span class="cm-number">360000</span> <span class="cm-attribute">-T</span> <span class="cm-number">0</span> <span class="cm-attribute">-s2</span> <span class="cm-number">0</span>.9</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 242px;"></div><div class="CodeMirror-gutters" style="display: none; height: 242px;"></div></div></div></pre><p><span>Finally, we pooled </span><code>proteins.fa</code><span> and </span><code>cnido_prot_sequence.fa</code><span> to make a final </span><code>proteins.fa</code><span> file (the old </span><code>proteins.fa</code><span> file was removed). </span></p><h5 id='translating-the-h-echinata-transcriptome'><span>Translating the </span><em><span>H. echinata</span></em><span> transcriptome</span></h5><p><span>After compiling </span><code>proteins.fa</code><span> we used it to generate a BLAST-able database:</span></p><p><code>makeblastdb -in proteins.fa -dbtype nucl -title proteins -out proteins</code></p><p><span>We then ran TransDecoder (v5.2.0) on the </span><em><span>H. echinata</span></em><span> transcriptome and incorporated BLAST results for the candidate peptide sequences when compared to the protein database:</span></p><p><span>(</span><em><span>01_prepHints/transDecoderHech.sh</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash" style="break-inside: unset;"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-meta">#!/bin/bash -l</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -p med</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --job-name=TD</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -c 24</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -t 60-0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --mem=60G</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --error=TD.err</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --output=TD.out</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">module load TransDecoder/5.2.0</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">TransDecoder.LongOrfs <span class="cm-attribute">-t</span> Hech-trinity.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">echo</span> <span class="cm-string">"blasting"</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">~/bin/blastp <span class="cm-attribute">-query</span> Hech-trinity.fa.transdecoder_dir/longest_orfs.pep \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">-db</span> proteins <span class="cm-attribute">-max_target_seqs</span> <span class="cm-number">1</span> \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">-outfmt</span> <span class="cm-number">6</span> <span class="cm-attribute">-evalue</span> 1e-5 <span class="cm-attribute">-num_threads</span> <span class="cm-number">24</span> > blastpHeck.outfmt6</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">TransDecoder.Predict <span class="cm-attribute">-t</span> Hech-trinity.fa <span class="cm-attribute">--single_best_only</span> <span class="cm-attribute">--retain_blastp_hits</span> blastpHeck.outfmt6</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 440px;"></div><div class="CodeMirror-gutters" style="display: none; height: 440px;"></div></div></div></pre><h5 id='translating-brown-hydra-transcriptomes'><span>Translating Brown </span><em><span>Hydra</span></em><span> Transcriptomes</span></h5><p><span>We used essentially the same approach for the brown </span><em><span>Hydra</span></em><span> transcriptomes, although we used diamond (v2.0.6) instead of BLAST to speed things up.</span></p><p><span>To make the diamond database from </span><code>proteins.fa</code><span>:</span></p><p><code>diamond makedb -d proteins --in proteins.fa</code></p><p><span>We first reduced the redundancy in the </span><em><span>Hydra</span></em><span> transcriptomes using CD-hit:</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation"><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">cd-hit <span class="cm-attribute">-i</span> H_vulgarisZurich.fa <span class="cm-attribute">-o</span> zu.cdhit.fa <span class="cm-attribute">-c</span> <span class="cm-number">0</span>.95</span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">cd-hit <span class="cm-attribute">-i</span> H_oligactis.fa <span class="cm-attribute">-o</span> oli.cdhit.fa <span class="cm-attribute">-c</span> <span class="cm-number">0</span>.95</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">cd-hit <span class="cm-attribute">-i</span> H_circumcincta.fa <span class="cm-attribute">-o</span> cir.cdhit.fa <span class="cm-attribute">-c</span> <span class="cm-number">0</span>.95</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 66px;"></div><div class="CodeMirror-gutters" style="display: none; height: 66px;"></div></div></div></pre><p><span>We then ran TransDecoder using the following script:</span></p><p><span>(</span><em><span>01_prepHints/transDecoderClose.sh</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash" style="break-inside: unset;"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-meta">#!/bin/bash -l</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -p med</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --job-name=TD</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -c 24</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -t 60-0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --mem=60G</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --error=TD.err</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --output=TD.out</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">module load TransDecoder/5.2.0</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">for</span> arg <span class="cm-keyword">in</span> *cdhit.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">do</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-builtin">echo</span> <span class="cm-string">"</span><span class="cm-def">$arg</span><span class="cm-string">"</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> TransDecoder.LongOrfs <span class="cm-attribute">-t</span> <span class="cm-string">"</span><span class="cm-def">$arg</span><span class="cm-string">"</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> diamond blastp <span class="cm-attribute">--query</span> <span class="cm-string">"</span><span class="cm-def">$arg</span><span class="cm-string">"</span>.transdecoder_dir/longest_orfs.pep \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--db</span> ../proteins <span class="cm-attribute">--max-target-seqs</span> <span class="cm-number">1</span> \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--outfmt</span> <span class="cm-number">6</span> <span class="cm-attribute">--evalue</span> 1e-5 <span class="cm-attribute">-p</span> <span class="cm-number">24</span> <span class="cm-attribute">--sensitive</span> > blastp.outfmt6</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> TransDecoder.Predict <span class="cm-attribute">-t</span> <span class="cm-string">"</span><span class="cm-def">$arg</span><span class="cm-string">"</span> <span class="cm-attribute">--single_best_only</span> <span class="cm-attribute">--retain_blastp_hits</span> blastp.outfmt6</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">done</span></span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 506px;"></div><div class="CodeMirror-gutters" style="display: none; height: 506px;"></div></div></div></pre><p><span>For all Transdecoder runs, the resulting </span><code>.pep</code><span> file was used as the proteomes from these species for downstream applications.</span></p><h4 id='final-proteome-compilation-and-formating'><span>Final Proteome Compilation and Formating</span></h4><p><span>After generating all the individual protein fasta files, we used a script provided as part of a standard Orthofinder (v2.5.4) installation (</span><code>primary_transcript.py</code><span>) to extract primary isoforms from each file (this is mainly needed for Ensemble-sourced files). We also removed stop codon symbols from the proteomes. The output was directed into a subdirectory called </span><code>primary_transcripts</code><span>.</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">for</span> f <span class="cm-keyword">in</span> *fa ; <span class="cm-keyword">do</span> gsed <span class="cm-attribute">-i</span> <span class="cm-string">'s/\(^[^>].*\)\*/\1/g'</span> <span class="cm-def">$f</span> ; <span class="cm-keyword">done</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">for</span> f <span class="cm-keyword">in</span> *fa ; <span class="cm-keyword">do</span> gsed <span class="cm-attribute">-i</span> <span class="cm-string">'s/\(^[^>].*\)\./\1/g'</span> <span class="cm-def">$f</span> ; <span class="cm-keyword">done</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">for</span> f <span class="cm-keyword">in</span> *fa ; <span class="cm-keyword">do</span> python /Users/Jcazet/opt/anaconda3/envs/workingEnv/bin/primary_transcript.py <span class="cm-def">$f</span> ; <span class="cm-keyword">done</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">cd</span> primary_transcripts</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">for</span> f <span class="cm-keyword">in</span> *fa ; <span class="cm-keyword">do</span> gsed <span class="cm-attribute">-i</span> <span class="cm-string">'s/\(^>[^ \|]\+\).*/\1/g'</span> <span class="cm-def">$f</span> ; <span class="cm-keyword">done</span></span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 176px;"></div><div class="CodeMirror-gutters" style="display: none; height: 176px;"></div></div></div></pre><p><span>Files in the </span><code>primary transcripts</code><span> were then concatenated into the file </span><code>allPrimProts.fa</code></p><p><span>Finally, atypical non-AA characters were removed to prevent parsing errors later on:</span></p><p><code>sed -i -e '/^[^>]/s/[^AaRrNnDdCcEeQqGgHhIiLlKkMmFfPpSsTtWwYyVvBbZzJjXx]/X/g' allPrimProts.fa</code></p><h2 id='performing-ab-initio-gene-predictions'><span>Performing Ab Initio Gene Predictions</span></h2><p><span>We next used BRAKER2 (v2.1.5) to generate gene models using hints provided by our new transcriptome, proteomic database, and genome-mapped RNA-seq data.</span></p><p><strong><span>Note</span></strong><span>: The file </span><code>aepAligned.sortedByCoord.out.bam</code><span> created by STAR after mapping the RNA-seq data to the genome was renamed to </span><code>rna.bam</code><span> </span></p><p><span>(</span><em><span>02_braker2/brakerScript.sh</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-meta">#!/bin/bash</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">braker.pl <span class="cm-attribute">--genome</span><span class="cm-operator">=</span>aep.final.genome.full.softmask.fa \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--prot_seq</span><span class="cm-operator">=</span>allPrimProts.fa \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--bam</span><span class="cm-operator">=</span>rna.bam \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--etpmode</span> \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--softmasking</span> \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--cores</span> <span class="cm-number">48</span> \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--species</span><span class="cm-operator">=</span>HyVul \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--AUGUSTUS_CONFIG_PATH</span><span class="cm-operator">=</span>/home/jacazet/reference/makerAnnotations/aepAnnot/maker_braker/braker/config \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--AUGUSTUS_BIN_PATH</span><span class="cm-operator">=</span>/home/Augustus/bin \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--AUGUSTUS_SCRIPTS_PATH</span><span class="cm-operator">=</span>/home/Augustus/scripts \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--gff3</span></span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 286px;"></div><div class="CodeMirror-gutters" style="display: none; height: 286px;"></div></div></div></pre><p><span>The script above was executed on the cluster within a Singularity container on which BRAKER2 was installed</span></p><p><span>(</span><em><span>02_braker2/runBraker.sh</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-meta">#!/bin/bash</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --job-name=braker</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -p bigmemh</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -c 48</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -t 60-0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --mem=360G</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --error=braker.err</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --output=braker.out</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">module load singularity</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">singularity exec <span class="cm-attribute">-H</span> <span class="cm-string">"/home/jacazet"</span> <span class="cm-attribute">-B</span> /home/jacazet/reference/makerAnnotations/aepAnnot/maker_braker/braker ~/braker2_2.1.5.sif ./brakerScript.sh</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 286px;"></div><div class="CodeMirror-gutters" style="display: none; height: 286px;"></div></div></div></pre><h3 id='reformatingfixing-braker2-gff3-files'><span>Reformating/Fixing Braker2 GFF3 Files</span></h3><p><span>BRAKER2 incorporates gene model predictions from both GeneMark and Augustus. Unfortunately the GeneMark models were improperly formatted in the GFF3 file produced by BRAKER2, in that they all lacked mRNA/transcript and gene rows for the gene models. In addition, some Augustus predictions also lacked mRNA rows, and all Augustus predictions lacked gene rows. We used the following R script to fix that issue</span></p><p><span>(</span><em><span>02_braker2/brakerFixGname.R</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="R" style="break-inside: unset;"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="r"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 2605px; left: 834.68359375px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre>x</pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">library</span>(<span class="cm-variable">rstudioapi</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#set the working directory to be the folder in which this script is located</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">setwd</span>(<span class="cm-variable">dirname</span>(<span class="cm-variable">getActiveDocumentContext</span>()<span class="cm-operator cm-dollar">$</span><span class="cm-variable">path</span>))</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">gffIn</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">read.delim</span>(<span class="cm-string">"braker.gff3"</span>, <span class="cm-variable">header</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#pull out only those rose that are from genemark predictions (these are the problem ones)</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">gffIn.GM</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gffIn</span>[<span class="cm-variable">grepl</span>(<span class="cm-string">'GeneMark.hmm'</span>,<span class="cm-variable">gffIn</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V2</span>),]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#id the parent gene for each row</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">gffIn.GM</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">parent</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'.*Parent='</span>,<span class="cm-string">''</span>,<span class="cm-variable">gffIn.GM</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#remove gene models with incomplete ORFs that lack a start codon</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">gffIn.GM</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gffIn.GM</span>[<span class="cm-variable">gffIn.GM</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">parent</span> <span class="cm-operator cm-variable-2">%in%</span> <span class="cm-variable">gffIn.GM</span>[<span class="cm-variable">gffIn.GM</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator">==</span> <span class="cm-string">'start_codon'</span>,<span class="cm-string">'parent'</span>],] </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#split dataframe by parent ID (groups rows by gene model)</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">gffIn.GM</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">split</span>(<span class="cm-variable">gffIn.GM</span>,<span class="cm-variable">gffIn.GM</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">parent</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#now we need to add an mRNA row for each gene model</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">gffIn.GM</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">lapply</span>(<span class="cm-variable">gffIn.GM</span>, <span class="cm-keyword">function</span>(<span class="cm-variable">x</span>) {</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">newDF</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">x</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#pick an arbitrary row that we will remake into an mRNA row</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">mRow</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">newDF</span>[<span class="cm-number">1</span>,]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#extract parent ID for gene model in this DF</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#this is the basis for the gene name</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">pID</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">';'</span>,<span class="cm-string">''</span>,<span class="cm-variable">newDF</span>[<span class="cm-number">1</span>,<span class="cm-string">'parent'</span>])</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#set the start and end to encompass the full span of all rows for this gene prediction</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">mRow</span>[,<span class="cm-number">4</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">min</span>(<span class="cm-variable">newDF</span>[,<span class="cm-number">4</span>])</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">mRow</span>[,<span class="cm-number">5</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">max</span>(<span class="cm-variable">newDF</span>[,<span class="cm-number">5</span>])</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#rename feature type</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">mRow</span>[,<span class="cm-number">3</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-string">'mRNA'</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">mRow</span>[,<span class="cm-number">6</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-string">'.'</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">mRow</span>[,<span class="cm-number">8</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-string">'.'</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#give the mRNA row the proper mRNA ID in the tags column</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">mRow</span>[,<span class="cm-number">9</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">paste0</span>(<span class="cm-string">'ID='</span>,<span class="cm-variable">pID</span>,<span class="cm-string">'-mRNA-1;Parent='</span>,<span class="cm-variable">pID</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#we also need to create a gene row</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#(the GM predictions only have one isoform, so the gene entry is identical to the mRNA entry)</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">gRow</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">mRow</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">gRow</span>[,<span class="cm-number">9</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">paste0</span>(<span class="cm-string">'ID='</span>,<span class="cm-variable">pID</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">gRow</span>[,<span class="cm-number">3</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-string">'gene'</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#make sure all the rows for CDS, exons, etc. have the new transcript ID for their parent tag</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">newDF</span>[,<span class="cm-number">9</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-variable">paste0</span>(<span class="cm-string">'Parent='</span>,<span class="cm-variable">pID</span>),<span class="cm-variable">paste0</span>(<span class="cm-string">'Parent='</span>,<span class="cm-variable">pID</span>,<span class="cm-string">'-mRNA-1'</span>),<span class="cm-variable">newDF</span>[,<span class="cm-number">9</span>])</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#combine everything</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">newDF</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">rbind</span>(<span class="cm-variable">gRow</span>,<span class="cm-variable">mRow</span>,<span class="cm-variable">newDF</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#link all the rows with the gene ID</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">newDF</span>[,<span class="cm-number">9</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">paste0</span>(<span class="cm-variable">newDF</span>[,<span class="cm-number">9</span>],<span class="cm-string">'gene_id='</span>,<span class="cm-variable">pID</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#delete temp parent column</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">newDF</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">parent</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-atom">NULL</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-builtin">return</span>(<span class="cm-variable">newDF</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">})</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#compile the fixed gene models into a DF</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">gffIn.GM</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">do.call</span>(<span class="cm-variable">rbind</span>,<span class="cm-variable">gffIn.GM</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#drop the old, improperly formated versions of the GM models</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">gffIn</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gffIn</span>[<span class="cm-operator">!</span><span class="cm-variable">grepl</span>(<span class="cm-string">'GeneMark.hmm'</span>,<span class="cm-variable">gffIn</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V2</span>),]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#all the Augustus models lack a gene row, and some of them also lack a mRNA row</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#so we need to fix that too</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#extract the gene IDs for all rows</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">gffIn</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gffIn</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">gffIn</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">';.*'</span>,<span class="cm-string">''</span>,<span class="cm-variable">gffIn</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">gffIn</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'[.]t.*'</span>,<span class="cm-string">''</span>,<span class="cm-variable">gffIn</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">gffIn</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'ID='</span>,<span class="cm-string">''</span>,<span class="cm-variable">gffIn</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#there are some weird rows that I don't understand, so let's just drop them</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">gffIn</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gffIn</span>[<span class="cm-operator">!</span>(<span class="cm-variable">gffIn</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator cm-variable-2">%in%</span> <span class="cm-variable">c</span>(<span class="cm-string">'initial'</span>,<span class="cm-string">'terminal'</span>,<span class="cm-string">'internal'</span>)),]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#split augustus models into a list of DFs grouped by gene ID</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">gffInList</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">split</span>(<span class="cm-variable">gffIn</span>, <span class="cm-variable">gffIn</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">gffInList</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">lapply</span>(<span class="cm-variable">gffInList</span>, <span class="cm-keyword">function</span>(<span class="cm-variable">x</span>) {</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">old.df</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">x</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#check if the augustus models have an mRNA row</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#if they don't, add them (same approach as above)</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-keyword">if</span>(<span class="cm-variable">length</span>(<span class="cm-variable">old.df</span>[<span class="cm-variable">old.df</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator">==</span> <span class="cm-string">'mRNA'</span>,<span class="cm-number">1</span>]) <span class="cm-operator">==</span> <span class="cm-number">0</span>) {</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">mRow</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">old.df</span>[<span class="cm-number">1</span>,]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">mRow</span>[,<span class="cm-number">3</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-string">'mRNA'</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">mRow</span>[,<span class="cm-number">4</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">min</span>(<span class="cm-variable">old.df</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">mRow</span>[,<span class="cm-number">5</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">max</span>(<span class="cm-variable">old.df</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V5</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">mRow</span>[,<span class="cm-number">6</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-string">'.'</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">mRow</span>[,<span class="cm-number">8</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-string">'.'</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">mRow</span>[,<span class="cm-number">9</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">paste0</span>(<span class="cm-string">'ID='</span>,<span class="cm-variable">old.df</span>[<span class="cm-number">1</span>,<span class="cm-number">10</span>],<span class="cm-string">'.t1;Parent='</span>,<span class="cm-variable">old.df</span>[<span class="cm-number">1</span>,<span class="cm-number">10</span>],<span class="cm-string">';'</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#also add a gene row</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">gRow</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">mRow</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">gRow</span>[,<span class="cm-number">9</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">paste0</span>(<span class="cm-string">'ID='</span>,<span class="cm-variable">old.df</span>[<span class="cm-number">1</span>,<span class="cm-number">10</span>],<span class="cm-string">';'</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">gRow</span>[,<span class="cm-number">3</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-string">'gene'</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">new.df</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">rbind</span>(<span class="cm-variable">gRow</span>,<span class="cm-variable">mRow</span>,<span class="cm-variable">old.df</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> } <span class="cm-keyword">else</span> { <span class="cm-comment">#if there's already an mRNA row, then just add the gene row</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">gRow</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">old.df</span>[<span class="cm-variable">old.df</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator">==</span> <span class="cm-string">'mRNA'</span>,][<span class="cm-number">1</span>,]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">gRow</span>[,<span class="cm-number">9</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">paste0</span>(<span class="cm-string">'ID='</span>,<span class="cm-variable">old.df</span>[<span class="cm-number">1</span>,<span class="cm-number">10</span>],<span class="cm-string">';'</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">gRow</span>[,<span class="cm-number">3</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-string">'gene'</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">gRow</span>[,<span class="cm-number">4</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">min</span>(<span class="cm-variable">old.df</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">gRow</span>[,<span class="cm-number">5</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">max</span>(<span class="cm-variable">old.df</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V5</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">new.df</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">rbind</span>(<span class="cm-variable">gRow</span>,<span class="cm-variable">old.df</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> }</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">new.df</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">paste0</span>(<span class="cm-variable">new.df</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span>,<span class="cm-string">'gene_id='</span>,<span class="cm-variable">old.df</span>[<span class="cm-number">1</span>,<span class="cm-number">10</span>],<span class="cm-string">';'</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-builtin">return</span>(<span class="cm-variable">new.df</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">})</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#re-order the gene models so they are consecutive</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">gffInList</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gffInList</span>[<span class="cm-variable">order</span>(<span class="cm-variable">as.numeric</span>(<span class="cm-variable">gsub</span>(<span class="cm-string">'.*g'</span>,<span class="cm-string">''</span>,<span class="cm-variable">names</span>(<span class="cm-variable">gffInList</span>))))]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#combine list of DFs into single DF</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">newGff</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">do.call</span>(<span class="cm-variable">rbind</span>,<span class="cm-variable">gffInList</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#drop temp gene ID row</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">newGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-atom">NULL</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#combine augustus and GM gene models</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">newGff</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">rbind</span>(<span class="cm-variable">newGff</span>,<span class="cm-variable">gffIn.GM</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#export reformated GFF3</span></span></pre><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">write.table</span>(<span class="cm-variable">newGff</span>, <span class="cm-variable">file</span> <span class="cm-operator">=</span> <span class="cm-string">'braker.fix.gff3'</span>, <span class="cm-variable">row.names</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>, <span class="cm-variable">col.names</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>, <span class="cm-variable">sep</span> <span class="cm-operator">=</span> <span class="cm-string">'</span><span class="cm-string-2">\t</span><span class="cm-string">'</span>, <span class="cm-builtin">quote</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>)</span></pre></div></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 2618px;"></div><div class="CodeMirror-gutters" style="display: none; height: 2618px;"></div></div></div></pre><p><span>Some BRAKER2 gene models included incomplete ORFs with internal stop codons, which we had to filter out. For this step we used a perl script (</span><code>transcript_keeper.pl</code><span>) from a </span><a href='https://github.com/mscampbell/Genome_annotation'><span>very useful repo of genome annotation tools</span></a><span> to retain only the complete gene models. We also used several other tools from this repo throughout the annotation process.</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#generate AA sequences from the Braker GFF3 file</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">gffread <span class="cm-attribute">-g</span> aep.final.genome.fa <span class="cm-attribute">-y</span> braker.allprots.fa braker.fix.gff3 </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#get the IDs of gene models that don't have internal stop codons</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">seqkit <span class="cm-builtin">grep</span> <span class="cm-attribute">-v</span> <span class="cm-attribute">-s</span> <span class="cm-attribute">-r</span> <span class="cm-attribute">-p</span> <span class="cm-string">"^[^>].*\.[^</span><span class="cm-def">$]</span><span class="cm-string">"</span> braker.allprots.fa | <span class="cm-builtin">grep</span> <span class="cm-string">">"</span> | <span class="cm-builtin">sed</span> <span class="cm-string">'s/>//g'</span> > completeBrakerTranscripts.txt</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#Generate a new GFF3 file that only includes complete ORFs</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">cd</span> ../ && perl transcript_keeper.pl gm/completeBrakerTranscripts.txt gm/braker.fix.gff3 > gm/braker.fix.gff3.tmp && <span class="cm-builtin">cd</span> gm</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">mv</span> braker.fix.gff3.tmp braker.fix.gff3</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">gffread <span class="cm-attribute">-g</span> aep.final.genome.fa <span class="cm-attribute">-y</span> braker.prots.fa braker.fix.gff3 </span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 220px;"></div><div class="CodeMirror-gutters" style="display: none; height: 220px;"></div></div></div></pre><p><span>We generated some stats for these initial gene models. First we looked at the number of Genes/transcripts, exon length, etc.</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation"><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">perl look_at_transcripts.pl gm/braker.fix.gff3 </span></pre></div></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 22px;"></div><div class="CodeMirror-gutters" style="display: none; height: 22px;"></div></div></div></pre><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash" style="break-inside: unset;"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">median cds length<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">657</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">median transcript length<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">663</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">median five prime UTR length<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">median three prime UTR length<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">median exon length<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">119</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">median intron length<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">556</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">total gene count<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">33683</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">total transcripts<span class="cm-tab" role="presentation" cm-text=" "> </span> <span class="cm-number">36084</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">total unique transcripts<span class="cm-tab" role="presentation" cm-text=" "> </span> <span class="cm-number">36084</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">total unique cds<span class="cm-tab" role="presentation" cm-text=" "> </span> <span class="cm-number">36084</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">number of genes with five prime UTR<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">number of genes with three prime UTR<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">number of trans with five prime UTR<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">number of trans with three prime UTR<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">0</span></span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 330px;"></div><div class="CodeMirror-gutters" style="display: none; height: 330px;"></div></div></div></pre><p><span>We also looked at BUSCO stats:</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">docker run <span class="cm-attribute">-u</span> <span class="cm-quote">$(id -u)</span> <span class="cm-attribute">-v</span> <span class="cm-quote">$(pwd)</span>:/busco_wd ezlabgva/busco:v5.beta_cv1 busco <span class="cm-attribute">-c</span> <span class="cm-number">6</span> <span class="cm-attribute">-m</span> prot <span class="cm-attribute">-i</span> braker.prots.fa <span class="cm-attribute">-o</span> braker <span class="cm-attribute">-l</span> metazoa_odb10 <span class="cm-attribute">-f</span> </span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span>C:92.9%[S:82.0%,D:10.9%],F:3.0%,M:4.1%,n:954<span class="cm-tab" role="presentation" cm-text=" "> </span> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">886</span><span class="cm-tab" role="presentation" cm-text=" "> </span>Complete BUSCOs (C)<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">782</span><span class="cm-tab" role="presentation" cm-text=" "> </span>Complete and single-copy BUSCOs (S)<span class="cm-tab" role="presentation" cm-text=" "> </span> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">104</span><span class="cm-tab" role="presentation" cm-text=" "> </span>Complete and duplicated BUSCOs (D)<span class="cm-tab" role="presentation" cm-text=" "> </span> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">29</span><span class="cm-tab" role="presentation" cm-text=" "> </span>Fragmented BUSCOs (F)<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">39</span><span class="cm-tab" role="presentation" cm-text=" "> </span>Missing BUSCOs (M)<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" class="cm-tab-wrap-hack" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">954</span><span class="cm-tab" role="presentation" cm-text=" "> </span>Total BUSCO groups searched<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span></span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 220px;"></div><div class="CodeMirror-gutters" style="display: none; height: 220px;"></div></div></div></pre><h2 id='supplementing-gene-models-using-exonerate'><span>Supplementing Gene Models Using Exonerate</span></h2><p><span>The stats for the Braker2 gene models were quite good; however, the number of complete BUSCOs was somewhat lower than the genome-guided transcriptome we had produced, which suggested that there were additional BUSCOs in our genome that weren't being annotated. We therefore wrote a custom pipeline to produce gene models from nucleotide alignments generated by exonerate (v2.2.0). This would allow us to use as input our previous 'gold standard' annotation (the LRv2 transcriptome) as well as any manually deposited </span><em><span>Hydra</span></em><span> GenBank entries to supplement the BRAKER2 annotations, hopefully filling in some of the gaps in the annotation.</span></p><h3 id='compiling-the-input-sequences-to-be-used-for-alignment'><span>Compiling the Input Sequences to Be Used for Alignment</span></h3><p><span>We wanted to include manually deposited </span><em><span>Hydra</span></em><span> GenBank sequences because all of those sequences were experimentally validated in some way, making them high quality coding sequence predictions. To get these sequences from GenBank we started with this query on NCBI:</span></p><p><code>"Hydra vulgaris"[porgn] AND (biomol_mrna[PROP] AND ddbj_embl_genbank[filter])</code><span> </span></p><p><span>We downloaded the results as a multi-genbank file (downloaded on November 13, 2020) . This file contained a large number of procedurally deposited GenBank entries that had not been experimentally validated. We filtered out those entries and exported the remaining nucleotide sequences using the following python script:</span></p><p><span>(</span><em><span>03_exonerate/hydraAnnotations.py</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="python" style="break-inside: unset;"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="python"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#!/usr/bin/env python3</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment"># -*- coding: utf-8 -*-</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-string">"""</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-string">Created on Sat Nov 14 10:04:21 2020</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-string">@author: Jcazet</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-string">"""</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">from</span> <span class="cm-variable">Bio</span> <span class="cm-keyword">import</span> <span class="cm-variable">SeqIO</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">import</span> <span class="cm-variable">re</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">gb</span> = <span class="cm-variable">SeqIO</span>.<span class="cm-property">parse</span>(<span class="cm-string">'/Users/Jcazet/Google_Drive/Juliano_lab/References/genbank/"Hydra vulgaris"[porgn] AND (biomol_mrna[PROP] AND ddbj_embl_genbank[filter]).gb'</span>, <span class="cm-string">"genbank"</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment"># c = 0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">gbKeep</span> = []</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">while</span> <span class="cm-keyword">True</span>:</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-keyword">try</span>:</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">f</span> = <span class="cm-builtin">next</span>(<span class="cm-variable">gb</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-keyword">except</span> <span class="cm-variable">StopIteration</span>:</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-keyword">break</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-keyword">else</span>:</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">tests</span> = []</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">tests</span>.<span class="cm-property">append</span>(<span class="cm-variable">re</span>.<span class="cm-property">search</span>(<span class="cm-string">'est project'</span>, <span class="cm-variable">f</span>.<span class="cm-property">annotations</span>[<span class="cm-string">'references'</span>][<span class="cm-number">0</span>].<span class="cm-property">title</span>, <span class="cm-variable">re</span>.<span class="cm-property">IGNORECASE</span>))</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">tests</span>.<span class="cm-property">append</span>(<span class="cm-variable">re</span>.<span class="cm-property">search</span>(<span class="cm-string">'rna interference'</span>, <span class="cm-variable">f</span>.<span class="cm-property">annotations</span>[<span class="cm-string">'references'</span>][<span class="cm-number">0</span>].<span class="cm-property">title</span>, <span class="cm-variable">re</span>.<span class="cm-property">IGNORECASE</span>))</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">tests</span>.<span class="cm-property">append</span>(<span class="cm-variable">re</span>.<span class="cm-property">search</span>(<span class="cm-string">'Hydra vulgaris cDNAs'</span>, <span class="cm-variable">f</span>.<span class="cm-property">annotations</span>[<span class="cm-string">'references'</span>][<span class="cm-number">0</span>].<span class="cm-property">title</span>, <span class="cm-variable">re</span>.<span class="cm-property">IGNORECASE</span>))</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">tests</span>.<span class="cm-property">append</span>(<span class="cm-variable">re</span>.<span class="cm-property">search</span>(<span class="cm-string">'Comparative analysis of septic'</span>, <span class="cm-variable">f</span>.<span class="cm-property">annotations</span>[<span class="cm-string">'references'</span>][<span class="cm-number">0</span>].<span class="cm-property">title</span>, <span class="cm-variable">re</span>.<span class="cm-property">IGNORECASE</span>))</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-keyword">if</span> <span class="cm-builtin">all</span>(<span class="cm-variable">v</span> <span class="cm-keyword">is</span> <span class="cm-keyword">None</span> <span class="cm-keyword">for</span> <span class="cm-variable">v</span> <span class="cm-keyword">in</span> <span class="cm-variable">tests</span>):</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">f</span>.<span class="cm-property">description</span> = <span class="cm-variable">re</span>.<span class="cm-property">sub</span>(<span class="cm-string">r'\, .*cds$'</span>,<span class="cm-string">r''</span>,<span class="cm-variable">f</span>.<span class="cm-property">description</span>, <span class="cm-variable">re</span>.<span class="cm-property">IGNORECASE</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment"># print(f.description)</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">f</span>.<span class="cm-property">description</span> = <span class="cm-variable">f</span>.<span class="cm-property">description</span> <span class="cm-operator">+</span> <span class="cm-string">'|'</span> <span class="cm-operator">+</span> <span class="cm-variable">f</span>.<span class="cm-property">annotations</span>[<span class="cm-string">'references'</span>][<span class="cm-number">0</span>].<span class="cm-property">title</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">f</span>.<span class="cm-property">id</span> = <span class="cm-variable">f</span>.<span class="cm-property">id</span> <span class="cm-operator">+</span> <span class="cm-string">'|'</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">gbKeep</span>.<span class="cm-property">append</span>(<span class="cm-variable">f</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">SeqIO</span>.<span class="cm-property">write</span>(<span class="cm-variable">gbKeep</span>, <span class="cm-string">"/Users/Jcazet/Google_Drive/Juliano_lab/References/genbank/hydraAnnotations.gb"</span>,<span class="cm-string">"gb"</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">SeqIO</span>.<span class="cm-property">write</span>(<span class="cm-variable">gbKeep</span>, <span class="cm-string">"/Users/Jcazet/Google_Drive/Juliano_lab/References/genbank/hydraAnnotations.fasta"</span>,<span class="cm-string">"fasta"</span>)</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 880px;"></div><div class="CodeMirror-gutters" style="display: none; height: 880px;"></div></div></div></pre><p><span>We then combined these sequences with the aep LRv2 transcriptome to generate the input for our alignment pipeline.</span></p><p><code>cat hydraAnnotations.fasta aepLRv2.fasta > query.fa</code></p><h3 id='pipeline-for-generating-gene-models-from-exonerate-alignments'><span>Pipeline for Generating Gene Models from Exonerate Alignments</span></h3><p><span>In principle, the pipeline is simple, in that Exonerate can take an mRNA sequence and a genome sequence as input and output gene model coordinates in a GFF format; however, there are multiple complications when actually implementing this approach. First, the specific Exonerate algorithm that produces high-quality alignments (</span><code>cdna2genome</code><span>) is prohibitively slow when given a genome-sized search space. Second, Exonerate has finicky input requirements, including a requirement for a file that specifies the ORF coordinates for a given input transcript. Third, the GFF files produced by Exonerate need extensive formatting fixes.</span></p><p><span>Pipeline software versions for software not yet mentioned:</span></p><p><span>BedTools v2.30.0, EMBOSS v6.6.0.0, agat v0.6.1</span></p><p><span>The pipeline script is provided below:</span></p><p><span>(</span><em><span>03_exonerate/gbMap.sh</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash" style="break-inside: unset;"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 3529px; left: 41.734375px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre>x</pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-meta">#!/bin/bash -l</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">source</span> ~/.bash_profile</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#The only input for the pipeline script is the name of a file </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#within a subdirectory called query.fa.split/</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-def">fileN</span><span class="cm-operator">=</span><span class="cm-string">"</span><span class="cm-def">$1</span><span class="cm-string">"</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-def">fileN</span><span class="cm-operator">=</span><span class="cm-string">"</span><span class="cm-def">${fileN/query.fa.split\//}</span><span class="cm-string">"</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#here we define a run name, derived from the input file name</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-def">runN</span><span class="cm-operator">=</span><span class="cm-string">"</span><span class="cm-def">${fileN/.fa/}</span><span class="cm-string">"</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#we'll make an output folder named after the run name to put our output</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">mkdir</span> <span class="cm-string">"</span><span class="cm-def">$runN</span><span class="cm-string">"</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">cd</span> <span class="cm-string">"</span><span class="cm-def">$runN</span><span class="cm-string">"</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">echo</span> <span class="cm-string">"</span><span class="cm-def">$runN</span><span class="cm-string">"</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">echo</span> <span class="cm-string">"</span><span class="cm-def">$fileN</span><span class="cm-string">"</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#many of the fasta sequences in the input sequences have long headers with characters</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#that can cause parsing errors. These long headers have useful information though,</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#so we just pull out the full headers to set aside, then truncate the headers in our actual</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#input files</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">grep</span> <span class="cm-string">">"</span> ../query.fa.split/<span class="cm-string">"</span><span class="cm-def">$fileN</span><span class="cm-string">"</span> > headers.txt</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">sed</span> <span class="cm-string">'s/|.*//g'</span> ../query.fa.split/<span class="cm-string">"</span><span class="cm-def">$fileN</span><span class="cm-string">"</span> > inSeqs.r.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#using cd-hit to get rid of any possibly redundant sequences in the input</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">cd-hit <span class="cm-attribute">-i</span> inSeqs.r.fa <span class="cm-attribute">-o</span> inSeqs.fa <span class="cm-attribute">-c</span> <span class="cm-number">0</span>.95 <span class="cm-attribute">-G</span> <span class="cm-number">0</span> <span class="cm-attribute">-aL</span> <span class="cm-number">0</span>.8</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#now we split the multi-fasta into individual files</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#which we'll then iterate through one by one</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">seqkit split <span class="cm-attribute">--quiet</span> <span class="cm-attribute">-i</span> <span class="cm-attribute">-f</span> <span class="cm-attribute">-O</span> subseqs inSeqs.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#initialize empty output files</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">echo</span> <span class="cm-attribute">-n</span> > fullRes.gff3</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">echo</span> <span class="cm-attribute">-n</span> > rawExo.txt</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">echo</span> <span class="cm-attribute">-n</span> > reformatted.gff</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#iterate through each sequence from the input file</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">for</span> arg <span class="cm-keyword">in</span> subseqs/*fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">do</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-comment">#extract the sequence ID from it's file name</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-def">name</span><span class="cm-operator">=</span><span class="cm-quote">$(echo "</span><span class="cm-def">$arg</span><span class="cm-quote">" | sed "s/subseqs\/inSeqs.id_//g;s/.fa//g")</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-builtin">echo</span> <span class="cm-def">$name</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" class="cm-tab-wrap-hack" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#Exonerate's cdna2genome algorithm can produce high quality alignments, but</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#it's also prohibitively slow when it has to deal with a genome-sized search</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#space. We use basically the same solution as the MAKER pipeline, and use an</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#initial BLAST search to produce a rough alignment that we can use to</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#identify the general coordinates for the target gene. Then we can generate a</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#high quality Exonerate alignment based on a much smaller search space.</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span>blastn <span class="cm-attribute">-query</span> <span class="cm-def">$arg</span> <span class="cm-attribute">-db</span> ../AEPgenome <span class="cm-attribute">-outfmt</span> <span class="cm-string">'17 SR'</span> <span class="cm-attribute">-max_target_seqs</span> <span class="cm-number">1</span> > out.sam</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" class="cm-tab-wrap-hack" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-comment">#if BLAST produced no alignments, move on to the next query sequence</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-keyword">if</span> [ ! <span class="cm-attribute">-s</span> out.sam ]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-keyword">then</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span>continue</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-keyword">fi</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" class="cm-tab-wrap-hack" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-comment">#take the SAM alignments from blast and output the genomic sequence that spans</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-comment">#the entirety of the alignment</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span>samtools view <span class="cm-attribute">-b</span> out.sam | \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span>bedtools bamtobed <span class="cm-attribute">-tag</span> AS <span class="cm-attribute">-i</span> <span class="cm-attribute">-</span> | \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-builtin">sort</span> <span class="cm-attribute">-k1</span>,1 <span class="cm-attribute">-k2</span>,2n | \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-comment">#merge nearby alignments into a single chunk</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-comment">#have to be within 20Kb (max intron size)</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span>bedtools merge <span class="cm-attribute">-c</span> <span class="cm-number">5</span> <span class="cm-attribute">-o</span> sum <span class="cm-attribute">-d</span> <span class="cm-number">20000</span> <span class="cm-attribute">-s</span> <span class="cm-attribute">-i</span> <span class="cm-attribute">-</span> | </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-builtin">sort</span> <span class="cm-attribute">-k</span> <span class="cm-number">4</span> | \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span>tail <span class="cm-attribute">-n</span> <span class="cm-number">1</span> | \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-comment">#add 20 Kb on either side of chunk for good measure</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span>bedtools slop <span class="cm-attribute">-i</span> <span class="cm-attribute">-</span> <span class="cm-attribute">-g</span> ../aep.genome <span class="cm-attribute">-b</span> <span class="cm-number">20000</span> |</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span>bedtools getfasta <span class="cm-attribute">-fi</span> ../aep.genome.fullsoft.fa <span class="cm-attribute">-bed</span> <span class="cm-attribute">-</span> \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span>> searchSpace.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" class="cm-tab-wrap-hack" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#pull all possible ORF coordinates from transcript sequence</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span>getorf <span class="cm-def">$arg</span> orfList.txt <span class="cm-attribute">-find</span> <span class="cm-number">1</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" class="cm-tab-wrap-hack" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-comment">#pull out coordinates for the longest ORF</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span>seqkit <span class="cm-builtin">sort</span> <span class="cm-attribute">--quiet</span> <span class="cm-attribute">-l</span> <span class="cm-attribute">-r</span> orfList.txt | \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span>head <span class="cm-attribute">-n</span> <span class="cm-number">1</span> | \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-builtin">sed</span> <span class="cm-string">'s/.*\[//g;s/\]//g;s/- //g'</span> > coords.txt</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" class="cm-tab-wrap-hack" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#reformat coordinates to work with Exonerate (need to include stop codon in total length)</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span>/usr/bin/Rscript ../addStopCoord.R coords.txt</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" class="cm-tab-wrap-hack" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-builtin">rm</span> orfList.txt</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" class="cm-tab-wrap-hack" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#final ORF coords reformatting</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-builtin">echo</span> <span class="cm-def">$name</span> <span class="cm-string">"+"</span> | \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-builtin">cat</span> <span class="cm-attribute">-</span> coords.txt | \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span>tr <span class="cm-string">'\n'</span> <span class="cm-string">' '</span> > annot.txt</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" class="cm-tab-wrap-hack" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-builtin">rm</span> coords.txt</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" class="cm-tab-wrap-hack" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-comment">#run the actual exonerate algorithm</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span>exonerate <span class="cm-attribute">-q</span> <span class="cm-def">$arg</span> \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-attribute">-t</span> searchSpace.fa \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-attribute">-E</span> TRUE \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-attribute">-m</span> cdna2genome \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-attribute">--percent</span> <span class="cm-number">25</span> \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-attribute">--showalignment</span> FALSE \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-attribute">--showvulgar</span> FALSE \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-attribute">--showtargetgff</span> TRUE \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-attribute">-n</span> <span class="cm-number">1</span> \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-attribute">-S</span> FALSE \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-attribute">--annotation</span> annot.txt \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-attribute">--softmasktarget</span> TRUE \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-attribute">--seedrepeat</span> <span class="cm-number">4</span> \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-attribute">--geneseed</span> <span class="cm-number">250</span> | \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-builtin">sed</span> <span class="cm-string">'s/utr3b/utr3/g;/^#[^#]/d;/^#$/d;/similarity/d'</span> > exo.txt</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-comment">#exonerate outputs a lot of additional text beyond just the GFF that we need to get rid of</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span>csplit <span class="cm-attribute">-f</span> hit exo.txt <span class="cm-string">'/gff-version/'</span> <span class="cm-string">'{*}'</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" class="cm-tab-wrap-hack" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-builtin">cat</span> exo.txt >> rawExo.txt</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" class="cm-tab-wrap-hack" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-builtin">rm</span> hit00</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" class="cm-tab-wrap-hack" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-comment">#if exonerate produced no alignments then move on to the next query sequence</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-keyword">if</span> [ ! <span class="cm-attribute">-f</span> hit01 ]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-keyword">then</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span>continue</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-keyword">fi</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" class="cm-tab-wrap-hack" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-comment">#this script fixes the all the formatting problems with the exonerate GFF output</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span>/usr/bin/Rscript ../reformatGff.R hit01</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" class="cm-tab-wrap-hack" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-builtin">sed</span> <span class="cm-attribute">-i</span> <span class="cm-string">'/^##/d'</span> hit01.gff</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" class="cm-tab-wrap-hack" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-builtin">cat</span> hit01.gff >> reformatted.gff</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-builtin">echo</span> <span class="cm-string">"##gff-version 2"</span> | <span class="cm-builtin">cat</span> <span class="cm-attribute">-</span> hit01.gff > geneRes.gff</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" class="cm-tab-wrap-hack" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-comment">#delete UTR rows, we'll add them back in later using AGAT </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-comment">#(does a better job with formatting/accuracy)</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-builtin">sed</span> <span class="cm-string">'/utr5/d;/utr3/d'</span> geneRes.gff > geneRes.utr.gff</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" class="cm-tab-wrap-hack" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span>conda activate agatEnv</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span>agat_convert_sp_gxf2gxf.pl <span class="cm-attribute">-g</span> geneRes.utr.gff \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-attribute">-c</span> <span class="cm-string">"gene_id"</span> \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-attribute">-gvi</span> <span class="cm-number">2</span> \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-attribute">-gvo</span> <span class="cm-number">3</span> \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-attribute">-o</span> geneRes.gff3</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" class="cm-tab-wrap-hack" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span>conda deactivate</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" class="cm-tab-wrap-hack" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#final tweaks to add proper parent ID to gene model</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span>/usr/bin/Rscript ../fixParents.R geneRes.gff3</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" class="cm-tab-wrap-hack" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-builtin">rm</span> hit0*</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" class="cm-tab-wrap-hack" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-comment">#add result to full list of gene models</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-builtin">cat</span> geneRes.pfix.gff3 >> fullRes.gff3</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" class="cm-tab-wrap-hack" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span></span></pre><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">done</span></span></pre></div></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 3542px;"></div><div class="CodeMirror-gutters" style="display: none; height: 3542px;"></div></div></div></pre><p><span>This script uses several supplemental Rscripts, primarily for reformatting text files.</span></p><p><span>The accessory script below fixes formatting issues with Exonerate's GFF output. In addition it converts the coordinates into their proper genomic equivalents (in the initial output the coordinates are relative to the small stretch of sequence used for the alignment).</span></p><p><span>(</span><em><span>03_exonerate/reformatGff.R</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="R" style="break-inside: unset;"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="r"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">args</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">commandArgs</span>(<span class="cm-variable">trailingOnly</span> <span class="cm-operator">=</span> <span class="cm-variable">T</span>)</span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inGff</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">read.delim</span>(<span class="cm-variable">file</span> <span class="cm-operator">=</span> <span class="cm-variable">args</span>[<span class="cm-number">1</span>], <span class="cm-variable">sep</span> <span class="cm-operator">=</span> <span class="cm-string">"</span><span class="cm-string-2">\t</span><span class="cm-string">"</span>, <span class="cm-variable">stringsAsFactors</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>, <span class="cm-variable">header</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inGff</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">inGff</span>[<span class="cm-operator">!</span><span class="cm-variable">grepl</span>(<span class="cm-string">"^#"</span>,<span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V1</span>),]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inGff</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">inGff</span>[<span class="cm-operator">!</span><span class="cm-variable">grepl</span>(<span class="cm-string">"^-"</span>,<span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V1</span>),]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">stCoord</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">".*:"</span>,<span class="cm-string">""</span>,<span class="cm-variable">inGff</span>[<span class="cm-number">1</span>,<span class="cm-number">1</span>])</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">stCoord</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">as.numeric</span>(<span class="cm-variable">gsub</span>(<span class="cm-string">"-.*"</span>,<span class="cm-string">""</span>,<span class="cm-variable">stCoord</span>))</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span> <span class="cm-operator">+</span> <span class="cm-variable">stCoord</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V5</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V5</span> <span class="cm-operator">+</span> <span class="cm-variable">stCoord</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V1</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">":.*"</span>,<span class="cm-string">""</span>,<span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V1</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">gName</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">inGff</span>[<span class="cm-variable">grepl</span>(<span class="cm-string">"gene"</span>,<span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span>),<span class="cm-number">9</span>]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">gName</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">".*sequence "</span>,<span class="cm-string">""</span>,<span class="cm-variable">gName</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">gName</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">"</span> <span class="cm-string">;.*"</span>,<span class="cm-string">""</span>,<span class="cm-variable">gName</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">"gene_id 0 ; "</span>,<span class="cm-string">""</span>,<span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">gName</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">paste0</span>(<span class="cm-string">"</span> <span class="cm-string">; gene_id "</span>,<span class="cm-variable">gName</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">paste0</span>(<span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span>,<span class="cm-variable">gName</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">"^ ; "</span>,<span class="cm-string">""</span>,<span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">end5</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">inGff</span>[<span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator">==</span> <span class="cm-string">'utr5'</span>,<span class="cm-number">5</span>]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">end5</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">max</span>(<span class="cm-variable">end5</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">if</span>(<span class="cm-variable">inGff</span>[<span class="cm-number">1</span>,<span class="cm-number">7</span>] <span class="cm-operator">==</span> <span class="cm-string">'+'</span>) {</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">inGff</span>[<span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator">==</span> <span class="cm-string">'cds'</span> <span class="cm-operator">&</span> <span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span> <span class="cm-operator"><=</span> <span class="cm-variable">end5</span>,<span class="cm-number">4</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">end5</span> <span class="cm-operator">+</span> <span class="cm-number">1</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">}</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">end5</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">inGff</span>[<span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator">==</span> <span class="cm-string">'utr5'</span>,<span class="cm-number">4</span>]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">end5</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">min</span>(<span class="cm-variable">end5</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">if</span>(<span class="cm-variable">inGff</span>[<span class="cm-number">1</span>,<span class="cm-number">7</span>] <span class="cm-operator">==</span> <span class="cm-string">'-'</span>) {</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">inGff</span>[<span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator">==</span> <span class="cm-string">'cds'</span> <span class="cm-operator">&</span> <span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V5</span> <span class="cm-operator">>=</span> <span class="cm-variable">end5</span>,<span class="cm-number">5</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">end5</span> <span class="cm-operator">-</span> <span class="cm-number">1</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">}</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">write.table</span>(<span class="cm-variable">file</span> <span class="cm-operator">=</span> <span class="cm-variable">paste0</span>(<span class="cm-variable">args</span>[<span class="cm-number">1</span>],<span class="cm-string">".gff"</span>), <span class="cm-variable">inGff</span>, <span class="cm-variable">sep</span> <span class="cm-operator">=</span> <span class="cm-string">"</span><span class="cm-string-2">\t</span><span class="cm-string">"</span>, <span class="cm-builtin">quote</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>, <span class="cm-variable">row.names</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>, <span class="cm-variable">col.names</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>)</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 924px;"></div><div class="CodeMirror-gutters" style="display: none; height: 924px;"></div></div></div></pre><p><span>The accessory script below fixes the coordinates for ORFs generated by </span><code>getorf</code><span> in part by including the stop codon in the final ORF coordinates.</span></p><p><span>(</span><em><span>03_exonerate/addStopCoord.R</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="R"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="r"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">args</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">commandArgs</span>(<span class="cm-variable">trailingOnly</span> <span class="cm-operator">=</span> <span class="cm-variable">T</span>)</span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inCoords</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">read.delim</span>(<span class="cm-variable">args</span>[<span class="cm-number">1</span>], <span class="cm-variable">header</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>, <span class="cm-variable">sep</span> <span class="cm-operator">=</span> <span class="cm-string">"</span> <span class="cm-string">"</span>, <span class="cm-variable">stringsAsFactors</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inCoords</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-atom">NULL</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inCoords</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V2</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">inCoords</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V2</span> <span class="cm-operator">+</span> <span class="cm-number">3</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inCoords</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V2</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">inCoords</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V2</span> <span class="cm-operator">-</span> <span class="cm-variable">inCoords</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V1</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">write.table</span>(<span class="cm-variable">inCoords</span>, <span class="cm-variable">file</span> <span class="cm-operator">=</span> <span class="cm-variable">args</span>[<span class="cm-number">1</span>], <span class="cm-variable">sep</span> <span class="cm-operator">=</span> <span class="cm-string">"</span> <span class="cm-string">"</span>, <span class="cm-builtin">quote</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>, <span class="cm-variable">row.names</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>, <span class="cm-variable">col.names</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>)</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 242px;"></div><div class="CodeMirror-gutters" style="display: none; height: 242px;"></div></div></div></pre><p><span>The accessory script below makes sure all of the gene and mRNA IDs are uniformly formatted following the AGAT conversion to GFF3. It also makes sure all rows associated with a gene model have the appropriate parent ID. </span></p><p><span>(</span><em><span>03_exonerate/fixParents.R</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="R" style="break-inside: unset;"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="r"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">options</span>(<span class="cm-variable">stringsAsFactors</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>)</span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">args</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">commandArgs</span>(<span class="cm-variable">trailingOnly</span> <span class="cm-operator">=</span> <span class="cm-variable">T</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inGff</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">read.delim</span>(<span class="cm-variable">args</span>[<span class="cm-number">1</span>], <span class="cm-variable">sep</span> <span class="cm-operator">=</span> <span class="cm-string">"</span><span class="cm-string-2">\t</span><span class="cm-string">"</span>, <span class="cm-variable">skip</span> <span class="cm-operator">=</span> <span class="cm-number">1</span>, <span class="cm-variable">header</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">gName</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">inGff</span>[<span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator">==</span> <span class="cm-string">"gene"</span>[<span class="cm-number">1</span>],<span class="cm-number">9</span>]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">gName</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">"ID="</span>,<span class="cm-string">""</span>,<span class="cm-variable">gName</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">gName</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">";.*"</span>,<span class="cm-string">""</span>,<span class="cm-variable">gName</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">"^RNA$"</span>,<span class="cm-string">"mRNA"</span>,<span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">badRNA</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">which</span>(<span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator">==</span> <span class="cm-string">"mRNA"</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">for</span>(<span class="cm-variable">i</span> <span class="cm-keyword">in</span> <span class="cm-variable">badRNA</span>) {</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">badFormat</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'.*Parent=([^;]*);.*'</span>,<span class="cm-string">"</span><span class="cm-string-2">\\</span><span class="cm-string">1"</span>,<span class="cm-variable">inGff</span>[<span class="cm-variable">i</span>,<span class="cm-number">9</span>])</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">goodFormat</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">toupper</span>(<span class="cm-variable">badFormat</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">goodFormat</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">paste0</span>(<span class="cm-string">"Parent="</span>,<span class="cm-variable">goodFormat</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">inGff</span>[<span class="cm-variable">i</span>,<span class="cm-number">9</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-variable">paste0</span>(<span class="cm-string">"Parent="</span>,<span class="cm-variable">badFormat</span>),<span class="cm-variable">goodFormat</span>,<span class="cm-variable">inGff</span>[<span class="cm-variable">i</span>,<span class="cm-number">9</span>])</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">}</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">"nbisL</span><span class="cm-string-2">\\</span><span class="cm-string">d-cds"</span>,<span class="cm-variable">paste0</span>(<span class="cm-variable">gName</span>,<span class="cm-string">"-mRNA"</span>),<span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">"nbisL</span><span class="cm-string-2">\\</span><span class="cm-string">d"</span>,<span class="cm-variable">gName</span>,<span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">"nbis"</span>,<span class="cm-variable">gName</span>,<span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">"ID=exon"</span>,<span class="cm-variable">paste0</span>(<span class="cm-string">"ID="</span>,<span class="cm-variable">gName</span>,<span class="cm-string">"-exon"</span>),<span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">"ID=cds"</span>,<span class="cm-variable">paste0</span>(<span class="cm-string">"ID="</span>,<span class="cm-variable">gName</span>,<span class="cm-string">"-cds"</span>),<span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">"ID=(</span><span class="cm-string-2">\\</span><span class="cm-string">d+);"</span>,<span class="cm-variable">paste0</span>(<span class="cm-string">"ID="</span>,<span class="cm-variable">gName</span>,<span class="cm-string">"-intron"</span>,<span class="cm-string">"-</span><span class="cm-string-2">\\</span><span class="cm-string">1;"</span>),<span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'T(</span><span class="cm-string-2">\\</span><span class="cm-string">d+)AEP'</span>,<span class="cm-string">'t</span><span class="cm-string-2">\\</span><span class="cm-string">1aep'</span>,<span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inGff</span>[<span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator">==</span> <span class="cm-string">'mRNA'</span>,<span class="cm-number">9</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'^(ID=[^;]+)exon'</span>,<span class="cm-string">'</span><span class="cm-string-2">\\</span><span class="cm-string">1mRNA'</span>,<span class="cm-variable">inGff</span>[<span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator">==</span> <span class="cm-string">'mRNA'</span>,<span class="cm-number">9</span>])</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inGff</span>[<span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator cm-variable-2">%in%</span> <span class="cm-variable">c</span>(<span class="cm-string">'intron'</span>,<span class="cm-string">'exon'</span>,<span class="cm-string">'cds'</span>,<span class="cm-string">'three_prime_UTR'</span>,<span class="cm-string">'five_prime_UTR'</span>),<span class="cm-number">9</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'(Parent=[^-;]+-)[^-;]+'</span>,<span class="cm-string">'</span><span class="cm-string-2">\\</span><span class="cm-string">1mRNA'</span>,<span class="cm-variable">inGff</span>[<span class="cm-variable">inGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator cm-variable-2">%in%</span> <span class="cm-variable">c</span>(<span class="cm-string">'intron'</span>,<span class="cm-string">'exon'</span>,<span class="cm-string">'cds'</span>,<span class="cm-string">'three_prime_UTR'</span>,<span class="cm-string">'five_prime_UTR'</span>),<span class="cm-number">9</span>])</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">write.table</span>(<span class="cm-variable">inGff</span>, <span class="cm-variable">file</span> <span class="cm-operator">=</span> <span class="cm-variable">gsub</span>(<span class="cm-string">".gff3"</span>,<span class="cm-string">".pfix.gff3"</span>,<span class="cm-variable">args</span>[<span class="cm-number">1</span>]), <span class="cm-builtin">quote</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>, <span class="cm-variable">sep</span> <span class="cm-operator">=</span> <span class="cm-string">"</span><span class="cm-string-2">\t</span><span class="cm-string">"</span>, <span class="cm-variable">row.names</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>, <span class="cm-variable">col.names</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>) </span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 792px;"></div><div class="CodeMirror-gutters" style="display: none; height: 792px;"></div></div></div></pre><h3 id='running-the-exonerate-pipeline'><span>Running the Exonerate Pipeline</span></h3><p><span>To prep for the pipeline, we generated a blast db from the AEP genome file that had all repeats soft-masked</span></p><p><code>makeblastdb -in aep.genome.fullsoft.fa -dbtype nucl -title AEPgenome -parse_seqids -out AEPgenome</code></p><p><span>We also preped a file with chromosome sizes (needed by bedtools):</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation"><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">samtools faidx aep.genome.fullsoft.fa</span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">cut</span> <span class="cm-attribute">-f</span> <span class="cm-number">1</span>,2 aep.genome.fullsoft.fa.fai > aep.genome</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 44px;"></div><div class="CodeMirror-gutters" style="display: none; height: 44px;"></div></div></div></pre><p><span>Finally, we split our multifasta file of query sequences to run the pipeline in parallel</span></p><p><code>seqkit split -p 24 query.fa</code></p><p><span>The pipeline was executed using the following script:</span></p><p><span>(</span><em><span>03_exonerate/slurmRunExo.sh</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash" style="break-inside: unset;"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-meta">#! /bin/bash -l</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --job-name=exo</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -p med</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -c 4</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -t 60-0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --array=0-23</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --mem=8G</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --error=exo_%a.err</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --output=exo_%a.out</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">source</span> ~/.bash_profile</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">conda activate agatEnv</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-def">array</span><span class="cm-operator">=</span>(query.fa.split/*)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">./gbMap.sh <span class="cm-def">${array[$SLURM_ARRAY_TASK_ID]}</span></span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 374px;"></div><div class="CodeMirror-gutters" style="display: none; height: 374px;"></div></div></div></pre><p><span>We then concatenated the resulting output files from the 24 separate runs</span></p><p><code>cat query.part_0*/fullRes.gff3 > exoCat.gff3</code></p><p><span>We filtered out short or incomplete ORFs from the resulting gene predictions.</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#write AA seqeunces from exonerate GFF</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">agat_sp_extract_sequences.pl <span class="cm-attribute">-g</span> exoCat.gff3 <span class="cm-attribute">-f</span> aep.final.genome.rfmt.fa <span class="cm-attribute">-p</span> <span class="cm-attribute">-o</span> exoOut.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#get the IDs of complete gene models (no internal stops and longer than 20 AA)</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">seqkit <span class="cm-builtin">grep</span> <span class="cm-attribute">-s</span> <span class="cm-attribute">-r</span> <span class="cm-attribute">-v</span> <span class="cm-attribute">-p</span> <span class="cm-string">"\*[A-Z]+"</span> exoPartOut.fa | seqkit <span class="cm-builtin">grep</span> <span class="cm-attribute">-s</span> <span class="cm-attribute">-r</span> <span class="cm-attribute">-v</span> <span class="cm-attribute">-p</span> <span class="cm-string">"^\*"</span> <span class="cm-attribute">-</span> | seqkit seq <span class="cm-attribute">-m</span> <span class="cm-number">20</span> <span class="cm-attribute">-</span> > exoFilt.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">grep</span> <span class="cm-string">'>'</span> exoFilt.fa > exoHeaders.txt</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 132px;"></div><div class="CodeMirror-gutters" style="display: none; height: 132px;"></div></div></div></pre><p><strong><span>Note</span></strong><span>: AGAT can't parse fasta files with excessively large line widths. The initial genome file didn't have linebreaks, so we added them using this command </span><code>seqkit seq -w 60 aep.final.genome.fa > aep.final.genome.rfmt.fa</code></p><p><span>'Bad' gene models (those models not listed in exoHeaders.txt) were removed from the exonerate GFF3 file using this R script:</span></p><p><span>(</span><em><span>03_exonerate/subExoComp.R</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="R" style="break-inside: unset;"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="r"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">library</span>(<span class="cm-variable">rstudioapi</span>)</span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#set the working directory to be the folder in which this script is located</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">setwd</span>(<span class="cm-variable">dirname</span>(<span class="cm-variable">getActiveDocumentContext</span>()<span class="cm-operator cm-dollar">$</span><span class="cm-variable">path</span>))</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.full</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">read.delim</span>(<span class="cm-string">"exoCat.gff3"</span>,<span class="cm-variable">header</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.keep</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">read.delim</span>(<span class="cm-string">"exoHeaders.txt"</span>, <span class="cm-variable">header</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.keep</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V1</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'>'</span>,<span class="cm-string">''</span>,<span class="cm-variable">e.keep</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V1</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.keep</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V1</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'.*gene='</span>,<span class="cm-string">''</span>,<span class="cm-variable">e.keep</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V1</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.keep</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V1</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'</span> <span class="cm-string">.*'</span>,<span class="cm-string">''</span>,<span class="cm-variable">e.keep</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V1</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.full</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">e.full</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.full</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'.*gene_id='</span>,<span class="cm-string">''</span>,<span class="cm-variable">e.full</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.full</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">';.*'</span>,<span class="cm-string">''</span>,<span class="cm-variable">e.full</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.sub</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">e.full</span>[<span class="cm-variable">e.full</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span> <span class="cm-operator cm-variable-2">%in%</span> <span class="cm-variable">e.keep</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V1</span>,]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.sub</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-atom">NULL</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">write.table</span>(<span class="cm-variable">e.sub</span>,<span class="cm-variable">file</span> <span class="cm-operator">=</span> <span class="cm-string">'exoCat.complete.gff3'</span>,<span class="cm-variable">row.names</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>, <span class="cm-variable">col.names</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>, <span class="cm-variable">sep</span> <span class="cm-operator">=</span> <span class="cm-string">'</span><span class="cm-string-2">\t</span><span class="cm-string">'</span>,<span class="cm-builtin">quote</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>)</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 440px;"></div><div class="CodeMirror-gutters" style="display: none; height: 440px;"></div></div></div></pre><h2 id='curating-and-combining-exonerate-and-braker2-gene-models'><span>Curating and Combining Exonerate and Braker2 Gene Models</span></h2><h3 id='merging-the-braker2-and-exonerate-gene-models'><span>Merging the Braker2 and Exonerate Gene Models</span></h3><p><span>We next needed a way to merge the Braker2 and Exonerate gene models into a unified set of predictions. In many cases the two predictions identified a gene model at the same locus, meaning we needed a way to pick the better of the two options. We did this by BLASTing the gene models against our database of proteins (used initially to provide hints to the BRAKER2 pipeline) from other species and then picking whichever gene model had the best alignment score.</span></p><p><span>First we pooled the Braker2 and Exonerate protein sequences</span></p><p><code>cat exoFilt.fa braker.prots.fa > gmCandidates.fa</code></p><p><span>Then we removed any stop codon symbols (causes errors when BLASTing)</span></p><p><code>sed -i 's/\.$//g;s/\([A-Z]\)\./\1/g;s/\.\([A-Z]\)/\1/g' gmCandidates.fa</code></p><p><code>sed -i 's/\*//g' gmCandidates.fa</code></p><p><span>Then we used diamond to align the protein models to </span><code>allPrimProts.fa</code></p><p><span>(</span><em><span>04_mergeMods/runBlast.sh</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-meta">#!/bin/bash</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -p med</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --job-name=blastp</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -c 24</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -t 60-0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --mem=0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --error=blastp.err</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --output=blastp.out</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">diamond blastp <span class="cm-attribute">--query</span> gmCandidates.fa \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--db</span> allPrimProts <span class="cm-attribute">--sensitive</span> \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--outfmt</span> <span class="cm-number">6</span> <span class="cm-attribute">--evalue</span> 1e-5 <span class="cm-attribute">-p</span> <span class="cm-number">24</span> > blastpPrimProt.outfmt6</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 264px;"></div><div class="CodeMirror-gutters" style="display: none; height: 264px;"></div></div></div></pre><p><span>Next, we needed to identify which gene models from the two approaches overlapped (indicating a redundancy that needed to be resolved). We did this by looking for genes whose coordinates intersected each other in the genome</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation"><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">grep</span> <span class="cm-attribute">-P</span> <span class="cm-string">'\tgene\t'</span> braker.fix.gff3 | <span class="cm-builtin">awk</span> <span class="cm-string">'BEGIN { OFS = "\t" } { print $1, $4, $5, $9, $8, $7}'</span> > brakerGenes.bed</span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">grep</span> <span class="cm-attribute">-P</span> <span class="cm-string">'\tgene\t'</span> exoCat.complete.gff3 | <span class="cm-builtin">awk</span> <span class="cm-string">'BEGIN { OFS = "\t" } { print $1, $4, $5, $9, $8, $7}'</span> > exoGenes.bed</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">bedtools intersect <span class="cm-attribute">-wo</span> <span class="cm-attribute">-f</span> <span class="cm-number">0</span>.3 <span class="cm-attribute">-F</span> <span class="cm-number">0</span>.3 <span class="cm-attribute">-e</span> <span class="cm-attribute">-s</span> <span class="cm-attribute">-a</span> brakerGenes.bed <span class="cm-attribute">-b</span> exoGenes.bed > brakerExoOlap.bed</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">bedtools intersect <span class="cm-attribute">-wo</span> <span class="cm-attribute">-f</span> <span class="cm-number">0</span>.5 <span class="cm-attribute">-F</span> <span class="cm-number">0</span>.5 <span class="cm-attribute">-e</span> <span class="cm-attribute">-s</span> <span class="cm-attribute">-a</span> exoGenes.bed <span class="cm-attribute">-b</span> exoGenes.bed > exoExoOlap.bed</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 132px;"></div><div class="CodeMirror-gutters" style="display: none; height: 132px;"></div></div></div></pre><p><span>We then used the following R script to reduce the redundancy the BRAKER2 and exonerate models using the BedTools and BLAST output. We kept any gene models that had no intersections, or, if they did have intersections, we kept only the gene model that had the highest alignment score from our protein database BLAST run.</span></p><p><span>(</span><em><span>04_mergeMods/gmFilt.R</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="R" style="break-inside: unset;"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="r"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">library</span>(<span class="cm-variable">rstudioapi</span>)</span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">library</span>(<span class="cm-variable">plyr</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#set the working directory to be the folder in which this script is located</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">setwd</span>(<span class="cm-variable">dirname</span>(<span class="cm-variable">getActiveDocumentContext</span>()<span class="cm-operator cm-dollar">$</span><span class="cm-variable">path</span>))</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bE.in</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">read.delim</span>(<span class="cm-string">"brakerExoOlap.bed"</span>, <span class="cm-variable">header</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#name reformatting</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V10</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">';.*'</span>,<span class="cm-string">''</span>,<span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V10</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V10</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'ID='</span>,<span class="cm-string">''</span>,<span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V10</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'ID='</span>,<span class="cm-string">''</span>,<span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">';.*'</span>,<span class="cm-string">''</span>,<span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'[.]t.*'</span>,<span class="cm-string">''</span>,<span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#drop identical overlap pairs (caused by isoforms)</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bE.in</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">bE.in</span>[<span class="cm-operator">!</span><span class="cm-variable">duplicated</span>(<span class="cm-variable">paste</span>(<span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span>,<span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V10</span>)),]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#self olap for exonerate </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.in</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">read.delim</span>(<span class="cm-string">"exoExoOlap.bed"</span>, <span class="cm-variable">header</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V10</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">';.*'</span>,<span class="cm-string">''</span>,<span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V10</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V10</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'ID='</span>,<span class="cm-string">''</span>,<span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V10</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">';.*'</span>,<span class="cm-string">''</span>,<span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'ID='</span>,<span class="cm-string">''</span>,<span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.in</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">e.in</span>[<span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span> <span class="cm-operator">!=</span> <span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V10</span>,]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">blastHits</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">read.delim</span>(<span class="cm-string">"blastpPrimProt.outfmt6"</span>, <span class="cm-variable">header</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">blastHits</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V1</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">"-mRNA.*"</span>,<span class="cm-string">""</span>,<span class="cm-variable">blastHits</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V1</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#get sum of bit score for all blast hits for a given gene model</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">blastHits.score</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">aggregate</span>(<span class="cm-variable">blastHits</span>[,<span class="cm-number">12</span>], <span class="cm-builtin">list</span>(<span class="cm-variable">blastHits</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V1</span>), <span class="cm-variable">FUN</span> <span class="cm-operator">=</span> <span class="cm-variable">sum</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#only keep the isoform with the highest score</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">blastHits.score</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">Group.1</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'[.]t</span><span class="cm-string-2">\\</span><span class="cm-string">d*'</span>,<span class="cm-string">''</span>,<span class="cm-variable">blastHits.score</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">Group.1</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">blastHits.score</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">blastHits.score</span>[<span class="cm-variable">order</span>(<span class="cm-operator">-</span><span class="cm-variable">blastHits.score</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">x</span>),]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">blastHits.score</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">blastHits.score</span>[<span class="cm-operator">!</span><span class="cm-variable">duplicated</span>(<span class="cm-variable">blastHits.score</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">Group.1</span>),]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#add braker gene model blast score to overlap table</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">bScore</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">mapvalues</span>(<span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span>,<span class="cm-variable">from</span> <span class="cm-operator">=</span> <span class="cm-variable">blastHits.score</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">Group.1</span>, <span class="cm-variable">to</span> <span class="cm-operator">=</span> <span class="cm-variable">blastHits.score</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">x</span>, <span class="cm-variable">warn_missing</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bE.in</span>[<span class="cm-variable">grepl</span>(<span class="cm-string">'^file'</span>,<span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">bScore</span>),<span class="cm-string">'bScore'</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-number">0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">bScore</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">as.numeric</span>(<span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">bScore</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#add exonerate gene model blast score to overlap table</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">eScore</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">mapvalues</span>(<span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V10</span>,<span class="cm-variable">from</span> <span class="cm-operator">=</span> <span class="cm-variable">blastHits.score</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">Group.1</span>, <span class="cm-variable">to</span> <span class="cm-operator">=</span> <span class="cm-variable">blastHits.score</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">x</span>, <span class="cm-variable">warn_missing</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bE.in</span>[<span class="cm-operator">!</span><span class="cm-variable">grepl</span>(<span class="cm-string">'^</span><span class="cm-string-2">\\</span><span class="cm-string">d'</span>,<span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">eScore</span>),<span class="cm-string">'eScore'</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-number">0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">eScore</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">as.numeric</span>(<span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">eScore</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#get the id of the gene model that has the higher score</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">better</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">bScore</span> <span class="cm-operator">>=</span> <span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">eScore</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">worse</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-string">''</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bE.in</span>[<span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">better</span> <span class="cm-operator">==</span> <span class="cm-variable">FALSE</span>,<span class="cm-string">'worse'</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">bE.in</span>[<span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">better</span> <span class="cm-operator">==</span> <span class="cm-variable">FALSE</span>,<span class="cm-number">4</span>]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">better</span>[<span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">better</span> <span class="cm-operator">==</span> <span class="cm-variable">FALSE</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">bE.in</span>[<span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">better</span> <span class="cm-operator">==</span> <span class="cm-variable">FALSE</span>,<span class="cm-number">10</span>]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bE.in</span>[<span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">better</span> <span class="cm-operator">==</span> <span class="cm-variable">TRUE</span>,<span class="cm-string">'worse'</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">bE.in</span>[<span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">better</span> <span class="cm-operator">==</span> <span class="cm-variable">TRUE</span>,<span class="cm-number">10</span>]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">better</span>[<span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">better</span> <span class="cm-operator">==</span> <span class="cm-variable">TRUE</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">bE.in</span>[<span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">better</span> <span class="cm-operator">==</span> <span class="cm-variable">TRUE</span>,<span class="cm-number">4</span>]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bE.in</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">bE.in</span>[<span class="cm-operator">!</span>(<span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">better</span> <span class="cm-operator cm-variable-2">%in%</span> <span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">worse</span>),]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">e1Score</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">mapvalues</span>(<span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span>,<span class="cm-variable">from</span> <span class="cm-operator">=</span> <span class="cm-variable">blastHits.score</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">Group.1</span>, <span class="cm-variable">to</span> <span class="cm-operator">=</span> <span class="cm-variable">blastHits.score</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">x</span>, <span class="cm-variable">warn_missing</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.in</span>[<span class="cm-operator">!</span><span class="cm-variable">grepl</span>(<span class="cm-string">'^</span><span class="cm-string-2">\\</span><span class="cm-string">d'</span>,<span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">e1Score</span>),<span class="cm-string">'e1Score'</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-number">0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">e1Score</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">as.numeric</span>(<span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">e1Score</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">e2Score</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">mapvalues</span>(<span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V10</span>,<span class="cm-variable">from</span> <span class="cm-operator">=</span> <span class="cm-variable">blastHits.score</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">Group.1</span>, <span class="cm-variable">to</span> <span class="cm-operator">=</span> <span class="cm-variable">blastHits.score</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">x</span>, <span class="cm-variable">warn_missing</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.in</span>[<span class="cm-operator">!</span><span class="cm-variable">grepl</span>(<span class="cm-string">'^</span><span class="cm-string-2">\\</span><span class="cm-string">d'</span>,<span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">e2Score</span>),<span class="cm-string">'e2Score'</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-number">0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">e2Score</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">as.numeric</span>(<span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">e2Score</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#get the id of the gene model that has the higher score</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">better</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">e1Score</span> <span class="cm-operator">>=</span> <span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">e2Score</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">worse</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-string">''</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.in</span>[<span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">better</span> <span class="cm-operator">==</span> <span class="cm-variable">FALSE</span>,<span class="cm-string">'worse'</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">e.in</span>[<span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">better</span> <span class="cm-operator">==</span> <span class="cm-variable">FALSE</span>,<span class="cm-number">4</span>]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">better</span>[<span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">better</span> <span class="cm-operator">==</span> <span class="cm-variable">FALSE</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">e.in</span>[<span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">better</span> <span class="cm-operator">==</span> <span class="cm-variable">FALSE</span>,<span class="cm-number">10</span>]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.in</span>[<span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">better</span> <span class="cm-operator">==</span> <span class="cm-variable">TRUE</span>,<span class="cm-string">'worse'</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">e.in</span>[<span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">better</span> <span class="cm-operator">==</span> <span class="cm-variable">TRUE</span>,<span class="cm-number">10</span>]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">better</span>[<span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">better</span> <span class="cm-operator">==</span> <span class="cm-variable">TRUE</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">e.in</span>[<span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">better</span> <span class="cm-operator">==</span> <span class="cm-variable">TRUE</span>,<span class="cm-number">4</span>]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.in</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">e.in</span>[<span class="cm-operator">!</span>(<span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">e1Score</span> <span class="cm-operator">==</span> <span class="cm-number">0</span> <span class="cm-operator">&</span> <span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">e2Score</span> <span class="cm-operator">==</span> <span class="cm-number">0</span>),]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">sPair</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">apply</span>(<span class="cm-variable">e.in</span>[,<span class="cm-variable">c</span>(<span class="cm-number">4</span>,<span class="cm-number">10</span>)],<span class="cm-number">1</span>, <span class="cm-keyword">function</span>(<span class="cm-variable">x</span>) {</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">vIn</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">as.vector</span>(<span class="cm-variable">x</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">vIn</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">vIn</span>[<span class="cm-variable">order</span>(<span class="cm-variable">vIn</span>)]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">vOut</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">paste</span>(<span class="cm-variable">vIn</span>,<span class="cm-variable">collapse</span> <span class="cm-operator">=</span> <span class="cm-string">'_'</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-builtin">return</span>(<span class="cm-variable">vOut</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">})</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.in</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">e.in</span>[<span class="cm-operator">!</span><span class="cm-variable">duplicated</span>(<span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">sPair</span>),]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">e.exclude</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">unique</span>(<span class="cm-variable">e.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">worse</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#pull in all gene ID names</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">exoGMs</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">read.delim</span>(<span class="cm-string">"exoGenes.bed"</span>, <span class="cm-variable">header</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bGMs</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">read.delim</span>(<span class="cm-string">"brakerGenes.bed"</span>, <span class="cm-variable">header</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#name reformating</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">exoGMs</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">';.*'</span>,<span class="cm-string">''</span>,<span class="cm-variable">exoGMs</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">exoGMs</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'ID='</span>,<span class="cm-string">''</span>,<span class="cm-variable">exoGMs</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bGMs</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">';.*'</span>,<span class="cm-string">''</span>,<span class="cm-variable">bGMs</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bGMs</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'ID='</span>,<span class="cm-string">''</span>,<span class="cm-variable">bGMs</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#keep any gene models that had a blast hit and didn't have an overlap</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bGMs</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">bGMs</span>[<span class="cm-operator">!</span>(<span class="cm-variable">bGMs</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span> <span class="cm-operator cm-variable-2">%in%</span> <span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span>),]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bGMs</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">bGMs</span>[<span class="cm-variable">bGMs</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span> <span class="cm-operator cm-variable-2">%in%</span> <span class="cm-variable">blastHits.score</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">Group.1</span>,]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">exoGMs</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">exoGMs</span>[<span class="cm-operator">!</span>(<span class="cm-variable">exoGMs</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span> <span class="cm-operator cm-variable-2">%in%</span> <span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V10</span>),]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">exoGMs</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">exoGMs</span>[<span class="cm-variable">exoGMs</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span> <span class="cm-operator cm-variable-2">%in%</span> <span class="cm-variable">blastHits.score</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">Group.1</span>,]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bE.in</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">bE.in</span>[<span class="cm-operator">!</span>(<span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">bScore</span> <span class="cm-operator">==</span> <span class="cm-number">0</span> <span class="cm-operator">&</span> <span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">eScore</span> <span class="cm-operator">==</span> <span class="cm-number">0</span>),]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#get list of gene models to keep</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">gm.keep</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">c</span>(<span class="cm-variable">bGMs</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span>,<span class="cm-variable">exoGMs</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span>,<span class="cm-variable">bE.in</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">better</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">gm.keep</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">unique</span>(<span class="cm-variable">gm.keep</span>[<span class="cm-operator">!</span>(<span class="cm-variable">gm.keep</span> <span class="cm-operator cm-variable-2">%in%</span> <span class="cm-variable">e.exclude</span>)])</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#filter braker gene models</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bGff</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">read.delim</span>(<span class="cm-string">"braker.fix.gff3"</span>,<span class="cm-variable">header</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'.*gene_id='</span>,<span class="cm-string">''</span>,<span class="cm-variable">bGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">';'</span>,<span class="cm-string">''</span>,<span class="cm-variable">bGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bGff</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">bGff</span>[<span class="cm-variable">bGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span> <span class="cm-operator cm-variable-2">%in%</span> <span class="cm-variable">gm.keep</span>,]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">bGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-atom">NULL</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">write.table</span>(<span class="cm-variable">bGff</span>, <span class="cm-variable">file</span> <span class="cm-operator">=</span> <span class="cm-string">'braker.keep.gff3'</span>, <span class="cm-variable">row.names</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>, <span class="cm-variable">col.names</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>, <span class="cm-variable">sep</span> <span class="cm-operator">=</span> <span class="cm-string">'</span><span class="cm-string-2">\t</span><span class="cm-string">'</span>, <span class="cm-builtin">quote</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#filter exonerate gene models</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">eGff</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">read.delim</span>(<span class="cm-string">'exoCat.complete.gff3'</span>, <span class="cm-variable">header</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>, <span class="cm-variable">skip</span> <span class="cm-operator">=</span> <span class="cm-number">1</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">eGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'.*gene_id='</span>,<span class="cm-string">''</span>,<span class="cm-variable">eGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">eGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">';.*'</span>,<span class="cm-string">''</span>,<span class="cm-variable">eGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">eGff</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">eGff</span>[<span class="cm-variable">eGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span> <span class="cm-operator cm-variable-2">%in%</span> <span class="cm-variable">gm.keep</span>,]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">eGff</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-atom">NULL</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">write.table</span>(<span class="cm-variable">eGff</span>, <span class="cm-variable">file</span> <span class="cm-operator">=</span> <span class="cm-string">'exo.keep.gff3'</span>, <span class="cm-variable">row.names</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>, <span class="cm-variable">col.names</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>, <span class="cm-variable">sep</span> <span class="cm-operator">=</span> <span class="cm-string">'</span><span class="cm-string-2">\t</span><span class="cm-string">'</span>, <span class="cm-builtin">quote</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>)</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 3124px;"></div><div class="CodeMirror-gutters" style="display: none; height: 3124px;"></div></div></div></pre><p><span>We then pooled the filtered gene models to generate the preliminary merged set of predictions</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation"><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">cat</span> exo.keep.gff3 braker.keep.gff3 > combined.gm.gff3</span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">agat_sp_keep_longest_isoform.pl <span class="cm-attribute">--gff</span> combined.gm.gff3 <span class="cm-attribute">-o</span> combined.gm.longest.gff3</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">agat_sp_extract_sequences.pl <span class="cm-attribute">-g</span> combined.gm.longest.gff3 <span class="cm-attribute">-f</span> aep.final.genome.rfmt.fa <span class="cm-attribute">-p</span> <span class="cm-attribute">-o</span> combined.prots.fa</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 66px;"></div><div class="CodeMirror-gutters" style="display: none; height: 66px;"></div></div></div></pre><h3 id='removing-tes-and-short-gene-models'><span>Removing TEs and Short Gene Models</span></h3><p><span>Although we performed extensive repeat masking, there were still contaminating TE proteins in our gene models. To identify and remove at least some of these TEs, we used interProScan (v5.51-85.0) to scan our preliminary protein models to identify genes with transposase domains that we could then filter out.</span></p><p><span>(</span><em><span>04_mergeMods/comboIPR.sh</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-meta">#!/bin/bash</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --job-name=ipr</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -p med </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -c 8</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -t 60-0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --mem=0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --error=ipr.err</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --output=ipr.out</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">cd</span> interproscan-5.51-85.0</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">./interproscan.sh <span class="cm-attribute">-d</span> comboGM <span class="cm-attribute">-cpu</span> <span class="cm-number">8</span> <span class="cm-attribute">-dp</span> <span class="cm-attribute">-f</span> TSV, GFF3 <span class="cm-attribute">-goterms</span> <span class="cm-attribute">-i</span> ../combined.prots.fa <span class="cm-attribute">-iprlookup</span> <span class="cm-attribute">-pa</span></span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 264px;"></div><div class="CodeMirror-gutters" style="display: none; height: 264px;"></div></div></div></pre><p><span>We used the domain prediction TSV to identify gene models with a transposase domain</span></p><p><code>grep 'transpos' combined.prots.fa.tsv | cut -f 1 | sort | uniq > teIDs.txt</code></p><p><span>We also flagged any proteins shorter than 50 AAs</span></p><p><code>seqkit seq -M 50 -i combined.prots.fa | grep ">" | sed 's/>//g' > shortProts.txt</code></p><p><span>Both lists of flagged IDs were used to filter the merged gene set</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">cat</span> shortProts.txt teIDs.txt | <span class="cm-builtin">sed</span> <span class="cm-string">'s/-mRNA-1//g;s/\.t.*//g;s/$/;/g'</span>| <span class="cm-builtin">sort</span> | uniq > dropTheseGenes.txt</span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">grep</span> <span class="cm-attribute">-P</span> <span class="cm-string">'\tgene\t'</span> combined.gm.gff3 | <span class="cm-builtin">cut</span> <span class="cm-attribute">-f</span> <span class="cm-number">9</span> | <span class="cm-builtin">sed</span> <span class="cm-string">'s/;.*/;/g;s/ID=//g'</span> | <span class="cm-builtin">grep</span> <span class="cm-attribute">-v</span> <span class="cm-attribute">-f</span> dropTheseGenes.txt | <span class="cm-builtin">sed</span> <span class="cm-string">'s/;$//g'</span> > genesKeep.txt</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#Needed to fix a minor formatting issue that caused a parsing error in the gene model gff3</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">sed</span> <span class="cm-string">'s/cds/CDS/g;s/\([^;]\)$/\1;/g'</span> combined.gm.gff3 > combined.rfmt.gm.gff3</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">perl gene_keeper.pl gm/genesKeep.txt gm/combined.rfmt.gm.gff3 > gm/combined.gm.filt.gff3</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 132px;"></div><div class="CodeMirror-gutters" style="display: none; height: 132px;"></div></div></div></pre><p><span>Below are the updated gene model stats after the TE and short AA filtering:</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash" style="break-inside: unset;"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">perl look_at_transcripts.pl gm/combined.gm.filt.gff3</span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">median cds length<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">660</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">median transcript length<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">732</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">median five prime UTR length<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">61</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">median three prime UTR length<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">124</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">median exon length<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">124</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">median intron length<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">597</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">total gene count<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">29394</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">total transcripts<span class="cm-tab" role="presentation" cm-text=" "> </span> <span class="cm-number">31159</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">total unique transcripts<span class="cm-tab" role="presentation" cm-text=" "> </span> <span class="cm-number">31159</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">total unique cds<span class="cm-tab" role="presentation" cm-text=" "> </span> <span class="cm-number">31156</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">number of genes with five prime UTR<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">8107</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">number of genes with three prime UTR<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">7348</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">number of trans with five prime UTR<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">8107</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">number of trans with three prime UTR<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">7348</span></span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 374px;"></div><div class="CodeMirror-gutters" style="display: none; height: 374px;"></div></div></div></pre><h3 id='renaming-gene-models'><span>Renaming Gene Models</span></h3><p><span>Next we prettied up the gene model names, giving them names that roughly followed Ensembl naming conventions using a utility function from MAKER3 (v3.01.03)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">gff3_sort <span class="cm-attribute">-g</span> combined.gm.filt.gff3 <span class="cm-attribute">-og</span> combined.gm.filt.sort.gff3</span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">for</span> f <span class="cm-keyword">in</span> {1..15}; <span class="cm-keyword">do</span> printf <span class="cm-string">"chr-</span><span class="cm-def">$f</span><span class="cm-string">\t</span><span class="cm-def">$f</span><span class="cm-string">\n"</span> >> sOrder.txt; <span class="cm-keyword">done</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">singularity exec <span class="cm-attribute">-B</span> /home/jacazet/reference/makerAnnotations/aepAnnot/finalize ~/maker-plus_3.01.03.sif maker_map_ids <span class="cm-attribute">--prefix</span> HVAEP1_ <span class="cm-attribute">--justify</span> <span class="cm-number">6</span> <span class="cm-attribute">--suffix</span> . <span class="cm-attribute">--abrv_gene</span> G <span class="cm-attribute">--abrv_tran</span> T <span class="cm-attribute">--iterate</span> <span class="cm-number">1</span> <span class="cm-attribute">--sort_order</span> sOrder.txt combined.gm.filt.sort.gff3 > merged.gm.map.txt</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">singularity exec <span class="cm-attribute">-B</span> /home/jacazet/reference/makerAnnotations/aepAnnot/finalize ~/maker-plus_3.01.03.sif map_gff_ids merged.gm.map.txt combined.gm.filt.sort.gff3</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">mv</span> combined.gm.filt.sort.gff3 HVAEP1.baseline.geneModels.gff3</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 264px;"></div><div class="CodeMirror-gutters" style="display: none; height: 264px;"></div></div></div></pre><p><span>To evaluate the completeness, we selected the longest isoform for each gene model, extracted protein sequences, and ran BUSCO</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">agat_sp_keep_longest_isoform.pl <span class="cm-attribute">--gff</span> HVAEP1.baseline.geneModels.gff3 <span class="cm-attribute">-o</span> HVAEP1.baseline.geneModels.longest.gff3</span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">gffread <span class="cm-attribute">-g</span> ../../aep.final.genome.fa <span class="cm-attribute">-y</span> HVAEP1.longest.prot.fa HVAEP1.baseline.geneModels.longest.gff3</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">docker run <span class="cm-attribute">-u</span> <span class="cm-quote">$(id -u)</span> <span class="cm-attribute">-v</span> <span class="cm-quote">$(pwd)</span>:/busco_wd ezlabgva/busco:v5.beta_cv1 busco <span class="cm-attribute">-c</span> <span class="cm-number">6</span> <span class="cm-attribute">-m</span> prot <span class="cm-attribute">-i</span> HVAEP1.longest.prot.fa <span class="cm-attribute">-o</span> final <span class="cm-attribute">-l</span> metazoa_odb10 <span class="cm-attribute">-f</span> <span class="cm-attribute">--offline</span></span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 154px;"></div><div class="CodeMirror-gutters" style="display: none; height: 154px;"></div></div></div></pre><p><span>Including the Exonerate models gave a decent boost to completeness:</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-attribute">--------------------------------------------------</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span>|Results from dataset metazoa_odb10 |</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-attribute">--------------------------------------------------</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span>|C:95.0%[S:93.8%,D:1.2%],F:1.3%,M:3.7%,n:954 |</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span>|906<span class="cm-tab" role="presentation" cm-text=" "> </span>Complete BUSCOs (C) |</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span>|895<span class="cm-tab" role="presentation" cm-text=" "> </span>Complete and single-copy BUSCOs (S) |</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span>|11<span class="cm-tab" role="presentation" cm-text=" "> </span>Complete and duplicated BUSCOs (D) |</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span>|12<span class="cm-tab" role="presentation" cm-text=" "> </span>Fragmented BUSCOs (F) |</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span>|36<span class="cm-tab" role="presentation" cm-text=" "> </span>Missing BUSCOs (M) |</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span>|954<span class="cm-tab" role="presentation" cm-text=" "> </span>Total BUSCO groups searched |</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-attribute">--------------------------------------------------</span></span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 242px;"></div><div class="CodeMirror-gutters" style="display: none; height: 242px;"></div></div></div></pre><h2 id='updating-gene-models-with-pasa'><span>Updating Gene Models with PASA</span></h2><p><span>The merged exonerate/BRAKER2 gene models were very complete based on the BUSCO metrics; however, these gene models had relatively few isoforms, meaning we were likely underestimating overall transcriptional complexity. Also, only the Exonerate models had UTRs. This motiviated us to try and incorporate more of the information from our transcriptome into our gene models. We used the PASA pipeline (v2.4.1), which provides such a functionality.</span></p><p><span>We first prepped the transcriptome we generated using Trinity for the PASA pipeline:</span></p><p><span>(</span><em><span>05_pasaUpdate/runCleanup.sh</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-meta">#! /bin/bash -l</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -p med</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --job-name=pasaC</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -c 4</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -t 60-0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --mem=16G</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --error=pasaC.err</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --output=pasaC.out</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">module load singularity</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">singularity exec <span class="cm-attribute">-B</span> /home/jacazet/reference/makerAnnotations/aepAnnot/pasa \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> ~/pasa.sif /usr/local/src/PASApipeline/bin/seqclean Trinity-GG.fasta</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 286px;"></div><div class="CodeMirror-gutters" style="display: none; height: 286px;"></div></div></div></pre><p><span>We then ran the main PASA pipeline, which aligned the transcriptome to the genome</span></p><p><span>(</span><em><span>05_pasaUpdate/runAlignment.sh</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-meta">#! /bin/bash</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">/usr/local/src/PASApipeline/Launch_PASA_pipeline.pl \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">-c</span> alignAssembly.config <span class="cm-attribute">-C</span> <span class="cm-attribute">-R</span> <span class="cm-attribute">--CPU</span> <span class="cm-number">30</span> \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--ALIGNER</span> gmap,blat <span class="cm-attribute">-g</span> aep.final.genome.fa <span class="cm-attribute">-t</span> Trinity-GG.fasta.clean \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">-T</span> <span class="cm-attribute">-u</span> Trinity-GG.fasta <span class="cm-attribute">--TRANSDECODER</span> \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--stringent_alignment_overlap</span> <span class="cm-number">30</span>.0 <span class="cm-attribute">-d</span></span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 154px;"></div><div class="CodeMirror-gutters" style="display: none; height: 154px;"></div></div></div></pre><p><span>We executed the above script from within a singularity container on a slurm computing cluster using the script below:</span></p><p><span>(</span><em><span>05_pasaUpdate/slurmRunAlignment.sh</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-meta">#! /bin/bash</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -p bigmemh</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --job-name=pasaP</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -c 60</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -t 60-0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --mem=0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --error=pasaP.err</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --output=pasaP.out</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">module load singularity</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">singularity exec <span class="cm-attribute">-B</span> /home/jacazet/reference/makerAnnotations/aepAnnot/pasa \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> ~/pasa.sif ./runAlignment.sh\</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 286px;"></div><div class="CodeMirror-gutters" style="display: none; height: 286px;"></div></div></div></pre><p><span>The PASA pipeline was then run again with the </span><code>-A</code><span> flag, triggering the annotation comparison mode. In this mode, PASA compares the aligned transcripts from the transcriptome to the provided gene annotations, and updates the gene models in cases where the aligned transcripts contained more/better information (e.g., splice sites or UTR coords)</span></p><p><span>(</span><em><span>05_pasaUpdate/runCompare.sh</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash" style="break-inside: unset;"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-meta">#! /bin/bash</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -p med </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --job-name=pasaCmp</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -c 24</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -t 60-0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --mem=60G</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --error=pasaCmp.err</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --output=pasaCmp.out</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">module load singularity</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">singularity exec <span class="cm-attribute">-B</span> /home/jacazet/reference/makerAnnotations/aepAnnot/pasa/pasaUpdate ~/pasa.sif /usr/local/src/PASApipeline/Launch_PASA_pipeline.pl \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">-c</span> annotationCompare.config <span class="cm-attribute">-A</span> \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">-g</span> aep.final.genome.fa \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">-t</span> Trinity-GG.fasta.clean \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">-L</span> \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--annots</span> HVAEP1.baseline.geneModels.gff3 \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">--CPU</span> <span class="cm-number">24</span></span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 418px;"></div><div class="CodeMirror-gutters" style="display: none; height: 418px;"></div></div></div></pre><p><span>The resulting GFF3 file was named </span><code>HVAEP1.geneModels.pUpdate1.gff3</code></p><h2 id='polishing-and-finalizing-gene-models'><span>Polishing and Finalizing Gene Models</span></h2><p><span>In some cases PASA ended up breaking ORFs of gene models that were previously complete. We dropped the PASA updated versions of those disrupted gene models and restored them to their prior pre-PASA state.</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash" style="break-inside: unset;"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#get AA sequence for PASA models</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">gffread <span class="cm-attribute">-y</span> HVAEP1.geneModels.pUpdate1.prot.fa <span class="cm-attribute">-g</span> ../../aep.final.genome.fa HVAEP1.geneModels.pUpdate1.gff3</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#Get IDs for complete proteins (no broken ORFs)</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">seqkit <span class="cm-builtin">grep</span> <span class="cm-attribute">-v</span> <span class="cm-attribute">-p</span> <span class="cm-string">'\.'</span> <span class="cm-attribute">-r</span> <span class="cm-attribute">-s</span> HVAEP1.geneModels.pUpdate1.prot.fa | <span class="cm-builtin">grep</span> <span class="cm-string">'>'</span> | <span class="cm-builtin">sed</span> <span class="cm-string">'s/>//g'</span> > HVAEP1.geneModels.pUpdate1.completeProt.txt</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#Drop all incomplete ORFs from the PASA updated models</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">perl transcript_keeper.pl HVAEP1.geneModels.pUpdate1.completeProt.txt HVAEP1.geneModels.pUpdate1.gff3 > HVAEP1.geneModels.pUpdate1.complete.gff3</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#Get the IDs of broken ORFs in the PASA updated models</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">seqkit <span class="cm-builtin">grep</span> <span class="cm-attribute">-p</span> <span class="cm-string">'\.'</span> <span class="cm-attribute">-r</span> <span class="cm-attribute">-s</span> HVAEP1.geneModels.pUpdate1.prot.fa | <span class="cm-builtin">grep</span> <span class="cm-string">'>'</span> | <span class="cm-builtin">sed</span> <span class="cm-string">'s/>//g'</span> > HVAEP1.geneModels.pUpdate1.incompleteProt.txt</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#Pull the pre-PASA versions for the models broken by PASA</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">perl transcript_keeper.pl HVAEP1.geneModels.pUpdate1.incompleteProt.txt HVAEP1.baseline.geneModels.gff3 > HVAEP1.geneModels.baseline.completeComplement.gff3</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#Merge the complete PASA updated models with the restored pre-PASA models</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">cat</span> HVAEP1.geneModels.baseline.completeComplement.gff3 HVAEP1.geneModels.pUpdate1.complete.gff3 > HVAEP1.geneModels.pUpdate1.filt.gff3</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 484px;"></div><div class="CodeMirror-gutters" style="display: none; height: 484px;"></div></div></div></pre><p><span>While reviewing the PASA-updated gene models, we came across a problem in the exonerate predictions where very large introns got inserted to try and fully align the full 3' UTR sequence that was provided (sometimes these included polyA sequence which weren't removed prior to alignment). We addressed this issue by dropping all 3' UTRs shorter than 20nt that were in an exon on their own at the end of genes using the following R script:</span></p><p><span>(06_finalize/uFix.R)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="R" style="break-inside: unset;"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="r"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">setwd</span>(<span class="cm-variable">dirname</span>(<span class="cm-variable">rstudioapi</span><span class="cm-operator">::</span><span class="cm-variable">getActiveDocumentContext</span>()<span class="cm-operator cm-dollar">$</span><span class="cm-variable">path</span>))</span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">read.delim</span>(<span class="cm-string">"HVAEP1.geneModels.pUpdate1.filt.gff3"</span>, <span class="cm-variable">header</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>, <span class="cm-variable">skip</span> <span class="cm-operator">=</span> <span class="cm-number">1</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">inG</span>[<span class="cm-operator">!</span><span class="cm-variable">grepl</span>(<span class="cm-string">"^#"</span>,<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V1</span>),]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#PASA added some of its own models, which we don't really want</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#so we just drop them</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">inG</span>[<span class="cm-operator">!</span><span class="cm-variable">grepl</span>(<span class="cm-string">'novel_model|temp_model'</span>,<span class="cm-variable">inG</span>[,<span class="cm-number">9</span>]),]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#get the gene IDs for each row</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">".*HVAEP1_[TG](</span><span class="cm-string-2">\\</span><span class="cm-string">d+).*"</span>,<span class="cm-string">"</span><span class="cm-string-2">\\</span><span class="cm-string">1"</span>,<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#partition rows by gene ID</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG.List</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">split</span>(<span class="cm-variable">inG</span>, <span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#sort rows by coords, from 5' to 3' relative to the gene in question</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG.List</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">lapply</span>(<span class="cm-variable">inG.List</span>, <span class="cm-keyword">function</span>(<span class="cm-variable">x</span>) <span class="cm-keyword">if</span>(<span class="cm-variable">x</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V7</span>[<span class="cm-number">1</span>] <span class="cm-operator">==</span> <span class="cm-string">'-'</span>){</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-builtin">return</span>(<span class="cm-variable">x</span>[<span class="cm-variable">order</span>(<span class="cm-operator">-</span><span class="cm-variable">x</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span>),])} <span class="cm-keyword">else</span> {</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-builtin">return</span>(<span class="cm-variable">x</span>[<span class="cm-variable">order</span>(<span class="cm-variable">x</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span>),])</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> })</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#initialize list of problematic short 3' UTRs</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">short3</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-builtin">list</span>()</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#for loop to check all gene models for problematic UTRs</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">for</span>(<span class="cm-variable">i</span> <span class="cm-keyword">in</span> <span class="cm-number">1</span><span class="cm-operator">:</span><span class="cm-variable">length</span>(<span class="cm-variable">inG.List</span>)){</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">subG</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">inG.List</span>[[<span class="cm-variable">i</span>]]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#if the gene doesn't even have 3' UTR we can skip it</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-keyword">if</span>(<span class="cm-variable">nrow</span>(<span class="cm-variable">subG</span>[<span class="cm-variable">subG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator">==</span> <span class="cm-string">'three_prime_UTR'</span>,]) <span class="cm-operator">==</span> <span class="cm-number">0</span>) {</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-keyword">next</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> }</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#pull 3' UTR rows and calculate each UTR segments length</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">prime3</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">subG</span>[<span class="cm-variable">subG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator">==</span> <span class="cm-string">'three_prime_UTR'</span>,]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">prime3L</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">prime3</span>[<span class="cm-variable">nrow</span>(<span class="cm-variable">prime3</span>),<span class="cm-number">5</span>] <span class="cm-operator">-</span> <span class="cm-variable">prime3</span>[<span class="cm-variable">nrow</span>(<span class="cm-variable">prime3</span>),<span class="cm-number">4</span>]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#next look for exons at the end of genes that are exclusively made of UTR sequence</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">exTest</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">subG</span>[<span class="cm-variable">subG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator">==</span> <span class="cm-string">'exon'</span>,]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">exTest</span> <span class="cm-operator cm-arrow"><-</span> (<span class="cm-variable">exTest</span>[<span class="cm-variable">nrow</span>(<span class="cm-variable">exTest</span>),<span class="cm-number">4</span>] <span class="cm-operator">==</span> <span class="cm-variable">prime3</span>[<span class="cm-variable">nrow</span>(<span class="cm-variable">prime3</span>),<span class="cm-number">4</span>]) <span class="cm-operator">&</span> (<span class="cm-variable">exTest</span>[<span class="cm-variable">nrow</span>(<span class="cm-variable">exTest</span>),<span class="cm-number">5</span>] <span class="cm-operator">==</span> <span class="cm-variable">prime3</span>[<span class="cm-variable">nrow</span>(<span class="cm-variable">prime3</span>),<span class="cm-number">5</span>])</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#flag any 3' UTR-only exons if they are shorter than 20 bp</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-keyword">if</span>(<span class="cm-variable">prime3L</span> <span class="cm-operator"><=</span> <span class="cm-number">20</span> <span class="cm-operator">&</span> <span class="cm-variable">exTest</span>){</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">short3</span>[[<span class="cm-variable">as.character</span>(<span class="cm-variable">subG</span>[<span class="cm-number">1</span>,<span class="cm-string">'gID'</span>])]] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">subG</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> }</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">}</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#initialize a list of fixed UTRs</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">short3.fix</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-builtin">list</span>()</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment"># go through the list of problematic 3' UTRs and drop them from the gene model</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">for</span>(<span class="cm-variable">i</span> <span class="cm-keyword">in</span> <span class="cm-number">1</span><span class="cm-operator">:</span><span class="cm-variable">length</span>(<span class="cm-variable">short3</span>)) {</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">subG</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">short3</span>[[<span class="cm-variable">i</span>]]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-keyword">if</span>(<span class="cm-variable">subG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V7</span>[<span class="cm-number">1</span>] <span class="cm-operator">==</span> <span class="cm-string">'-'</span>) {</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#get current boundary from faulty utr</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">oldB</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">subG</span>[<span class="cm-variable">nrow</span>(<span class="cm-variable">subG</span>),<span class="cm-number">4</span>]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#get the new boundary from the next leftmost thing</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">newB</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">unique</span>(<span class="cm-variable">subG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">newB</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">newB</span>[<span class="cm-variable">length</span>(<span class="cm-variable">newB</span>) <span class="cm-operator">-</span> <span class="cm-number">1</span>]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#delete the bad UTR</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">subG</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">subG</span>[<span class="cm-operator">-</span><span class="cm-variable">which</span>(<span class="cm-variable">subG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator">==</span> <span class="cm-string">'three_prime_UTR'</span> <span class="cm-operator">&</span> <span class="cm-variable">subG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span> <span class="cm-operator">==</span> <span class="cm-variable">oldB</span>),]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">subG</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">subG</span>[<span class="cm-operator">-</span><span class="cm-variable">which</span>(<span class="cm-variable">subG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator">==</span> <span class="cm-string">'exon'</span> <span class="cm-operator">&</span> <span class="cm-variable">subG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span> <span class="cm-operator">==</span> <span class="cm-variable">oldB</span>),]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#update the new Boundary for other rows</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">subG</span>[<span class="cm-variable">subG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span> <span class="cm-operator">==</span> <span class="cm-variable">oldB</span>,<span class="cm-string">'V4'</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">newB</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">short3.fix</span>[[<span class="cm-variable">as.character</span>(<span class="cm-variable">subG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span>[<span class="cm-number">1</span>])]] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">subG</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> } <span class="cm-keyword">else</span> {</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#get current boundary from faulty utr</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">oldB</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">subG</span>[<span class="cm-variable">nrow</span>(<span class="cm-variable">subG</span>),<span class="cm-number">5</span>]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#get the new boundary from the next leftmost thing</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">newB</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">unique</span>(<span class="cm-variable">subG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V5</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">newB</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">newB</span>[<span class="cm-variable">length</span>(<span class="cm-variable">newB</span>) <span class="cm-operator">-</span> <span class="cm-number">1</span>]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#delete the short UTR and it's exon</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">subG</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">subG</span>[<span class="cm-operator">-</span><span class="cm-variable">which</span>(<span class="cm-variable">subG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator">==</span> <span class="cm-string">'three_prime_UTR'</span> <span class="cm-operator">&</span> <span class="cm-variable">subG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V5</span> <span class="cm-operator">==</span> <span class="cm-variable">oldB</span>),]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">subG</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">subG</span>[<span class="cm-operator">-</span><span class="cm-variable">which</span>(<span class="cm-variable">subG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator">==</span> <span class="cm-string">'exon'</span> <span class="cm-operator">&</span> <span class="cm-variable">subG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V5</span> <span class="cm-operator">==</span> <span class="cm-variable">oldB</span>),]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#update the new Boundary for other rows</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">subG</span>[<span class="cm-variable">subG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V5</span> <span class="cm-operator">==</span> <span class="cm-variable">oldB</span>,<span class="cm-number">5</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">newB</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">short3.fix</span>[[<span class="cm-variable">as.character</span>(<span class="cm-variable">subG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span>[<span class="cm-number">1</span>])]] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">subG</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> }</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">}</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#merged fixed gene models with remaining entries</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">outG</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">inG.List</span>[<span class="cm-operator">!</span>(<span class="cm-variable">names</span>(<span class="cm-variable">inG.List</span>) <span class="cm-operator cm-variable-2">%in%</span> <span class="cm-variable">names</span>(<span class="cm-variable">short3.fix</span>))]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">outG</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">c</span>(<span class="cm-variable">outG</span>, <span class="cm-variable">short3.fix</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">outG</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">do.call</span>(<span class="cm-variable">rbind</span>,<span class="cm-variable">outG</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#make sure output is coordinate sorted</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">chrNum</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">as.numeric</span>(<span class="cm-variable">gsub</span>(<span class="cm-string">'chr-'</span>,<span class="cm-string">''</span>,<span class="cm-variable">outG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V1</span>))</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">outG</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">outG</span>[<span class="cm-variable">order</span>(<span class="cm-variable">chrNum</span>,<span class="cm-variable">outG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V4</span>),]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#give every row the appropriate gene ID tag</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">outG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">paste0</span>(<span class="cm-string">';gene_id=HVAEP1_G'</span>,<span class="cm-variable">outG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">outG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">paste0</span>(<span class="cm-variable">outG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span>,<span class="cm-variable">outG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">outG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'[.]</span><span class="cm-string-2">\\</span><span class="cm-string">d[.](</span><span class="cm-string-2">\\</span><span class="cm-string">d)[.]'</span>,<span class="cm-string">'_</span><span class="cm-string-2">\\</span><span class="cm-string">1_'</span>,<span class="cm-variable">outG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">outG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'Name=[^;]+;'</span>,<span class="cm-string">''</span>,<span class="cm-variable">outG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">write.table</span>(<span class="cm-variable">outG</span>[,<span class="cm-number">1</span><span class="cm-operator">:</span><span class="cm-number">9</span>], <span class="cm-variable">file</span> <span class="cm-operator">=</span> <span class="cm-string">"HVAEP1.geneModels.pUpdate1.filt.uFix.gff3"</span>, <span class="cm-builtin">quote</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>, <span class="cm-variable">row.names</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>, <span class="cm-variable">col.names</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>, <span class="cm-variable">sep</span> <span class="cm-operator">=</span> <span class="cm-string">'</span><span class="cm-string-2">\t</span><span class="cm-string">'</span>)</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 2464px;"></div><div class="CodeMirror-gutters" style="display: none; height: 2464px;"></div></div></div></pre><p><span>Because some gene models got merged or otherwise modified since we had used MAKER to reformat the gene names, we had to adjust the gene names so that they were still numbered consecutively according to their order in the genome.</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash" style="break-inside: unset;"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#agat to try and catch any formatting issues with the GFF3</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">agat_convert_sp_gxf2gxf.pl <span class="cm-attribute">-g</span> HVAEP1.geneModels.pUpdate1.filt.uFix.gff3 <span class="cm-attribute">-gvi</span> <span class="cm-number">3</span> <span class="cm-attribute">-gvo</span> <span class="cm-number">3</span> <span class="cm-attribute">-c</span> <span class="cm-string">'gene_id'</span> <span class="cm-attribute">-o</span> HVAEP1.geneModels.pUpdate1.filt.uFix.agat.gff3</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">for</span> f <span class="cm-keyword">in</span> {1..15}; <span class="cm-keyword">do</span> printf <span class="cm-string">"chr-</span><span class="cm-def">$f</span><span class="cm-string">\t</span><span class="cm-def">$f</span><span class="cm-string">\n"</span> >> sOrder.txt; <span class="cm-keyword">done</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#need to change naming scheme so as to not trip up the MAKER script</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">sed</span> <span class="cm-attribute">-i</span> <span class="cm-string">'s/HVAEP1_//g'</span> HVAEP1.geneModels.pUpdate1.filt.uFix.agat.gff3 </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">singularity exec <span class="cm-attribute">-B</span> /home/jacazet/reference/makerAnnotations/aepAnnot/finalize ~/maker-plus_3.01.03.sif maker_map_ids <span class="cm-attribute">--prefix</span> HVAEP1_ <span class="cm-attribute">--justify</span> <span class="cm-number">6</span> <span class="cm-attribute">--suffix</span> . <span class="cm-attribute">--abrv_gene</span> G <span class="cm-attribute">--abrv_tran</span> T <span class="cm-attribute">--iterate</span> <span class="cm-number">1</span> <span class="cm-attribute">--sort_order</span> sOrder.txt HVAEP1.geneModels.pUpdate1.filt.uFix.agat.gff3 > merged.gm.map.txt</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">docker run <span class="cm-attribute">-it</span> <span class="cm-attribute">-v</span> <span class="cm-quote">$(pwd)</span>:/usr/local/dockerWD jcazet/maker-plus:3.01.03 maker_map_ids <span class="cm-attribute">--prefix</span> HVAEP1_ <span class="cm-attribute">--justify</span> <span class="cm-number">6</span> <span class="cm-attribute">--suffix</span> . <span class="cm-attribute">--abrv_gene</span> G <span class="cm-attribute">--abrv_tran</span> T <span class="cm-attribute">--iterate</span> <span class="cm-number">1</span> <span class="cm-attribute">--sort_order</span> dockerWD/sOrder.txt dockerWD/HVAEP1.geneModels.pUpdate1.filt.uFix.agat.gff3 > merged.gm.map.txt</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">cp</span> HVAEP1.geneModels.pUpdate1.filt.uFix.agat.gff3 HVAEP1.PU.RN.gff3</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">singularity exec <span class="cm-attribute">-B</span> /home/jacazet/reference/makerAnnotations/aepAnnot/finalize ~/maker-plus_3.01.03.sif map_gff_ids merged.gm.map.txt HVAEP1.PU.RN.gff3</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 462px;"></div><div class="CodeMirror-gutters" style="display: none; height: 462px;"></div></div></div></pre><p><span>Finally, because these gene models were passed through many different programs that often added odd/unconventional tags, there were quite a few weird formatting quirks in the 9th column of the GFF3. The following R script trys to catch and correct most of those formatting issues:</span></p><p><span>(</span><em><span>06_finalize/postRnPolish.R</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="R" style="break-inside: unset;"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="r"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">setwd</span>(<span class="cm-variable">dirname</span>(<span class="cm-variable">rstudioapi</span><span class="cm-operator">::</span><span class="cm-variable">getActiveDocumentContext</span>()<span class="cm-operator cm-dollar">$</span><span class="cm-variable">path</span>))</span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">read.delim</span>(<span class="cm-string">'HVAEP1.PU.RN.gff3'</span>, <span class="cm-variable">skip</span> <span class="cm-operator">=</span> <span class="cm-number">1</span>, <span class="cm-variable">header</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'Name=;'</span>,<span class="cm-string">''</span>,<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'Alias=[^;]+;'</span>,<span class="cm-string">''</span>,<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'deletions=[^;]+;'</span>,<span class="cm-string">''</span>,<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'.*HVAEP1_[TG](</span><span class="cm-string-2">\\</span><span class="cm-string">d+).*'</span>,<span class="cm-string">'</span><span class="cm-string-2">\\</span><span class="cm-string">1'</span>,<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'gene_id=.*$'</span>,<span class="cm-string">''</span>,<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">paste0</span>(<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span>,<span class="cm-string">'gene_id=HVAEP1_G'</span>,<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">gID</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">tID</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'.*(HVAEP1_T</span><span class="cm-string-2">\\</span><span class="cm-string">d+[.]</span><span class="cm-string-2">\\</span><span class="cm-string">d+);.*'</span>,<span class="cm-string">'</span><span class="cm-string-2">\\</span><span class="cm-string">1'</span>,<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">euFix</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">euFix</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">';.*'</span>,<span class="cm-string">''</span>,<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">euFix</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">euFix</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'ID=T</span><span class="cm-string-2">\\</span><span class="cm-string">d+[.]</span><span class="cm-string-2">\\</span><span class="cm-string">d+(</span><span class="cm-string-2">\\</span><span class="cm-string">D+)'</span>,<span class="cm-string">'</span><span class="cm-string-2">\\</span><span class="cm-string">1'</span>,<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">euFix</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">euFix</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'ID=T</span><span class="cm-string-2">\\</span><span class="cm-string">d+_</span><span class="cm-string-2">\\</span><span class="cm-string">d+_[^.]+[.](</span><span class="cm-string-2">\\</span><span class="cm-string">D+)'</span>,<span class="cm-string">'.</span><span class="cm-string-2">\\</span><span class="cm-string">1'</span>,<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">euFix</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">euFix</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'ID=split[^.]+[.][^.]+'</span>,<span class="cm-string">''</span>,<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">euFix</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">euFix</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'_T</span><span class="cm-string-2">\\</span><span class="cm-string">d+[.]</span><span class="cm-string-2">\\</span><span class="cm-string">d+[.](</span><span class="cm-string-2">\\</span><span class="cm-string">D+)'</span>,<span class="cm-string">'.</span><span class="cm-string-2">\\</span><span class="cm-string">1'</span>,<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">euFix</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">euFix</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'ID=file_1_file_1_jg</span><span class="cm-string-2">\\</span><span class="cm-string">d+[.]t</span><span class="cm-string-2">\\</span><span class="cm-string">d+(</span><span class="cm-string-2">\\</span><span class="cm-string">D+)'</span>,<span class="cm-string">'.</span><span class="cm-string-2">\\</span><span class="cm-string">1'</span>,<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">euFix</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">euFix</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'ID=t</span><span class="cm-string-2">\\</span><span class="cm-string">d+aep-(</span><span class="cm-string-2">\\</span><span class="cm-string">D+)-(</span><span class="cm-string-2">\\</span><span class="cm-string">d+)'</span>,<span class="cm-string">'.</span><span class="cm-string-2">\\</span><span class="cm-string">1.</span><span class="cm-string-2">\\</span><span class="cm-string">2'</span>,<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">euFix</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">euFix</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'ID=nbis-'</span>,<span class="cm-string">'.'</span>,<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">euFix</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">euFix</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'five_prime_utr[</span><span class="cm-string-2">\\</span><span class="cm-string">.-]'</span>,<span class="cm-string">'utr5p'</span>,<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">euFix</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">euFix</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'three_prime_utr[</span><span class="cm-string-2">\\</span><span class="cm-string">.-]'</span>,<span class="cm-string">'utr3p'</span>,<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">euFix</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">euFix</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">paste0</span>(<span class="cm-string">'ID='</span>,<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">tID</span>,<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">euFix</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">targRow</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">grepl</span>(<span class="cm-string">'UTR'</span>,<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span>) <span class="cm-operator">|</span> <span class="cm-variable">grepl</span>(<span class="cm-string">'exon'</span>,<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span>[<span class="cm-operator">!</span><span class="cm-variable">targRow</span>,<span class="cm-string">'euFix'</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-string">''</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span>[<span class="cm-variable">targRow</span>,<span class="cm-number">9</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'ID=[^;]+'</span>,<span class="cm-string">''</span>,<span class="cm-variable">inG</span>[<span class="cm-variable">targRow</span>,<span class="cm-number">9</span>])</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span>[<span class="cm-variable">targRow</span>,<span class="cm-number">9</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">paste0</span>(<span class="cm-variable">inG</span>[<span class="cm-variable">targRow</span>,<span class="cm-string">'euFix'</span>],<span class="cm-variable">inG</span>[<span class="cm-variable">targRow</span>,<span class="cm-number">9</span>])</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">euFix</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-atom">NULL</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">cFix</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">tID</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span>[<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator">!=</span> <span class="cm-string">'CDS'</span>,<span class="cm-string">'cFix'</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-string">''</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">rleRes</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">data.frame</span>(<span class="cm-variable">lengths</span> <span class="cm-operator">=</span> <span class="cm-variable">rle</span>(<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">cFix</span>)<span class="cm-operator cm-dollar">$</span><span class="cm-variable">lengths</span>, <span class="cm-variable">values</span> <span class="cm-operator">=</span> <span class="cm-variable">rle</span>(<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">cFix</span>)<span class="cm-operator cm-dollar">$</span><span class="cm-variable">values</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">rleRes</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">lapply</span>(<span class="cm-variable">rleRes</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">lengths</span>, <span class="cm-keyword">function</span>(<span class="cm-variable">x</span>) <span class="cm-variable">seq</span>(<span class="cm-variable">from</span><span class="cm-operator">=</span><span class="cm-number">1</span>, <span class="cm-variable">to</span><span class="cm-operator">=</span><span class="cm-variable">x</span>, <span class="cm-variable">by</span> <span class="cm-operator">=</span> <span class="cm-number">1</span>))</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">rleRes</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">do.call</span>(<span class="cm-variable">c</span>,<span class="cm-variable">rleRes</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">cFix</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">paste0</span>(<span class="cm-string">'.'</span>,<span class="cm-variable">rleRes</span>,<span class="cm-string">'.'</span>,<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">cFix</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span>[<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator">!=</span> <span class="cm-string">'CDS'</span>,<span class="cm-string">'cFix'</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-string">''</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span>[<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator">==</span> <span class="cm-string">'CDS'</span> ,<span class="cm-number">9</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'ID=[^;]+'</span>,<span class="cm-string">''</span>,<span class="cm-variable">inG</span>[<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator">==</span> <span class="cm-string">'CDS'</span> ,<span class="cm-number">9</span>])</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span>[<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator">==</span> <span class="cm-string">'CDS'</span> ,<span class="cm-number">9</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">paste0</span>(<span class="cm-string">'ID=cds'</span>,<span class="cm-variable">inG</span>[<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator">==</span> <span class="cm-string">'CDS'</span> ,<span class="cm-string">'cFix'</span>],<span class="cm-variable">inG</span>[<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator">==</span> <span class="cm-string">'CDS'</span> ,<span class="cm-number">9</span>])</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">cFix</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-atom">NULL</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">tID</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">paste0</span>(<span class="cm-string">'transcript_id='</span>,<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">tID</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span>[<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V3</span> <span class="cm-operator">==</span> <span class="cm-string">'gene'</span>,<span class="cm-string">'tID'</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-string">''</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">paste0</span>(<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span>,<span class="cm-string">';'</span>,<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">tID</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">';$'</span>,<span class="cm-string">''</span>,<span class="cm-variable">inG</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">V9</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">write.table</span>(<span class="cm-variable">inG</span>[,<span class="cm-number">1</span><span class="cm-operator">:</span><span class="cm-number">9</span>],<span class="cm-variable">file</span> <span class="cm-operator">=</span> <span class="cm-string">'HVAEP1.PU.RN.pol.gff3'</span>, <span class="cm-variable">sep</span> <span class="cm-operator">=</span> <span class="cm-string">'</span><span class="cm-string-2">\t</span><span class="cm-string">'</span>, <span class="cm-variable">row.names</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>, <span class="cm-variable">col.names</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>, <span class="cm-builtin">quote</span> <span class="cm-operator">=</span> <span class="cm-variable">F</span>)</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 1606px;"></div><div class="CodeMirror-gutters" style="display: none; height: 1606px;"></div></div></div></pre><p><span>Following these modifications, we finalized the gene models and generated the final fasta and GFF files:</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">cp</span> HVAEP1.PU.RN.pol.gff3 HVAEP1.GeneModels.gff3</span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">gffread <span class="cm-attribute">-y</span> HVAEP1.prot.fa <span class="cm-attribute">-g</span> ../../aep.final.genome.fa HVAEP1.GeneModels.gff3</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">gffread <span class="cm-attribute">-w</span> HVAEP1.tran.fa <span class="cm-attribute">-g</span> ../../aep.final.genome.fa HVAEP1.GeneModels.gff3</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">agat_sp_keep_longest_isoform.pl <span class="cm-attribute">--gff</span> HVAEP1.GeneModels.gff3 <span class="cm-attribute">-o</span> HVAEP1.GeneModels.longestIso.gff3</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">gffread <span class="cm-attribute">-w</span> HVAEP1.tran.longestIso.fa <span class="cm-attribute">-g</span> ../../aep.final.genome.fa HVAEP1.GeneModels.longestIso.gff3</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">gffread <span class="cm-attribute">-y</span> HVAEP1.prot.longestIso.fa <span class="cm-attribute">-g</span> ../../aep.final.genome.fa HVAEP1.GeneModels.longestIso.gff3</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 242px;"></div><div class="CodeMirror-gutters" style="display: none; height: 242px;"></div></div></div></pre><p><span>Final gene model stats:</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash" style="break-inside: unset;"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">perl look_at_transcripts.pl gm/HVAEP1.GeneModels.gff3 </span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">median cds length<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">786</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">median transcript length<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">1048</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">median five prime UTR length<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">145</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">median three prime UTR length<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">267</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">median exon length<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">129</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">median intron length<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">689</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">total gene count<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">28917</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">total transcripts<span class="cm-tab" role="presentation" cm-text=" "> </span> <span class="cm-number">37784</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">total unique transcripts<span class="cm-tab" role="presentation" cm-text=" "> </span> <span class="cm-number">37762</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">total unique cds<span class="cm-tab" role="presentation" cm-text=" "> </span> <span class="cm-number">35343</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">number of genes with five prime UTR<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">13901</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">number of genes with three prime UTR<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">13183</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">number of trans with five prime UTR<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">21156</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">number of trans with three prime UTR<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">20339</span></span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 374px;"></div><div class="CodeMirror-gutters" style="display: none; height: 374px;"></div></div></div></pre><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">docker run <span class="cm-attribute">-u</span> <span class="cm-quote">$(id -u)</span> <span class="cm-attribute">-v</span> <span class="cm-quote">$(pwd)</span>:/busco_wd ezlabgva/busco:v5.beta_cv1 busco <span class="cm-attribute">-c</span> <span class="cm-number">6</span> <span class="cm-attribute">-m</span> prot <span class="cm-attribute">-i</span> maker.longest.prots.fa <span class="cm-attribute">-o</span> maker <span class="cm-attribute">-l</span> metazoa_odb10 <span class="cm-attribute">-f</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span>***** Results: *****</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span>C:94.7%[S:93.3%,D:1.4%],F:1.7%,M:3.6%,n:954<span class="cm-tab" role="presentation" cm-text=" "> </span> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">903</span><span class="cm-tab" role="presentation" cm-text=" "> </span>Complete BUSCOs (C)<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">890</span><span class="cm-tab" role="presentation" cm-text=" "> </span>Complete and single-copy BUSCOs (S)<span class="cm-tab" role="presentation" cm-text=" "> </span> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">13</span><span class="cm-tab" role="presentation" cm-text=" "> </span>Complete and duplicated BUSCOs (D)<span class="cm-tab" role="presentation" cm-text=" "> </span> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">16</span><span class="cm-tab" role="presentation" cm-text=" "> </span>Fragmented BUSCOs (F)<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">35</span><span class="cm-tab" role="presentation" cm-text=" "> </span>Missing BUSCOs (M)<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" class="cm-tab-wrap-hack" style="padding-right: 0.1px;"><span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-number">954</span><span class="cm-tab" role="presentation" cm-text=" "> </span>Total BUSCO groups searched<span class="cm-tab" role="presentation" cm-text=" "> </span><span class="cm-tab" role="presentation" cm-text=" "> </span></span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 264px;"></div><div class="CodeMirror-gutters" style="display: none; height: 264px;"></div></div></div></pre><h2 id='generating-functional-annotations'><span>Generating Functional Annotations</span></h2><p><span>We next set about generating functional annotations for the AEP gene models. To make inferences about gene function, we used protein domain predictions as well as orthology/sequence similarity to genes in better annotated animal models. </span></p><h3 id='predicting-protein-domains-using-interproscan'><span>Predicting Protein Domains Using InterProScan</span></h3><p><span>To predict protein domains, we used the InterProScan pipeline (including the optional modules for Phobius, SignalP, and TMHMM):</span></p><p><span>(</span><em><span>07_funAnnot/runFinalIpr.sh</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-meta">#!/bin/bash</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --job-name=ipr</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -p med </span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -c 8</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -t 60-0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --mem=0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --error=ipr.err</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --output=ipr.out</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">cd</span> interproscan-5.51-85.0</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">./interproscan.sh <span class="cm-attribute">-d</span> final <span class="cm-attribute">-cpu</span> <span class="cm-number">8</span> <span class="cm-attribute">-dp</span> <span class="cm-attribute">-f</span> TSV, GFF3 <span class="cm-attribute">-goterms</span> <span class="cm-attribute">-i</span> ../HVAEP1.prot.longestIso.fa <span class="cm-attribute">-iprlookup</span> <span class="cm-attribute">-pa</span></span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 264px;"></div><div class="CodeMirror-gutters" style="display: none; height: 264px;"></div></div></div></pre><p><span>This generated the output file </span><code>HVAEP1.prot.longestIso.fa.tsv</code><span>, which we used as our primary resource for determining the protein domain composition of the AEP gene models.</span></p><h3 id='predicting-orthology-using-orthofinder'><span>Predicting Orthology Using OrthoFinder</span></h3><p><span>Identifying orthologs is critical for understanding for any comparative genomics analyses, and can also be a useful way of preliminarily assigning functions to genes of interest. We used OrthoFinder to systematically identify orthologs for all AEP gene models in diverse metazoan species. </span></p><p><span>We assembled a total of 45 proteomes for the OrthoFinder analysis.</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash" style="break-inside: unset;"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">A_diaphana.fa</span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">A_millepora.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">A_queenslandica.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">A_vaga.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">Aurelia.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">B_lanceolatum.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">B_mori.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">C_cruxmelitensis.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">C_elegans.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">C_hemisphaerica.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">C_intestinalis.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">C_milii.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">C_xamachana.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">D_melanogaster.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">D_pulex.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">E_muelleri.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">G_gallus.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">H_circumcincta.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">H_echinata.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">H_miamia.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">H_oligactis.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">H_sapiens.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">H_viridissima.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">H_vulgaris105.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">H_vulgarisAEP.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">H_vulgarisAEPlrv2.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">L_chalumnae.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">L_oculatus.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">M_brevicollis.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">M_leidyi.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">M_musculus.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">M_virulenta.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">N_vectensis.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">O_bimaculoides.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">P_bachei.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">P_lutea.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">P_marinus.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">P_miniata.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">P_naikaiensis.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">R_esculentum.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">S_callimorphus.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">S_mediterranea.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">S_purpuratus.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">T_adhaerens.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">X_tropicalis.fa</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 990px;"></div><div class="CodeMirror-gutters" style="display: none; height: 990px;"></div></div></div></pre><p><span>The sources for most of these proteomes were described above. Below are the sources for the additional proteomes that we added for the OrthoFinder analysis:</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash" style="break-inside: unset;"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">A_millepora.fa</span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">ncbi.nlm.nih.gov/genome/2652</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">Aurelia.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">davidadlergold.faculty.ucdavis.edu/jellyfish/</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">C_cruxmelitensis.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">github.com/josephryan/Ohdera_et_al_2018/tree/master/AA_Files</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">C_hemisphaerica.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">This Study</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">C_xamachana.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">mycocosm.jgi.doe.gov/Casxa1/Casxa1.home.html</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">H_vulgarisAEPlrv2.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">arusha.nhgri.nih.gov/hydra/download/<span class="cm-def">?dl</span><span class="cm-operator">=</span>tr</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">M_brevicollis.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">protists.ensembl.org/Monosiga_brevicollis_mx1_gca_000002865/Info/Index</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">M_musculus.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">uswest.ensembl.org/Mus_musculus/Info/Index</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">P_lutea.fa</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">plut.reefgenomics.org/download/</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 572px;"></div><div class="CodeMirror-gutters" style="display: none; height: 572px;"></div></div></div></pre><p><span>We also dropped the </span><code>H_vulgarisZurich.fa</code><span> that was in our original list of proteomes that we used as hints for gene model prediction. </span></p><p><span>One of the new sources was the AEP LRv2 trancriptome, which we needed to translate into protein sequence. We did this using transdecoder, similar to what was described above for other transcriptomic sources, although in this case we used the NCBI NR database instead of a custom protein database for generating BLAST hits to prioritize predicted ORFs.</span></p><p><span>(</span><em><span>07_funAnnot/transDecoder.sh</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash" style="break-inside: unset;"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-meta">#!/bin/bash -l</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --job-name=TD</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -c 24</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH -t 60-0</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --mem=36G</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --error=TD.err</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#SBATCH --output=TD.out</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">module load TransDecoder/5.2.0</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">TransDecoder.LongOrfs <span class="cm-attribute">-t</span> aepLRv2.fasta</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">echo</span> <span class="cm-string">"blasting"</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">~/bin/blastp <span class="cm-attribute">-query</span> aepLRv2.fasta.transdecoder_dir/longest_orfs.pep \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">-db</span> ~/blastdb/nr <span class="cm-attribute">-max_target_seqs</span> <span class="cm-number">1</span> \</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-attribute">-outfmt</span> <span class="cm-number">6</span> <span class="cm-attribute">-evalue</span> 1e-5 <span class="cm-attribute">-num_threads</span> <span class="cm-number">24</span> > blastp.outfmt6</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">TransDecoder.Predict <span class="cm-attribute">-t</span> aepLRv2.fasta <span class="cm-attribute">--single_best_only</span> <span class="cm-attribute">--retain_blastp_hits</span> blastp.outfmt6</span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 418px;"></div><div class="CodeMirror-gutters" style="display: none; height: 418px;"></div></div></div></pre><p><span>After we had compiled our protein sources, we reformatted the proteomes to be compatible with Orthofinder (primarily dropping stop codon symbols and spaces in header text) and selected single representative isoforms for each gene (when possible):</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="bash"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="bash"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 9px; left: 8px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre><span>xxxxxxxxxx</span></pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">for</span> f <span class="cm-keyword">in</span> *fa ; <span class="cm-keyword">do</span> gsed <span class="cm-attribute">-i</span> <span class="cm-string">'s/\(^[^>].*\)\*/\1/g'</span> <span class="cm-def">$f</span> ; <span class="cm-keyword">done</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">for</span> f <span class="cm-keyword">in</span> *fa ; <span class="cm-keyword">do</span> gsed <span class="cm-attribute">-i</span> <span class="cm-string">'s/\(^[^>].*\)\./\1/g'</span> <span class="cm-def">$f</span> ; <span class="cm-keyword">done</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">for</span> f <span class="cm-keyword">in</span> *fa ; <span class="cm-keyword">do</span> python /Users/Jcazet/opt/anaconda3/envs/workingEnv/bin/primary_transcript.py <span class="cm-def">$f</span> ; <span class="cm-keyword">done</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-builtin">cd</span> primary_transcripts</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">for</span> f <span class="cm-keyword">in</span> *fa ; <span class="cm-keyword">do</span> gsed <span class="cm-attribute">-i</span> <span class="cm-string">'s/\(^>[^ \|]\+\).*/\1/g'</span> <span class="cm-def">$f</span> ; <span class="cm-keyword">done</span></span></pre></div></div></div></div></div><div style="position: absolute; height: 0px; width: 1px; border-bottom-width: 0px; border-bottom-style: solid; border-bottom-color: transparent; top: 176px;"></div><div class="CodeMirror-gutters" style="display: none; height: 176px;"></div></div></div></pre><p><span>To make interpreting the OrthoFinder results easier, we wanted to incorporate gene names from certain well-studied species (e.g., humans, flies, etc.) into the sequence IDs used in the analysis. By doing this, we would be able to discern the identity of at least some genes in the OrthoFinder gene trees without having to first convert a complex gene ID into something more human readable. </span></p><p><span>We used a custom R script to identify proteomes that were associated with functional annotations/gene names in ensembl. We then used the gene IDs from those proteomes to download gene names (as well as GO terms) from biomart. We exported new versions of the proteome fasta files with modified headers that included the abbreviated gene name. We also exported tables that included all the metadata we downloaded (ensembl ID, short gene name, long gene name, GO terms, and uniparc ID) for each proteome as a separate reference.</span></p><p><span>(</span><em><span>07_funAnnot/getSymbols.R</span></em><span>)</span></p><pre class="md-fences md-end-block ty-contain-cm modeLoaded" spellcheck="false" lang="R" style="break-inside: unset;"><div class="CodeMirror cm-s-inner cm-s-null-scroll CodeMirror-wrap" lang="r"><div style="overflow: hidden; position: relative; width: 3px; height: 0px; top: 1307px; left: 286px;"><textarea autocorrect="off" autocapitalize="off" spellcheck="false" tabindex="0" style="position: absolute; bottom: -1em; padding: 0px; width: 1000px; height: 1em; outline: none;"></textarea></div><div class="CodeMirror-scrollbar-filler" cm-not-content="true"></div><div class="CodeMirror-gutter-filler" cm-not-content="true"></div><div class="CodeMirror-scroll" tabindex="-1"><div class="CodeMirror-sizer" style="margin-left: 0px; margin-bottom: 0px; border-right-width: 0px; padding-right: 0px; padding-bottom: 0px;"><div style="position: relative; top: 0px;"><div class="CodeMirror-lines" role="presentation"><div role="presentation" style="position: relative; outline: none;"><div class="CodeMirror-measure"><pre>x</pre></div><div class="CodeMirror-measure"></div><div style="position: relative; z-index: 1;"></div><div class="CodeMirror-code" role="presentation" style=""><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">library</span>(<span class="cm-variable">rstudioapi</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">library</span>(<span class="cm-variable">biomaRt</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">library</span>(<span class="cm-variable">Biostrings</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">library</span>(<span class="cm-variable">plyr</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">setwd</span>(<span class="cm-variable">dirname</span>(<span class="cm-variable">getActiveDocumentContext</span>()<span class="cm-operator cm-dollar">$</span><span class="cm-variable">path</span>))</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#get the list of all individual proteomes to be used for orthofinder analysis</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">seqList</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">list.files</span>(<span class="cm-string">"individual/primary_transcripts/"</span>, <span class="cm-variable">full.names</span> <span class="cm-operator">=</span> <span class="cm-variable">T</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#import AA sequences</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">seqs</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">lapply</span>(<span class="cm-variable">seqList</span>, <span class="cm-keyword">function</span>(<span class="cm-variable">x</span>) <span class="cm-variable">readAAStringSet</span>(<span class="cm-variable">x</span>))</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#extract species names from filenames</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">seqNames</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">".*/"</span>,<span class="cm-string">""</span>,<span class="cm-variable">seqList</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">seqNames</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">".fa"</span>,<span class="cm-string">""</span>,<span class="cm-variable">seqNames</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">seqNames</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">tolower</span>(<span class="cm-variable">seqNames</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">seqNames</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">"_"</span>,<span class="cm-string">""</span>,<span class="cm-variable">seqNames</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">names</span>(<span class="cm-variable">seqs</span>) <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">seqNames</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#generate ensembl database name from species name</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">setName</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">vapply</span>(<span class="cm-variable">seqNames</span>, <span class="cm-keyword">function</span>(<span class="cm-variable">x</span>) <span class="cm-variable">paste0</span>(<span class="cm-variable">x</span>,<span class="cm-string">"_gene_ensembl"</span>), <span class="cm-variable">character</span>(<span class="cm-number">1</span>))</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#check to find which species have databases available</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">ensembList</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">lapply</span>(<span class="cm-variable">setName</span>, <span class="cm-keyword">function</span>(<span class="cm-variable">x</span>) <span class="cm-variable">try</span>(<span class="cm-variable">useEnsembl</span>(<span class="cm-variable">biomart</span> <span class="cm-operator">=</span> <span class="cm-string">"genes"</span>, <span class="cm-variable">dataset</span> <span class="cm-operator">=</span> <span class="cm-variable">x</span>), <span class="cm-variable">silent</span> <span class="cm-operator">=</span> <span class="cm-variable">T</span>))</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#subset to only include species with an ensembl db hit</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">enSubset</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">vapply</span>(<span class="cm-variable">ensembList</span>, <span class="cm-keyword">function</span>(<span class="cm-variable">x</span>) <span class="cm-operator">!</span><span class="cm-variable">is.character</span>(<span class="cm-variable">x</span>), <span class="cm-variable">logical</span>(<span class="cm-number">1</span>))</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">ensembList</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">ensembList</span>[<span class="cm-variable">enSubset</span>]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">seqs</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">seqs</span>[<span class="cm-variable">enSubset</span>]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#initialize empty results object</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">annots</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-builtin">list</span>()</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#for each proteome, download ensembl annotation data</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#gene name, go terms, description, uniparc ID</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-keyword">for</span> (<span class="cm-variable">i</span> <span class="cm-keyword">in</span> <span class="cm-number">1</span><span class="cm-operator">:</span><span class="cm-variable">length</span>(<span class="cm-variable">seqs</span>)){</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">res</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">getBM</span>(<span class="cm-variable">attributes</span> <span class="cm-operator">=</span> <span class="cm-variable">c</span>(<span class="cm-string">"ensembl_gene_id"</span>,<span class="cm-string">"external_gene_name"</span>,<span class="cm-string">"external_gene_source"</span>,<span class="cm-string">"go_id"</span>,<span class="cm-string">"entrezgene_description"</span>,<span class="cm-string">"uniparc"</span>),</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">filters</span> <span class="cm-operator">=</span> <span class="cm-string">"ensembl_gene_id"</span>,</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">values</span> <span class="cm-operator">=</span> <span class="cm-variable">substr</span>(<span class="cm-variable">gsub</span>(<span class="cm-string">"[.].*"</span>,<span class="cm-string">""</span>,<span class="cm-variable">seqs</span>[[<span class="cm-variable">i</span>]]@<span class="cm-variable">ranges</span>@<span class="cm-variable">NAMES</span>),<span class="cm-number">1</span>,<span class="cm-number">18</span>),</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">mart</span> <span class="cm-operator">=</span> <span class="cm-variable">ensembList</span>[[<span class="cm-variable">i</span>]])</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-comment">#drop any characters after a space in gene name</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">res</span>[,<span class="cm-number">2</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">"</span> <span class="cm-string">.*$"</span>,<span class="cm-string">""</span>,<span class="cm-variable">res</span>[,<span class="cm-number">2</span>])</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">annots</span>[[<span class="cm-variable">i</span>]] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">res</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">}</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#generate ID that combines ensembl ID and gene name</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#this will be the ID that's used to replace the AA fasta header</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">annots</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">lapply</span>(<span class="cm-variable">annots</span>, <span class="cm-keyword">function</span>(<span class="cm-variable">x</span>) <span class="cm-variable">cbind</span>(<span class="cm-variable">x</span>,<span class="cm-variable">finAnnot</span> <span class="cm-operator">=</span> <span class="cm-variable">paste</span>(<span class="cm-variable">x</span>[,<span class="cm-number">1</span>],<span class="cm-variable">x</span><span class="cm-operator cm-dollar">$</span><span class="cm-variable">external_gene_name</span>, <span class="cm-variable">sep</span> <span class="cm-operator">=</span> <span class="cm-string">"_"</span>)))</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#subset to annots to just be a conversion table from old to new IDs</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">finAnnots</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">lapply</span>(<span class="cm-variable">annots</span>, <span class="cm-keyword">function</span>(<span class="cm-variable">x</span>) <span class="cm-variable">unique</span>(<span class="cm-variable">x</span>[,<span class="cm-variable">c</span>(<span class="cm-number">1</span>,<span class="cm-number">7</span>)]))</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><div class="CodeMirror-activeline" style="position: relative;"><div class="CodeMirror-activeline-background CodeMirror-linebackground"></div><div class="CodeMirror-gutter-background CodeMirror-activeline-gutter" style="left: 0px; width: 0px;"></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#make sure to fix any instances where there was no gene name to append</span></span></pre></div><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">finAnnots</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">lapply</span>(<span class="cm-variable">finAnnots</span>, <span class="cm-keyword">function</span>(<span class="cm-variable">x</span>) {</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">x</span>[,<span class="cm-number">2</span>] <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">gsub</span>(<span class="cm-string">'_$'</span>,<span class="cm-string">''</span>,<span class="cm-variable">x</span>[,<span class="cm-number">2</span>])</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-builtin">return</span>(<span class="cm-variable">x</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">})</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#replace the old names on the AA with the new ones that have the gene name included</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">newSeqs</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">lapply</span>(<span class="cm-number">1</span><span class="cm-operator">:</span><span class="cm-variable">length</span>(<span class="cm-variable">seqs</span>), <span class="cm-keyword">function</span>(<span class="cm-variable">x</span>) {</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">newSeqObj</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">seqs</span>[[<span class="cm-variable">x</span>]]</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">newSeqObj</span>@<span class="cm-variable">ranges</span>@<span class="cm-variable">NAMES</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">mapvalues</span>(<span class="cm-variable">substr</span>(<span class="cm-variable">gsub</span>(<span class="cm-string">"[.].*"</span>,<span class="cm-string">""</span>,<span class="cm-variable">newSeqObj</span>@<span class="cm-variable">ranges</span>@<span class="cm-variable">NAMES</span>),<span class="cm-number">1</span>,<span class="cm-number">18</span>), </span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">from</span> <span class="cm-operator">=</span> <span class="cm-variable">finAnnots</span>[[<span class="cm-variable">x</span>]][,<span class="cm-number">1</span>],</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">to</span> <span class="cm-operator">=</span> <span class="cm-variable">gsub</span>(<span class="cm-string">"_$"</span>,<span class="cm-string">""</span>,<span class="cm-variable">finAnnots</span>[[<span class="cm-variable">x</span>]][,<span class="cm-number">2</span>]))</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-builtin">return</span>(<span class="cm-variable">newSeqObj</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">})</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">
</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-comment">#generate modified filenames for the output so as not to overwrite the original files</span></span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span class="cm-variable">newFileNames</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">vapply</span>(<span class="cm-variable">names</span>(<span class="cm-variable">seqs</span>), <span class="cm-keyword">function</span>(<span class="cm-variable">x</span>) {</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">fChar</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">toupper</span>(<span class="cm-variable">substr</span>(<span class="cm-variable">x</span>,<span class="cm-number">1</span>,<span class="cm-number">1</span>))</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-variable">lChar</span> <span class="cm-operator cm-arrow"><-</span> <span class="cm-variable">substr</span>(<span class="cm-variable">x</span>,<span class="cm-number">2</span>,<span class="cm-variable">nchar</span>(<span class="cm-variable">x</span>))</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"> <span class="cm-builtin">return</span>(<span class="cm-variable">paste0</span>(<span class="cm-variable">fChar</span>,<span class="cm-string">'_'</span>,<span class="cm-variable">lChar</span>))</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;">}, <span class="cm-string">""</span>)</span></pre><pre class=" CodeMirror-line " role="presentation"><span role="presentation" style="padding-right: 0.1px;"><span cm-text="" cm-zwsp="">