-
Notifications
You must be signed in to change notification settings - Fork 0
/
Makefile
58 lines (42 loc) · 2.1 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
.PHONY: all
.SECONDARY:
all: data/output/replayed_links_to_restore.csv data/output/replayed_links_to_delete.csv
data/tatoeba/%.tar.bz2:
wget --timestamping --directory-prefix=data/tatoeba/ \
http://downloads.tatoeba.org/exports/$*.tar.bz2
data/tatoeba/%.csv: data/tatoeba/%.tar.bz2
tar --directory=data/tatoeba/ --extract --bzip2 --touch --file=$<
data/tatoeba/sentence_comments.csv: data/tatoeba/comments.tar.bz2
tar --directory=data/tatoeba/ --extract --bzip2 --touch --file=$<
data/output/:
mkdir -p data/output/
data/output/horus_redirects.csv: data/tatoeba/sentence_comments.csv data/output/
grep -e '^\S*\s\S*\sHorus\s.*Please go to' $< \
| sed -e 's/^\S*\s\(\S*\)\s\S*\s\S*\s\S*\sPlease go to #\([0-9]*\)\..*$$/\1\t\2/' \
| grep -ve '^\([0-9]*\)\s\1$$' \
| sort -k1b,1 \
> $@
data/output/ids_in_%.csv: data/tatoeba/%.csv data/output/
cut -f1 $< | sort -k1b,1 -u > $@
data/output/linked_ghosts.csv: data/output/ids_in_links.csv data/output/ids_in_sentences.csv
comm -23 $^ > $@
data/output/ghost_redirects.csv: data/output/linked_ghosts.csv data/output/horus_redirects.csv
join -j 1 $^ > $@
data/output/ghost_link_redirection.csv: data/output/ghost_redirects.csv data/tatoeba/links.csv
sort data/tatoeba/links.csv -k1b,1 | join -j 1 $< - > $@
data/output/original_ghost_links.csv: data/output/ghost_link_redirection.csv
cut -d' ' -f1,3 $< > $@
data/output/fixed_ghost_links.csv: data/output/ghost_link_redirection.csv
cut -d' ' -f2,3 $< > $@
data/output/replay.sqlite \
data/output/replayed_links_present.csv \
data/output/replayed_links_deleted.csv \
data/output/replayed_sentences_present.csv \
data/output/replayed_sentences_deleted.csv: data/tatoeba/contributions_20181127.csv data/output/
./replay_contributions.py $<
data/output/replayed_links_to_delete.csv: data/output/replayed_links_deleted.csv data/tatoeba/links.csv
./tupleset.py and $^ > $@
data/output/replayed_links_to_restore.csv: data/output/replayed_links_present.csv data/tatoeba/links.csv
./tupleset.py gt $^ > $@
data/output/rehydrated.sqlite: data/tatoeba/sentences.csv data/tatoeba/links.csv data/output/
./rehydrate.py