diff --git a/scripts/src/main/bash/oneliners.txt b/scripts/src/main/bash/oneliners.txt index af315b68df..7f465c222d 100644 --- a/scripts/src/main/bash/oneliners.txt +++ b/scripts/src/main/bash/oneliners.txt @@ -57,4 +57,23 @@ find -maxdepth 2 -mindepth 2 -type d | env LC_ALL=C sort | awk -F / '{sub(/wiki/ gzip -d < enwiki/20120601/enwiki-20120601-interlanguage-links-same-as.ttl.gz | grep -v -E 'resource/(Category|Template):' | wc -l # count number of abstracts, i.e. non-redirect, non-disambig pages in Wikipedia article namespace, in all extracted languages. Result in 3.8: 20805392 -grep 'short-abstracts\.ttl' lines-bytes-packed.txt | awk '{s+=$3} END {print s}' \ No newline at end of file +grep 'short-abstracts\.ttl' lines-bytes-packed.txt | awk '{s+=$3} END {print s}' + +# count mappings (per language and total) +grep -c '' mappings/Mapping_* | sed 's/[_:.]/ /g' | sort -k 4 -n -r | awk '{s+=$4; print $2 " " $4} END {print "total " s}' + +# ontology: count classes +grep -c '' dbpedia_3.* + +# ontology: count datatype properties, not specialized +grep -c -E '' dbpedia_3.* + +# ontology: count specialized datatype properties +grep -c -E '' dbpedia_3.* +