dtr/code/plot.awk

241 lines
5.4 KiB
Awk

#!/usr/bin/awk -f
# This program is a copy of guff, a plot device. https://github.com/silentbicycle/guff
# My copy here is written in awk instead of C, has no compelling benefit.
# Public domain. @thingskatedid
# Modified for my purposes by sdomi (ja@sdomi.pl)
# Run as awk -v x=xyz ... or env variables for stuff?
# Assumptions: the data is evenly spaced along the x-axis
# TODO: moving average
# TODO: trend lines, or guess at complexities
# TODO: points vs. lines
# TODO: colourblind safe scheme
# TODO: center data around the 0 axis
# TODO: scanning for all float formats input, including -inf, NaN etc
# TODO: guess at whether to use lines or circles, based on delta within a window?
function hastitle() {
for (i = 1; i <= NF; i++) {
if ($i ~ /[^-0-9.]/) {
return 1
}
}
return 0
}
function amax(a, i, max) {
max = -1
for (i in a) {
if (max == -1 || a[i] > a[max]) {
max = i
}
}
return max
}
function normalise( delta) {
for (i = 1; i <= NF; i++) {
max[i] = 0
min[i] = 0
for (j = 1; j <= NR; j++) {
if (a[i, j] != "#") {
if (a[i, j] > max[i]) {
max[i] = a[i, j]
} else
if (a[i, j] < min[i]) {
min[i] = a[i, j]
}
}
}
delta[i] = max[i] - min[i]
for (j = 1; j <= NR; j++) {
if (a[i, j] != "#") {
a[i, j] -= min[i]
if (delta[i] > 0) {
a[i, j] /= delta[i]
}
}
}
}
# TODO: rescale to center around 0
# Here the data is squished slightly in descending order of deltas.
# Each column is scaled independently anyway, so they're never to scale.
# The idea here is to help show intutively which are smaller, but without
# actually drawing them to size (since then very small deltas would not be
# visible at all).
k = 0
prev = -1
while (length(delta) > 0) {
i = amax(delta)
# Several columns can share the same delta
# Formatting to %.3f here is just for sake of rounding
if (prev != -1 && sprintf("%.3f", prev) != sprintf("%.3f", delta[i])) {
k++
}
# +2 to squish things upwards a bit
scale = (NF + 2 - k) / (NF + 2)
# there's no need to scale by 1
if (scale != 1) {
for (j = 1; j <= NR; j++) {
if (a[i, j] != "#") {
a[i, j] *= scale
}
}
}
prev = delta[i]
delete delta[i]
}
}
# internal coordinates to svg coordinates
function point(x, y) {
x = x * (chart_width - 2 * xmargin) + xmargin
y = (height - 2 * ymargin) - y * (height - 2 * ymargin) + ymargin
return sprintf("%u,%u", x, y)
}
function line(i) {
last_was_empty = 0
printf "<polyline stroke='%s%s' stroke-width='1.5' fill='#0000'", color[i], alpha
printf " points='"
for (j = 1; j <= NR; j++) {
if (a[i, j] == "#") {
if(last_was_empty == 0) {
printf "'/>"
printf "<polyline stroke='%s%s' stroke-width='1.5' fill='#0000'", color[i], alpha
printf " points='"
last_was_empty = 1
}
} else {
printf "%s ", point((j - 1) / NR, a[i, j])
last_was_empty = 0
}
}
printf "'/>\n"
last_was_empty = 0
}
function circles(i) {
for (j = 1; j <= NR; j++) {
p = point((j - 1) / NR, a[i, j])
split(p, q, ",")
printf "<circle cx='%u' cy='%u' r='5'></circle>\n",
q[1], q[2]
}
}
function legend_text(i, title) {
printf " <g transform='translate(%u %u)'>\n", chart_width + gutter, i * line_height
printf " <circle cx='%d' cy='%d' r='3.5' style='fill:%s;' stroke='%s'/>\n",
-10, -line_height / 2 + 5, color[i], color[i]
printf " <text style='%s' xml:space='preserve'>%-*s[%.3g, %.3g]</text>\n",
sprintf("fill: %s; font-size: %upx; font-family: mono", fg, font_size),
(title_width > 0) ? title_width + 1 : 0, title, min[i], max[i]
printf " </g>\n"
}
function display() {
print "<?xml version='1.0'?>"
printf "<svg xmlns='%s' width='%u' height='%u' version='1.1'>\n",
"http://www.w3.org/2000/svg",
chart_width + gutter + legend_width, height+line_height
printf "<style>circle {fill:#0000;} svg {background-color: #222;} text {fill: #eeeeee; font-size: 15px; font-family: mono}</style>"
title_width = 0
for (i = 1; i <= NF; i++) {
if (length(title[i]) > title_width) {
title_width = length(title[i])
}
}
for (i = 1; i <= NF; i++) {
line(i)
circles(i)
}
if (length(title)) {
for (i = 1; i <= NF; i++) {
legend_text(i, title[i])
}
}
printf "<g transform='translate(%u %u)'><text>###START_DATE###</text></g>", 5, height+line_height-3
printf "<g transform='translate(%u %u)'><text>###END_DATE###</text></g>", chart_width-158, height+line_height-3
printf "<polyline stroke='#666f' stroke-width='1.5' fill='#0000' points='1,%s 1,%s'/>", height+line_height/2, 10
printf "<polyline stroke='#666f' stroke-width='1.5' fill='#0000' points='%s,%s %s,%s'/>", chart_width, height+line_height/2, chart_width, 10
print "</svg>"
}
NR == 1 {
if (hastitle()) {
for (i = 1; i <= NF; i++) {
title[i] = $i
}
NR--
next
}
}
{
for (i = 1; i <= NF; i++) {
a[i, NR] = $i
#if (a[i, NR] == "?") {
# printf "%s", a[i, NR]
#}
}
}
END {
fg = "#eeeeee"
alpha = "ff"
# Bang Wong's colour-safe palette, https://www.nature.com/articles/nmeth.1618
# (using just the last five colours)
color[3] = "#CC79A7"
color[1] = "#009E73"
color[2] = "#F0E442"
color[4] = "#0072B2"
color[5] = "#D55E00"
color[6] = fg
#if (NF == 1) {
# color[1] = fg
#}
if (NF > length(color)) {
print "too many fields" >> "/dev/stderr"
exit 1
}
chart_width=600
legend_width=256
height=120
xmargin=0
ymargin=5
gutter=30
font_size=15
line_height=20
# the data is scaled 0..1 for our internal coordinate space
normalise()
display()
}