Skip to content
Snippets Groups Projects
Commit fb82da18 authored by Janne Mareike Koschinski's avatar Janne Mareike Koschinski
Browse files

Implement hafas messages API

parent f1eab971
No related branches found
No related tags found
No related merge requests found
<div id="HIM_Text" class="himMessagesHigh">
<div id="HIM_Text__820800">
<span class="bold">
Ahrensburg&nbsp;-&nbsp;
Bad Oldesloe:&nbsp;
Bauarbeiten.&nbsp;
</span>
<span class="">
Der Zug f&#228;llt zwischen Ahrensburg und Bad Oldesloe aus. Ein Ersatzverkehr von Ahrensburg nach Bad Oldesloe ist eingerichtet. Bitte &#252;berpr&#252;fen Sie Ihre Verbindung noch einmal kurz vor der Reise.
</span>
</div>
<div id="HIM_Text__820803">
<span class="bold">
Bargteheide&nbsp;-&nbsp;
Bad Oldesloe:&nbsp;
Bauarbeiten.&nbsp;
</span>
<span class="">
Der Zug f&#228;llt zwischen Bargteheide und Bad Oldesloe aus. Ein Ersatzverkehr von Bargteheide nach Bad Oldesloe ist eingerichtet. Bitte &#252;berpr&#252;fen Sie Ihre Verbindung noch einmal kurz vor der Reise.
</span>
</div>
</div>
<div id="HIM_Text" class="himMessagesHigh">
<div id="HIM_Text__797833">
<span class="bold">
Neum&#252;nster&nbsp;-&nbsp;
Kiel Hbf:&nbsp;
Bauarbeiten.&nbsp;
</span>
<span class="">
Der Zug f&#228;llt zwischen Neum&#252;nster und Kiel Hbf aus. Ein Ersatzverkehr von Neum&#252;nster nach Kiel Hbf ist eingerichtet. Bitte &#252;berpr&#252;fen Sie Ihre Verbindung noch einmal kurz vor der Reise.
</span>
</div>
</div>
<div id="HIM_Text" class="himMessagesMiddle">
<div id="HIM_Text__805562">
<span class="bold">
Rendsburg&nbsp;-&nbsp;
Flensburg:&nbsp;
Bauarbeiten.&nbsp;
</span>
<span class="">
Die Ankunft verz&#246;gert sich bis zu 15 Minuten. Bitte &#252;berpr&#252;fen Sie Ihre Verbindung noch einmal kurz vor der Reise.
</span>
</div>
</div>
<div id="HIM_Text" class="himMessagesLow">
<div id="HIM_Text__819298">
<span class="bold">
Uelzen:&nbsp;
St&#246;rung.&nbsp;
</span>
<span class="">
Aufzug Gl 101 wird umgebaut (Letzte Aktualisierung 13:10)
</span>
</div>
</div>
<div id="HIM_Text" class="himMessagesMiddle">
<div id="HIM_Text__820812">
<span class="bold">
Hamburg Hbf&nbsp;-&nbsp;
L&#252;beck Hbf:&nbsp;
Bauarbeiten.&nbsp;
</span>
<span class="">
Der Zug h&#228;lt zus&#228;tzlich in Ahrensburg und Bargteheide. Bitte &#252;berpr&#252;fen Sie Ihre Verbindung noch einmal kurz vor der Reise.
</span>
</div>
</div>
<div id="HIM_Text" class="himMessagesMiddle">
<div id="HIM_Text__807918">
<span class="bold">
Hamburg Hbf&nbsp;-&nbsp;
Ahrensburg:&nbsp;
Bauarbeiten.&nbsp;
</span>
<span class="">
Die Ankunft verz&#246;gert sich bis zu 10 Minuten.
</span>
</div>
</div>
<div id="HIM_Text" class="himMessagesMiddle">
<div id="HIM_Text__823981">
<span class="bold">
Berlin S&#252;dkreuz&nbsp;-&nbsp;
Nauen:&nbsp;
Information.&nbsp;
</span>
<span class="">
Der Zug verkehrt in umgekehrter Wagenreihung. Die Rollstuhlmitnahme und die 1. Klasse befindet sich im Steuerwagen.
</span>
</div>
</div>
package bahn
import (
"bytes"
"github.com/andybalholm/cascadia"
"golang.org/x/net/html"
"io"
"regexp"
"strings"
)
func HafasMessagesFromBytes(source []byte) ([]HafasMessage, error) {
return HafasMessagesFromReader(bytes.NewReader(source))
}
var hafasMessageHighSelector = cascadia.MustCompile(".himMessagesHigh > div")
var hafasMessageMiddleSelector = cascadia.MustCompile(".himMessagesMiddle > div")
var hafasMessageLowSelector = cascadia.MustCompile(".himMessagesLow > div")
var hafasMessageValiditySelector = cascadia.MustCompile("span.bold")
var hafasMessageContentSelector = cascadia.MustCompile("span:not(.bold)")
var hafasMessageValidityRegex = regexp.MustCompile("^(?P<From>.+?)(?:\\p{Z}-\\p{Z}\\n(?P<To>.+?))?(?::\\p{Z}\\n(?P<Subject>.+?)\\.?)?$")
var hafasMessageIdRegex = regexp.MustCompile("^HIM_Text__(?P<Id>\\d+)$")
func HafasMessagesFromReader(source io.Reader) ([]HafasMessage, error) {
var err error
var messages []HafasMessage
var document *html.Node
if document, err = html.Parse(source); err != nil {
return messages, err
}
parseMessage := func(node *html.Node, priority HafasMessagePriority) {
validityNode := hafasMessageValiditySelector.MatchFirst(node)
contentNode := hafasMessageContentSelector.MatchFirst(node)
validity := strings.TrimSpace(parseText(validityNode))
content := strings.TrimSpace(parseText(contentNode))
var id string
for _, attr := range node.Attr {
if attr.Namespace == "" && attr.Key == "id" {
id = attr.Val
}
}
parsedId := parseRegexGroups(hafasMessageIdRegex, strings.TrimSpace(id))
parsedValidity := parseRegexGroups(hafasMessageValidityRegex, strings.TrimSpace(validity))
messages = append(messages, HafasMessage{
Priority: priority,
Id: parsedId["Id"],
From: parsedValidity["From"],
To: parsedValidity["To"],
Subject: parsedValidity["Subject"],
Content: content,
})
}
for _, node := range hafasMessageHighSelector.MatchAll(document) {
parseMessage(node, HafasMessagePriorityHigh)
}
for _, node := range hafasMessageMiddleSelector.MatchAll(document) {
parseMessage(node, HafasMessagePriorityMiddle)
}
for _, node := range hafasMessageLowSelector.MatchAll(document) {
parseMessage(node, HafasMessagePriorityLow)
}
return messages, nil
}
func parseText(node *html.Node) string {
var result string
parseTextInternal(node, &result)
return result
}
func parseTextInternal(node *html.Node, out *string) {
if node.Type == html.TextNode {
*out += node.Data
} else {
for c := node.FirstChild; c != nil; c = c.NextSibling {
parseTextInternal(c, out)
}
}
}
func parseRegexGroups(regex *regexp.Regexp, url string) map[string]string {
match := regex.FindStringSubmatch(url)
paramsMap := make(map[string]string)
for i, name := range regex.SubexpNames() {
if i > 0 && i <= len(match) {
paramsMap[name] = match[i]
}
}
return paramsMap
}
...@@ -2,4 +2,8 @@ module git.kuschku.de/justjanne/bahn-api ...@@ -2,4 +2,8 @@ module git.kuschku.de/justjanne/bahn-api
go 1.12 go 1.12
require gopkg.in/yaml.v2 v2.2.2 require (
github.com/andybalholm/cascadia v1.0.0
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01
gopkg.in/yaml.v2 v2.2.2
)
package bahn
type HafasMessage struct {
Id string `json:"id,omitempty"yaml:"id,omitempty"`
Priority HafasMessagePriority `json:"priority,omitempty"yaml:"priority,omitempty"`
From string `json:"from,omitempty"yaml:"from,omitempty"`
To string `json:"to,omitempty"yaml:"to,omitempty"`
Subject string `json:"subject,omitempty"yaml:"subject,omitempty"`
Content string `json:"content,omitempty"yaml:"content,omitempty"`
}
type HafasMessagePriority string
const (
HafasMessagePriorityLow HafasMessagePriority = "LOW"
HafasMessagePriorityMiddle HafasMessagePriority = "MIDDLE"
HafasMessagePriorityHigh HafasMessagePriority = "HIGH"
)
#!/bin/bash #!/bin/bash
function cleanup_xml { function cleanup_xml_roundtrip {
for i in $(seq 0 1 $2); do for i in $(seq 0 1 $2); do
CURRENT_FILE="${1}/${i}" CURRENT_FILE="${1}/${i}"
CURRENT_FILE_ROUNDTRIP="${CURRENT_FILE}.roundtrip.xml" CURRENT_FILE_ROUNDTRIP="${CURRENT_FILE}.roundtrip.xml"
...@@ -15,7 +15,7 @@ function cleanup_xml { ...@@ -15,7 +15,7 @@ function cleanup_xml {
done done
} }
function cleanup_json { function cleanup_json_roundtrip {
for i in $(seq 0 1 $2); do for i in $(seq 0 1 $2); do
CURRENT_FILE="${1}/${i}" CURRENT_FILE="${1}/${i}"
CURRENT_FILE_ROUNDTRIP="${CURRENT_FILE}.roundtrip.json" CURRENT_FILE_ROUNDTRIP="${CURRENT_FILE}.roundtrip.json"
...@@ -29,10 +29,21 @@ function cleanup_json { ...@@ -29,10 +29,21 @@ function cleanup_json {
done done
} }
function cleanup_json {
for i in $(seq 0 1 $2); do
CURRENT_FILE="${1}/${i}"
CURRENT_FILE_JSON="${CURRENT_FILE}.json"
jq "." "${CURRENT_FILE_JSON}" > "${CURRENT_FILE}.tmp"
mv "${CURRENT_FILE}.tmp" "${CURRENT_FILE_JSON}"
done
}
CURRENT_DIR=$(dirname $(realpath "$0")) CURRENT_DIR=$(dirname $(realpath "$0"))
cleanup_xml "${CURRENT_DIR}/iris_station" 2 cleanup_xml_roundtrip "${CURRENT_DIR}/iris_station" 2
cleanup_xml "${CURRENT_DIR}/iris_realtime" 4 cleanup_xml_roundtrip "${CURRENT_DIR}/iris_realtime" 4
cleanup_xml "${CURRENT_DIR}/iris_timetable" 4 cleanup_xml_roundtrip "${CURRENT_DIR}/iris_timetable" 4
cleanup_xml "${CURRENT_DIR}/iris_wingdef" 2 cleanup_xml_roundtrip "${CURRENT_DIR}/iris_wingdef" 2
cleanup_json "${CURRENT_DIR}/apps_wagenreihung" 226 cleanup_json_roundtrip "${CURRENT_DIR}/apps_wagenreihung" 226
cleanup_json "${CURRENT_DIR}/hafas_messages" 24
...@@ -132,7 +132,7 @@ func jsonInput(raw interface{}, filename string) { ...@@ -132,7 +132,7 @@ func jsonInput(raw interface{}, filename string) {
} }
} }
func xmlOutput(raw interface{}, data interface{}, filename string) { func xmlRoundtrip(raw interface{}, data interface{}, filename string) {
var err error var err error
if err = encodeXml(raw, filename+".roundtrip.xml"); err != nil { if err = encodeXml(raw, filename+".roundtrip.xml"); err != nil {
...@@ -148,7 +148,7 @@ func xmlOutput(raw interface{}, data interface{}, filename string) { ...@@ -148,7 +148,7 @@ func xmlOutput(raw interface{}, data interface{}, filename string) {
} }
} }
func jsonOutput(raw interface{}, data interface{}, filename string) { func jsonRoundtrip(raw interface{}, data interface{}, filename string) {
var err error var err error
if err = encodeJson(raw, filename+".roundtrip.json"); err != nil { if err = encodeJson(raw, filename+".roundtrip.json"); err != nil {
...@@ -164,6 +164,18 @@ func jsonOutput(raw interface{}, data interface{}, filename string) { ...@@ -164,6 +164,18 @@ func jsonOutput(raw interface{}, data interface{}, filename string) {
} }
} }
func jsonOutput(raw interface{}, data interface{}, filename string) {
var err error
if err = encodeYaml(data, filename+".yaml"); err != nil {
panic(err.Error())
}
if err = encodeJson(data, filename+".json"); err != nil {
panic(err.Error())
}
}
const InputFolder = "data" const InputFolder = "data"
const OutputFolder = "out" const OutputFolder = "out"
...@@ -172,6 +184,7 @@ var timetableData []byte ...@@ -172,6 +184,7 @@ var timetableData []byte
var realtimeData []byte var realtimeData []byte
var wingDefinitionData []byte var wingDefinitionData []byte
var coachSequenceData []byte var coachSequenceData []byte
var hafasMessageData []byte
func TestMain(m *testing.M) { func TestMain(m *testing.M) {
var err error var err error
...@@ -191,6 +204,9 @@ func TestMain(m *testing.M) { ...@@ -191,6 +204,9 @@ func TestMain(m *testing.M) {
if coachSequenceData, err = ioutil.ReadFile(fmt.Sprintf("%s/%s/%d.json", InputFolder, "apps_wagenreihung", 0)); err != nil { if coachSequenceData, err = ioutil.ReadFile(fmt.Sprintf("%s/%s/%d.json", InputFolder, "apps_wagenreihung", 0)); err != nil {
panic(err) panic(err)
} }
if hafasMessageData, err = ioutil.ReadFile(fmt.Sprintf("%s/%s/%d.html", InputFolder, "hafas_messages", 0)); err != nil {
panic(err)
}
os.Exit(m.Run()) os.Exit(m.Run())
} }
...@@ -225,7 +241,13 @@ func BenchmarkCoachSequence(b *testing.B) { ...@@ -225,7 +241,13 @@ func BenchmarkCoachSequence(b *testing.B) {
} }
} }
func BenchmarkRoundtrip(b *testing.B) { func BenchmarkHafasMessages(b *testing.B) {
if _, err := HafasMessagesFromBytes(coachSequenceData); err != nil {
b.Error(err.Error())
}
}
func TestRoundtrip(t *testing.T) {
for i := 0; i < 3; i++ { for i := 0; i < 3; i++ {
var raw rawStations var raw rawStations
folderName := "iris_station" folderName := "iris_station"
...@@ -233,7 +255,7 @@ func BenchmarkRoundtrip(b *testing.B) { ...@@ -233,7 +255,7 @@ func BenchmarkRoundtrip(b *testing.B) {
output := fmt.Sprintf("%s/%s/%d", OutputFolder, folderName, i) output := fmt.Sprintf("%s/%s/%d", OutputFolder, folderName, i)
xmlInput(&raw, input) xmlInput(&raw, input)
data := parseStations(raw) data := parseStations(raw)
xmlOutput(&raw, &data, output) xmlRoundtrip(&raw, &data, output)
} }
for i := 0; i < 5; i++ { for i := 0; i < 5; i++ {
...@@ -243,7 +265,7 @@ func BenchmarkRoundtrip(b *testing.B) { ...@@ -243,7 +265,7 @@ func BenchmarkRoundtrip(b *testing.B) {
output := fmt.Sprintf("%s/%s/%d", OutputFolder, folderName, i) output := fmt.Sprintf("%s/%s/%d", OutputFolder, folderName, i)
xmlInput(&raw, input) xmlInput(&raw, input)
data := parseTimetable(raw) data := parseTimetable(raw)
xmlOutput(&raw, &data, output) xmlRoundtrip(&raw, &data, output)
} }
for i := 0; i < 5; i++ { for i := 0; i < 5; i++ {
...@@ -253,7 +275,7 @@ func BenchmarkRoundtrip(b *testing.B) { ...@@ -253,7 +275,7 @@ func BenchmarkRoundtrip(b *testing.B) {
output := fmt.Sprintf("%s/%s/%d", OutputFolder, folderName, i) output := fmt.Sprintf("%s/%s/%d", OutputFolder, folderName, i)
xmlInput(&raw, input) xmlInput(&raw, input)
data := parseTimetable(raw) data := parseTimetable(raw)
xmlOutput(&raw, &data, output) xmlRoundtrip(&raw, &data, output)
} }
for i := 0; i < 3; i++ { for i := 0; i < 3; i++ {
...@@ -263,7 +285,7 @@ func BenchmarkRoundtrip(b *testing.B) { ...@@ -263,7 +285,7 @@ func BenchmarkRoundtrip(b *testing.B) {
output := fmt.Sprintf("%s/%s/%d", OutputFolder, folderName, i) output := fmt.Sprintf("%s/%s/%d", OutputFolder, folderName, i)
xmlInput(&raw, input) xmlInput(&raw, input)
data := parseWingDefinition(raw) data := parseWingDefinition(raw)
xmlOutput(&raw, &data, output) xmlRoundtrip(&raw, &data, output)
} }
for i := 0; i < 227; i++ { for i := 0; i < 227; i++ {
...@@ -273,6 +295,22 @@ func BenchmarkRoundtrip(b *testing.B) { ...@@ -273,6 +295,22 @@ func BenchmarkRoundtrip(b *testing.B) {
output := fmt.Sprintf("%s/%s/%d", OutputFolder, folderName, i) output := fmt.Sprintf("%s/%s/%d", OutputFolder, folderName, i)
jsonInput(&raw, input) jsonInput(&raw, input)
data := parseCoachSequence(raw) data := parseCoachSequence(raw)
jsonRoundtrip(&raw, &data, output)
}
for i := 0; i < 25; i++ {
var raw []HafasMessage
folderName := "hafas_messages"
input := fmt.Sprintf("%s/%s/%d.html", InputFolder, folderName, i)
output := fmt.Sprintf("%s/%s/%d", OutputFolder, folderName, i)
f, err := os.Open(input)
if err != nil {
panic(err)
}
data, err := HafasMessagesFromReader(f)
if err != nil {
panic(err)
}
jsonOutput(&raw, &data, output) jsonOutput(&raw, &data, output)
} }
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment