Skip to content

Commit 008af85

Browse files
Merge pull request #11 from th0ma7/fix-html.unescape
zap2it: Multiple fixes and added tags
2 parents 03684f8 + f845c74 commit 008af85

File tree

1 file changed

+57
-44
lines changed

1 file changed

+57
-44
lines changed

zap2it-GuideScrape.py

Lines changed: 57 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -20,47 +20,45 @@
2020

2121
def sanitizeData(data):
2222
#https://stackoverflow.com/questions/1091945/what-characters-do-i-need-to-escape-in-xml-documents
23-
sData = data.replace('"','"')
24-
sData = sData.replace("'",''')
25-
sData = sData.replace('<','&lt;')
26-
sData = sData.replace('<','&gt;')
27-
sData = sData.replace('&','&amp;')
28-
return sData;
23+
data = data.replace('&','&amp;')
24+
data = data.replace('"','&quot;')
25+
data = data.replace("'",'&apos;')
26+
data = data.replace('<','&lt;')
27+
data = data.replace('>','&gt;')
28+
return data;
29+
2930
def buildXMLChannel(channel):
3031
xml = ""
31-
xml = xml + ' <channel id="' + sanitizeData(channel["channelId"]) + '">' + "\n"
32-
xml = xml + ' <display-name>' + sanitizeData(channel["channelNo"] + " " + channel["callSign"]) + '</display-name>' + "\n"
33-
xml = xml + ' <display-name>' + sanitizeData(channel["channelNo"]) + '</display-name>' + "\n"
34-
xml = xml + ' <display-name>' + sanitizeData(channel["callSign"]) + '</display-name>' + "\n"
35-
xml = xml + ' </channel>' + "\n"
32+
xml = xml + "\t" + '<channel id="' + html.unescape(channel["channelId"]) + '">' + "\n"
33+
xml = xml + "\t\t" + '<display-name>' + html.unescape(channel["channelNo"] + " " + channel["callSign"]) + '</display-name>' + "\n"
34+
xml = xml + "\t\t" + '<display-name>' + html.unescape(channel["channelNo"]) + '</display-name>' + "\n"
35+
xml = xml + "\t\t" + '<display-name>' + html.unescape(channel["callSign"]) + '</display-name>' + "\n"
36+
xml = xml + "\t\t" + '<display-name>' + html.unescape(channel["affiliateName"].title()) + '</display-name>' + "\n"
37+
xml = xml + "\t\t" + '<icon src="http:' + channel["thumbnail"].partition('?')[0] + '" />' + "\n"
38+
xml = xml + "\t" + '</channel>' + "\n"
3639
return xml
3740

3841
def buildXMLProgram(event,channelId):
3942
#2018-04-11T21:00:00Z
4043
#20180408120000 +0000
4144
xml = ""
42-
xml = xml + ' <programme start="' + buildXMLDate(event["startTime"]) + '" '
43-
xml = xml + 'stop="' + buildXMLDate(event["endTime"]) + '" channel="' + sanitizeData(channelId) + '">' + "\n"
44-
xml = xml + ' <title lang="' + optLanguage + '">' + sanitizeData(event["program"]["title"]) + '</title>' + "\n"
45+
season = "0"
46+
episode = "0"
47+
48+
xml = xml + "\t" + '<programme start="' + buildXMLDate(event["startTime"]) + '" '
49+
xml = xml + 'stop="' + buildXMLDate(event["endTime"]) + '" channel="' + html.unescape(channelId) + '">' + "\n"
50+
xml = xml + "\t\t" + '<title lang="' + optLanguage + '">' + sanitizeData(event["program"]["title"]) + '</title>' + "\n"
4551
if event["program"]["episodeTitle"] is not None:
46-
xml = xml + ' <sub-title lang="' + optLanguage + '">' + sanitizeData(event["program"]["episodeTitle"]) + ' </sub-title>' + "\n"
52+
xml = xml + "\t\t" + '<sub-title lang="' + optLanguage + '">' + sanitizeData(event["program"]["episodeTitle"]) + '</sub-title>' + "\n"
4753
if event["program"]["shortDesc"] is None:
4854
event["program"]["shortDesc"] = "Unavailable"
49-
xml = xml + ' <desc lang="' + optLanguage + '">' + html.escape(event["program"]["shortDesc"]) + '</desc>' + "\n"
50-
xml = xml + ' <length units="minutes">' + sanitizeData(event["duration"]) + '</length>' + "\n"
51-
for category in event["filter"]:
52-
xml = xml + ' <category>' + sanitizeData(category.replace('filter-','')) + '</category>' + "\n"
55+
xml = xml + "\t\t" + '<desc lang="' + optLanguage + '">' + sanitizeData(event["program"]["shortDesc"]) + '</desc>' + "\n"
56+
xml = xml + "\t\t" + '<length units="minutes">' + html.unescape(event["duration"]) + '</length>' + "\n"
5357
if event["thumbnail"] is not None:
54-
xml = xml + ' <thumbnail>http://zap2it.tmsimg.com/assets/' + event["thumbnail"] + '.jpg</thumbnail>' + "\n"
55-
xml = xml + ' <icon src="http://zap2it.tmsimg.com/assets/' + event["thumbnail"] + '.jpg" />' + "\n"
56-
if event["rating"] is not None:
57-
xml = xml + ' <rating>' + "\n"
58-
xml = xml + ' <value>' + event["rating"] + '</value>' + "\n"
59-
xml = xml + ' </rating>' + "\n"
60-
xml = xml + ' <subtitles type="teletext" />' + "\n"
61-
season = "0"
62-
episode = "0"
63-
episodeid = ""
58+
xml = xml + "\t\t" + '<thumbnail>http://zap2it.tmsimg.com/assets/' + event["thumbnail"] + '.jpg</thumbnail>' + "\n"
59+
xml = xml + "\t\t" + '<icon src="http://zap2it.tmsimg.com/assets/' + event["thumbnail"] + '.jpg" />' + "\n"
60+
61+
xml = xml + "\t\t" + '<url>https://tvlistings.zap2it.com//overview.html?programSeriesId=' + event["seriesId"] + '&amp;tmsId=' + event["program"]["id"] + '</url>' + "\n"
6462

6563
try:
6664
#if "season" in event:
@@ -69,26 +67,41 @@ def buildXMLProgram(event,channelId):
6967
if event["program"]["episode"] is not None:
7068
episode = str(event["program"]["episode"])
7169

72-
#if "id" in event:
73-
if event["program"]["id"] is not None:
74-
episodeid = str(event["program"]["id"])
7570
except KeyError:
7671
print("no season for:" + event["program"]["title"])
72+
73+
for category in event["filter"]:
74+
xml = xml + "\t\t" + '<category lang="en">' + html.unescape(category.replace('filter-','')) + '</category>' + "\n"
7775

7876
#print season + "." + episode
7977
if ((int(season) != 0) and (int(episode) != 0)):
80-
if int(season) < 10:
81-
season = "0" + str(season)
82-
if int(episode) < 10:
83-
episode = "0" + str(episode)
84-
xml = xml + ' <episode-num system="SxxExx">S' + season + "E" + episode + "</episode-num>" + "\n"
85-
xml = xml + ' <episode-num system="common">S' + season + "E" + episode + "</episode-num>" + "\n"
86-
87-
showid = event["seriesId"].replace('SH','')
88-
episodeid = episodeid.replace('EP' + showid,'')
89-
xml = xml + ' <episode-num system="dd_progid">EP' + sanitizeData(showid + '.' + episodeid) + '</episode-num>' + "\n"
78+
xml = xml + "\t\t" + '<category lang="en">Series</category>' + "\n"
79+
xml = xml + "\t\t" + '<episode-num system="common">S' + str(season).zfill(2) + "E" + str(episode).zfill(2) + "</episode-num>" + "\n"
80+
xml = xml + "\t\t" + '<episode-num system="xmltv_ns">' + str(int(season) - 1) + "." + str(int(episode) - 1) + ".</episode-num>" + "\n"
81+
82+
if event["program"]["id"][-4:] == "0000":
83+
xml = xml + "\t\t" + '<episode-num system="dd_progid">' + event["seriesId"] + '.' + event["program"]["id"][-4:] + '</episode-num>' + "\n"
84+
else:
85+
xml = xml + "\t\t" + '<episode-num system="dd_progid">' + event["seriesId"].replace('SH','EP') + '.' + event["program"]["id"][-4:] + '</episode-num>' + "\n"
86+
87+
for flag in event["flag"]:
88+
if (flag == "New"):
89+
xml = xml + "\t\t<new />\n"
90+
elif (flag == "Finale"):
91+
xml = xml + "\t\t<last-chance />\n"
92+
elif (flag == "Premiere"):
93+
xml = xml + "\t\t<premiere />\n"
94+
95+
for tag in event["tags"]:
96+
if (tag == "CC"):
97+
xml = xml + "\t\t" + '<subtitles type="teletext" />' + "\n"
98+
99+
if event["rating"] is not None:
100+
xml = xml + "\t\t" + '<rating>' + "\n"
101+
xml = xml + "\t\t\t" + '<value>' + event["rating"] + '</value>' + "\n"
102+
xml = xml + "\t\t" + '</rating>' + "\n"
90103

91-
xml = xml + ' </programme>'+"\n"
104+
xml = xml + "\t" + '</programme>'+"\n"
92105
return xml
93106

94107
def buildXMLDate(inputDateString):
@@ -197,7 +210,7 @@ def buildXMLDate(inputDateString):
197210
closestTimestamp = closestTimestamp + (60*60*3)
198211

199212
guideXML = '<?xml version="1.0" encoding="UTF-8"?>' + "\n"
200-
guideXML = guideXML + '<!DOCTYPE tv SYSTEM "xmltv.dtd">' + "\n"
213+
guideXML = guideXML + '<!DOCTYPE tv SYSTEM "xmltv.dtd">' + "\n\n"
201214

202215
guideXML = guideXML + '<tv source-info-url="http://tvlistings.zap2it.com/" source-info-name="zap2it.com" generator-info-name="zap2it-GuideScraping" generator-info-url="[email protected]">' + "\n"
203216

0 commit comments

Comments
 (0)