Skip to content
This repository has been archived by the owner on Jan 3, 2019. It is now read-only.

Commit

Permalink
4oD: Improved metadata tagging
Browse files Browse the repository at this point in the history
GiA was retrieving incorrect programme descriptions, now fixed.  Also
switched to simpler XML source for programme data.
  • Loading branch information
[email protected] committed Dec 30, 2012
1 parent 1b4ab87 commit ef05411
Show file tree
Hide file tree
Showing 8 changed files with 1,146 additions and 48 deletions.
2 changes: 2 additions & 0 deletions Credits.html
Original file line number Diff line number Diff line change
Expand Up @@ -52,5 +52,7 @@
By <a href="http://code.google.com/p/nibor-xbmc-repo/">Nibor</a>.</span></p>
<p class="Titles"><span class="Secondary_Titles">NSHost+ThreadedAdditions</span><span class="Main"><br />
By <a href="http://www.cocoawithlove.com/2009/11/drop-in-fix-for-problems-with-nshost.html">Matt Gallagher</a>.</span></p>
<p class="Titles"><span class="Secondary_Titles">NSString+HTML (from MWFeedParser)</span><span class="Main"><br />
By <a href="https://github.com/mwaterfall/MWFeedParser">Mike Waterfall</a>.</span></p>
</body>
</html>
181 changes: 134 additions & 47 deletions FourODDownload.m
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#import "FourODDownload.h"
#import "ASIHTTPRequest.h"
#import "NSHost+ThreadedAdditions.h"
#import "NSString+HTML.h"
#import <Python/Python.h>

@implementation FourODDownload
Expand Down Expand Up @@ -44,7 +45,26 @@ - (void)launchMetaRequest
errorCache = [[NSMutableString alloc] initWithString:@""];
processErrorCache = [NSTimer scheduledTimerWithTimeInterval:.25 target:self selector:@selector(processError) userInfo:nil repeats:YES];

ASIHTTPRequest *dataRequest = [ASIHTTPRequest requestWithURL:[NSURL URLWithString:[show url]]];
NSScanner *scanner = [NSScanner scannerWithString:[show url]];
[scanner scanUpToString:@"#" intoString:nil];
[scanner scanString:@"#" intoString:nil];
NSString *pid = nil;
[scanner scanUpToString:@"lklk" intoString:&pid];

if (!pid)
{
[self addToLog:[NSString stringWithFormat:@"ERROR: GiA cannot interpret the 4oD URL: %@", [show url]]];
[show setReasonForFailure:@"MetadataProcessing"];
[show setComplete:[NSNumber numberWithBool:YES]];
[show setSuccessful:[NSNumber numberWithBool:NO]];
[show setValue:@"Download Failed" forKey:@"status"];
[nc postNotificationName:@"DownloadFinished" object:show];
return;
}

[show setRealPID:pid];
NSURL *requestURL = [NSURL URLWithString:[NSString stringWithFormat:@"http://www.channel4.com/programmes/asset/%@",[show realPID]]];
ASIHTTPRequest *dataRequest = [ASIHTTPRequest requestWithURL:requestURL];
[dataRequest setDidFinishSelector:@selector(dataRequestFinished:)];
[dataRequest setTimeOutSeconds:10];
[dataRequest setNumberOfTimesToRetryOnTimeout:3];
Expand All @@ -56,51 +76,123 @@ -(void)dataRequestFinished:(ASIHTTPRequest *)request
{
if (!running)
return;
NSString *responseString = [[NSString alloc] initWithData:[request responseData] encoding:NSUTF8StringEncoding];
BOOL verbose = [[NSUserDefaults standardUserDefaults] boolForKey:@"Verbose"];
if (verbose)
[self addToLog:[NSString stringWithFormat:@"DEBUG: Programme Data Response: %@", responseString] noTag:YES];

NSScanner *scanner = [NSScanner scannerWithString:responseString];

[scanner scanUpToString:@"og:image" intoString:nil];
[scanner scanString:@"og:image\" content=\"" intoString:nil];
[scanner scanUpToString:@"\"" intoString:&thumbnailURL];

NSString *description = nil, *seriesTitle = nil;
[scanner scanUpToString:@"<meta name=\"description\"" intoString:nil];
[scanner scanUpToString:@"4oD" intoString:nil];
[scanner scanString:@"4oD. " intoString:nil];
[scanner scanUpToString:@"\"/>" intoString:&description];
[show setDesc:description];

[scanner scanUpToString:@"<h1 class=\"brandTitle\" data-wsbrandtitle=" intoString:nil];
[scanner scanString:@"<h1 class=\"brandTitle\" data-wsbrandtitle=" intoString:nil];
[scanner scanUpToString:@"title=\"" intoString:nil];
[scanner scanString:@"title=\"" intoString:nil];
[scanner scanUpToString:@"\">" intoString:&seriesTitle];
[show setSeriesName:seriesTitle];

[show setEpisodeName:[[[show showName] componentsSeparatedByString:@" - "] objectAtIndex:1]];

NSInteger series, episode;
[scanner scanUpToString:@"seriesNo" intoString:nil];
[scanner scanString:@"seriesNo\">Series " intoString:nil];
[scanner scanInteger:&series];
[show setSeason:series];
[scanner scanUpToString:@"episodeNo" intoString:nil];
[scanner scanString:@"episodeNo\">Episode " intoString:nil];
[scanner scanInteger:&episode];
[show setEpisode:episode];
NSLog(@"Response Status Code: %ld",(long)[request responseStatusCode]);
if ([request responseStatusCode] == 200)
{
NSString *responseString = [[NSString alloc] initWithData:[request responseData] encoding:NSUTF8StringEncoding];
BOOL verbose = [[NSUserDefaults standardUserDefaults] boolForKey:@"Verbose"];
if (verbose)
[self addToLog:[NSString stringWithFormat:@"DEBUG: Programme Data Response: %@", responseString] noTag:YES];

NSScanner *scanner = [NSScanner scannerWithString:responseString];

NSString *episodeTitle = nil;
[scanner scanUpToString:@"<episodeTitle>" intoString:nil];
[scanner scanString:@"<episodeTitle>" intoString:nil];
[scanner scanUpToString:@"</" intoString:&episodeTitle];
episodeTitle = [episodeTitle stringByDecodingHTMLEntities];
[show setEpisodeName:episodeTitle];

if (verbose)
[self addToLog:[NSString stringWithFormat:@"DEBUG: Programme Data Processed: thumbnailURL=%@ description=%@ seriesTitle=%@ series=%ld episode=%ld", thumbnailURL, description, seriesTitle, series, episode]];

NSString *seriesTitle = nil;
[scanner scanUpToString:@"<brandTitle>" intoString:nil];
[scanner scanString:@"<brandTitle>" intoString:nil];
[scanner scanUpToString:@"</" intoString:&seriesTitle];
seriesTitle = [seriesTitle stringByDecodingHTMLEntities];
[show setSeriesName:seriesTitle];

NSInteger episodeNumber;
[scanner scanUpToString:@"<episodeNumber>" intoString:nil];
[scanner scanString:@"<episodeNumber>" intoString:nil];
[scanner scanInteger:&episodeNumber];
[show setEpisode:episodeNumber];

NSInteger seriesNumber;
[scanner scanUpToString:@"<seriesNumber>" intoString:nil];
[scanner scanString:@"<seriesNumber>" intoString:nil];
[scanner scanInteger:&seriesNumber];
[show setSeason:seriesNumber];

NSString *imagePath = nil;
[scanner scanUpToString:@"<imagePath>" intoString:nil];
[scanner scanString:@"<imagePath>" intoString:nil];
[scanner scanUpToString:@"</" intoString:&imagePath];
if (imagePath)
thumbnailURL = [NSString stringWithFormat:@"http://www.channel4.com%@",imagePath];

NSString *episodeGuideUrl = nil;
[scanner scanUpToString:@"<episodeGuideUrl>" intoString:nil];
[scanner scanString:@"<episodeGuideUrl>" intoString:nil];
[scanner scanUpToString:@"</" intoString:&episodeGuideUrl];

if (!(episodeTitle && seriesTitle && episodeNumber && seriesNumber && imagePath && episodeGuideUrl))
[self addToLog:[NSString stringWithFormat:@"INFO: Some programme data not found. Tagging will be incomplete."] noTag:YES];

if (verbose)
[self addToLog:[NSString stringWithFormat:@"DEBUG: Programme Data Processed: episodeTitle=%@ seriesTitle=%@ episodeNumber=%ld seriesNumber=%ld imagePath=%@ episodeGuideUrl=%@", episodeTitle, seriesTitle, episodeNumber, seriesNumber, imagePath, episodeGuideUrl]];

if (episodeGuideUrl)
{
NSURL *requestURL = [NSURL URLWithString:[NSString stringWithFormat:@"http://www.channel4.com%@",episodeGuideUrl]];
ASIHTTPRequest *descRequest = [ASIHTTPRequest requestWithURL:requestURL];
[descRequest setDidFinishSelector:@selector(descRequestFinished:)];
[descRequest setTimeOutSeconds:10];
[descRequest setNumberOfTimesToRetryOnTimeout:3];
[descRequest setDelegate:self];
[descRequest startAsynchronous];
}
else
{
[self doHostLookup];
}
}
else
{
[self addToLog:[NSString stringWithFormat:@"WARNING: Programme data request failed. Tagging will be incomplete."] noTag:YES];
[self doHostLookup];
}
}

-(void)descRequestFinished:(ASIHTTPRequest *)request
{
if (!running)
return;
NSLog(@"Response Status Code: %ld",(long)[request responseStatusCode]);
if ([request responseStatusCode] == 200)
{
NSString *responseString = [[NSString alloc] initWithData:[request responseData] encoding:NSUTF8StringEncoding];
BOOL verbose = [[NSUserDefaults standardUserDefaults] boolForKey:@"Verbose"];
if (verbose)
[self addToLog:[NSString stringWithFormat:@"DEBUG: Description Data Response: %@", responseString] noTag:YES];

NSScanner *scanner = [NSScanner scannerWithString:responseString];

NSString *synopsis = nil;
[scanner scanUpToString:@"<meta name=\"synopsis\" content=\"" intoString:nil];
[scanner scanString:@"<meta name=\"synopsis\" content=\"" intoString:nil];
[scanner scanUpToString:@"\"/>" intoString:&synopsis];
synopsis = [synopsis stringByConvertingHTMLToPlainText];
[show setDesc:synopsis];

if (!synopsis)
[self addToLog:[NSString stringWithFormat:@"INFO: Programme description not found. Tagging may be incomplete."] noTag:YES];

if (verbose)
[self addToLog:[NSString stringWithFormat:@"DEBUG: Programme Data Processed: synopsis=%@", synopsis]];
}
else
{
[self addToLog:[NSString stringWithFormat:@"WARNING: Programme description request failed. Tagging will be incomplete."] noTag:YES];
}
[self doHostLookup];
}

-(void)doHostLookup
{
BOOL skipLookup = [[NSUserDefaults standardUserDefaults] boolForKey:[NSString stringWithFormat:@"%@SkipDNSLookup", defaultsPrefix]];
if (skipLookup)
[self hostLookupFinished:nil];
else
[NSHost hostWithName:@"ais.channel4.com" inBackgroundForReceiver:self selector:@selector(hostLookupFinished:)];
[NSHost hostWithName:@"ais.channel4.com" inBackgroundForReceiver:self selector:@selector(hostLookupFinished:)];
}

-(void)hostLookupFinished:(NSHost *)aHost
Expand All @@ -112,12 +204,6 @@ -(void)hostLookupFinished:(NSHost *)aHost
hostAddr = [aHost address];
if (!hostAddr)
hostAddr = @"ais.channel4.com";
NSScanner *scanner = [NSScanner scannerWithString:[show url]];
[scanner scanUpToString:@"#" intoString:nil];
[scanner scanString:@"#" intoString:nil];
NSString *pid;
[scanner scanUpToString:@"lklk" intoString:&pid];
[show setRealPID:pid];
NSURL *requestURL = [NSURL URLWithString:[NSString stringWithFormat:@"http://%@/asset/%@",hostAddr,[show realPID]]];
NSLog(@"Metadata URL: %@",requestURL);
[self addToLog:[NSString stringWithFormat:@"INFO: Metadata URL: %@", requestURL] noTag:YES];
Expand All @@ -128,6 +214,7 @@ -(void)hostLookupFinished:(NSHost *)aHost
[request setNumberOfTimesToRetryOnTimeout:3];
[request setDelegate:self];

NSScanner *scanner = nil;
NSString *proxyOption = [[NSUserDefaults standardUserDefaults] valueForKey:@"Proxy"];
if ([proxyOption isEqualToString:@"Custom"])
{
Expand Down
66 changes: 66 additions & 0 deletions GTMNSString+HTML.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
//
// GTMNSString+HTML.h
// Dealing with NSStrings that contain HTML
//
// Copyright 2006-2008 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations under
// the License.
//

#import <Foundation/Foundation.h>

/// Utilities for NSStrings containing HTML
@interface NSString (GTMNSStringHTMLAdditions)

/// Get a string where internal characters that need escaping for HTML are escaped
//
/// For example, '&' become '&amp;'. This will only cover characters from table
/// A.2.2 of http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters
/// which is what you want for a unicode encoded webpage. If you have a ascii
/// or non-encoded webpage, please use stringByEscapingAsciiHTML which will
/// encode all characters.
///
/// For obvious reasons this call is only safe once.
//
// Returns:
// Autoreleased NSString
//
- (NSString *)gtm_stringByEscapingForHTML;

/// Get a string where internal characters that need escaping for HTML are escaped
//
/// For example, '&' become '&amp;'
/// All non-mapped characters (unicode that don't have a &keyword; mapping)
/// will be converted to the appropriate &#xxx; value. If your webpage is
/// unicode encoded (UTF16 or UTF8) use stringByEscapingHTML instead as it is
/// faster, and produces less bloated and more readable HTML (as long as you
/// are using a unicode compliant HTML reader).
///
/// For obvious reasons this call is only safe once.
//
// Returns:
// Autoreleased NSString
//
- (NSString *)gtm_stringByEscapingForAsciiHTML;

/// Get a string where internal characters that are escaped for HTML are unescaped
//
/// For example, '&amp;' becomes '&'
/// Handles &#32; and &#x32; cases as well
///
// Returns:
// Autoreleased NSString
//
- (NSString *)gtm_stringByUnescapingFromHTML;

@end
Loading

0 comments on commit ef05411

Please sign in to comment.