tv.com

If you made a script you can offer it to the others here, or ask help to improve it. You can also report here bugs & problems with existing scripts.
Post Reply
crocodilu_q
Posts: 14
Joined: 2005-10-20 10:35:25

tv.com

Post by crocodilu_q »

Hi all,

I'm having problems with the tv.com script for a few weeks now. It does not find any series name and episode. Did the tv.com changed site arhitecure or something ?
Or maybe you know another script for another site that imports tv series info ?

thx


Later edit:

I've decided to fix myself the script. It seems that tv.com has changes it's pages structure and that is why the script doesn't work anymore. It will be ready soon and I will post here the modification in case anyone needs them.
crocodilu_q
Posts: 14
Joined: 2005-10-20 10:35:25

Post by crocodilu_q »

Job done ! (later edit - 3rd version :) )

Here is the script:

Code: Select all

(***************************************************

Ant Movie Catalog importation script
www.antp.be/software/moviecatalog/

[Infos]
Authors=n'alf  (<link>nalf75014@yahoo.fr</link>)
Title=tv.com
Description=TV series import from tv.com
Site=http://www.tv.com
Language=EN
Version=0.6 (06/07/2006)
Requires=3.5.0
Comments=inspired by the tvtome script by Alex Iribarren and modified
for the new tv.com structure by CROCODILU (crocodilu_q@yahoo.com)
License=This program is free software; you can redistribute it and/or modify it under the  terms of the GNU General Public License as published by the Free Software Foundation;  either version 2 of the License, or (at your option) any later version. |
GetInfo=1

[Options]

***************************************************)

program TvCom;

var
  EpName, EpNumber, SeName: string;
  MovieName: string;

function FindLine(Pattern: string; List: TStringList; StartAt: Integer): Integer;
var
  i: Integer;
begin
  result := -1;
  if StartAt < 0 then
    StartAt := 0;
  for i := StartAt to List.Count-1 do
    if Pos(Pattern, List.GetString(i)) <> 0 then
    begin
      result := i;
      Break;
    end;
end;

procedure AnalyzeEpListing(Address: string);
var
  Page: TStringList;
  LineNr, StartPos, EndLine, Res: Integer;
  Line, Season, EpTitle, EpNr, found: string;
begin
  Page := TStringList.Create;
  Address := Address + 'episode_listings.html';
  Page.Text := PostPage(Address, URLEncode('season=0'));
  LineNr := FindLine('Click here to continue to TV.com', Page, 0);
  if LineNr > -1 then
  begin
      Line := Page.GetString(LineNr);
      AnalyzeResults(SeName);
      Exit;
  end

  PickTreeClear;
  PickTreeAdd('Results for "' + EpName + '"', '');
  LineNr := FindLine('</th>', Page, LineNr);
  Address := '';
  Res := 0;
  EndLine := FindLine('</table>', Page, 0);

  repeat
    LineNr := FindLine('<tr class="', Page, LineNr);
    LineNr := LineNr + 1;
    LineNr := FindLine(EpName, Page, LineNr + 4);
    if LineNr > -1 then
    begin
      Line := Page.GetString(LineNr);
      StartPos := pos('">', Line) + 2;
      EpTitle := copy(Line, StartPos, pos('</a', Line) - StartPos);
      HTMLDecode(EpTitle);
      StartPos := pos('"', Line) + 1;
      Address := copy(Line, StartPos, pos('">', Line) - StartPos);
      PickTreeAdd(EpTitle, Address);
      Res := Res + 1;
      LineNr := LineNr + 1;
    end
  until (LineNr < 0) or (LineNr > EndLine);

  LineNr := FindLine('<tr class="', Page, LineNr);
  EndLine := FindLine('</table>', Page, LineNr);
  
  if (Res = 0) then
  begin
  repeat
    LineNr := FindLine('<a href="', Page, LineNr);
    Line := Page.GetString(LineNr);
    if (LineNr > -1) and (pos ('class="f-930"', Line) = 0) and (pos ('img src', Line) = 0)  then
    begin
      StartPos := pos('">', Line) + 2;
      EpTitle := copy(Line, StartPos, pos('</a', Line) - StartPos);
      HTMLDecode(EpTitle);
      StartPos := pos('"', Line) + 1;
      Address := copy(Line, StartPos, pos('">', Line) - StartPos);
      PickTreeAdd(EpTitle, Address);
    end
    LineNr := LineNr + 1;
  until (LineNr > EndLine);
  if PickTreeExec(Address) then
      AnalyzeEpisodePage(Address);
  end
  
  
  if (Res = 1) then
  begin
    AnalyzeEpisodePage(Address);
  end
  
  if (Res > 1) then
  begin
      if PickTreeExec(Address) then
      AnalyzeEpisodePage(Address);
  end
  Page.Free;
end;

procedure AnalyzeEpisodePage(Address: string);
var
  Line, TempStr, ProdCode, TempStr2, GuestStars, EpNr, Season, EpTitle: string;
  BeginPos, Tmp, StartPos: Integer;
  Page: TStringList;
  LineNr: Integer;
begin
  Page := TStringList.Create;
  Page.Text := GetPage(Address);

  // URL - OK
  SetField(fieldURL, Address);

  // First Aired + Production Code - OK
  LineNr := FindLine('First Aired:', Page, 0);
  Line := Page.GetString(LineNr);
  CutAfter(Line,'<span class="f-bold f-666">');
  SetField(fieldYear, copy(Line, pos(', ',Line)+2, 4));
  SetField(fieldComments,'Production Code: ' + copy(Line, pos('Code: ', Line) + 6, 4))

  // Rating  - OK
  LineNr := FindLine('<span class="f-28 f-bold mt-10 mb-10 f-FF9 db lh-18">', Page, 0);
  Line := Page.GetString(LineNr);
  StartPos := pos('">', Line) + 2;
  SetField(fieldRating, Copy(Line, StartPos, 3));

  // Writer - OK
  LineNr := FindLine('Writer:', Page, 0) + 3;
  Line := Page.GetString(LineNr);
  CutAfter(Line,'>');
  TempStr := copy(Line, 1, pos('</a>',Line)-1);
  HTMLRemoveTags(TempStr);
  SetField(fieldProducer, 'Writer: ' + TempStr);

  // Director - OK
  LineNr := FindLine('Director:', Page, 0) + 3;
  Line := Page.GetString(LineNr);
  CutAfter(Line,'>');
  TempStr := copy(Line, 1, pos('</a>',Line)-1);
  HTMLRemoveTags(TempStr);
  SetField(fieldDirector, TempStr);

  // Description - OK
  LineNr := FindLine('<div id="main-col">', Page, 0) + 3;
  Line := Page.GetString(LineNr);
  TempStr := Trim(Line);
  HTMLRemoveTags(TempStr);
  SetField(fieldDescription, TempStr);

  // Cast
  // Guest Stars

  LineNr := FindLine('Guest Star:', Page, 0) + 3;
  Line := Page.GetString(LineNr);
  TempStr := Trim(Line);
  HTMLRemoveTags(TempStr);
  repeat
    Tmp := Length(TempStr);
    TempStr := StringReplace(TempStr, '  ', ' ');
  until Length(TempStr) = Tmp;
  TempStr := StringReplace(TempStr, ' ,', ',');
  TempStr := StringReplace(TempStr, ' ', '');
  GuestStars := TempStr;
  
  LineNr := FindLine('Star:', Page, 0) + 3;
  Line := Page.GetString(LineNr);
  TempStr := Trim(Line);
  HTMLRemoveTags(TempStr);
  repeat
    Tmp := Length(TempStr);
    TempStr := StringReplace(TempStr, '  ', ' ');
  until Length(TempStr) = Tmp;
  TempStr := StringReplace(TempStr, ' ,', ',');
  TempStr := StringReplace(TempStr, ' ', '');
  SetField(fieldActors, TempStr + #13#10 + 'Guest stars: ' + GuestStars);

  Page.Free;
end;

procedure CutAfter(var Str: string; Pattern: string);
begin
  Str := Copy(str, Pos(Pattern, Str) + Length(Pattern), Length(Str));
end;

function StringReplaceAll(S, Old, New: string): string;
begin
  while Pos(Old, S) > 0 do
    S := StringReplace(S, Old, New);
  Result := S;
end;

procedure AnalyzeResults(var Search: string);
var
  Page: TStringList;
  LineNr, StartPos, EndLine, Res: Integer;
  Line, Name, Address: string;
begin
  Page := TStringList.Create;
  Page.Text := GetPage('http://www.tv.com/search.php?qs=' + URLEncode(Search) + '&type=11&stype=all&tag=search%3Bbutton');
  LineNr := FindLine('<table id="search-results"', Page, 0);
  EndLine := FindLine('</table>', Page, LineNr);
  
  PickTreeClear;
  PickTreeAdd('Results for "' + Search + '"', '');

  LineNr := FindLine('<a href="', Page, LineNr);
  repeat
    if LineNr > -1 then
    begin
      Line := Page.GetString(LineNr);
      SeName := Trim(Line);
      SeName := StringReplace(SeName, '	', '');
      HTMLRemoveTags(Sename);
      HTMLDecode(SeName);
      StartPos := pos('"', Line) + 1;
      Address := copy(Line, StartPos, pos('?', Line) - StartPos - 12);
      PickTreeAdd(SeName, Address);
      LineNr := LineNr + 1;
      LineNr := FindLine('<a href="', Page, LineNr);
    end
  until (LineNr < 0) or (LineNr > EndLine);
  
  if PickTreeExec(Address) then AnalyzeEpListing(Address);
      
end;

begin
  if CheckVersion(3,5,0) then
  begin

      Input('TV.com Import', 'Enter the name of the series:', SeName);
  //      SeName := GetField(fieldMedia);
  //      EpName := GetField(fieldOriginalTitle);
  //      Delete(EpName,1,7);
  if (EpName = '') then EpName := GetField(fieldTranslatedTitle);
        if (EpName = '') then Input('TV.com Import', 'Enter the title of the episode:', EpName);
        AnalyzeResults(SeName);
  end else
  ShowMessage('This script requires a newer version of Ant Movie Catalog (at least the version 3.5.0)');
end.






If you have any suggestions, comments, bugs and so on, please post them. I have tested it for my series and my database and it works.

Edit (v2.0) :)
I added one extra functionality. In my case this was absolutely neccesary: if you select a few episodes and try to update all at once and you misstype or use a capital letter in the wrong place, the search engine will find nothing and you cannot advance further. This, combine with ANT's problem with mixing the order of the selected items was really a problem if you try to import info for an entire series. So now, if the search engine does not find a match for a episode, then it will display ALL episodes and you can easily select the desired one and move further. If it finds a perfect match, it wont display anything, like it was designed before.

One more thing, I had to give up the category field because I couldnot find the info on the episode page as it was before.

Edit - v3.0 :)

One more "bad" thing from TV.COM. After a few searches, they kindly present you with the ads page and "click to continue to tv.com", obviously messing the results. Now the scripts checks for that and survives.
Last edited by crocodilu_q on 2006-06-07 08:21:16, edited 2 times in total.
antp
Site Admin
Posts: 9629
Joined: 2002-05-30 10:13:07
Location: Brussels
Contact:

Post by antp »

Thanks, I'll put it in on www.antp.be/temp/scripts with the other new scripts :)

By the way, no not forget to update properties of the script, i.e. Author and Version info ;)
crocodilu_q
Posts: 14
Joined: 2005-10-20 10:35:25

Post by crocodilu_q »

and again, tv.com pages are changed, script does not display correctly the series name :(
I will try to fix it in the next days.
JohnL
Posts: 2
Joined: 2006-09-18 18:10:54

Post by JohnL »

The code to extract the data from the page seems to work OK once you've found the correct page, however the tv.com search function appears to have problems finding the right page. Unfortunately my programming skills aren't up to doing to coding but I can see a way around it.

If you had something where you input the series name, then go to epguides and, taking the first letter of the series name, check the relevant menu page for a match.

For example if you wanted 'Stargate' you'd check the page:-

http://www.epguides.com/menus/

On that page there are 3 matches for 'Stargate' so you'd need to select the one you want. Only the entries that in in bold (have the bold tag) will have information on tv.com.

So say you selected SG1 you'd have the page:-

http://www.epguides.com/StargateSG1/

At this point you could enter a partial episode name to get a list of matches to select from, or possibly null to select all. Say you selected 'Children of the Gods (1)' you'd have the address:-

http://www.tv.com/stargate-sg-1/childre ... mmary.html

the code posted can exact the data from this page, if you also wanted the recap to put in the comments just substitute the word recap for the word summary:-

http://www.tv.com/stargate-sg-1/childre ... recap.html

With my limited programing I got as far as extracting the Series name from the summary page

Code: Select all

//Series Name
  LineNr :=FindLine('<h1>', Page, 0);
  Line := Page.GetString(LineNr);  
  HTMLRemoveTags(Line);
  Setfield(fieldCategory,Line);
and the Episode Title

Code: Select all

  //Episode Name
  LineNr :=FindLine('class="pr-10 pl-10', Page, LineNr);
  Line := Page.GetString(LineNr+1);  
  TempStr := Trim(Line);
  HTMLRemoveTags(TempStr);
  Setfield(fieldOriginalTitle,TempStr);
but got stuck on how to format the series and episode numbers with a leading zero (goes after the 'First Aired' stuff)

Code: Select all

  TempStr2 := Trim(Line);
  HTMLRemoveTags(TempStr2);
  EpisodeNum := copy(TempStr2, pos('Number: ',TempStr2)+8, 2);
  SeriesNum := copy(TempStr2, pos('Num: ',TempStr2)+5, 2);
  Setfield(fieldTranslatedTitle,'S' + SeriesNum + 'E' + EpisodeNum + ' Code: ' + copy(Line, 

pos('Code: ', Line) + 6, 4));
Hope this is of help.
Post Reply