my imdb.com script with portuguese title translation and....

If you made a script you can offer it to the others here, or ask help to improve it. You can also report here bugs & problems with existing scripts.
Post Reply
joazito

my imdb.com script with portuguese title translation and....

Post by joazito »

... and a few other tweaks.
Namelly, the script picks only one of the descriptions/comments, i find it unnecessary to choose from one. Any will do. And the category isn't simply the first category found... that was pretty shallow... Specially because imdb.com orders category alphabetically, not by relevance.
Hmm I think that's it.

Code: Select all

// GETINFO SCRIPTING
// IMDB (US) import with small picture

(***************************************************
*  Movie importation script for:                  *
*      IMDB (US), http://akas.imdb.com              *
*                                                 *
*  (c) 2002 Antoine Potten    antoine@buypin.com  *
*  Improvements made by Danny Falkov              *
*  Improvements made by Kai Blankenhorn           *
*  2003-01-04 : Remove duplicates in PickTree     *
*               HTMLDecode in GetDescriptions     *
*               by Ork <ork@everydayangels.net>   *
*                                                 *
*  For use with Ant Movie Catalog 3.4.0           *
*  www.ant.be.tf/moviecatalog ··· www.buypin.com  *
*                                                 *
*  The source code of the script can be used in   *
*  another program only if full credits to        *
*  script author and a link to Ant Movie Catalog  *
*  website are given in the About box or in       *
*  the documentation of the program               *
***************************************************)

program IMDb;
var
  MovieName: string;
  TheMovieTitle: string;
  TheMovieAddress: string;

function FindLine(Pattern: string; List: TStringList; StartAt: Integer): Integer;
var
  i: Integer;
begin
  result := -1;
  if StartAt < 0 then
    StartAt := 0;
  for i := StartAt to List.Count-1 do
    if Pos(Pattern, List.GetString(i)) <> 0 then
    begin
      result := i;
      Break;
    end;
end;

procedure AnalyzePage(Address: string);
var
  Page: TStringList;
  LineNr: Integer;
begin
  Page := TStringList.Create;
  Page.Text := GetPage(Address);
  if pos('<TITLE>IMDb', Page.Text) = 0 then
  begin
    SetField(fieldURL, 'http://imdb.com/Title' + copy(Address, pos('?',Address), length(Address)));
    AnalyzeMoviePage(Page)
  end else
  begin
    PickTreeClear;
    LineNr := 0;
    LineNr := FindLine('<H2><A NAME="top">Most popular searches</A></H2>', Page, LineNr);
    if LineNr > -1 then
    begin
      PickTreeAdd('Most popular searches', '');
      AddMoviesTitles(Page, LineNr);
    end;
    LineNr := FindLine('<H2><A NAME="mov">Movies</A></H2>', Page, LineNr);
    if LineNr > -1 then
    begin
      PickTreeAdd('Movies', '');
      AddMoviesTitles(Page, LineNr);
    end;
    LineNr := FindLine('<H2><A NAME="tvm">TV-Movies</A></H2>', Page, LineNr);
    if LineNr > -1 then
    begin
      PickTreeAdd('TV-Movies', '');
      AddMoviesTitles(Page, LineNr);
    end;
    LineNr := FindLine('<H2><A NAME="tvs">TV series</A></H2>', Page, LineNr);
    if LineNr > -1 then
    begin
      PickTreeAdd('TV Series', '');
      AddMoviesTitles(Page, LineNr);
    end;
    LineNr := FindLine('<H2><A NAME="vid">Made for video</A></H2>', Page, LineNr);
    if LineNr > -1 then
    begin
      PickTreeAdd('Made for video', '');
      AddMoviesTitles(Page, LineNr);
    end;
    //Sometimes, the IMDb sends a title in Most Popular Searches
    // and the same title in Movies.
    //TheMovieAddress and TheMovieTitle are used to choose directly
    // that one movie instead of asking the user.
    if TheMovieAddress='' then
      begin
        if PickTreeExec(Address) then AnalyzePage(Address);
      end
    else
      AnalyzePage(TheMovieAddress);
  end;
  Page.Free;
end;

procedure AnalyzeMoviePage(Page: TStringList);
var
  Line, Value, Value2, FullValue: string;
  LineNr: Integer;
  BeginPos, EndPos: Integer;
begin

  // Original Title & Year
  LineNr := FindLine('<title>', Page, 0);
  Line := Page.GetString(LineNr);
  if LineNr > -1 then
  begin
    BeginPos := pos('<title>', Line);
    if BeginPos > 0 then
      BeginPos := BeginPos + 7;
    EndPos := pos('(', Line);
    if EndPos = 0 then
      EndPos := Length(Line);
    Value := copy(Line, BeginPos, EndPos - BeginPos - 1);
    HTMLDecode(Value);
    SetField(fieldOriginalTitle, Value);
    BeginPos := pos('(', Line) + 1;
    if BeginPos > 0 then
    begin
      EndPos := pos(')', Line);
      Value := copy(Line, BeginPos, EndPos - BeginPos);
      SetField(fieldYear, Value);
    end;
  end;

  // Rating
  LineNr := FindLine('User Rating:', Page, 0);
  if LineNr > -1 then
  begin
    Line := Page.GetString(LineNr + 4);
    if Pos('/10', Line) > 0 then
    begin
      BeginPos := pos('<b>', Line) + 3;
      Value := IntToStr(Round(StrToInt(StrGet(Line, BeginPos), 0) + (StrToInt(StrGet(Line, BeginPos + 2), 0) / 10)));
      SetField(fieldRating, Value);
    end;
  end;

  // Picture
  LineNr := FindLine('<img alt="cover" align="left" src="http://ia.imdb.com/media/imdb/', Page, 0);
  if LineNr < 0 then
    LineNr := FindLine('<img alt="cover" align="left" src="http://posters.imdb.com/', Page, 0);
  if LineNr < 0 then
    LineNr := FindLine('<img alt="cover" align="left" src="http://images.amazon.com/', Page, 0);
  if LineNr > -1 then
  begin
    Line := Page.GetString(LineNr);
    BeginPos := pos('src="', Line) + 4;
    Delete(Line, 1, BeginPos);
    EndPos := pos('"', Line);
    Value := copy(Line, 1, EndPos - 1);
    GetPicture(Value, False); // False = do not store picture externally ; store it in the catalog file
  end;

  // Director
  LineNr := FindLine('Directed by', Page, 0);
  if LineNr > -1 then
  begin
    FullValue := '';
    Line := Page.GetString(LineNr + 1);
    repeat
      BeginPos := pos('">', Line) + 2;
      EndPos := pos('</a>', Line);
      Value := copy(Line, BeginPos, EndPos - BeginPos);
      if (Value <> '(more)') and (Value <> '') then
      begin
        if FullValue <> '' then
          FullValue := FullValue + ', ';
        FullValue := FullValue + Value;
      end;
      Delete(Line, 1, EndPos);
    until Pos('</a>', Line) = 0;
    HTMLDecode(FullValue);
    SetField(fieldDirector, FullValue);
  end;

  // Actors
  LineNr := FindLine('ast overview', Page, 0);
  if LineNr = -1 then
    LineNr := FindLine('redited cast', Page, 0);
  if LineNr > -1 then
  begin
    FullValue := '';
    Line := Page.GetString(LineNr);
    repeat
      BeginPos := Pos('<td valign="top">', Line);
      if BeginPos > 0 then
      begin
        Delete(Line, 1, BeginPos);
        Line := copy(Line, 25, Length(Line));
        BeginPos := pos('">', Line) + 2;
        EndPos := pos('</a>', Line);
        if EndPos = 0 then
          EndPos := Pos('</td>', Line);
        Value := copy(Line, BeginPos, EndPos - BeginPos);
        if (Value <> '(more)') and (Value <> '') then
        begin
          BeginPos := pos('.... </td><td valign="top">', Line);
          if BeginPos > 0 then
          begin
            EndPos := pos('</td></tr>', Line);
            BeginPos := BeginPos + 27;
            Value2 := copy(Line, BeginPos, EndPos - BeginPos);
            if Value2 <> '' then
            begin
              Value := Value + ' (as ' + Value2 + ')';
            end;
          end;
          if FullValue <> '' then
            FullValue := FullValue + ', ';
          FullValue := FullValue + Value;
        end;
        EndPos := Pos('</td></tr>', Line);
        Delete(Line, 1, EndPos);
      end else
      begin
        Line := '';
      end;
    until Line = '';
    HTMLDecode(FullValue);
    SetField(fieldActors, FullValue);
  end;

  //Country
  LineNr := FindLine('Country:', Page, 0);
  if LineNr > -1 then
  begin
    Line := Page.GetString(LineNr + 1);
    BeginPos := pos('/">', Line) + 3;
    EndPos := pos('</a>', Line);
    Value := copy(Line, BeginPos, EndPos - BeginPos);
    HTMLDecode(Value);
    SetField(fieldCountry, Value);
  end;

  //Category
  LineNr := FindLine('Genre:', Page, 0);
  if LineNr > -1 then
  begin
    Line := Page.GetString(LineNr + 1);
    BeginPos := pos('<a', Line);
    EndPos := pos('(more)', Line);
    Value := copy(Line, BeginPos, EndPos - BeginPos);
    HTMLRemoveTags(Value);
    HTMLDecode(Value);
    SetField(fieldCategory, Value);
  end;

  //Description
  LineNr := FindLine('Plot Summary:', Page, 0);
  if LineNr < 1 then
    LineNr := FindLine('Plot Outline:', Page, 0);
  if LineNr > -1 then
  begin
    Line := Page.GetString(LineNr);
    BeginPos := pos('</b>', Line) + 5;
    EndPos := pos('<a href', Line);
    if EndPos < 1 then
      Line := Line + Page.GetString(LineNr+1);
    EndPos := pos('<a href="/Plot?', Line);
    if EndPos < 1 then
      EndPos := pos('<br><br>', Line);
    if EndPos < 1 then
      EndPos := Length(Line);
    PickListClear;
    Value := copy(Line, BeginPos, EndPos - BeginPos);
    HTMLDecode(Value);
    PickListAdd(Value);
    BeginPos := pos('/Plot?', Line);
    EndPos := pos('">(more)', Line);
    if (BeginPos <> 0) and (EndPos <> 0) then
    begin
      Value := copy(Line, BeginPos, EndPos - BeginPos);
      GetDescriptions(Value);
    end;
    Value := '';
    if PickListExec('Select a description for "' + MovieName + '"', Value) then
      SetField(fieldDescription, Value);
  end;

  // Comments
  LineNr := FindLine('<b>Summary:</b>', Page, 0);
  if LineNr > -1 then
  begin
    Value := '';
    repeat
      LineNr := LineNr + 1;
      Line := Page.GetString(LineNr);
      EndPos := Pos('</blockquote>', Line);
      if EndPos = 0 then
        EndPos := Length(Line)
      else
        EndPos := EndPos - 2;
      Value := Value + Copy(Line, 1, EndPos) + ' ';
    until Pos('</blockquote>', Line) > 0;
    HTMLDecode(Value);
    Value := StringReplace(Value, '<br>', #13#10);
    Value := StringReplace(Value, #13#10+' ', #13#10);
    SetField(fieldComments, Value);
  end;

  // Length
  LineNr := FindLine('Runtime:', Page, 0);
  if LineNr > -1 then
  begin
    Line := Page.GetString(LineNr + 1);
    EndPos := pos(' min', Line);
    if EndPos = 0 then
      EndPos := pos('  /', Line);
    if EndPos = 0 then
      EndPos := Length(Line);
    if Pos(':', Line) < EndPos then
      BeginPos := Pos(':', Line) + 1
    else
      BeginPos := 1;
    Value := copy(Line, BeginPos, EndPos - BeginPos);
    SetField(fieldLength, Value);
  end;

  // Alternative (Portuguese) title
  LineNr := FindLine('Also Known As', Page, 0);
  if LineNr > -1 then
  begin
    PickListClear;
    Line := Page.GetString(LineNr);
    Line := copy(Line, 0, Length(Line) - 1);
    repeat
      BeginPos := pos('<i class="transl">', Line)+18;
      EndPos := pos('</i>', Line) + 3;
      Value := copy(Line, BeginPos, EndPos);
	if pos('(Portugal)',Value) > 0 then
	if pos('</i>',Value) = EndPos-BeginPos then  //This is a stupid line... but sometimes the script messes things up without it, dunno why.
	begin
	      HTMLRemoveTags(Value);
	      HTMLDecode(Value);
	      Value := copy(Value, 0, pos(' (Portugal)',Value));
	      EndPos := pos(' (Brazil)',Value);
	      if EndPos>0 then Value := copy(Value, 0, pos(' (Brazil)',Value));
	      Value := copy(Value, 0, Length(Value)-Length(' (....) '));
	      PickListAdd(Value);
	end;
    EndPos := pos('</i>', Line) + 4;
    Line := copy(Line, EndPos, Length(Line) - 1);
    until Length(Line) = 0;
    Value := '';
    if PickListExec('Select a translated title for "' + MovieName + '"', Value) then
	setField(fieldTranslatedTitle, Value);
  end;


  // Language
  LineNr := FindLine('Language:', Page, 0);
  if LineNr > -1 then
  begin
    Line := Page.GetString(LineNr + 1);
    BeginPos := pos('/">', Line) + 3;
    EndPos := pos('</a>', Line);
    if EndPos = 0 then
      EndPos := Length(Line);
    Value := copy(Line, BeginPos, EndPos - BeginPos);
    SetField(fieldLanguages, Value);
  end;

  DisplayResults;
end;

procedure GetDescriptions(Address: string);
var
  Line, Value: string;
  LineNr: Integer;
  BeginPos, EndPos: Integer;
  Page: TStringList;
begin
  Page := TStringList.Create;
  Page.Text := GetPage('http://us.imdb.com' + Address);
  LineNr := FindLine('<p class="plotpar">', Page, 0);
  while LineNr > -1 do
  begin
    Value := '';
    repeat
      Line := Page.GetString(LineNr);
      BeginPos := pos('"plotpar">', Line);
      if BeginPos > 0 then
        BeginPos := BeginPos + 10
      else
        BeginPos := 1;
      EndPos := pos('</p>', Line);
      if EndPos < 1 then
        EndPos := Length(Line) + 1;
      if Value <> '' then
        Value := Value + ' ';
      Value := Value + copy(Line, BeginPos, EndPos - BeginPos);
      LineNr := LineNr + 1;
    until (pos('</p>', Line) > 0) or (LineNr = Page.Count);
    HTMLDecode(Value);
    PickListClear; //(joazito) I'm only interested in 1 description... this will clear all but the last 
    PickListAdd(Value);
    LineNr := FindLine('<p class="plotpar">', Page, LineNr);
  end;
  Page.Free;
end;

procedure AddMoviesTitles(Page: TStringList; var LineNr: Integer);
var
  Line: string;
  MovieTitle, MovieAddress: string;
  StartPos: Integer;
begin
  repeat
    LineNr := LineNr + 1;
    Line := Page.GetString(LineNr);
    StartPos := pos('="', Line);
    if StartPos > 0 then
    begin
      Startpos := Startpos + 2;
      MovieAddress := copy(Line, StartPos, pos('">', Line) - StartPos);
      StartPos := pos('">', Line) + 2;
      MovieTitle := copy(Line, StartPos, pos('</A>', Line) - StartPos);
      HTMLDecode(Movietitle);
      //Remove duplicates
      if TheMovieTitle='' then
        begin
          TheMovieTitle:=MovieTitle;
          TheMovieAddress:='http://us.imdb.com' + MovieAddress;
        end
      else
        begin
          if TheMovieTitle<>'*' then
            if TheMovieTitle<>MovieTitle then
              begin
                TheMovieTitle:='*';
                TheMovieAddress:='';
              end;
        end;
      PickTreeAdd(MovieTitle, 'http://akas.imdb.com' + MovieAddress);
    end;
  until pos('</OL>', Line) > 0;
end;

begin
  if CheckVersion(3,4,0) then
  begin
    TheMovieTitle:='';
    TheMovieAddress:='';
    MovieName := GetField(fieldOriginalTitle);
    if MovieName = '' then
      MovieName := GetField(fieldTranslatedTitle);
    if Input('IMDb Import', 'Enter the title of the movie:', MovieName) then
    begin
//      AnalyzePage('http://us.imdb.com/Tsearch?title='+UrlEncode(MovieName)+'&restrict=Movies+only');
      AnalyzePage('http://us.imdb.com/Tsearch?title='+UrlEncode(MovieName));
    end;
  end else
    ShowMessage('This script requires a newer version of Ant Movie Catalog (at least the version 3.4.0)');
end.
If you want to adapt this to your own language, understant that this code is a little messy because a portuguese title can appears followed by "(portugal)", "(brasil)" or both. And I'm only interested in portuguese or both, so there.
joazito

Post by joazito »

I've just realized imdb.com doesn't sort categories alphabetically... only sometimes... oh well my arguments still hold.
joazito
Posts: 13
Joined: 2003-02-09 10:28:28
Location: Setubal, Portugal
Contact:

oops...

Post by joazito »

ok major flaw in the translation part, sorry. I suspected this thing wasn't working well... now I know it really wasn't. My mistake, I misinterpreted the "copy" function syntax. Here's a corrected version:

Code: Select all

  // Alternative (Portuguese) title
  LineNr := FindLine('Also Known As', Page, 0);
  if LineNr > -1 then
  begin
    PickListClear;
    Line := Page.GetString(LineNr);
    Line := copy(Line, 0, Length(Line) - 1);
    repeat
      BeginPos := pos('<i class="transl">', Line);
      EndPos := pos('</i>', Line);
      Value := copy(Line, BeginPos, EndPos-BeginPos);
	if pos('(Portugal)',Value) > 0 then
	begin
	      HTMLRemoveTags(Value);
	      HTMLDecode(Value);
	      Value := copy(Value, 0, pos(' (Portugal)',Value));
	      EndPos := pos(' (Brazil)',Value);
	      if EndPos>0 then Value := copy(Value, 0, pos(' (Brazil)',Value));
	      Value := copy(Value, 0, Length(Value)-Length(' (....) '));
	      PickListAdd(Value);
	end;
    EndPos := pos('</i>', Line) + 4;
    Line := copy(Line, EndPos, Length(Line) - 1);
    until Length(Line) = 0;
    Value := '';
    if PickListExec('Select a translated title for "' + MovieName + '"', Value) then
	setField(fieldTranslatedTitle, Value);
  end;
Post Reply