Page 1 of 1

[UPD ITA] FilmUP(IT) 1.1.2 with special chars and accents corrected

Posted: 2018-05-01 20:23:37
by fulvio53s03

Code: Select all

(***************************************************

Ant Movie Catalog importation script
www.antp.be/software/moviecatalog/

[Infos]
Authors=L. Francisco, Pivello, Zandal, Fulvio53s03
Title=FilmUP
Description=Get movie info from Leonardo.FilmUP.it and shows years in picklist (Fulvio53s03)
Site=http://filmup.leonardo.it
Language=IT
Version=1.1.2 - 01.05.2018
Requires=3.5.1
Comments=29.12.2011 Commenti estratti sia per vecchio che per nuovo layout di pagina
License=
GetInfo=1
RequiresMovies=1

[Options]
AlternateURL=0|0|0=Use the FilmUP web site for FieldURL|1=Try to use ufficial movie web site for FieldURL
MezzoVoto=1|1|0=Integer Ratings|1=Ratings with ,5
ActorsPlusCast=0|0|0=Get Main Actors data (if present) otherwise get Cast field|1=Get Main Actors data (if present) and Cast data
TrailerURL=0|0|0=Normal URL page|1=Trailer URL page (if found)
Poster=1|0|1=Download full-res Poster|0=Download tiny Poster

[Parameters]

***************************************************)

program LeonardoFilmUP;
uses
  StringUtils7552;
const
  debug_search = false;                          // debug mode on/off
  Apice       = #39;
  folder = 'f:\prova\';                                   // directory where to save files

var
  MovieName, MovieToSearch, MovieTitle: string;
  Pagina_Ricerca, TheMovieAddress, Pagestr: string;
  SaveComm, comm, save_translated, Save_original: String;
  accento: string;
  lgth_title: integer;

function DecodePage(s: string): string;
begin
  s := StringReplace(s, '’', '’');
  s := UTF8Decode(s);
  Result := s;
end;

procedure AnalyzePage(Address: string);
var
  Page: TStringList;
  LineNr: integer;
  BeginPos: integer;
  campo_ricerca: string;
begin
  Page := TStringList.Create;
  Page.Text := GetPage(Address);
  Pagestr := Page.Text;
  if debug_search then
     DumpPage(folder + 'FilmUP_page.txt', pagestr);                //2018.04.30fs
  SetField(fieldURL, Address);
  LineNr := FindLine('Ordina risultati per', Page, 0);
  if LineNr = -1 then
  begin
    SetField(fieldURL, Address);
//fulvio
    AnalyzeMoviePage(Page);
  end
  else
  begin
    PickTreeClear;
    campo_ricerca := getfield(fieldFilePath);
    if  campo_ricerca = '' then
        campo_ricerca := getfield(fieldFormattedTitle);
    PickTreeAdd('Risultati ricerca per "' + campo_ricerca + '":', '');                                               // per più pagine di film

    AddMoviesTitles;
    if TheMovieAddress='' then
    begin
      if PickTreeExec(Address) then 
	 AnalyzePage(Address);
    end
    else
    begin
      SetField(fieldURL, TheMovieAddress);
      Page.Text := GetPage(TheMovieAddress);
      Pagestr := Page.Text;
      AnalyzeMoviePage(Page);
    end;
  end;
  Page.Free;
end;

procedure AnalyzeMoviePage(Page: TStringList);
var
  Line, PreviousLine, NomeHtml, sTemp: string;
  AddrImage: string;
  LineNr,PrevLineNr, BeginPos, EndPos, Field: Integer;
  IsMainActors: boolean;
begin
  IsMainActors := false;
  sTemp := '';
//  SetField(fieldURL, Address);
  LineNr := FindLine('<font face="arial, helvetica" size="3"><b>', Page, 0);
  if LineNr > -1 then
  begin
   SetField(fieldDate, DateToStr(Date));
    //Translated Title
    Line := Page.GetString(LineNr);
    HTMLRemoveTags(Line);
//fs    SetField(fieldTranslatedTitle, Line);
    save_translated :=  AnsiMixedCase(AnsiLowerCase(Line), ' ');
    Save_translated := RemoveHtmlClean(Save_translated);
    SetField(fieldTranslatedTitle, save_translated);
    repeat
      //Look for next info
      repeat
        LineNr := LineNr + 1;
        Line := Page.GetString(LineNr);
        HTMLRemoveTags(Line);
      until (Line<>'')  or (LineNr > Page.Count);

      // Test if 'Trama:' missing
      if ((PreviousLine = 'Data di uscita:&nbsp;')
      or  (PreviousLine = 'Uscita prevista:&nbsp;')) and (Line <> 'Trama:') then
        if (copy(Line,1,6)<>'Trama:') then
          Line := 'Trama:' + Line;
      PreviousLine := Line;

      //Look for type of line
      if Line = 'Titolo originale:&nbsp;' then
        Field := fieldOriginalTitle
      else if Line = 'Regia:&nbsp;' then
        Field := fieldDirector
      else if Line = 'Produzione:&nbsp;' then
        Field := fieldProducer
      else if Line = 'Nazione:&nbsp;' then
        Field := fieldCountry
      else if Line = 'Genere:&nbsp;' then
        Field := fieldCategory
      else if Line = 'Anno:&nbsp;' then
        Field := fieldYear
      else if Line = 'Durata:&nbsp;' then
        Field := fieldLength //Special case: get number only
      else if Line = 'Sito ufficiale:&nbsp;' then
        Field := fieldURL
      else if Line = 'Attori protagonisti:&nbsp;' then
      begin
        Field := fieldActors;
        IsMainActors := true;
      end
      else if Line = 'Cast:&nbsp;' then
        Field := fieldActors
      else if Line = 'Trama:' then
        begin
        Field := fieldDescription          //2018.04.30fs
        if debug_search then
            DumpPage(folder + 'FilmTV_line' + IntToStr(ctr_giri) + '.txt', Blocco);                // debug
        end
      else
        Field := 0;

      // I have to add this test, for sometimes the description
      // is on the same line as the tag 'Trama'
      if (copy(Line,1,6)='Trama:') and (length(Line)>6) then
         begin
         if debug_search then
             DumpPage(folder + 'FilmUP_page2.txt', pagestr);                //2018.04.30fs
         Field := fieldDescription;
         Line := Textbetween(Pagestr, 'Trama:<br>', '</font>');
         if debug_search then
             DumpPage(folder + 'FilmUP_line_trama.txt', Line);                // debug
         HTMLRemoveTags(Line);
         HTMLDecode(Line);                    //2018.04.30fs
         end
      else
       begin
        //Get values
        LineNr := LineNr + 1;
        Line := Page.GetString(LineNr);
        HTMLRemoveTags(Line);
        HTMLDecode(Line);

        //Special case: Length
        if Field = fieldLength then Line := copy(Line,1,length(Line)-1);
       end;

       //Alternative URL case
       if (field = fieldURL) then
       begin
         if (Length(Line) = 0) or (GetOption('AlternateURL')=0) then      // if WEB url missing or explicitly requested...
           Line := GetField(fieldURL)  // ...restore FilmUp URL
         else
           Line := 'http://'+Line;
       end;

      if Field<>0 then
      begin
       if Field = fieldActors then
       begin
         if IsMainActors then // Remember Main Actors (if present)
         begin
           sTemp := Line;
           IsMainActors := false;
         end
         else
         begin
           if ((GetOption('ActorsPlusCast')=1) and (sTemp<>'')) then
             Line := sTemp + ' Cast: ' + Line
           else
             if sTemp <> '' then
               Line := sTemp;
           Line := UTF8decode(Line);         //2018.04.30fs
           SetField(Field,Line);
         end;
       end
       else
         Line := UTF8decode(Line);       //2018.04.30fs
         SetField(Field,Line);
      end;

    until (Field=fieldDescription) or (LineNr > Page.Count);
    Save_original := getfield(fieldOriginalTitle);
    Save_original := RemoveHtmlClean(Save_original);
    if AnsiLowerCase(Save_translated) = AnsiLowerCase(Save_original) then
       setfield(FieldOriginalTitle, '');
  end;

  PrevLineNr := LineNr;

   //Trailer
if GetOption('TrailerURL')=1 then begin
LineNr := FindLine('">Trailer</a>', Page, PrevLineNr);
  if LineNr > -1 then
  begin
    Line := Page.GetString(LineNr);
    if Pos('size="2"><a', Line) <> 0 then Delete(Line,1,pos('size="2"><a',Line));
    //if Pos('DVD', Line) <> 0 then Delete(Line,1,pos('DVD',Line));
    Delete(Line,1,pos('href="',Line)+5);
    NomeHtml := Copy(Line,1,pos('"',Line)-1);
    if (copy(NomeHtml,1,1)<>'/') then
    begin
      NomeHtml := '/' + NomeHtml;
    end;
    NomeHtml :='http://filmup.leonardo.it' + NomeHtml;
    comm := textbetween(GetPage(NomeHtml),'<EMBED SRC="','"');
//HTMLRemoveTags(comm);
//Fulvio  SetField(fieldURL,comm);
    PrevLineNr := LineNr;
  end;
end;

  //Comments
  LineNr := FindLine('">Recensione</a>', Page, PrevLineNr);
  if LineNr > -1 then
  begin
    Line := Page.GetString(LineNr);
    if Pos('Scheda', Line) <> 0 then
      Delete(Line,1,pos('Scheda',Line));
    if Pos('DVD', Line) <> 0 then
      Delete(Line,1,pos('DVD',Line));
    Delete(Line,1,pos('href="',Line)+5);
    NomeHtml :=Copy(Line,1,pos('"',Line)-1);
    if (copy(NomeHtml,1,1)<>'/') then
    begin
      NomeHtml := '/' + NomeHtml;
    end;
    SaveComm := '';
    SaveComm := GetPage(NomeHtml);
//    SaveComm := UTF8decode(SaveComm);
    if debug_search then
       DumpPage(folder + 'comments.txt', SaveComm);                //2018.04.30fs
    comm := textbetween(SaveComm,'<font face="arial,helvetica" size="2"><b>','<a class="filmup" href="opinioni.htm">');    //vecchio formato
    if  length(comm) < 1 then
      comm := textbetween(SaveComm,'<td width="100%" valign="top"><font size="3"><b>','<a class="filmup" href="opinioni.htm">');      //nuovo formato
    comm := TextAfter(comm,'</b><br>'+#13#10);
    Comm := RemoveHtmlClean(Comm);
    Comm := stringreplace(Comm, accento, apice);                 //2018.04.30fs
    comm := DecodePage(comm);                //2018.04.30fs      qui da correggere ?  in apostrofi
    SetField(fieldComments,comm);
    PrevLineNr := LineNr;
  end;

  //Rating
  LineNr := FindLine('">Opinioni</a>', Page, PrevLineNr);
  if LineNr > -1 then
  begin
    Line := Page.GetString(LineNr);
    Delete(Line,1,pos('Recensione',Line));
    Delete(Line,1,pos('href="',Line)+5);
    Line := GetLineFromOtherPage(Copy(Line,1,pos('"',Line)-1),'</b> - <img src="');
    if Line <> '' then
    begin
      Line := Copy(Line,1,pos('</b> - <img src="',Line)-1);
      if GetOption('MezzoVoto')=0 then
 if pos('.',Line)>0 then Line := Copy(Line,1,pos('.',Line)-1);
      SetField(fieldRating,Line);
    end;
    PrevLineNr := LineNr;
  end;

  //Picture // start fulvio53s03 Code
    LineNr := FindLine('<img src="locand/', Page, PrevLineNr);
  if LineNr > -1 then
    begin
      Line := Page.GetString(LineNr);
      AddrImage := TextBetween(Line, '<img src="', '" width="');
      AddrImage := 'http://filmup.leonardo.it/' + AddrImage;
      GetPicture(AddrImage);
    end;

  if GetOption('Poster')=1 then
  begin
    LineNr := FindLine('href="posters/locp/', Page, PrevLineNr);  //cerca Poster grande
    if LineNr > -1 then
    begin
      Line := Page.GetString(LineNr);
      AddrImage := TextBetween(Line, 'href="posters/locp/', '" target="_blank"');
      AddrImage := 'http://filmup.leonardo.it/posters/loc/500/' + AddrImage;
      AddrImage := StringReplace(AddrImage, '.htm', '.jpg');
      GetPicture(AddrImage);

//      http://filmup.leonardo.it/posters/locp/toystory2.htm
//      http://filmup.leonardo.it/posters/loc/500/toystory2.jpg

//      Line := GetLineFromOtherPage('http://filmup.leonardo.it/'+Copy(Line,1,pos('"',Line)-1),'<img src="../loc/500/');
//      if Line <> '' then
//      begin
//        Delete(Line,1,pos('<img src="../',Line)+12);
//        GetPicture('http://filmup.leonardo.it/posters/'+Copy(Line,1,pos('"',Line)-1));
//      end;
    end;
  end;
//Picture // start fulvio53s03 Code
end;

function RemoveHtmlClean(str1: string) :string;
begin
  HTMLRemoveTags(str1);
  HTMLDecode(str1);
//  str1 := RemoveTabs(str1);
  result := FullTrim(str1);
end;

function RemoveTabs(Pattern: string): string;
begin
  repeat
  Delete(Pattern, 1, 1);
  until ord(copy(Pattern, 1, 1)) <> 9;
  result := Pattern;
end;

function GetLineFromOtherPage(address: string; hint: string): string;
var
  Page: TStringList;
  LineNr: integer;
begin
  Page := TStringList.Create;
  Page.Text := GetPage(Address);
  LineNr := FindLine(hint, Page, 0);
  if LineNr > -1 then result := Page.GetString(LineNr);
  Page.Free;
end;

procedure GetComments(address: string);
var
  Page: TStringList;
  BeginLine: integer;
  EndLine: integer;
  i: integer;
  Line, Comments: string;
begin
  Page := TStringList.Create;
  Page.Text := GetPage(Address);
  BeginLine := FindLine('RECENSIONI', Page, 0);
  BeginLine := FindLine('<font face="arial,helvetica" size="2"><b>', Page, BeginLine);
  EndLine := FindLine('<a href="opinioni.htm">Scrivi la tua recensione!</a></font><br><br>', Page, BeginLine);
  for i:= BeginLine+1 to EndLine-1 do
  begin
    Line := Page.GetString(i);
    Line := StringReplace(Line, '<br>', #13#10);
    Line := StringReplace(Line, #13#10#32, #13#10);
    HTMLRemoveTags(Line);
    HTMLDecode(Line);
    Comments := Comments + Line;
  end;
  comments := Utf8decode(comments);          //2018.04.30fs
  SetField(fieldComments, Comments);
  Page.Free;
end;

function MyTrim(Value: string):string;
var
  ExitLoop: Boolean;
  NewField, OldField: String;
  OldLgthValue, NewLgthValue: integer;
begin
    NewField := ' ';
    OldField := #9;
    value := StringReplace(Value, OldField, NewField);
    OldField := #10;
    value := StringReplace(Value, OldField, NewField);
    OldField := #13;
    value := StringReplace(Value, OldField, NewField);
    OldField := '  ';
    value := StringReplace(Value, OldField, NewField);
    Newfield := '&';
    OldField := '&';
    value := StringReplace(Value, OldField, NewField);
    ExitLoop := False;
    OldLgthValue := length(Value);
    repeat
      value := StringReplace(Value, OldField, NewField);
      NewLgthValue := length(Value);
    if  OldLgthValue = NewLgthValue then
        ExitLoop := True
    else
        OldLgthValue := NewLgthValue;
    until ExitLoop;

  Result := '';
  HTMLRemoveTags(Value);
  Result := Value;
end;


//
procedure AddMoviesTitles;
var
  LineNr, CharToDelete: Integer;
  Line, Blocco: string;
  MovieAddress, MovieAnno: string;
  BeginPos, EndPos, ctr_giri: Integer;
  begin
//fulvio  LineNr := FindLine('FilmUP - Scheda: ',Page,LineNr);
  ctr_giri := 0;
  Pagestr := '<DL>' + Textbetween(Pagestr, '<DL>', 'Risultati della ricerca:<br>');     //estrae tabella dei risultati
  Blocco := Textbetween(Pagestr, '<DL>', '</DL>');         //estrae elemento della tabella
  if debug_search then
     DumpPage(folder + 'FilmTV_line' + IntToStr(ctr_giri) + '.txt', Blocco);                // debug
  CharToDelete := Length(Blocco) + 9;    //lgth dati estratti + lgth delimiters estrazione
  while CharToDelete > 9 do
    begin
    MovieAddress := TextBetween(Blocco, '<a class="filmup" href="', '" TARGET="_blank">');
//2018.04.27    MovieTitle := TextBetween(Blocco, 'FilmUP - Scheda: ', '</a>');
    MovieTitle := TextBetween(Blocco, 'TARGET="_blank">', '</a>');              //2018.04.27
    MovieTitle := stringReplace (MovieTitle,+ 'FilmUP - Scheda: ', '');         //2018.04.27
    MovieTitle := Mytrim(MovieTitle);
//    HTMLRemoveTags(MovieTitle);
//    HTMLDecode(Movietitle);
//**************    //fs da verificare
    MovieAnno := TextBetween(Blocco, 'Anno: ', ' Genere');
    MovieTitle := MovieTitle + ' [' + MovieAnno + ']';
    HTMLRemoveTags(MovieTitle);
    HTMLDecode(MovieTitle);
    MovieTitle := StringReplace(MovieTitle, ' - FilmUP.com ', '');
    MovieTitle := stringReplace (MovieTitle,+ 'FilmUP - Scheda: ', '');         //2018.04.27
    lgth_title := length(MovieTitle);
//2018.04.27    If (lgth_title > 3) and (pos(MovieAddress, '/soundtrack/') > 0) then
    If (lgth_title > 3) and (pos('/soundtrack/', MovieAddress) = 0) then
       PickTreeAdd(MovieTitle, MovieAddress);
    if TheMovieAddress='*' then
       TheMovieAddress := MovieAddress
    else
       TheMovieAddress := '';
//**************

    delete(Pagestr, 1, CharTodelete)
    Blocco := Textbetween(Pagestr, '<DL>', '</DL>');
    ctr_giri := ctr_giri + 1;
    if debug_search then
       DumpPage(folder + 'FilmTV_line' + IntToStr(ctr_giri) + '.txt', Blocco);                // debug

    CharToDelete := Length(Blocco) + 9;    //lgth dati estratti + lgth delimiters estrazione
//    delete(Pagestr, 1, CharTodelete)
//  end;

  end;
//Fulvio  LineNr := FindLine('Successivo',Page,LineNr);
//Fulvio  Line := Page.GetString(LineNr);
//Fulvio  BeginPos := pos('HREF',Line);
//Fulvio  if BeginPos>0 then
//Fulvio  begin
//Fulvio    Delete(Line,1,BeginPos + 5);
//Fulvio    EndPos := pos('"',Line);
//Fulvio    MovieAddress := copy(Line,1,EndPos-1);
//Fulvio    PickTreeMoreLink(MovieAddress);
//Fulvio  end;
  if TheMovieAddress='*' then TheMovieAddress := '';
end;
//

// -----------------------------
// Questo è il main dello script
// -----------------------------
begin
  if CheckVersion(3,5,0) then
   begin
    TheMovieAddress := '*';
    MovieName := StringReplace(GetField(fieldTranslatedTitle), '.', ' ');
    if MovieName = '' then
      MovieName := StringReplace(GetField(fieldOriginalTitle), '.', ' ');
While pos ('[', MovieName) > 0 Do begin
  MovieName := TextBefore(MovieName, '[', '') + TextAfter(MovieName, ']');
end;
    if Input('FilmUP Import', 'Digita il titolo del film:', MovieName) then
    begin
    MovieToSearch:=StringReplace(MovieName, ' ', '+');
//Fulvio  ricerca per 'Termini contenuti':
// &ul=%25%2Fsc_%25&x=60&y=11&m=all&wf=0020&wm=sub&sy=0
    Pagina_Ricerca := 'http://filmup.leonardo.it/cgi-bin/search.cgi?ps=100&fmt=long&q='+MovieToSearch+'&ul=%25%2Fsc_%25&x=60&y=11&m=all&wf=0020&wm=sub&sy=0';
    AnalyzePage(Pagina_Ricerca);
//Fulvio  ricerca per 'Termini esatti':
//    AnalyzePage('http://filmup.leonardo.it/cgi-bin/search.cgi?ps=100&fmt=long&q='+MovieToSearch+'&ul=%25%2Fsc_%25&x=31&y=12&m=all&wf=0020&wm=wrd&sy=0');
    end;
   end
  else
    ShowMessage('Questo script richiede una versione più nuova di Ant Movie Catalog (almeno la versione 3.5.0)');
end.

Re: [UPD ITA] FilmUP(IT) 1.1.2 with special chars and accents corrected

Posted: 2018-05-01 20:31:48
by otreux
many thanks! :grinking: