Page 1 of 1

[UPD ITA] Filmscoop.it rel. 2.0

Posted: 2018-06-01 17:03:37
by fulvio53s03
verificato con: controlled using:
Assassinio sull'orient express Murder on the orient express
Assassinio sull'orient express Murder on the orient express
Inju, la bete dans l'ombre Inju, la bête dans l'ombre
Pronti a morire The quick and the dead
Bride wars - la mia migliore nemica Bride wars
L'uomo dal pugno d'oro El hombre del puño de oro
Alien Outpost
Outpost 37
Blade Runner 2049
Magnolia
Avatar Avatar
La prima cosa bella La prima cosa bella
L'armata degli eroi L'armée des ombres
Doa: due ore ancora D.o.a.: dead on arrival
Blade runner Blade runner
La bestia del castello maledetto La mano de un hombre muerto
Mamma mia! Mamma mia!

Code: Select all

(***************************************************

Ant Movie Catalog importation script
www.antp.be/software/moviecatalog/

[Infos]
Authors=from original Claudio Rinaldi (rinaldiclaudio@gmail.com), Mrobama, Fulvio53s03
Title=FilmScoop.it
Description=Get movie info from FilmScoop.it
Site=www.filmscoop.it
Language=IT
Version=2.0
Requires=4.2.2
Comments=Nuova versione di Fulvio53s03, estrae tutti i commenti. Esecuzione velocizzata!
License=
GetInfo=1
RequiresMovies=1

[Options]

[Parameters]

***************************************************)

program FilmScoopIT;

uses
  StringUtils7552;
const
  debug = false;
  folder = 'f:\prova\';                                   // directory where to save files;
  UrlBase = 'http://www.filmscoop.it';

  UrlRicerca = '&to=&r=&i=&n=&g=0&a=&o=Titolo&Submit=Cerca';
  QueryBase = UrlBase + '/ricerca/risultati.asp?t=';
  QueryFilm = UrlBase + '/film_al_cinema/';
  ImagePath = UrlBase + '/locandine/';

  cStartNumRis = 'Numero di risultati: <strong>'; // Result Number start Marker
  cEndNumRis = '</strong> - Pagin';               // Result Number end Marker
  cStartId = 'href="/film_al_cinema/';      // ID start marker
  cEndId = '"';                             // ID end marker
  cStartTitle = '<h1 class="TitoloFilmUpper"'; // Title start marker
  cEndTitle = '</a>';                          // Title end marker
  cStartTranslTitle = 'Titolo Originale</strong>: ';        // Translated title start marker
  cEndTranslTitle = '</h2>';                                // Translated title end marker
  cStartImg = '<img src="http://www.filmscoop.it/locandine/';               // Image start marker
  cEndImg = '" alt';                                                        // Image end marker
  cStartDirector = 'Regia</strong>:';               // Director start marker
  cStartCast = 'Interpreti</strong>:';              // Actor start marker
  cStartCategory = 'Genere</strong>:';       // Catogory start marker
  cEndCategory = '</a>';                    // Category end marker
  cStartDuration = 'Durata</strong>:';       // Duration start marker
  cEndDuration = '<br />';                    // Duration end marker
  cStartCountry = '<strong>Nazionalit&agrave;</strong>:';     // Country start Marker
  cEndCountry = '<strong>Genere';                             // Country end marker
  cstartrecensione = '<div id="TestoRecensione">';
  cendrecensione   = '</div>';
  cStartYear = ' ';                     // Year start marker
  cEndYear = '<br />';                 // Year end marker
  cStartDesc = '<h2 style="margin:25px 0 0 0;padding:0;font-weight:bold;">Trama del film';         // Description start marker
  cEndDesc = '</p>';                                                                               // Description end marker
  cStartComm = '<div align="center" class="comtext" style="margin-top:8px;">Commenti:';       // Comments start marker
  cEndComm = '<script type="text/javascript">';                                               // Comments end marker
  cStartTitleList = '<strong>';            // Title list start marker
  cEndTitleList = '</strong>';            // Title list end marker
var
  MovieUrl, MovieName, TranslatedStr, PageStr:  string;
  cValue, extrvalue, AllComments: string;
  link_recensione, Allrecensione: string;
// -----------------------
// ANALYZE MOVIE DATA PAGE
// IN:  none
// OUT: set Ant fields
// -----------------------
procedure AnalyzeMoviePage;
var
  start_link_recensione, cImage, SaveValue: string;
  str_min: string;
  ore, minu, minuti: integer;
begin
  // Get packed title main page
  Allrecensione := '';
  PageStr := GetPage(MovieUrl);
  Pagestr := UTF8decode(Pagestr);
  Pagestr := textbefore(Pagestr, '<div class="BoxLeft">', '');                  //2018.05.29
  if debug then
     DumpPage(folder + 'filmscoop_movie_page.html', Pagestr);

  start_link_recensione := '<a href="/cgi-bin/recensioni/';
  if pos(start_link_recensione, Pagestr) > 0 then
     begin
     link_recensione := start_link_recensione + textbetween(Pagestr, start_link_recensione, '"');
     link_recensione := textafter(link_recensione, '"');
     link_recensione := Urlbase + link_recensione;
     estrai_recensione;
     end;
  // TRANSLATED TITLE
  cValue := CStartTitle + textbetween(PageStr, cStartTitle, cEndTitle) + cEndTitle;
  HTMLRemoveTags(cValue);
  HTMLDecode(cValue);
  cValue := fulltrim(cValue);
  SetField(fieldTranslatedTitle, AnsiUpFirstLetter(AnsiLowerCase(cValue)));

  // FILM IMAGE
  cImage := textbetween(PageStr, cStartImg, cEndImg);
  if cImage <> '' then
    GetPicture(ImagePath + cImage);
  // ORIGINAL TITLE
  cValue := textbetween(PageStr, cStartTranslTitle, cEndTranslTitle);
  HTMLRemoveTags(cValue);
  HTMLDecode(cValue);
  SetField(fieldOriginalTitle, AnsiUpFirstLetter(AnsiLowerCase(cValue)));
  // DIRECTOR
  cValue := textbetween(PageStr, cStartDirector, cStartCast);
  HTMLRemoveTags(cValue);
  HTMLDecode(cValue);
  cValue := fulltrim(cValue);
  SetField(fieldDirector, cValue);
  // ACTORS
  cValue := textbetween(PageStr, cStartCast, cStartDuration);
  HTMLRemoveTags(cValue);
  HTMLDecode(cValue);
  cValue := fulltrim(cValue);
  SetField(fieldActors, cValue);

  // COUNTRY
  cValue := textbetween(PageStr, cStartCountry, cEndCountry);
  HTMLRemoveTags(cValue);
  HTMLDecode(cValue);
  cValue := fulltrim(cValue);
  saveValue := cValue;
  cValue := textbefore(saveValue, ' 2', '');             //caratteri prima dell'anno di edizione
  if cValue = '' then
    cValue := textbefore(saveValue, ' 1', '');             //caratteri prima dell'anno di edizione
  SetField(fieldCountry, cValue);
  // YEAR
  cValue := textbetween(PageStr, cStartCountry, cEndCountry);
  HTMLRemoveTags(cValue);
  HTMLDecode(cValue);
  cValue := fulltrim(cValue);
  saveValue := cValue;
  cValue := textafter(saveValue, ' 2');               //caratteri dopo l'anno di edizione
  if cValue <> '' then
     cValue := '2' + cvalue                           //caratteri dopo l'anno di edizione
  else
     cvalue := '1' + textafter(saveValue, ' 1');
  SetField(fieldYear, cvalue);

  // CATEGORY
  cValue := textbetween(PageStr, cStartCategory, cEndCategory);
  HTMLRemoveTags(cValue);
  HTMLDecode(cValue);
  cValue := fulltrim(cValue);
  SetField(fieldCategory, AnsiUpFirstLetter(AnsiLowerCase(cValue)));

  // COMMENTS
  Allcomments := cStartComm + textbetween(PageStr, cStartComm, cEndComm) + cEndComm;
  if debug then
     DumpPage(folder + 'filmscoop_comments.html', AllComments);
  formatta_commenti;
  extrvalue := fulltrim(extrvalue);
  if length(extrvalue) > 4 then                              //lunghezza '--- ' iniziale ai commenti
     setField(fieldComments, extrvalue);

  // DESCRIPTION
  cValue := cStartDesc + textbetween(PageStr, cStartDesc, cEndDesc) + cEndDesc;
  cValue := '<p' + textBetween(cValue, '<p', '</p>') + '</p>';
  HTMLRemoveTags(cValue);
  HTMLDecode(cValue);
  cValue := fulltrim(cValue);
  if allrecensione <> '' then
     cValue := cValue + CRLF + CRLF + 'Recensione' + CRLF + allrecensione;
  SetField(fieldDescription, cValue);

  // URL
  SetField(fieldURL, MovieUrl);

  // DURATA
  cValue := textbetween(Pagestr, '<strong>Durata</strong>:', '<br />');
  cValue := textafter(cValue, 'h ');
//-------------- da discogs
   ore    := strtoint(TextBefore (cValue, '.', ''), 0);
   str_min := TextAfter (cValue, '.');
   minuti := strtoint(str_min, 0);
   if minuti > 0 then
      Minu := ore*60 + Minuti;
   if minuti = 0 then
      Minu := ore;
   str_min := inttostr(Minu);
   SetField(fieldLength, str_Min);
//------------ fine discogs

end;

procedure estrai_recensione;
begin
  Allrecensione := GetPage(link_recensione);
  Allrecensione := UTF8decode(Allrecensione);
  if debug then
     DumpPage(folder + 'filmscoop_recensione.html', Allrecensione);
  cValue := textbetween(Allrecensione, cstartrecensione, cendrecensione);
  HTMLRemoveTags(cValue);
  HTMLDecode(cValue);
  cValue := fulltrim(cValue);
  Allrecensione := cValue;
end;

procedure formatta_commenti;
var
startchar, endchar: string;
startspoil, endspoil, delstr: string;
precomments: string;
ctr_giri: integer;
begin
  ctr_giri := 0;
  startChar := '<div class="divCommentoLeft"';
  endChar   := '<div class="divCommentoLeft"';
  cValue := startchar + textbetween(AllComments, startchar, startchar) + endChar;
  repeat
    ctr_giri := ctr_giri + 1;
    allcomments := stringReplace(allcomments, cValue, '');
//2018.06.01    allcomments := startChar + allcomments + endChar;         //2018.06.01
    cValue := textbefore(cValue, '<div class="risposte">', '');
    if debug then
       DumpPage(folder + 'filmscoop_opinione ' + IntToStr(ctr_giri) + '.html', cValue);

// elimina spoiler      --------------------
    startspoil := '<p class="spoiler">';
    endspoil   :=  '</p>';
    delstr := startspoil + textbetween(cValue, startspoil, endspoil) + endspoil;
    cValue := stringreplace(cValue, delstr, '');
// fine elimina spoiler --------------------
    cvalue := stringreplace(cvalue, ' / 10', ' / 10. ---  ');
    HTMLRemoveTags(cValue);
    HTMLDecode(cValue);
    cValue := fulltrim(cValue);
    extrValue := extrValue + CRLF + CRLF + '--- ' + cValue;
    precomments := allcomments;
    cValue := startchar + textbetween(AllComments, startchar, startchar) + endChar;
//    if cValue = (startchar + endchar) then                                                                       //2018.06.01
//       cValue := startchar + textbetween(PreComments, startchar, '<div class="risposte">');   //2018.06.01
  until cvalue = (startChar + endChar);
//    all'uscita del loop, l'ultimo commento è contenuto in save_cValue. devo aggiungerlo!
//
end;
// ------------------------------------------------------------------
// FILL PICKTREE CONTROL WITH LINKS & TITLES or RETURN ONE PAGE LINK
// if OneFilm flag true return Film Id else populate PickTree
// IN:  OneFilm flag (bool)
// OUT: one page ID  (string)
// ------------------------------------------------------------------
function PopulatePickTree(OneFilm: boolean): string;
var
  cFilmId,cFilmTitle, indirizzo, save_indirizzo: string;
  StartPos,EndPos: integer;
begin
  if OneFilm then begin
    cFilmId := textbetween(PageStr,cStartId,cEndId);
    result  := stringreplace ((QueryFilm + cFilmId), '" rel="nofollow', '');    //2018.05.25
  end
  else begin
    PickTreeClear;
    repeat
      StartPos := pos(cStartId, PageStr);
      if StartPos > 0 then begin
        Delete(PageStr, 1, StartPos - 1);
        cFilmId := textbetween(PageStr,cStartId,cEndId);                        // Get ID
        HTMLRemoveTags(cFilmId);
        indirizzo := QueryFilm + cFilmId;                                       //2018.05.25
        cFilmTitle := textbetween(PageStr,cStartTitleList,cEndTitleList);       // Get Title
        HTMLRemoveTags(cFilmTitle);
        HTMLDecode(cFilmTitle);
        if (indirizzo <> save_indirizzo) and (pos('#trailer', indirizzo) = 0)    then
           PickTreeAdd(cFilmTitle, indirizzo);
        save_indirizzo := indirizzo;
        EndPos := pos(cStartId,PageStr);
        Delete(PageStr, 1, EndPos);
      end;
    until(StartPos = 0);
    result := '';
  end
end;

// ---------------------------------
// ANALYZE FIRST SEARCH RESULT PAGE:
// IN:  page Url (string)
// OUT: none
// ---------------------------------
procedure AnalyzeSearchPage(Url: string);
var
  NumRisultati : string;
begin
//  PageStr := RemoveextraChars(Url);       //rifare! esecuzione molto pesante!!!!!!!!
  PageStr := GetPage(Url);
  Pagestr := UTF8decode(Pagestr);
  if debug then
     DumpPage(folder + 'filmscoop_ricerca.txt', Pagestr);
  NumRisultati := textbetween(PageStr, cStartNumRis, cEndNumRis);
  
  if ( (NumRisultati = '0') or (NumRisultati = '')) then
    begin
      ShowMessage('Title not found / Nessun film trovato.');
      exit;
    end

  if NumRisultati = '1' then
    MovieUrl := PopulatePickTree(true)
  else
    begin
      PopulatePickTree(false);
      if not PickTreeExec(MovieUrl) then // ..select one
        exit;
    end;

  AnalyzeMoviePage;
end;

// ----------
// MAIN:
// IN:  none
// OUT: none
// ----------
begin
  if CheckVersion(4,2,2) then
    begin
      TranslatedStr := GetField(fieldTranslatedTitle);
      MovieName := GetField(fieldOriginalTitle);
      if (TranslatedStr <> '') then
        MovieName := TranslatedStr;

      if(Input('MyMovies.It', 'Enter the title of the movie', MovieName)) then
        begin
          MovieUrl := QueryBase + StringReplace(MovieName,' ','+') + UrlRicerca;
          AnalyzeSearchPage(MovieUrl);
        end;
    end
  else
    ShowMessage('This script requires a newer version of Ant Movie Catalog (at least the version 4.2.2)');
end.

Re: [UPD ITA] Filmscoop.it rel. 2.0

Posted: 2018-06-01 17:53:38
by antp
Thanks :)

Re: [UPD ITA] Filmscoop.it rel. 2.0

Posted: 2018-06-02 16:48:04
by otreux
Grazie! :clapping: