Extracts all comments, optimized code.
Code: Select all
(***************************************************
Ant Movie Catalog importation script
www.antp.be/software/moviecatalog/
[Infos]
Authors=Fulvio53s03 from original Claudio Rinaldi (rinaldiclaudio@gmail.com) & Mrobama
Title=FilmScoop.it
Description=Get movie info from FilmScoop.it
Site=www.filmscoop.it
Language=IT
Version=2.1
Requires=4.2.2
Comments=Nuova versione di Fulvio53s03, estrae tutti i commenti. Esecuzione velocizzata!
License=
GetInfo=1
RequiresMovies=1
[Options]
[Parameters]
***************************************************)
program FilmScoopIT;
uses
StringUtils7552;
const
debug = false;
folder = 'f:\prova\'; // directory where to save files;
UrlBase = 'http://www.filmscoop.it';
UrlRicerca = '&to=&r=&i=&n=&g=0&a=&o=Titolo&Submit=Cerca';
QueryBase = UrlBase + '/ricerca/risultati.asp?t=';
QueryFilm = UrlBase + '/film_al_cinema/';
ImagePath = UrlBase + '/locandine/';
cStartNumRis = 'Numero di risultati: <strong>'; // Result Number start Marker
cEndNumRis = '</strong> - Pagin'; // Result Number end Marker
cStartId = 'href="/film_al_cinema/'; // ID start marker
cEndId = '"'; // ID end marker
cStartTitle = '<h1 class="TitoloFilmUpper"'; // Title start marker
cEndTitle = '</a>'; // Title end marker
cStartTranslTitle = 'Titolo Originale</strong>: '; // Translated title start marker
cEndTranslTitle = '</h2>'; // Translated title end marker
cStartImg = '<img src="http://www.filmscoop.it/locandine/'; // Image start marker
cEndImg = '" alt'; // Image end marker
cStartDirector = 'Regia</strong>:'; // Director start marker
cStartCast = 'Interpreti</strong>:'; // Actor start marker
cStartCategory = 'Genere</strong>:'; // Catogory start marker
cEndCategory = '</a>'; // Category end marker
cStartDuration = 'Durata</strong>:'; // Duration start marker
cEndDuration = '<br />'; // Duration end marker
cStartCountry = '<strong>Nazionalità</strong>:'; // Country start Marker
cEndCountry = '<strong>Genere'; // Country end marker
cstartrecensione = '<div id="TestoRecensione">';
cendrecensione = '</div>';
cStartYear = ' '; // Year start marker
cEndYear = '<br />'; // Year end marker
cStartDesc = '<h2 style="margin:25px 0 0 0;padding:0;font-weight:bold;">Trama del film'; // Description start marker
cEndDesc = '</p>'; // Description end marker
cStartComm = '<div align="center" class="comtext" style="margin-top:8px;">Commenti:'; // Comments start marker
cMidComm = '<div class="divCommentoLeft"';
cEndComm = '<script type="text/javascript">'; // Comments end marker
cStartTitleList = '<strong>'; // Title list start marker
cEndTitleList = '</strong>'; // Title list end marker
var
MovieUrl, MovieName, TranslatedStr, PageStr: string;
cValue, extrvalue, AllComments: string;
link_recensione, Allrecensione: string;
// -----------------------
// ANALYZE MOVIE DATA PAGE
// IN: none
// OUT: set Ant fields
// -----------------------
procedure AnalyzeMoviePage;
var
start_link_recensione, cImage, SaveValue: string;
str_min: string;
ore, minu, minuti: integer;
begin
// Get packed title main page
Allrecensione := '';
PageStr := GetPage(MovieUrl);
Pagestr := UTF8decode(Pagestr);
Pagestr := textbefore(Pagestr, '<div class="BoxLeft">', ''); //2018.05.29
if debug then
DumpPage(folder + 'filmscoop_movie_page.html', Pagestr);
start_link_recensione := '<a href="/cgi-bin/recensioni/';
if pos(start_link_recensione, Pagestr) > 0 then
begin
link_recensione := start_link_recensione + textbetween(Pagestr, start_link_recensione, '"');
link_recensione := textafter(link_recensione, '"');
link_recensione := Urlbase + link_recensione;
estrai_recensione;
end;
// TRANSLATED TITLE
cValue := CStartTitle + textbetween(PageStr, cStartTitle, cEndTitle) + cEndTitle;
HTMLRemoveTags(cValue);
HTMLDecode(cValue);
cValue := fulltrim(cValue);
SetField(fieldTranslatedTitle, AnsiUpFirstLetter(AnsiLowerCase(cValue)));
// FILM IMAGE
cImage := textbetween(PageStr, cStartImg, cEndImg);
if cImage <> '' then
GetPicture(ImagePath + cImage);
// ORIGINAL TITLE
cValue := textbetween(PageStr, cStartTranslTitle, cEndTranslTitle);
HTMLRemoveTags(cValue);
HTMLDecode(cValue);
SetField(fieldOriginalTitle, AnsiUpFirstLetter(AnsiLowerCase(cValue)));
// DIRECTOR
cValue := textbetween(PageStr, cStartDirector, cStartCast);
HTMLRemoveTags(cValue);
HTMLDecode(cValue);
cValue := fulltrim(cValue);
SetField(fieldDirector, cValue);
// ACTORS
cValue := textbetween(PageStr, cStartCast, cStartDuration);
HTMLRemoveTags(cValue);
HTMLDecode(cValue);
cValue := fulltrim(cValue);
SetField(fieldActors, cValue);
// COUNTRY
cValue := textbetween(PageStr, cStartCountry, cEndCountry);
HTMLRemoveTags(cValue);
HTMLDecode(cValue);
cValue := fulltrim(cValue);
saveValue := cValue;
cValue := textbefore(saveValue, ' 2', ''); //caratteri prima dell'anno di edizione
if cValue = '' then
cValue := textbefore(saveValue, ' 1', ''); //caratteri prima dell'anno di edizione
SetField(fieldCountry, cValue);
// YEAR
cValue := textbetween(PageStr, cStartCountry, cEndCountry);
HTMLRemoveTags(cValue);
HTMLDecode(cValue);
cValue := fulltrim(cValue);
saveValue := cValue;
cValue := textafter(saveValue, ' 2'); //caratteri dopo l'anno di edizione
if cValue <> '' then
cValue := '2' + cvalue //caratteri dopo l'anno di edizione
else
cvalue := '1' + textafter(saveValue, ' 1');
SetField(fieldYear, cvalue);
// CATEGORY
cValue := textbetween(PageStr, cStartCategory, cEndCategory);
HTMLRemoveTags(cValue);
HTMLDecode(cValue);
cValue := fulltrim(cValue);
SetField(fieldCategory, AnsiUpFirstLetter(AnsiLowerCase(cValue)));
// COMMENTS
Allcomments := cStartComm + textbetween(PageStr, cStartComm, cEndComm) + cMidComm + cEndComm;
if debug then
DumpPage(folder + 'filmscoop_comments.html', AllComments);
formatta_commenti;
extrvalue := fulltrim(extrvalue);
if length(extrvalue) > 4 then //lunghezza '--- ' iniziale ai commenti
setField(fieldComments, extrvalue);
// DESCRIPTION
cValue := cStartDesc + textbetween(PageStr, cStartDesc, cEndDesc) + cEndDesc;
cValue := '<p' + textBetween(cValue, '<p', '</p>') + '</p>';
HTMLRemoveTags(cValue);
HTMLDecode(cValue);
cValue := fulltrim(cValue);
if allrecensione <> '' then
cValue := cValue + CRLF + CRLF + 'Recensione' + CRLF + allrecensione;
SetField(fieldDescription, cValue);
// URL
SetField(fieldURL, MovieUrl);
// DURATA
cValue := textbetween(Pagestr, '<strong>Durata</strong>:', '<br />');
cValue := textafter(cValue, 'h ');
//-------------- da discogs
ore := strtoint(TextBefore (cValue, '.', ''), 0);
str_min := TextAfter (cValue, '.');
minuti := strtoint(str_min, 0);
if minuti > 0 then
Minu := ore*60 + Minuti;
if minuti = 0 then
Minu := ore;
str_min := inttostr(Minu);
SetField(fieldLength, str_Min);
//------------ fine discogs
end;
procedure estrai_recensione;
begin
Allrecensione := GetPage(link_recensione);
Allrecensione := UTF8decode(Allrecensione);
if debug then
DumpPage(folder + 'filmscoop_recensione.html', Allrecensione);
cValue := textbetween(Allrecensione, cstartrecensione, cendrecensione);
HTMLRemoveTags(cValue);
HTMLDecode(cValue);
cValue := fulltrim(cValue);
Allrecensione := cValue;
end;
procedure formatta_commenti;
var
startchar, endchar: string;
startspoil, endspoil, delstr: string;
precomments: string;
ctr_giri: integer;
begin
ctr_giri := 1;
startChar := '<div class="divCommentoLeft"';
endChar := '<div class="divCommentoLeft"';
cValue := textbetween(AllComments, startchar, startchar) + endChar;
if debug then
DumpPage(folder + 'filmscoop_opinione ' + IntToStr(ctr_giri) + '.html', cValue);
repeat
ctr_giri := ctr_giri + 1;
allcomments := '<div>' + stringReplace(allcomments, cValue, '');
//2018.06.01 allcomments := startChar + allcomments + endChar; //2018.06.01
cValue := textbefore(cValue, '<div class="risposte">', '');
if debug then
DumpPage(folder + 'filmscoop_opinione ' + IntToStr(ctr_giri) + '.html', cValue);
// elimina spoiler --------------------
startspoil := '<p class="spoiler">';
endspoil := '</p>';
delstr := startspoil + textbetween(cValue, startspoil, endspoil) + endspoil;
cValue := stringreplace(cValue, delstr, '');
delstr := '<strong>SPOILER</strong>';
cValue := stringreplace(cValue, delstr, '');
// fine elimina spoiler --------------------
cvalue := stringreplace(cvalue, ' / 10', ' / 10. --- ');
delstr := 'id="divCommentoLeft';
delstr := delstr + textbetween(cvalue, 'id="divCommentoLeft', '>') + '>';
cvalue := stringreplace(cvalue, delstr, '');
HTMLRemoveTags(cValue);
HTMLDecode(cValue);
cValue := fulltrim(cValue);
extrValue := extrValue + CRLF + CRLF + '--- ' + cValue;
precomments := allcomments;
cValue := textbetween(AllComments, startchar, startchar) + endChar;
// if cValue = (startchar + endchar) then //2018.06.01
// cValue := startchar + textbetween(PreComments, startchar, '<div class="risposte">'); //2018.06.01
cValue := cValue; //per stop esecuzione (debug)
until cvalue = endChar;
// all'uscita del loop, l'ultimo commento è contenuto in save_cValue. devo aggiungerlo!
//
end;
// ------------------------------------------------------------------
// FILL PICKTREE CONTROL WITH LINKS & TITLES or RETURN ONE PAGE LINK
// if OneFilm flag true return Film Id else populate PickTree
// IN: OneFilm flag (bool)
// OUT: one page ID (string)
// ------------------------------------------------------------------
function PopulatePickTree(OneFilm: boolean): string;
var
cFilmId,cFilmTitle, indirizzo, save_indirizzo: string;
StartPos,EndPos: integer;
begin
if OneFilm then begin
cFilmId := textbetween(PageStr,cStartId,cEndId);
result := stringreplace ((QueryFilm + cFilmId), '" rel="nofollow', ''); //2018.05.25
end
else begin
PickTreeClear;
repeat
StartPos := pos(cStartId, PageStr);
if StartPos > 0 then begin
Delete(PageStr, 1, StartPos - 1);
cFilmId := textbetween(PageStr,cStartId,cEndId); // Get ID
HTMLRemoveTags(cFilmId);
indirizzo := QueryFilm + cFilmId; //2018.05.25
cFilmTitle := textbetween(PageStr,cStartTitleList,cEndTitleList); // Get Title
HTMLRemoveTags(cFilmTitle);
HTMLDecode(cFilmTitle);
if (indirizzo <> save_indirizzo) and (pos('#trailer', indirizzo) = 0) then
PickTreeAdd(cFilmTitle, indirizzo);
save_indirizzo := indirizzo;
EndPos := pos(cStartId,PageStr);
Delete(PageStr, 1, EndPos);
end;
until(StartPos = 0);
result := '';
end
end;
// ---------------------------------
// ANALYZE FIRST SEARCH RESULT PAGE:
// IN: page Url (string)
// OUT: none
// ---------------------------------
procedure AnalyzeSearchPage(Url: string);
var
NumRisultati : string;
begin
// PageStr := RemoveextraChars(Url); //rifare! esecuzione molto pesante!!!!!!!!
PageStr := GetPage(Url);
Pagestr := UTF8decode(Pagestr);
if debug then
DumpPage(folder + 'filmscoop_ricerca.txt', Pagestr);
NumRisultati := textbetween(PageStr, cStartNumRis, cEndNumRis);
if ( (NumRisultati = '0') or (NumRisultati = '')) then
begin
ShowMessage('Title not found / Nessun film trovato.');
exit;
end
if NumRisultati = '1' then
MovieUrl := PopulatePickTree(true)
else
begin
PopulatePickTree(false);
if not PickTreeExec(MovieUrl) then // ..select one
exit;
end;
AnalyzeMoviePage;
end;
// ----------
// MAIN:
// IN: none
// OUT: none
// ----------
begin
if CheckVersion(4,2,2) then
begin
TranslatedStr := GetField(fieldTranslatedTitle);
MovieName := GetField(fieldOriginalTitle);
if (TranslatedStr <> '') then
MovieName := TranslatedStr;
if(Input('MyMovies.It', 'Enter the title of the movie', MovieName)) then
begin
MovieUrl := QueryBase + StringReplace(MovieName,' ','+') + UrlRicerca;
AnalyzeSearchPage(MovieUrl);
end;
end
else
ShowMessage('This script requires a newer version of Ant Movie Catalog (at least the version 4.2.2)');
end.