I'm writing a new script to extract informations from www.kultvideo.it, an Italian website specialized in B-movies.
I'm going to complete my work but I have problems in extracting images from the site. The url I use to extract the image is right, i'm sure, but the image is 'generated' dynamically, I think, by... I don't know what and how.
Would you help me to understand and to extract the image (use the movie 'django' to verify, you can see the web-address' image in audio format field.
Thanks in advance.
Code: Select all
(***************************************************
Ant Movie Catalog importation script
www.antp.be/software/moviecatalog/
[Infos]
Authors=Fulvio53s03 based on original by Penanders (2006)
Title=Kultvideo
Description=Script per Kultvideo.it
Site=http://www.kultvideo.it
Language=IT
Version=2 - 2. 9.2010
Requires=3.5.1
Comments=Kultvideo offre poche informazioni, su film davvero introvabili
License=This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. |
GetInfo=1
[Options]
***************************************************)
program KultVideo;
uses
StringUtils7552;
var
MovieName, Pagestr: string;
//TheMovieAddress: string;
const
SITE = 'http://www.kultvideo.it/';
SITE1 = 'http://www.kultvideo.it/';
// SITE1 = 'http://www.kultvideo.it/scheda.asp';
// http://www.kultvideo.it/articles/ArticleSheet.aspx?__langG=it-IT&aid=7244
// ../articles/ArticleSheet.aspx?__langG=it-IT&aid=7244
// -- Formatta la stringa cercando le prime lettere rendendole maiuscole
function PrimeMaiu(str: string): string;
begin
str := AnsiLowerCase(str);
str := AnsiMixedCase(str, ' -/');
Result := str;
end;
// ---
function Space2html(Url: string): string;
var
Temp : string;
SpacePos : Integer;
begin
repeat
SpacePos := pos(' ', Url);
if SpacePos <> 0 then
begin
Temp := copy(Url, 1, SpacePos -1);
Delete(Url, 1, SpacePos);
Temp := Temp + '%20' + Url;
Url := Temp;
end;
until pos(' ', Url) = 0;
result := Url;
end;
Function RemoveSpace(Line : String): String;
Var
Temp : String;
SpacePos : Integer;
Begin
Repeat
SpacePos := Pos(' ', line);
If (Copy(Line, SpacePos, 2)) = ' ' then
Delete(Line, SpacePos, 2);
Until Pos(' ', Line) = 0;
If Pos(' ', Line) = 1 Then
Delete(line, 1, 1);
Result := Line;
End;
// ---
procedure AnalyzePage(Address: string);
var
Page: TStringList;
LineNr: integer;
BeginPos: integer;
begin
Page := TStringList.Create;
Page.Text := GetPage(Address);
Pagestr := UTF8Decode(Page.Text);
LineNr := FindLine('Risultati della ricerca per i termini', Page, 0);
if LineNr = -1 then
begin
ShowError('Spiacente, nessun film trovato');
end
else // Trovati film ! Nota: possono esserci + pagine -> non ancora gestito !
begin
// Crea la lista di film
PickTreeClear;
PickTreeAdd('Risultati ricerca per "' + UrlDecode(MovieName) + '":', '');
AddMoviesTitles(Page);
if PickTreeExec(Address) then
begin
Page.Text := GetPage(Address); // Richiede la pagina del film
Pagestr := Page.Text;
SetField(FieldUrl, Address);
//Page.savetofile('D:\Prova.txt');
AnalyzeMoviePage(Page); // Analizza la pagina del film
end;
end;
Page.Free;
end;
// ---
// Analisi ed estrazione dati dalla pagina del film
procedure AnalyzeMoviePage(Page: TStringList);
var
//Fine: Integer;
Line, Line2, Line3, Comm: string;
InitChar, EndChar, SaveNationYear: string;
LineNr: Integer;
BeginPos, EndPos: Integer;
Field: integer;
begin
//Debug
//Page.SaveToFile(PATHLOG+MovieName+'.film');
// data di estrazione dati
SetField(fieldDate, DateToStr(Date));
// Immagine
LineNr := FindLine('id="article_sheet_picture"', Page, 0);
//showmessage(intToStr(LineNR));
if LineNr>-1 then
begin
LineNr := LineNr + 1;
Line := Page.GetString(LineNr);
Line := TextBetween(Line, '<img src="../', '" width="');
if length(Line) > 0 then
begin
Line := SITE1 + Line;
setfield(fieldAudioFormat, Line);
GetPicture(Line);
end;
end;
// Cerca il titolo tradotto
LineNr := FindLine('<td id="article_sheet_title" align="left" valign="top"', Page, 0);
Line := Page.GetString(LineNr);
Line := TextBetween(Line, '<td id="article_sheet_title" align="left"', '<') + '<';
Line := TextBetween(Line, '>', '<');
HTMLRemoveTags(Line);
HTMLDecode(Line);
Line := Fulltrim(Line);
SetField(fieldTranslatedTitle, PrimeMaiu(Line) );
// SetField(fieldOriginalTitle, PrimeMaiu(Line) );
// Cerca il titolo originale
InitChar := '<span class="article_sheet_filmsubtitle">';
BeginPos := Pos(InitChar, Pagestr);
Delete(Pagestr, 1, BeginPos - 1);
EndChar := '<';
LineNr := FindLine(InitChar, Page, 0);
Line := Page.GetString(LineNr);
InitChar := '>';
Line := InitChar + TextBetween(Line, '<span class="article_sheet_filmsubtitle">', EndChar) + EndChar;
Line := TextBetween(Line, '>', EndChar);
BeginPos := Pos(InitChar, Pagestr);
HTMLRemoveTags(Line);
HTMLDecode(Line);
Line := Fulltrim(Line);
Line := UTF8Decode(Line);
SetField(fieldOriginalTitle, PrimeMaiu(Line) );
Delete(Pagestr, 1, BeginPos - 1);
// Cerca nazionalità e anno
InitChar := '<td';
EndChar := '</td>';
Line := InitChar + Textbetween(Pagestr, InitChar, EndChar) + EndChar;
HTMLRemoveTags(Line);
HTMLDecode(Line);
Line := Fulltrim(Line); //'Francia (1987) - Colore'
SaveNationYear := Line;
Line := TextBefore(SaveNationYear, '(', '');
SetField(fieldCountry, PrimeMaiu(Trim(Line)));
// esempio: MovieName := TextBefore(MovieName, '[', '') + TextAfter(MovieName, ']');
Line := TextBetween(SaveNationYear, '(', ')');
SetField(fieldYear, Line);
// Cerca genere
InitChar := '<span class="article_sheet_datalabel">';
Line := Textbetween(Pagestr, InitChar, '</td>');
BeginPos := Pos(InitChar, Pagestr);
Delete(Pagestr, 1, BeginPos);
Line := stringreplace(Line, 'Genere:', '');
HTMLRemoveTags(Line);
HTMLDecode(Line);
Line := Fulltrim(Line);
SetField(fieldCategory, PrimeMaiu(Line));
// Cerca regia
InitChar := '<span class="article_sheet_datalabel">';
Line := Textbetween(Pagestr, InitChar, '</td>');
BeginPos := Pos(InitChar, Pagestr);
Delete(Pagestr, 1, BeginPos);
Line := stringreplace(Line, 'Regia:', '');
HTMLRemoveTags(Line);
HTMLDecode(Line);
Line := Fulltrim(Line);
SetField(fieldDirector, PrimeMaiu(Line));
// Cerca cast
InitChar := '<span class="article_sheet_datalabel">';
Line := Textbetween(Pagestr, InitChar, '</td>');
BeginPos := Pos(InitChar, Pagestr);
Delete(Pagestr, 1, BeginPos);
Line := stringreplace(Line, 'Cast:', '');
HTMLRemoveTags(Line);
HTMLDecode(Line);
Line := Fulltrim(Line);
SetField(fieldActors, PrimeMaiu(Line));
comm := '';
// Cerca distributore (al posto del produttore)
InitChar := '<span class="article_sheet_datalabel">';
Line := Textbetween(Pagestr, InitChar, '</td>');
BeginPos := Pos(InitChar, Pagestr);
Delete(Pagestr, 1, BeginPos);
// Line := stringreplace(Line, 'Cast:', '');
HTMLRemoveTags(Line);
HTMLDecode(Line);
Line := stringreplace(Line, crlf, '');
Line := stringreplace(Line, #09, '');
Line := Fulltrim(Line);
Comm := Comm + Line + crlf;
InitChar := '<span class="article_sheet_datalabel">';
Line := Textbetween(Pagestr, InitChar, '</td>');
BeginPos := Pos(InitChar, Pagestr);
Delete(Pagestr, 1, BeginPos);
// Comm := Comm + Line + crlf;
InitChar := '<span class="article_sheet_datalabel">';
Line := Textbetween(Pagestr, InitChar, '</td>');
BeginPos := Pos(InitChar, Pagestr);
Delete(Pagestr, 1, BeginPos);
// Comm := Comm + Line + crlf;
InitChar := '<span class="article_sheet_datalabel">';
Line := Textbetween(Pagestr, InitChar, '</td>');
BeginPos := Pos(InitChar, Pagestr);
Delete(Pagestr, 1, BeginPos);
// Comm := Comm + Line + crlf;
InitChar := '<span class="article_sheet_datalabel">';
Line := Textbetween(Pagestr, InitChar, '</td>');
BeginPos := Pos(InitChar, Pagestr);
Delete(Pagestr, 1, BeginPos);
// Comm := Comm + Line + crlf;
InitChar := '<span class="article_sheet_datalabel">';
Line := Textbetween(Pagestr, InitChar, '</td>');
BeginPos := Pos(InitChar, Pagestr);
Delete(Pagestr, 1, BeginPos);
// Comm := Comm + Line + crlf;
InitChar := '<span class="article_sheet_datalabel">';
Line := Textbetween(Pagestr, InitChar, '</td>');
BeginPos := Pos(InitChar, Pagestr);
Delete(Pagestr, 1, BeginPos);
InitChar := '<span class="article_sheet_datalabel">';
Line := Textbetween(Pagestr, InitChar, '</td>');
BeginPos := Pos(InitChar, Pagestr);
Delete(Pagestr, 1, BeginPos);
InitChar := '<span class="article_sheet_datalabel">';
Line := Textbetween(Pagestr, InitChar, '</td>');
BeginPos := Pos(InitChar, Pagestr);
Delete(Pagestr, 1, BeginPos); SetField(fieldComments, Comm);
SetField(fieldDescription, Line);
InitChar := '<span class="article_sheet_datalabel">';
Line := Textbetween(Pagestr, InitChar, '</td>');
BeginPos := Pos(InitChar, Pagestr);
Delete(Pagestr, 1, BeginPos);
HTMLRemoveTags(Line);
HTMLDecode(Line);
Line := stringreplace(Line, crlf, '');
Line := stringreplace(Line, #09, '');
Line := Fulltrim(Line);
// Comm := Comm + Line + crlf;
SetField(fieldComments, Comm);
// Cerca la durata
InitChar := '<span class="article_sheet_datalabel">';
Line := Textbetween(Pagestr, InitChar, '</td>');
BeginPos := Pos(InitChar, Pagestr);
Delete(Pagestr, 1, BeginPos);
Line := stringreplace(Line, 'Durata:', '');
HTMLRemoveTags(Line);
HTMLDecode(Line);
Line := Fulltrim(Line);
SetField(fieldLength, Line);
end;
// ---
// Riempie la lista con i film trovati
procedure AddMoviesTitles(Page: TStringList);
var
LineNr: Integer;
Line: string;
MovieTitle, MovieAddress: string;
BeginPos, EndPos: Integer;
Pagina: TStringList;
begin
//TheMovieAddress := '*';
LineNr := 0;
LineNr := FindLine('<div class="artlist_artpicture">', Page, LineNr);
While LineNR <> -1 Do
Begin
// LineNr := 0;
If LineNR = -1 Then Break;
//Showmessage(intToStr(LineNr));
Line := Page.GetString(LineNr);
// Crea l'url per la pagina completa del film
LineNr := LineNr + 1;
Line := Page.GetString(LineNr);
//Showmessage(line);
MovieAddress := TextBetween(Line, '<a href="../', '"><img src="');
MovieAddress := SITE1 + MovieAddress;
//ShowMessage(MovieAddress);
Page.SetString(lineNR, ' ');
//Estrazione Titolo film
LineNr := FindLine('<td class="artlist_arttitle">', Page, LineNr);
LineNr := LineNr + 1;
MovieTitle := Page.GetString(LineNr);
//ShowMessage(MovieTitle);
//Ripulisce il titolo
HTMLRemoveTags(MovieTitle);
HTMLDecode(MovieTitle);
MovieTitle := Fulltrim(MovieTitle);
// Controlla se ci sono altre pagine di risultati
//LineNr := FindLine('Pagina Successiva',Page,0);
// if LineNr<>-1 then
// begin
// idx := idx+1;
// end;
//Line := Page.GetString(LineNr);
// Estrae l'URL della pagina successiva
//BeginPos := pos('href="', Line)+5;
//Delete( Line, 1, BeginPos);
//BeginPos := 1;
//EndPos := pos('"', Line);
//Line := SITE1 + copy(Line, BeginPos, endPos-BeginPos);
//ShowMessage(Line);
// Richiama la pagina successiva e la analizza
//Page.text := GetPage(Line);
//Pagestr := Page.Text;
// Debug
//Page.SaveToFile(PATHLOG+MovieName+'.res'+IntToStr(idx));
//Page.LoadFromFile(PATHLOG+MOVIE+'.res');
PickTreeAdd(MovieTitle, MovieAddress);
LineNr := FindLine('<div class="artlist_artpicture">', Page, LineNr);
end;
end;
// ----- main()
Var
TempVar: String;
begin
if CheckVersion(3,5,1) then
begin
MovieName := GetField(fieldtranslatedTitle);
if MovieName = '' then
MovieName := GetField(fieldTranslatedTitle);
if Input('KultVideo.it', 'Inserire il nome del film:', MovieName) then
begin
MovieName := UrlEncode(MovieName);
// TempVar:='http://www.kultvideo.it/cerca.asp?page=1&Genre=&key=&cerca='+UrlEncode(MovieName)+'&tipocerca=titolo&radiobutton=ALL';
TempVar:='http://www.kultvideo.it/search/Search.aspx?st=' + MovieName;
SetField(fieldURL, Tempvar); // Memorizza il campo URL
analyzepage(TempVar);
end;
end else
ShowMessage('This script requires a newer version of Ant Movie Catalog (at least the version 3.5.1)');
end.