@@ -10,19 +10,19 @@ image_db_lock = threading.Lock()
video_db_lock = threading . Lock ( )
image_db_path = " ./app/Database/Msg/HardLinkImage.db "
video_db_path = " ./app/Database/Msg/HardLinkVideo.db "
root_path = ' FileStorage/MsgAttach/'
video_root_path = ' FileStorage/Video/'
root_path = " FileStorage/MsgAttach/"
video_root_path = " FileStorage/Video/"
@log
def get_md5_from_xml ( content , type_ = ' img' ) :
def get_md5_from_xml ( content , type_ = " img" ) :
try :
# 解析XML
root = ET . fromstring ( content )
if type_ == ' img' :
if type_ == " img" :
# 提取md5的值
md5_value = root . find ( " .//img " ) . get ( " md5 " )
elif type_ == ' video' :
elif type_ == " video" :
md5_value = root . find ( " .//videomsg " ) . get ( " md5 " )
# print(md5_value)
return md5_value
@@ -30,7 +30,6 @@ def get_md5_from_xml(content, type_='img'):
return None
class tencent_struct :
def __setVals__ ( self , data , off ) :
if data :
@@ -141,6 +140,12 @@ class tencent_struct:
3 : ( " msg_info_struct " , __msgInfo__ ) ,
}
__struct2__ = { 1 : ( " " , " s " ) , 2 : ( " " , " s " ) }
__extraBuf__ = {
1 : ( " " , __struct2__ ) ,
}
def get_bytesExta_Content ( self , data = None , off = 0 ) :
self . __setVals__ ( data , off )
try :
@@ -148,6 +153,13 @@ class tencent_struct:
except :
raise
def get_extraBuf_Content ( self , data = None , off = 0 ) :
self . __setVals__ ( data , off )
try :
return self . readStruct ( " __extraBuf__ " )
except :
raise
__contenttype__ = {
" s " : __readString ,
" I " : __readUleb ,
@@ -163,6 +175,74 @@ def parseBytes(content: bytes):
pass
def parseExtraBuf ( content : bytes ) :
try :
extraBuf = tencent_struct ( ) . get_extraBuf_Content ( content )
return extraBuf
except :
pass
def decodeExtraBuf ( extra_buf_content : bytes ) :
off = 0
types = [ b " \x04 " , b " \x18 " , b " \x17 " , b " \x02 " , b " \x05 " ]
trunkName = {
" 46CF10C4 " : " 个性签名 " ,
" A4D9024A " : " 国家 " ,
" E2EAA8D1 " : " 省份 " ,
" 1D025BBF " : " 市 " ,
" 81AE19B4 " : " 朋友圈背景url " ,
" F917BCC0 " : " 公司名称 " ,
" 4EB96D85 " : " 企业微信属性 " ,
" 0E719F13 " : " 备注图片 " ,
" 759378AD " : " 手机号 " ,
}
res = { }
while off < len ( extra_buf_content ) :
length = 4 # 块头
trunk_head = extra_buf_content [ off : off + length ]
off + = length
trunk_head = binascii . hexlify ( trunk_head ) . decode ( ) . upper ( )
if trunk_head in trunkName :
trunk_head = trunkName [ trunk_head ]
res [ trunk_head ] = { }
char = extra_buf_content [ off : off + 1 ]
off + = 1
field = binascii . hexlify ( char ) . decode ( )
if char == b " \x04 " : # 四个字节的int, 小端序
length = 4
intContent = extra_buf_content [ off : off + length ]
off + = 4
intContent = int . from_bytes ( intContent , " little " )
res [ trunk_head ] [ field ] = intContent
elif char == b " \x18 " : # utf-16字符串
length = 4
lengthContent = extra_buf_content [ off : off + length ]
off + = 4
lengthContent = int . from_bytes ( lengthContent , " little " )
strContent = extra_buf_content [ off : off + lengthContent ]
off + = lengthContent
res [ trunk_head ] [ field ] = strContent . decode ( " utf-16 " ) . rstrip ( " \x00 " )
elif char == b " \x17 " : # utf-8 protobuf
length = 4
lengthContent = extra_buf_content [ off : off + length ]
off + = 4
lengthContent = int . from_bytes ( lengthContent , " little " )
strContent = extra_buf_content [ off : off + lengthContent ]
off + = lengthContent
res [ trunk_head ] [ field ] = parseExtraBuf ( strContent )
elif char == b " \x02 " : # 一个字节的int
content = extra_buf_content [ off : off + 1 ]
off + = 1
res [ trunk_head ] [ field ] = int . from_bytes ( content , " little " )
elif char == b " \x05 " : # 暂时不知道有啥用, 固定8个字节, 先当int处理
length = 8
content = extra_buf_content [ off : off + length ]
off + = length
res [ trunk_head ] [ field ] = int . from_bytes ( content , " little " )
return res
def singleton ( cls ) :
_instance = { }
@@ -206,13 +286,13 @@ class HardLink:
return None
if not self . open_flag :
return None
sql = '''
sql = """
select Md5Hash,MD5,FileName,HardLinkImageID.Dir as DirName1,HardLinkImageID2.Dir as DirName2
from HardLinkImageAttribute
join HardLinkImageID on HardLinkImageAttribute.DirID1 = HardLinkImageID.DirID
join HardLinkImageID as HardLinkImageID2 on HardLinkImageAttribute.DirID2 = HardLinkImageID2.DirID
where MD5 = ?;
'''
"""
try :
image_db_lock . acquire ( True )
try :
@@ -230,12 +310,12 @@ class HardLink:
return None
if not self . open_flag :
return None
sql = '''
sql = """
select Md5Hash,MD5,FileName,HardLinkVideoID2.Dir as DirName2
from HardLinkVideoAttribute
join HardLinkVideoID as HardLinkVideoID2 on HardLinkVideoAttribute.DirID2 = HardLinkVideoID2.DirID
where MD5 = ?;
'''
"""
try :
video_db_lock . acquire ( True )
try :
@@ -252,8 +332,8 @@ class HardLink:
bytesDict = parseBytes ( bytesExtra )
for msginfo in bytesDict [ 3 ] :
if msginfo [ 1 ] [ 1 ] [ 1 ] == ( 3 if thumb else 4 ) :
pathh = msginfo [ 1 ] [ 2 ] [ 1 ] # wxid\FileStorage\...
pathh = " \\ " . join ( pathh . split ( ' \\ ' ) [ 1 : ] )
pathh = msginfo [ 1 ] [ 2 ] [ 1 ] # wxid\FileStorage\...
pathh = " \\ " . join ( pathh . split ( " \\ " ) [ 1 : ] )
return pathh
md5 = get_md5_from_xml ( content )
if not md5 :
@@ -264,7 +344,7 @@ class HardLink:
dir1 = result [ 3 ]
dir2 = result [ 4 ]
data_image = result [ 2 ]
dir0 = ' Thumb' if thumb else ' Image'
dir0 = " Thumb" if thumb else " Image"
dat_image = os . path . join ( root_path , dir1 , dir0 , dir2 , data_image )
return dat_image
@@ -272,16 +352,16 @@ class HardLink:
bytesDict = parseBytes ( bytesExtra )
for msginfo in bytesDict [ 3 ] :
if msginfo [ 1 ] [ 1 ] [ 1 ] == ( 3 if thumb else 4 ) :
pathh = msginfo [ 1 ] [ 2 ] [ 1 ] # wxid\FileStorage\...
pathh = " \\ " . join ( pathh . split ( ' \\ ' ) [ 1 : ] )
pathh = msginfo [ 1 ] [ 2 ] [ 1 ] # wxid\FileStorage\...
pathh = " \\ " . join ( pathh . split ( " \\ " ) [ 1 : ] )
return pathh
md5 = get_md5_from_xml ( content , type_ = ' video' )
md5 = get_md5_from_xml ( content , type_ = " video" )
if not md5 :
return None
result = self . get_video_by_md5 ( binascii . unhexlify ( md5 ) )
if result :
dir2 = result [ 3 ]
data_image = result [ 2 ] . split ( ' . ' ) [ 0 ] + ' .jpg' if thumb else result [ 2 ]
data_image = result [ 2 ] . split ( " . " ) [ 0 ] + " .jpg" if thumb else result [ 2 ]
# dir0 = 'Thumb' if thumb else 'Image'
dat_image = os . path . join ( video_root_path , dir2 , data_image )
return dat_image
@@ -304,8 +384,8 @@ class HardLink:
# 6b02292eecea118f06be3a5b20075afc_t
if __name__ == ' __main__' :
msg_root_path = ' ./Msg/'
if __name__ == " __main__" :
msg_root_path = " ./Msg/"
image_db_path = " ./Msg/HardLinkImage.db "
video_db_path = " ./Msg/HardLinkVideo.db "
hard_link_db = HardLink ( )
@@ -315,10 +395,10 @@ if __name__ == '__main__':
# print(hard_link_db.get_image(content, thumb=False))
# result = get_md5_from_xml(content)
# print(result)
content = ''' <?xml version= " 1.0 " ?>
content = """ <?xml version= " 1.0 " ?>
<msg>
<videomsg aeskey= " d635d2013d221dbd05a4eab3a8185f5a " cdnvideourl= " 3057020100044b304902010002040297cead02032f540502042ba7b4de020465673b74042438316562356530652d653764352d343263632d613531642d6464383661313330623965330204052400040201000405004c537500 " cdnthumbaeskey= " d635d2013d221dbd05a4eab3a8185f5a " cdnthumburl= " 3057020100044b304902010002040297cead02032f540502042ba7b4de020465673b74042438316562356530652d653764352d343263632d613531642d6464383661313330623965330204052400040201000405004c537500 " length= " 25164270 " playlength= " 60 " cdnthumblength= " 7419 " cdnthumbwidth= " 1920 " cdnthumbheight= " 1080 " fromusername= " wxid_yt67eeoo4blm22 " md5= " 95558f0e503651375b475636519d2285 " newmd5= " 4ece19bcd92dc5b93b83f397461a1310 " isplaceholder= " 0 " rawmd5= " d660ba186bb31126d94fa568144face8 " rawlength= " 143850007 " cdnrawvideourl= " 3052020100044b30490201000204d8cd585302032f540502040f6a42b7020465673b85042464666462306634342d653339342d343232302d613534392d3930633030646236306266610204059400040201000400 " cdnrawvideoaeskey= " 5915b14ac8d121e0944d9e444aebb7ed " overwritenewmsgid= " 0 " originsourcemd5= " a1a567d8c170bca33d075b787a60dd3f " isad= " 0 " />
</msg>
'''
"""
print ( hard_link_db . get_video ( content ) )
print ( hard_link_db . get_video ( content , thumb = True ) )